您可以使用这个 JDBC 驱动程序:SimbaAthenaJDBC
<dependency>
<groupId>com.syncron.amazonaws</groupId>
<artifactId>simba-athena-jdbc-driver</artifactId>
<version>2.0.2</version>
</dependency>
使用:
SparkSession spark = SparkSession
.builder()
.appName("My Spark Example")
.getOrCreate();
Class.forName("com.simba.athena.jdbc.Driver");
Properties connectionProperties = new Properties();
connectionProperties.put("User", "AWSAccessKey");
connectionProperties.put("Password", "AWSSecretAccessKey");
connectionProperties.put("S3OutputLocation", "s3://my-bucket/tmp/");
connectionProperties.put("AwsCredentialsProviderClass",
"com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider");
connectionProperties.put("AwsCredentialsProviderArguments", "/my-folder/.athenaCredentials");
connectionProperties.put("driver", "com.simba.athena.jdbc.Driver");
List<String> predicateList =
Stream
.of("id = 'foo' and date >= DATE'2018-01-01' and date < DATE'2019-01-01'")
.collect(Collectors.toList());
String[] predicates = new String[predicateList.size()];
predicates = predicateList.toArray(predicates);
Dataset<Row> data =
spark.read()
.jdbc("jdbc:awsathena://AwsRegion=us-east-1;",
"my_env.my_table", predicates, connectionProperties);
您也可以在 Flink 应用程序中使用此驱动程序:
TypeInformation[] fieldTypes = new TypeInformation[] {
BasicTypeInfo.STRING_TYPE_INFO,
BasicTypeInfo.STRING_TYPE_INFO
};
RowTypeInfo rowTypeInfo = new RowTypeInfo(fieldTypes);
JDBCInputFormat jdbcInputFormat = JDBCInputFormat.buildJDBCInputFormat()
.setDrivername("com.simba.athena.jdbc.Driver")
.setDBUrl("jdbc:awsathena://AwsRegion=us-east-1;UID=my_access_key;PWD=my_secret_key;S3OutputLocation=s3://my-bucket/tmp/;")
.setQuery("select id, val_col from my_env.my_table WHERE id = 'foo' and date >= DATE'2018-01-01' and date < DATE'2019-01-01'")
.setRowTypeInfo(rowTypeInfo)
.finish();
DataSet<Row> dbData = env.createInput(jdbcInputFormat, rowTypeInfo);