【发布时间】:2015-02-06 01:40:16
【问题描述】:
我正在尝试在 Big Query 中加载嵌套的 json 数据。
这是我正在使用的数据和架构..
scehma -->
[{"name":"kind","type":"string"},{"name":"fullName","type":"string"},{"name":"age"," type":"integer"},{"name":"citiesLived","type":"record","fields":[{"name":"place","type":"string"},{" name":"numberOfYears","type":"integer"}]}]
data--> 它存在于 gc_data/load_data.json 中
{"kind": "person", "fullName": "John Doe", "age": 22, "citiesLived": [{ "place": "Seattle", "numberOfYears": 5}, {"地点”:“斯德哥尔摩”,“numberOfYears”:6}]} {"kind": "person", "fullName": "Jane Austen", "age": 24, "citiesLived": [{"place": "Los Angeles", "numberOfYears": 2}, {"place" : "东京", "numberOfYears": 2}]}
try {
bigquery.datasets().insert(PROJECT_ID, dataset).execute();
} catch (IOException e) {
System.out.println(e);
}
// Set where you are importing from (i.e. the Google Cloud Storage paths).
List<String> sources = new ArrayList<String>();
sources.add("gs://gc_data/json_test_new_flat.json");
loadConfig.setSourceUris(sources);
loadConfig.setSourceFormat("NEWLINE_DELIMITED_JSON");
//loadConfig.setFieldDelimiter("\n");
// Describe the resulting table you are importing to:
TableReference tableRef = new TableReference();
tableRef.setDatasetId("myDataset");
tableRef.setTableId("myTableJSONNew");
tableRef.setProjectId(projectId);
loadConfig.setDestinationTable(tableRef);
List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
TableFieldSchema fieldKind = new TableFieldSchema();
fieldKind.setName("kind");
fieldKind.setType("STRING");
TableFieldSchema fieldFullName = new TableFieldSchema();
fieldFullName.setName("fullName");
fieldFullName.setType("STRING");
TableFieldSchema fieldAge = new TableFieldSchema();
fieldAge.setName("age");
fieldAge.setType("INTEGER");
TableFieldSchema fieldJSON = new TableFieldSchema();
fieldJSON.setName("citiesLived");
fieldJSON.setType("RECORD");
// this is for record
List<TableFieldSchema> listOfJSonSchema = new ArrayList<TableFieldSchema>();
TableFieldSchema fieldPlace = new TableFieldSchema();
fieldPlace.setName("place");
fieldPlace.setType("STRING");
TableFieldSchema fieldnumberOfYears = new TableFieldSchema();
fieldnumberOfYears.setName("numberOfYears");
fieldnumberOfYears.setType("INTEGER");
listOfJSonSchema.add(fieldPlace);
listOfJSonSchema.add(fieldnumberOfYears);
//
fieldJSON.setFields(listOfJSonSchema);
fields.add(fieldKind);
fields.add(fieldFullName);
fields.add(fieldAge);
fields.add(fieldJSON);
TableSchema schema = new TableSchema();
schema.setFields(fields); // This is to set delimiter
loadConfig.setSchema(schema);
Insert insert = bigquery.jobs().insert(projectId, job);
insert.setProjectId(projectId);
JobReference jobRef = insert.execute().getJobReference();
System.out.println(jobRef.toPrettyString());
【问题讨论】:
标签: json nested google-bigquery