要读取 JSON 文件,"multiline" 选项必须设置为 true。
(如果是 JSONL 文件,则不需要 "multiline" 选项,即 false。)
为了地图、"mapKeyType" 和 "mapValueType" 必须填写。
为了大批, "arraySubtype" 必须填写。
为了结构, "subSchemas" 必须填写。
为了日期, "dateFormat" 选项如果不是 "yyyy-MM-dd" 则可能需要。
正确设置一切将导致此预览:
使用的架构:
{
"fieldSchemaList": [
{
"type": "STRING",
"name": "name",
"nullable": null,
"userDefinedTypeClass": null,
"customMetadata": {},
"arraySubtype": null,
"precision": null,
"scale": null,
"mapKeyType": null,
"mapValueType": null,
"subSchemas": null
},
{
"type": "DATE",
"name": "born",
"nullable": null,
"userDefinedTypeClass": null,
"customMetadata": {},
"arraySubtype": null,
"precision": null,
"scale": null,
"mapKeyType": null,
"mapValueType": null,
"subSchemas": null
},
{
"type": "MAP",
"name": "location",
"nullable": null,
"userDefinedTypeClass": null,
"customMetadata": {},
"arraySubtype": null,
"precision": null,
"scale": null,
"mapKeyType": {
"type": "STRING",
"name": null,
"nullable": null,
"userDefinedTypeClass": null,
"customMetadata": {},
"arraySubtype": null,
"precision": null,
"scale": null,
"mapKeyType": null,
"mapValueType": null,
"subSchemas": null
},
"mapValueType": {
"type": "STRING",
"name": null,
"nullable": null,
"userDefinedTypeClass": null,
"customMetadata": {},
"arraySubtype": null,
"precision": null,
"scale": null,
"mapKeyType": null,
"mapValueType": null,
"subSchemas": null
},
"subSchemas": null
},
{
"type": "ARRAY",
"name": "scores",
"nullable": null,
"userDefinedTypeClass": null,
"customMetadata": {},
"arraySubtype": {
"type": "STRUCT",
"name": null,
"nullable": null,
"userDefinedTypeClass": null,
"customMetadata": {},
"arraySubtype": null,
"precision": null,
"scale": null,
"mapKeyType": null,
"mapValueType": null,
"subSchemas": [
{
"type": "DATE",
"name": "date",
"nullable": null,
"userDefinedTypeClass": null,
"customMetadata": {},
"arraySubtype": null,
"precision": null,
"scale": null,
"mapKeyType": null,
"mapValueType": null,
"subSchemas": null
},
{
"type": "INTEGER",
"name": "score",
"nullable": null,
"userDefinedTypeClass": null,
"customMetadata": {},
"arraySubtype": null,
"precision": null,
"scale": null,
"mapKeyType": null,
"mapValueType": null,
"subSchemas": null
}
]
},
"precision": null,
"scale": null,
"mapKeyType": null,
"mapValueType": null,
"subSchemas": null
}
],
"primaryKey": null,
"dataFrameReaderClass": "com.palantir.foundry.spark.input.DataSourceDataFrameReader",
"customMetadata": {
"format": "json",
"options": {
"multiline": true,
"dateFormat": "yyyy MM dd"
}
}
}
Here 可以找到可用的文件读取选项,包括从 JSON 文件读取。