【发布时间】:2017-06-03 13:30:15
【问题描述】:
我有一个 JSON 数据,它是一个多行 JSON。我创建了一个配置单元表来将该数据加载到其中。我有另一个 JSON,它是一条单行 JSON 记录。当我将单行 JSON 记录加载到其配置单元表并尝试查询时,它工作正常。但是当我将多行 JSON 加载到它的配置单元表中时,它给出了以下异常:
Failed with exception java.io.IOException:org.apache.hadoop.hive.serde2.SerDeException: org.codehaus.jackson.JsonParseException: Unexpected end-of-input: expected close marker for OBJECT (from [Source: java.io.ByteArrayInputStream@8b89b3a; line: 1, column: 0]) at [Source: java.io.ByteArrayInputStream@8b89b3a; line: 1, column: 3]
以下是我的 JSON 数据:
{
"uploadTimeStamp" : "1486631318873",
"PDID" : "123",
"data" : [ {
"Data" : {
"unit" : "rpm",
"value" : "0"
},
"EventID" : "E1",
"PDID" : "123",
"Timestamp" : 1486631318873,
"Timezone" : 330,
"Version" : "1.0",
"pii" : { }
}, {
"Data" : {
"heading" : "N",
"loc3" : "false",
"loc" : "14.022425",
"loc1" : "78.760587",
"loc4" : "false",
"speed" : "10"
},
"EventID" : "E2",
"PDID" : "123",
"Timestamp" : 1486631318873,
"Timezone" : 330,
"Version" : "1.1",
"pii" : { }
}, {
"Data" : {
"x" : "1.1",
"y" : "1.2",
"z" : "2.2"
},
"EventID" : "E3",
"PDID" : "123",
"Timestamp" : 1486631318873,
"Timezone" : 330,
"Version" : "1.0",
"pii" : { }
}, {
"EventID" : "E4",
"Data" : {
"value" : "50",
"unit" : "percentage"
},
"Version" : "1.0",
"Timestamp" : 1486631318873,
"PDID" : "123",
"Timezone" : 330
}, {
"Data" : {
"unit" : "kmph",
"value" : "70"
},
"EventID" : "E5",
"PDID" : "123",
"Timestamp" : 1486631318873,
"Timezone" : 330,
"Version" : "1.0",
"pii" : { }
} ]
}
我正在使用 /hive/lib/hive-hcatalog-core-0.13.0.jar
下面是我的创建表语句:
create table test7(
uploadtime bigint,
pdid string,
data array<
struct<Data:struct<
unit:string,
value:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>,
struct<Data:struct<
heading:string,
Location:string,
latitude:bigint,
longitude:bigint,
Location2:string,
speed:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>,
struct<Data:struct<
unit:string,
value:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>,
struct<Data:struct<
x:int,
y:int,
z:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>,
struct<Data:struct<
heading:string,
loc3:string,
latitude:bigint,
longitude:bigint,
loc4:string,
speed:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>
>
)
ROW FORMAT SERDE
'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE
LOCATION
'/xyz/abc/';
编辑:
添加单行 JSON 和新表创建 stmt 报错:
{"uploadTimeStamp":"1487183800905","PDID":"123","data":[{"Data":{"unit":"rpm","value":"0"},"EventID":"event1","PDID":"123","Timestamp":1487183800905,"Timezone":330,"Version":"1.0","pii":{}},{"Data":{"heading":"N","loc1":"false","latitude":"16.032425","longitude":"80.770587","loc2":"false","speed":"10"},"EventID":"event2","PDID":"123","Timestamp":1487183800905,"Timezone":330,"Version":"1.1","pii":{}},{"Data":{"x":"1.1","y":"1.2","z":"2.2"},"event3":"AccelerometerInfo","PDID":"123","Timestamp":1487183800905,"Timezone":330,"Version":"1.0","pii":{}},{"EventID":"event4","Data":{"value":"50","unit":"percentage"},"Version":"1.0","Timestamp":1487183800905,"PDID":"123","Timezone":330},{"Data":{"unit":"kmph","value":"70"},"EventID":"event5","PDID":"123","Timestamp":1487183800905,"Timezone":330,"Version":"1.0","pii":{}}]}
create table test1(
uploadTimeStamp string,
PDID string,
data array<struct<
Data:struct<unit:string,value:int>,
EventID:string,
PDID:string,
TimeS:bigint,
Timezone:int,
Version:float,
pii:struct<>>,
struct<
Data:struct<heading:string,loc1:string,latitude:double,longitude:double,loc2:string,speed:int>,
EventID:string,
PDID:string,
TimeS:bigint,
Timezone:int,
Version:float,
pii:struct<>>,
struct<
Data:struct<x:float,y:float,z:float>,
EventID:string,
PDID:string,
TimeS:bigint,
Timezone:int,
Version:float,
pii:struct<>>,
struct<
EventID:string,
Data:struct<value:int,unit:percentage>,
Version:float,
TimeS:bigint,
PDID:string,
Timezone:int>,
struct<
Data:struct<unit:string,value:int>,
EventID:string,
PDID:string,
TimeS:bigint,
Timezone:int,
Version:float,
pii:struct<>>
>
ROW FORMAT SERDE
'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE
LOCATION
'/ABC/XYZ/';
MismatchedTokenException(320!=313)
...
...
...
FAILED: ParseException line 11:10 mismatched input '<>' expecting < near 'struct' in struct type
【问题讨论】:
-
似乎不支持多行 JSON,您必须将文档压缩为单行才能使用 JSON SERDE。
-
我现在将多行 json 转换为单行 json。现在,当我创建表并加载数据时,我在选择时得到的所有值都是 null 。
-
表示表定义与数据结构不匹配。稍后我会看看。
-
使用最新单行 JSON、新创建表语句和最新错误编辑问题
-
这个 JSON 是一团糟。你有一个包含不同结构元素的数组。