【发布时间】:2021-10-18 03:41:00
【问题描述】:
我使用胶水爬虫生成了一个 Athena DDL,以从存储在 S3 中的 Parquet 文件创建 AWS Athena 表。但是,在复制 DDL 并在不同的 AWS 账户中使用它时,我收到以下错误:
第 7:25 行:输入“
雅典娜 DDL
CREATE TABLE x.y(
"questiontext" string,
"dataexporttag" string,
"questiontype" string,
"selector" string,
"subselector" string,
"configuration" struct<ChoiceColumnWidth:bigint,MobileFirst:boolean,QuestionDescriptionOption:string,RepeatHeaders:string,TextPosition:string,WhiteSpace:string>,
"questiondescription" string,
"choices" struct<1:struct<Display:string>,2:struct<Display:string>,3:struct<Display:string>,4:struct<Display:string>,5:struct<Display:string>,6:struct<Display:string>,7:struct<Display:string>,8:struct<Display:string,ExclusiveAnswer:boolean>,9:struct<Display:string>>,
"choiceorder" array<bigint>,
"validation" struct<Settings:struct<ForceResponse:string,ForceResponseType:string,Type:string>>,
"language" array<int>,
"nextchoiceid" bigint,
"nextanswerid" bigint,
"questionid" string,
"questiontext_unsafe" string,
"variablenaming" struct<1:string,2:string,3:string,4:string,5:string,6:string,7:string,8:string,9:string>,
"datavisibility" struct<Hidden:boolean,Private:boolean>,
"recodevalues" struct<1:string,2:string,3:string,4:string,5:string,6:string,7:string,8:string,9:string>,
"randomization" struct<Advanced:struct<FixedOrder:array<string>,RandomSubSet:array<int>,RandomizeAll:array<string>,TotalRandSubset:bigint,Undisplayed:array<int>>,EvenPresentation:boolean,TotalRandSubset:string,Type:string>,
"defaultchoices" boolean,
"gradingdata" array<int>,
"searchsource" struct<AllowFreeResponse:string>,
"displaylogic" struct<0:struct<0:struct<ChoiceLocator:string,Description:string,LeftOperand:string,LogicType:string,Operator:string,QuestionID:string,QuestionIDFromLocator:string,QuestionIsInLoop:string,RightOperand:string,Type:string>,1:struct<Conjuction:string,Description:string,LeftOperand:string,LogicType:string,Operator:string,RightOperand:string,Type:string>,2:struct<Conjuction:string,Description:string,LeftOperand:string,LogicType:string,Operator:string,RightOperand:string,Type:string>,3:struct<Conjuction:string,Description:string,LeftOperand:string,LogicType:string,Operator:string,RightOperand:string,Type:string>,4:struct<Conjuction:string,Description:string,LeftOperand:string,LogicType:string,Operator:string,RightOperand:string,Type:string>,5:struct<Conjuction:string,Description:string,LeftOperand:string,LogicType:string,Operator:string,RightOperand:string,Type:string>,6:struct<Conjuction:string,Description:string,LeftOperand:string,LogicType:string,Operator:string,RightOperand:string,Type:string>,Type:string>,Type:string,inPage:boolean>,
"analyzechoices" struct<6:string,8:string>,
"answers" struct<1:struct<Display:string>,2:struct<Display:string>,3:struct<Display:string>,4:struct<Display:string>,5:struct<Display:string>,6:struct<Display:string>>,
"answerorder" array<bigint>,
"choicedataexporttags" boolean)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
我能够使用爬虫生成的表来查询表,并且架构似乎是正确的。谁能帮我理解为什么我不能将 DDL abd 用于不同 AWS 账户中的同一个文件?
【问题讨论】:
标签: amazon-s3 parquet amazon-athena presto