【发布时间】:2017-07-05 11:27:31
【问题描述】:
我正在尝试在配置单元表上运行 SparkSql。但我无法理解的问题。这是我的代码:
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.hive.*;
import org.apache.spark.sql.SQLContext;
public class queryhive {
public static void main(String[] args)
{
//SparkSession sc = new SparkConf().setAppName("SparkSessionZipsExample").setMaster("local");
SparkConf sparkConf = new SparkConf().setAppName("SparkSessionZipsExample").setMaster("local");
JavaSparkContext scon = new JavaSparkContext(sparkConf);
SQLContext sqlContext = new SQLContext(scon);
String warehouseLocation = "file:${system:user.dir}/spark-warehouse";
SparkSession sc = SparkSession
.builder()
.appName("SparkSessionZipsExample")
.config("spark.sql.warehouse.dir", warehouseLocation)
.enableHiveSupport()
.getOrCreate();
HiveContext hc = new org.apache.spark.sql.hive.HiveContext(sc);
hc.sql("select count(*) from SparkHive.health");
Row[] results = (Row[]) sqlContext.sql("FROM src SELECT key, value").collect();
}
}
我得到的例外是:
17/02/16 16:36:51 INFO SparkSqlParser: Parsing command: select count(*) from SparkHive.health
Exception in thread "main" java.util.ServiceConfigurationError: org.apache.spark.sql.sources.DataSourceRegister: Provider org.apache.spark.sql.hive.orc.DefaultSource could not be instantiated
at java.util.ServiceLoader.fail(ServiceLoader.java:232)
at java.util.ServiceLoader.access$100(ServiceLoader.java:185)
at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384)
at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404)
at java.util.ServiceLoader$1.next(ServiceLoader.java:480)
at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:43)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike$class.filterImpl(TraversableLike.scala:247)
at scala.collection.TraversableLike$class.filter(TraversableLike.scala:259)
at scala.collection.AbstractTraversable.filter(Traversable.scala:104)
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:550)
at org.apache.spark.sql.execution.datasources.DataSource.providingClass$lzycompute(DataSource.scala:86)
at org.apache.spark.sql.execution.datasources.DataSource.providingClass(DataSource.scala:86)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:325)
at org.apache.spark.sql.execution.datasources.ResolveDataSource$$anonfun$apply$1.applyOrElse(rules.scala:58)
at org.apache.spark.sql.execution.datasources.ResolveDataSource$$anonfun$apply$1.applyOrElse(rules.scala:41)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$1.apply(LogicalPlan.scala:58)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$1.apply(LogicalPlan.scala:58)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:331)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:329)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:58)
at org.apache.spark.sql.execution.datasources.ResolveDataSource.apply(rules.scala:41)
at org.apache.spark.sql.execution.datasources.ResolveDataSource.apply(rules.scala:40)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82)
at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
at scala.collection.immutable.List.foldLeft(List.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74)
at scala.collection.immutable.List.foreach(List.scala:381)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:64)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:62)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:48)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:63)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:699)
at SparkHiveSql.sparkhivesql.queryhive.main(queryhive.java:27)
Caused by: java.lang.VerifyError: Bad return type
Exception Details:
Location:
org/apache/spark/sql/hive/orc/DefaultSource.createRelation(Lorg/apache/spark/sql/SQLContext;[Ljava/lang/String;Lscala/Option;Lscala/Option;Lscala/collection/immutable/Map;)Lorg/apache/spark/sql/sources/HadoopFsRelation; @35: areturn
Reason:
Type 'org/apache/spark/sql/hive/orc/OrcRelation' (current frame, stack[0]) is not assignable to 'org/apache/spark/sql/sources/HadoopFsRelation' (from method signature)
Current Frame:
bci: @35
flags: { }
locals: { 'org/apache/spark/sql/hive/orc/DefaultSource', 'org/apache/spark/sql/SQLContext', '[Ljava/lang/String;', 'scala/Option', 'scala/Option', 'scala/collection/immutable/Map' }
stack: { 'org/apache/spark/sql/hive/orc/OrcRelation' }
Bytecode:
0x0000000: b200 1c2b c100 1ebb 000e 592a b700 22b6
0x0000010: 0026 bb00 2859 2c2d b200 2d19 0419 052b
0x0000020: b700 30b0
at java.lang.Class.getDeclaredConstructors0(Native Method)
at java.lang.Class.privateGetDeclaredConstructors(Class.java:2671)
at java.lang.Class.getConstructor0(Class.java:3075)
at java.lang.Class.newInstance(Class.java:412)
at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:380)
... 43 more
17/02/16 16:36:55 INFO SparkContext: Invoking stop() from shutdown hook
17/02/16 16:36:55 INFO SparkUI: Stopped Spark web UI at http://10.0.0.3:4040
我不知道为什么会这样。在运行这个程序之前,我的 HIVE 运行良好,但现在它根本无法运行。
是什么原因以及如何使上述代码运行?
我正在使用 Eclispe IDE,我的 Spark 版本是 2.1.0
【问题讨论】:
-
@G_H 我所拥有的没问题。谢谢你的分享
-
如果您检查这两个问题的堆栈跟踪,您会发现根本原因是相同的。 ServiceLoader 在类路径上找到 DefaultSource 实现调用构造函数,该构造函数返回的类型与预期的返回类型不对应。在预期
HadoopFsRelation的地方返回OrcRelation,但 OrcRelation 没有实现 HadoopFsRelation。这可能是版本冲突,因为我在 2.1.0 中找不到 HadoopFsRelation,而在旧版本(例如 1.6.0)中却存在。您的类路径上是否有多个 Spark 版本,或混合的 Spark/Hive 实现? -
@G_H 可能是多重火花。实际上,我在开源 Spark 的程序中使用了一些库。我已经使用 Bitnami 安装程序安装了 hadoop,它附带了一系列 hive 和 spark。将捆绑包用于我的目的。但是使用 java 程序来运行它。
-
Hadoop 安装是否可能使用 2.0.0 之前的 Hive 和 Spark 版本(如 1.6.3),而您的代码中包含 2.1.0 库?或者反过来:在安装为 2.1.0 时使用 pre-2.0.0 库。您的类路径中有一个带有
META-INF/services/org.apache.spark.sql.sources.DataSourceRegister的jar,其中列出了org.apache.spark.sql.hive.orc.DefaultSource作为实现。它找到的 DefaultSource 有一个名为createRelation的方法,该方法返回一个不是 HadoopFsRelation 子类的 OrcRelation,但调用它的代码需要这样。
标签: java apache-spark hive apache-spark-sql