【发布时间】:2017-01-31 20:20:24
【问题描述】:
我有一个打包在 fatjar 中并在 EMR Hadoop 集群上运行的 Scalding 作业。最近我在地图中添加了需要 DynamoDB 连接的新功能。但是一旦映射器到达 DynamoDB 初始化,它就会抛出以下异常:
Error in configuring object
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:450)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:344)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:166)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109)
... 9 more
Caused by: cascading.flow.FlowException: internal error during mapper configuration
at cascading.flow.hadoop.FlowMapper.configure(FlowMapper.java:102)
... 14 more
Caused by: com.esotericsoftware.kryo.KryoException: Unable to find class: com.amazonaws.http.conn.$Proxy7
Serialization trace:
connManager (com.amazonaws.http.impl.client.SdkHttpClient)
httpClient (com.amazonaws.http.AmazonHttpClient)
client (awscala.dynamodbv2.DynamoDBClient)
client (me.chuwy.enrich.hadoop.DuplicateStorage$DynamoDbStorage)
duplicateStorage (me.chuwy.enrich.hadoop.ShredJob)
$outer (me.chuwy.enrich.hadoop.ShredJob$$anonfun$11)
at com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:138)
at com.esotericsoftware.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:115)
at com.esotericsoftware.kryo.Kryo.readClass(Kryo.java:610)
at com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:599)
at com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
at com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:648)
at com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:605)
at com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
at com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:648)
at com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:605)
at com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
at com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:648)
at com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:605)
at com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:729)
at com.twitter.chill.SomeSerializer.read(SomeSerializer.scala:25)
at com.twitter.chill.SomeSerializer.read(SomeSerializer.scala:19)
at com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:648)
at com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:605)
at com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
at com.esotericsoftware.kryo.Kryo.readObject(Kryo.java:648)
at com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:605)
at com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:729)
at com.twitter.chill.SomeSerializer.read(SomeSerializer.scala:25)
at com.twitter.chill.SomeSerializer.read(SomeSerializer.scala:19)
at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:729)
at com.twitter.chill.SerDeState.readClassAndObject(SerDeState.java:61)
at com.twitter.chill.KryoPool.fromBytes(KryoPool.java:94)
at com.twitter.chill.Externalizer.fromBytes(Externalizer.scala:145)
at com.twitter.chill.Externalizer.maybeReadJavaKryo(Externalizer.scala:158)
at com.twitter.chill.Externalizer.readExternal(Externalizer.scala:148)
at java.io.ObjectInputStream.readExternalData(ObjectInputStream.java:1839)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1796)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1997)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1921)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1997)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1921)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1997)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1921)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
at java.util.HashMap.readObject(HashMap.java:1180)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1058)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1897)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1997)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1921)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1997)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1921)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
at cascading.flow.hadoop.util.JavaObjectSerializer.deserialize(JavaObjectSerializer.java:101)
at cascading.flow.hadoop.util.HadoopUtil.deserializeBase64(HadoopUtil.java:312)
at cascading.flow.hadoop.util.HadoopUtil.deserializeBase64(HadoopUtil.java:293)
at cascading.flow.hadoop.FlowMapper.configure(FlowMapper.java:81)
... 14 more
Caused by: java.lang.ClassNotFoundException: com.amazonaws.http.conn.$Proxy7
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass
我认为这个问题可能在于 EMR AMI 在 /usr/share/aws/aws-java-sdk/ 中使用自己的 jar 进行运输,并且它们与我包含在 fat jar 中的库冲突,我试图删除它们,但后来其他 EMR 步骤失败。
【问题讨论】:
-
您在 fat jar 中包含哪些 AWS Java SDK 依赖项(和版本)?因为,正如您所提到的,EMR 目前在 /usr/share/aws/aws-java-sdk 中包含 AWS Java SDK 1.10.x,在您的 fat jar 中包含不同版本的 AWS Java SDK 肯定会在运行时导致问题。您可能能够对包含在 jar 中的版本进行遮蔽/重新定位(如果您使用的是 Maven,请参阅 maven-shade-plugin),但有时这会导致其他问题。
-
谢谢@JonathanKelly。我使用 1.10.77 作为传递依赖,但我尝试了从 ~1.10.56 到 ~1.11.30 的不同版本。我还尝试将它们从 fat jar 中排除,并使用与 fat jar 中相同的版本覆盖 EMR jar。结果还是一样。现在我怀疑问题出在 OpenJDK 中,这是在 EMR 上运行 jar 的方式。
-
啊,好的,EMR 使用 AWS Java SDK 1.10.75.1 有一段时间了,还没有升级到 1.11.x,所以将 1.11.x 与您的应用程序捆绑可能会导致问题。如果您使用 1.10.75.1 作为“提供”依赖项(这将导致您的应用程序针对此版本进行编译但不将其包含在 fat jar 中),这有什么帮助吗?
-
不,不幸的是它没有。我试图从 fatjar 中排除(它应该与传递依赖的“提供”具有相同的效果)aws-core、aws-dynamodb、httpcore、httpclient。但结果还是一样。
标签: hadoop serialization jar emr scalding