【问题标题】:How to create encoder for custom Java objects?如何为自定义 Java 对象创建编码器?
【发布时间】:2017-01-04 10:29:38
【问题描述】:

我正在使用以下类从 Spark 编码器创建 bean

Class OuterClass implements Serializable {
    int id;
    ArrayList<InnerClass> listofInner;
    
    public int getId() {
        return id;
    }
    
    public void setId (int num) {
        this.id = num;
    }

    public ArrayList<InnerClass> getListofInner() {
        return listofInner;
    }
    
    public void setListofInner(ArrayList<InnerClass> list) {
        this.listofInner = list;
    }
}

public static class InnerClass implements Serializable {
    String streetno;
    
    public void setStreetno(String streetno) {
        this.streetno= streetno;
    }

    public String getStreetno() {
        return streetno;
    }
}

Encoder<OuterClass> outerClassEncoder = Encoders.bean(OuterClass.class);
Dataset<OuterClass> ds = spark.createDataset(Collections.singeltonList(outerclassList), outerClassEncoder)

我收到以下错误

Exception in thread "main" java.lang.UnsupportedOperationException: Cannot infer type for class OuterClass$InnerClass because it is not bean-compliant

如何在 Java 中为 Spark 实现这种类型的用例?如果我删除内部类,这工作得很好。但我需要为我的用例创建一个内部类。

【问题讨论】:

    标签: java apache-spark apache-spark-2.0


    【解决方案1】:

    您的 JavaBean 类应该有一个公共的无参数构造函数、getter 和 setter,并且它应该实现 Serializable 接口。 Spark SQL 适用于有效的 JavaBean 类。

    编辑:添加内部类的工作示例

    OuterInnerDF.java

    package com.abaghel.examples;
    
    import java.util.ArrayList;
    import java.util.Collections;
    import org.apache.spark.sql.Dataset;
    import org.apache.spark.sql.Encoder;
    import org.apache.spark.sql.Encoders;
    import org.apache.spark.sql.SparkSession;
    import com.abaghel.examples.OuterClass.InnerClass;
    
    public class OuterInnerDF {
      public static void main(String[] args) {
        SparkSession spark = SparkSession
                .builder()
                .appName("OuterInnerDF")
                .config("spark.sql.warehouse.dir", "/file:C:/temp")
                .master("local[2]")
                .getOrCreate();
    
         System.out.println("====> Create DataFrame");
         //Outer
         OuterClass us = new OuterClass();
         us.setId(111);     
         //Inner
         OuterClass.InnerClass ic = new OuterClass.InnerClass();
         ic.setStreetno("My Street");
         //list
         ArrayList<InnerClass> ar = new ArrayList<InnerClass>();
         ar.add(ic);         
         us.setListofInner(ar);  
         //DF
         Encoder<OuterClass> outerClassEncoder = Encoders.bean(OuterClass.class);        
         Dataset<OuterClass> ds = spark.createDataset(Collections.singletonList(us), outerClassEncoder);
         ds.show();
        }
    }
    

    OuterClass.java

    package com.abaghel.examples;
    
    import java.io.Serializable;
    import java.util.ArrayList;
    
    public class OuterClass implements Serializable {
    int id;
    ArrayList<InnerClass> listofInner;
    
    public int getId() {
        return id;
    }
    
    public void setId(int num) {
        this.id = num;
    }
    
    public ArrayList<InnerClass> getListofInner() {
        return listofInner;
    }
    
    public void setListofInner(ArrayList<InnerClass> list) {
        this.listofInner = list;
    }
    
    public static class InnerClass implements Serializable {
        String streetno;
    
        public void setStreetno(String streetno) {
            this.streetno = streetno;
        }
    
        public String getStreetno() {
            return streetno;
          }
        }
    }
    

    控制台输出

    ====> Create DataFrame
    16/08/28 18:02:55 INFO CodeGenerator: Code generated in 32.516369 ms
    +---+-------------+
    | id|  listofInner|
    +---+-------------+
    |111|[[My Street]]|
    +---+-------------+
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2013-05-12
      • 1970-01-01
      • 2020-08-14
      • 2018-08-06
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2020-11-07
      相关资源
      最近更新 更多