在select 语句中使用to_json 而不是toJSON 并选择所需的列以及to_json 函数。
检查下面的代码。
val version = // Get version value from versionDF
parentDF.select($"id",struct(to_json(struct($"*")).as("value")).as("data"),lit(version).as("version"))
scala> parentDF.select($"id",struct(to_json(struct($"*")).as("value")).as("data"),lit(version).as("version")).printSchema
root
|-- id: integer (nullable = false)
|-- data: struct (nullable = false)
| |-- value: string (nullable = true)
|-- version: double (nullable = false)
更新
scala> df.select($"id",to_json(struct(struct($"*").as("value"))).as("data"),lit(version).as("version")).printSchema
root
|-- id: integer (nullable = false)
|-- data: string (nullable = true)
|-- version: integer (nullable = false)
scala> df.select($"id",to_json(struct(struct($"*").as("value"))).as("data"),lit(version).as("version")).show(false)
+---+------------------------------------------+-------+
|id |data |version|
+---+------------------------------------------+-------+
|1 |{"value":{"id":1,"col1":"a1","col2":"b1"}}|1 |
|2 |{"value":{"id":2,"col1":"a2","col2":"b2"}}|1 |
|3 |{"value":{"id":3,"col1":"a3","col2":"b3"}}|1 |
+---+------------------------------------------+-------+
Update-1
scala> df.select($"id",to_json(struct($"*").as("value")).as("data"),lit(version).as("version")).printSchema
root
|-- id: integer (nullable = false)
|-- data: string (nullable = true)
|-- version: integer (nullable = false)
scala> df.select($"id",to_json(struct($"*").as("value")).as("data"),lit(version).as("version")).show(false)
+---+--------------------------------+-------+
|id |data |version|
+---+--------------------------------+-------+
|1 |{"id":1,"col1":"a1","col2":"b1"}|1 |
|2 |{"id":2,"col1":"a2","col2":"b2"}|1 |
|3 |{"id":3,"col1":"a3","col2":"b3"}|1 |
+---+--------------------------------+-------+