【发布时间】:2021-06-02 16:57:14
【问题描述】:
如果我的输出格式是“控制台”,writeStream 似乎可以工作,但当我的输出格式是“镶木地板”时就不行。是否有一些我缺少的额外设置或配置?
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.types._
object DataStreaming extends App with Context {
val customSchema = StructType(Array(
StructField("x", DoubleType),
StructField("y", DoubleType),
StructField("name", StringType)
))
def readFromParquet(): Unit = {
val sourceDF: DataFrame = spark.readStream
.schema(customSchema)
.parquet("/path/to/source/")
sourceDF
.writeStream
.format("console")
.outputMode("append")
.start()
.awaitTermination()
sourceDF
.writeStream
.format("parquet")
.option("path", "/output/path/")
.outputMode("append")
.start()
.awaitTermination()
}
readFromParquet()
}
【问题讨论】:
标签: scala apache-spark