【发布时间】:2018-03-27 05:21:15
【问题描述】:
我的代码出现以下错误,你能告诉我为什么吗?
notebook:28: error: not found: value month Retail_df = Retail_df.withColumn("月份", month(retail_df("Date")))
notebook:29: error: not found: value year Retail_df = Retail_df.withColumn("年份", year(retail_df("日期")))
import org.apache.spark.sql.types._
// Make cutom schema
var schema = StructType(Array(
StructField("Store", IntegerType, true),
StructField("DayOfWeek", IntegerType, true),
StructField("Date", DateType, true),
StructField("Sales", IntegerType, true),
StructField("Customers", IntegerType, true),
StructField("Open", IntegerType, true),
StructField("Promo", IntegerType, true),
StructField("StateHoliday", StringType, true),
StructField("SchoolHoliday", StringType, true)))
val retail_dfr = sqlContext.read.format("com.databricks.spark.csv").option("header", "true").schema(schema)
var retail_df = retail_dfr.load("/FileStore/tables/Rossman/train.csv")
val sales_custs_df = retail_df.select( "Store", "Sales", "Customers" )
val retails_open_df = retail_df.where( retail_df("Open") > 0)
val holidays_df = retail_df.filter(($"StateHoliday" === 1) && ($"SchoolHoliday" === 1))
val store_ids = retail_df.select(retail_df("Store")).distinct()
var weekday_promos = retail_df.stat.crosstab( "DayOfWeek" , "Promo" )
weekday_promos = weekday_promos.withColumnRenamed( "DayOfWeek_Promo", "DayOfWeek" )
.withColumnRenamed( "0", "NoPromo" )
.withColumnRenamed( "1","Promo" )
retail_df = retail_df.withColumn("Month", month(retail_df("Date")))
retail_df = retail_df.withColumn("Year", year(retail_df("Date")))
retail_df.show(5)
【问题讨论】:
标签: scala apache-spark