import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SaveMode;

/**
 * @author Administrator
 *
 */
public class GenericLoadSave {

	public static void main(String[] args) {
		SparkConf conf = new SparkConf() 
				.setAppName("GenericLoadSave")
				.setMaster("local");
		JavaSparkContext sc = new JavaSparkContext(conf);
		SQLContext sqlContext = new SQLContext(sc);
	//parquet 带表结构 ???
		DataFrame usersDF = sqlContext.read().load(	"hdfs://hadoop1:9000/input/users.parquet");
		//没有指定format  就是写入到磁盘的数据格式     默认是parquet
		usersDF.select("name", "favorite_color").write().mode(SaveMode.Overwrite).save("hdfs://hadoop1:9000/output/namesAndFavColors_scala");   
		
		DataFrame pDF = sqlContext.read().parquet("hdfs://hadoop1:9000/output/namesAndFavColors_scala");
		pDF.show();
	}
	
}

相关文章:

  • 2022-12-23
  • 2021-11-18
  • 2022-12-23
  • 2021-11-18
  • 2021-08-13
  • 2021-09-11
  • 2022-12-23
  • 2022-12-23
猜你喜欢
  • 2022-01-12
  • 2022-12-23
  • 2021-10-28
  • 2021-11-18
  • 2021-07-08
  • 2021-11-18
相关资源
相似解决方案