Spark教程
Maven依赖:
<dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>2.0.0</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>2.0.0</version> </dependency>
程序框架:
import org.apache.spark.sql.SparkSession object DataFrameTest { def main(args: Array[String]) { val spark = SparkSession .builder() .appName("DataFrameTest") .master("local[4]") .getOrCreate() val sqlContext = spark.sqlContext // 创建顶点DataFrame val person = sqlContext.createDataFrame(List( ("a", "Alice", 34, "w"), ("b", "Bob", 36, "m"), ("c", "Charlie", 30, "w"), ("d", "David", 29, "m"), ("e", "Esther", 32, "m"), ("f", "Fanny", 36, "m"), ("g", "Alice", 60, "w") )).toDF("id", "name", "age", "sex") //TODO spark.stop() } }