Spark教程
作者: 时海 风自在
程序基本框架

Maven依赖:


        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>2.0.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>2.0.0</version>
        </dependency>


程序框架:

import org.apache.spark.sql.SparkSession

object DataFrameTest {
  def main(args: Array[String]) {


    val spark = SparkSession
      .builder()
      .appName("DataFrameTest")
      .master("local[4]")
      .getOrCreate()

    val sqlContext = spark.sqlContext

    // 创建顶点DataFrame
    val person = sqlContext.createDataFrame(List(
      ("a", "Alice", 34, "w"),
      ("b", "Bob", 36, "m"),
      ("c", "Charlie", 30, "w"),
      ("d", "David", 29, "m"),
      ("e", "Esther", 32, "m"),
      ("f", "Fanny", 36, "m"),
      ("g", "Alice", 60, "w")
    )).toDF("id", "name", "age", "sex")


    //TODO
    

    spark.stop()

  }

}

标签: spark、artifactid、groupid、dependency、sqlcontext
一个创业中的苦逼程序员
  • 回复
隐藏