Spark教程
1、
示例代码
import org.apache.spark.{SparkConf, SparkContext} object Test { def main(args: Array[String]) { val conf = new SparkConf().setAppName("join").setMaster("local[4]") val sc = new SparkContext(conf) val rdd1 = sc.parallelize(Seq(1,2,3)) val rdd2 = sc.parallelize(Seq(2,3,4)) val rdd3=rdd1.map(x=>(x,1)) val rdd4=rdd2.map(x=>(x,2)) val rdd5=rdd3.join(rdd4) print("rdd3: ") rdd3.collect().foreach(x=>print(x)) println() print("rdd4: ") rdd4.collect().foreach(x=>print(x)) println() print("rdd5: ") rdd5.collect().foreach(x=>print(x)) println() } } 输出结果:rdd3: (1,1)(2,1)(3,1) rdd4: (2,2)(3,2)(4,2) rdd5: (2,(1,2))(3,(1,2))