工程相关
官网:https://graphframes.github.io/user-guide.html
Maven依赖:
<dependency> <groupId>org.graphframes</groupId> <artifactId>graphframes</artifactId> <version>0.5.0-spark2.1-s_2.11</version> </dependency> <dependency> <groupId>com.typesafe.scala-logging</groupId> <artifactId>scala-logging-slf4j_2.11</artifactId> <version>2.1.2</version> </dependency>
基本样例
import org.apache.spark.sql.SparkSession import org.graphframes._ object GraphFrameExample { def main(args: Array[String]) { var spark = SparkSession .builder .appName("GraphExample") .master("local[4]") .getOrCreate() val sc = spark.sqlContext val v = sc.createDataFrame(List( ("a", "Alice", 34), ("b", "Bob", 36), ("c", "Charlie", 30) )).toDF("id", "name", "age") val e = sc.createDataFrame(List( ("a", "b", "friend"), ("b", "c", "follow"), ("c", "b", "follow") )).toDF("src", "dst", "relationship") val g = GraphFrame(v, e) var vs = g.vertices.filter("age>35") vs.collect().foreach(x => println(x)) spark.stop(); } }
输出:
[b,Bob,36]
GraphFrame 转换成Graphx,会为每一个顶点生成一个唯一Long类型的ID
val g = GraphFrame(v, e) var graph = g.toGraphX graph.vertices.collect().foreach(x=>println(x)) graph.edges.collect().foreach(x=>println(x))
输出结果:
(1047972020224,[c,Charlie,30])
(1382979469312,[b,Bob,36])
(1460288880640,[a,Alice,34])
Edge(1382979469312,1047972020224,[b,c,follow])
Edge(1047972020224,1382979469312,[c,b,follow])
Edge(1460288880640,1382979469312,[a,b,friend])
使用自带例子:
import org.apache.spark.sql.SparkSession import org.graphframes._ object GraphFrameExample { def main(args: Array[String]) { var spark = SparkSession .builder .appName("GraphExample") .master("local[4]") .getOrCreate() val sc = spark.sqlContext import org.graphframes.examples val g: GraphFrame = examples.Graphs.friends // get example graph g.vertices.show() g.edges.show() spark.stop(); }
+---+-------+---+
| id| name|age|
+---+-------+---+
| a| Alice| 34|
| b| Bob| 36|
| c|Charlie| 30|
| d| David| 29|
| e| Esther| 32|
| f| Fanny| 36|
| g| Gabby| 60|
+---+-------+---+
+---+---+------------+
|src|dst|relationship|
+---+---+------------+
| a| b| friend|
| b| c| follow|
| c| b| follow|
| f| c| follow|
| e| f| follow|
| e| d| friend|
| d| a| friend|
| a| e| friend|
+---+---+------------+
参考:
http://www.infoq.com/cn/news/2016/03/GraphFrame-DataFrame-SQL