Spark mllib教程
1、RowMatrix
import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.linalg.distributed.RowMatrix import org.apache.spark.sql.SparkSession object Example { def main(args: Array[String]) { val spark = SparkSession .builder .appName(s"${this.getClass.getSimpleName}") .master("local[4]") .getOrCreate() val sc = spark.sparkContext val rows = sc.makeRDD(Seq(Vectors.dense(1.0, 2.0, 3.0), Vectors.dense(1.1, 2.1, 3.1), Vectors.dense(1.2, 2.2, 3.2), Vectors.dense(1.3, 2.3, 3.3))) val mat: RowMatrix = new RowMatrix(rows) val m = mat.numRows() val n = mat.numCols() // QR decomposition val qrResult = mat.tallSkinnyQR(true) println("rows:" + m + ",cols:" + n) spark.stop(); } }
2、IndexedRowMatrix
import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix} import org.apache.spark.sql.SparkSession object Example { def main(args: Array[String]) { val spark = SparkSession .builder .appName(s"${this.getClass.getSimpleName}") .master("local[4]") .getOrCreate() val sc = spark.sparkContext val rows = sc.makeRDD(Seq(IndexedRow(2, Vectors.dense(1.0, 2.0, 3.0)), IndexedRow(3, Vectors.dense(1.1, 2.1, 3.1)), IndexedRow(4, Vectors.dense(1.2, 2.2, 3.2)))) val mat: IndexedRowMatrix = new IndexedRowMatrix(rows) val m = mat.numRows() val n = mat.numCols() println("rows:" + m + ",cols:" + n) spark.stop(); } }
3、CoordinateMatrix
CoordinateMatrix 由 (行下标,列下标,值)条目构成,适用的场景为:维度很大并且很稀疏的矩阵
import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry} import org.apache.spark.sql.SparkSession object Example { def main(args: Array[String]) { val spark = SparkSession .builder .appName(s"${this.getClass.getSimpleName}") .master("local[4]") .getOrCreate() val sc = spark.sparkContext val rows = sc.makeRDD(Seq(MatrixEntry(1,2,1.0),MatrixEntry(2,3,1.0))) val mat: CoordinateMatrix = new CoordinateMatrix(rows) val m = mat.numRows() val n = mat.numCols() println("rows:" + m + ",cols:" + n) //转换成IndexedRowMatrix val indexMat=mat.toIndexedRowMatrix() spark.stop(); } }
4、
5、