Spark mllib教程
作者: 时海
Distributed Matrix

1、RowMatrix

import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.sql.SparkSession

object Example {

  def main(args: Array[String]) {
    val spark = SparkSession
      .builder
      .appName(s"${this.getClass.getSimpleName}")
      .master("local[4]")
      .getOrCreate()
    val sc = spark.sparkContext


    val rows = sc.makeRDD(Seq(Vectors.dense(1.0, 2.0, 3.0),
      Vectors.dense(1.1, 2.1, 3.1),
      Vectors.dense(1.2, 2.2, 3.2),
      Vectors.dense(1.3, 2.3, 3.3)))

    val mat: RowMatrix = new RowMatrix(rows)

    val m = mat.numRows()
    val n = mat.numCols()

    // QR decomposition
    val qrResult = mat.tallSkinnyQR(true)

    println("rows:" + m + ",cols:" + n)


    spark.stop();
  }
}


2、IndexedRowMatrix


import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix}
import org.apache.spark.sql.SparkSession

object Example {

  def main(args: Array[String]) {
    val spark = SparkSession
      .builder
      .appName(s"${this.getClass.getSimpleName}")
      .master("local[4]")
      .getOrCreate()
    val sc = spark.sparkContext

    val rows = sc.makeRDD(Seq(IndexedRow(2, Vectors.dense(1.0, 2.0, 3.0)),
      IndexedRow(3, Vectors.dense(1.1, 2.1, 3.1)),
      IndexedRow(4, Vectors.dense(1.2, 2.2, 3.2))))

    val mat: IndexedRowMatrix = new IndexedRowMatrix(rows)

    val m = mat.numRows()
    val n = mat.numCols()

    println("rows:" + m + ",cols:" + n)


    spark.stop();
  }
}


3、CoordinateMatrix

CoordinateMatrix 由 (行下标,列下标,值)条目构成,适用的场景为:维度很大并且很稀疏的矩阵


import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry}
import org.apache.spark.sql.SparkSession

object Example {

  def main(args: Array[String]) {
    val spark = SparkSession
      .builder
      .appName(s"${this.getClass.getSimpleName}")
      .master("local[4]")
      .getOrCreate()
    val sc = spark.sparkContext


    val rows = sc.makeRDD(Seq(MatrixEntry(1,2,1.0),MatrixEntry(2,3,1.0)))

    val mat: CoordinateMatrix = new CoordinateMatrix(rows)

    val m = mat.numRows()
    val n = mat.numCols()

    println("rows:" + m + ",cols:" + n)

    //转换成IndexedRowMatrix
    val indexMat=mat.toIndexedRowMatrix()

    spark.stop();
  }
}


4、

5、

标签: val、mat、vectors、rows、spark
一个创业中的苦逼程序员
  • 回复
隐藏