其他分享
首页 > 其他分享> > 进阶RDD

进阶RDD

作者:互联网

import Utils.SparkUtils
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

object Demo {
  def main(args: Array[String]): Unit = {
    val sc: SparkContext =SparkUtils.getSparkContext()
    val rdd: RDD[(String, Int)] =sc.textFile("data/cc.txt")
      .flatMap(_.split("\\s+"))
      .map((_,1))
      .reduceByKey(_+_)
      .map(_._1.toUpperCase)
      .map((_,1))
      .reduceByKey(_+_)
    println(rdd.toDebugString)
      sc.stop()
  }
}

(2) ShuffledRDD[7] at reduceByKey at Demo.scala:16 [] ±(2)
MapPartitionsRDD[6] at map at Demo.scala:15 []
| MapPartitionsRDD[5] at map at Demo.scala:14 []
| ShuffledRDD[4] at reduceByKey at Demo.scala:13 []
±(2) MapPartitionsRDD[3] at map at Demo.scala:12 []
| MapPartitionsRDD[2] at flatMap at Demo.scala:11 []
| data/cc.txt MapPartitionsRDD[1] at textFile at Demo.scala:10 []
| data/cc.txt HadoopRDD[0] at textFile at Demo.scala:10 []

标签:map,进阶,scala,cc,Demo,reduceByKey,RDD,MapPartitionsRDD
来源: https://blog.csdn.net/DearNingning/article/details/117589131