编程语言
首页 > 编程语言> > Spark Java版本wordCount

Spark Java版本wordCount

作者:互联网

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.*;
import scala.Tuple2;

import java.net.URL;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;

public class wordcount{

    public static void main(String[] args) {

        SparkConf conf = new SparkConf();
        conf.setMaster("local[*]").setAppName("wc");
        JavaSparkContext sc = new JavaSparkContext(conf);
        URL url = wordcount.class.getResource("/wc.txt");
        JavaRDD<String> lineRDD = sc.textFile(url.getPath());
        JavaRDD<String> flatRDD = lineRDD.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public Iterator<String> call(String line) throws Exception {

                String[] fields = line.split(" ");
                List<String> list = Arrays.asList(fields);
                return list.iterator();
            }
        });

        JavaPairRDD<String, Integer> mapRDD = flatRDD.mapToPair(new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String s) throws Exception {
                Tuple2<String, Integer> stringIntegerTuple2 = new Tuple2<String, Integer>(s, 1);
                return stringIntegerTuple2;
            }
        });

        JavaPairRDD<String, Integer> resultRDD = mapRDD.reduceByKey(new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer v1, Integer v2) throws Exception {
                return v1 + v2;
            }
        });

//        resultRDD.foreach(new VoidFunction<Tuple2<String, Integer>>() {
//            @Override
//            public void call(Tuple2<String, Integer> tuple2) throws Exception {
//                System.out.println(tuple2._1 +"  :  " + tuple2._2.toString());
//            }
//        });

        Iterator<Tuple2<String, Integer>> iter = resultRDD.sortByKey(false).collect().iterator();
        while(iter.hasNext())
        {
            Tuple2<String, Integer> wc = iter.next();
            System.out.println(wc._1 + " : " + wc._2.toString());
        }
        sc.stop();
    }
}

太烦了

标签:java,import,wordCount,Tuple2,new,Java,Spark,public,wc
来源: https://www.cnblogs.com/kpwong/p/14036123.html