spark中迭代器的使用(求最大或最小)
作者:互联网
groupbykey
import java.util.Arrays;
import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import jersey.repackaged.com.google.common.collect.Lists;
import scala.Tuple2;
public class groupbykey {
public static void main(String[] args) {
// TODO Auto-generated method stub
SparkConf conf=new SparkConf().setAppName("").setMaster("local");
JavaSparkContext sc=new JavaSparkContext(conf);
List<Tuple2<String,Integer>> scores=Arrays.asList(
new Tuple2<String,Integer>("jac",80),
new Tuple2<String,Integer>("jac",100),
new Tuple2<String,Integer>("jac",70),
new Tuple2<String,Integer>("gs",80),
new Tuple2<String,Integer>("gs",90)
);
JavaPairRDD<String,Integer> pardd=sc.parallelizePairs(scores);
JavaPairRDD<String,Iterable<Integer>> pardd1=pardd.groupByKey();
JavaPairRDD<String,Integer> pardd2=
pardd1.mapValues(f->{
List<Integer> li=Lists.newArrayList(f);//迭代器转list
Integer j=0;
for(int i=0;i<li.size();i++) {
if(li.get(i)>j) {
j=li.get(i);
}
}
return j;
});
pardd2.foreach(f->System.out.println(f));
}
}
标签:java,迭代,最小,JavaPairRDD,Tuple2,new,import,spark 来源: https://blog.csdn.net/m0_53291740/article/details/121592892