其他分享
首页 > 其他分享> > Spark ==》 发送接收json数据并分析

Spark ==》 发送接收json数据并分析

作者:互联网

题目一:以下为学生期末考试的部分数据,请按要求完成统计格式如下
{"name":"zhangsan","sex":"m",kemu:yuwen,"score":66}

1) 创建kafka主题ods_score_topic,要求一个备份,一个分区

2) 创建生产者,往主题里添加15条以上数据

3) 创建maven项目

4) 导入sparkstreaming依赖

5) 创建sparkconf环境

6) 设置批时间5S

7) 设置日志等级味error

8) 获取kafka数据源

9) 解析json数据,返回4元组格式数据

10) 遍历元组计算每个学生的总成绩

11) 遍历元组计算每个科目的最高分

遍历元组计算每个科目的平均分

 

生产者

package com.lq.scala

import com.alibaba.fastjson.JSON
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.clients.producer.ProducerRecord
import java.util.Properties
import scala.beans.BeanProperty

// @BeanProperty 自动生成set get

case class Stu(@BeanProperty name:String,@BeanProperty sex:String,@BeanProperty kemu:String,@BeanProperty score:Int) object KafkaProducerTest { def main(args: Array[String]): Unit = { val props = new Properties() props.put("bootstrap.servers", "hdp1:9092,hdp2:9092,hdp3:9092") props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer: KafkaProducer[String,String] = new KafkaProducer[String,String](props) //2)创建生产者,往主题里添加15条以上数据 producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(Stu("张三", "m", "yuwen", 66)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("张三","m","yingyu",77)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("张三","m","shuxue",88)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("李四","m","yuwen",80)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("李四","m","yingyu",90)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("李四","m","shuxue",100)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("王五","n","yuwen",100)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("王五","n","yingyu",80)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("王五","n","shuxue",70)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("赵六","n","yuwen",80)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("赵六","n","yingyu",80)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("赵六","n","shuxue",80)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("郑七","m","yuwen",95)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("郑七","m","yingyu",85)).toString)) producer.send(new ProducerRecord[String, String]("ods_score_topic","",JSON.toJSON(new Stu("郑七","m","shuxue",75)).toString)) producer.close() } }

 

 

消费者

package com.lq.scala

import com.alibaba.fastjson.JSON
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe

object KafkaConsumerTest {
  def main(args: Array[String]): Unit = {
    //5)创建sparkconf环境  setMaster 设置线程数量
    val conf = new SparkConf().setMaster("local[*]").setAppName("week1")
    //6)设置批时间5S
    val ssc = new StreamingContext(conf, Seconds(5))
    //7)设置日志等级味error
    ssc.sparkContext.setLogLevel("ERROR")
    //8)获取kafka数据源
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "hdp1:9092,hdp2:9092,hdp3:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "group1",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    val topics = Array("ods_score_topic")
    val stream = KafkaUtils.createDirectStream[String, String](
      ssc,
      PreferConsistent,
      Subscribe[String, String](topics, kafkaParams)
    )
    val dataDS: DStream[(String, String, String, Int)] = stream.map(record => record.value())
      //9)解析json数据,返回4元组格式数据
      .map(JSON.parseObject(_, classOf[Stu]))
      .map(stu => (
        stu.name, stu.sex, stu.kemu, stu.score
      ))
    dataDS.foreachRDD(rdd=>{
      //10)遍历元组计算每个学生的总成绩
      println("计算每个学生的总成绩")
      rdd.map(s => (s._1, s._4)).reduceByKey(_ + _).foreach(println)
      //11)遍历元组计算每个科目的最高分(10分)
      println("计算每个科目的最高分")
      rdd.groupBy(_._3).map(s=>(
        s._1,s._2.map(_._4).max
      )).foreach(println)
      //12)遍历元组计算每个科目的平均分(10分)
      println("计算每个科目的平均分")
      rdd.groupBy(_._3).map(s=>(
        s._1,s._2.map(_._4).sum * 1.0/s._2.size
      )).foreach(println)
    })
    ssc.start()
    ssc.awaitTermination()
  }
}

 

标签:String,producer,ods,JSON,发送,json,score,new,Spark
来源: https://www.cnblogs.com/lenny-z/p/16194965.html