人工打标实现交通画像
作者:互联网
人工打标实现交通画像
4.0.0org.exampleAnalysisHotWords1.0-SNAPSHOTUTF-8com.janeluoikanalyzer2012_u6org.apache.sparkspark-core_2.112.4.5com.google.guavaguavacom.google.guavaguava14.0.1org.apache.sparkspark-sql_2.112.4.5org.apache.sparkspark-streaming_2.112.4.5org.apache.sparkspark-mllib_2.112.4.5org.apache.sparkspark-graphx_2.112.4.5mysqlmysql-connector-java8.0.18org.scala-toolsmaven-scala-plugin2.15.2modified-onlymain-scalacprocess-resourcesadd-sourcecompilescala-test-compileprocess-test-resourcestestCompilemaven-assembly-pluginfalsejar-with-dependenciescom.xxx.uploadFilemake-assemblypackageassemblyorg.apache.maven.pluginsmaven-compiler-plugin3.1compilecompiletargettarget/classestarget/test-classe***c
package main.scala.com.jsptpd.anylysishotwords import java.util.Properties import com.jsptpd.anylysishotwords.HostWordInfo import org.apache.spark.sql.{Dataset, SparkSession} import scala.collection.mutable import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ case class ParticiplelabelAnalysis(item:String,updatatype:String) object ParticiplelabelAnalysis { def main(args: Array[String]): Unit = { val spark = SparkSession.builder().appName("ParticiplelabelAnalysis").master("local[*]").getOrCreate() val prop = new Properties() prop.put("user", "xxxxxxxxxxxxxxxxx") prop.put("password", "xxxxxxxxxxxxx") prop.put("driver", "com.mysql.jdbc.Driver") val dataFrame = spark.read.jdbc("jdbc:mysql://xxxxxxxxxxxxxxxxxxxx1/city?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC", "xxxxxxxxxxxx", prop).select("rqsttime", "rqstcontent"). where("DATE_SUB(current_date,30) < rqsttime and cagou in ('市消防局','市交通局','市交通运输局', '市政务办','市城建集团'," + "'市民卡公司','南京市公共交通集团','12345','市地铁集团','信息中心','市城管局','市交管局','市应急管理局'," + "'市大数据局','市公安局交管局','市公交集团','市气象局','南京港集团','市公安局','南京地铁集团有限公司','市交通集团'," + "'南京公交集团','南京地铁集团')") //dataFrame.map(row=>{println(row(0),row(1)) import spark.implicits._ val iteminfo: Dataset[String] = dataFrame.map(row => (row(0).toString, row(1).toString .trim.replace("\n", ""))).map(item => (item._2)) val stringToStrings: mutable.Map[String, Set[String]] = mutable.Map( "设施不合理" -> Set("不合理", "设施", "规划", "设置"), "施工影响大" -> Set("工程", "建设", "影响", "施工"), "交通安全隐患" -> Set("安全隐患", "维修", "坏了"), "相关部门不作为" -> Set("事故", "不处理", "不作为", "不解决"), "公交不准时" -> Set("公交", "不准时", "晚点", "时间长"), "公交不按线路行驶" -> Set("公交", "站台", "线路", "溜站"), "交通拥堵" -> Set("堵塞", "拥堵", "车多"), "占道违停" -> Set("占道", "违停"), "出租车乱收费" -> Set("出租车", "费用", "收费", "打车", "计费"), "改善地铁设施" -> Set("地铁", "不方便", "坏了"), "处罚不认可" -> Set("曝光", "不认可", "罚单", "处罚"), "增加地铁站点" -> Set("地铁", "线路", "站点", "规划") ) var infoitem: Map[String, Long] = Map[String, Long]() for (key <- stringToStrings.keys) { println(key) var sum: Long = 0 for (stringToStringsvalue <- stringToStrings.get(key)) { for (item l.contains(item)).count() sum = sum + l1 } infoitem += (key -> sum) } } val jsons1 = compact(render(infoitem)) println(jsons1) val propitem = new Properties() val db2url="jdbc:mysql://xxxxxxxxxxxxxxxxxx/pc?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC" propitem.setProperty("user", "xxxxxxxxxxxxxxxxx") propitem.setProperty("password", "xxxxxxxxxxxxxxxxxxxx") import spark.implicits._ val value = Seq(ParticiplelabelAnalysis(jsons1,"30")).toDS() value.write.mode("append").jdbc(db2url,"t_particpanalysis",propitem) spark.stop() } }
标签:Set,String,val,打标,org,人工,apache,import,画像 来源: https://blog.51cto.com/u_13887992/2733375