spark-shell 启动设置动态分区,snappy压缩、parquet存储以及备份
作者:互联网
1、spark-shell 启动设置动态分区
--executor-memory 16G \
--total-executor-cores 10 \
--executor-cores 10 \
--conf "spark.hadoop.hive.exec.dynamic.partition=true" \
--conf "spark.hadoop.hive.exec.dynamic.partition.mode=nonstrict"
--conf spark.sql.shuffle.partitions=10 \
--conf spark.default.parallelism=10 \
2、spark-sql对表压缩及备份
val sqlContext = new org.apache.spark.SQLContext(sc);
import org.apache.hadoop.conf.Configuration
import org.apache.fs.{FileSystem, FileUtil, Path ,FileStatus}
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
import scala.io.Source
import java.io.PrintWrite
val tbn = "src_es"
val tbn = Array("middata","decision_info")
for (tb <- tbn){
println(dbn+"."+tb)
val df = sqlContext.sql("select * from "+dbn+"."+tb)
df.write.option("compression","snappy").format("parquet")
.save("/backupdatafile/"+dbn+".db/"+tb)
val dbtb = spark.read.parquet("/backupdatafile/"+dbn+".db/"+tb)
dbtb.createOrReplaceTempView("test_"+tb)
spark.sql("insert overwrite table "+dbn+"."+tb+" select * from test_"+tb);
}
标签:10,shell,val,--,snappy,conf,parquet,import,spark 来源: https://www.cnblogs.com/DengWhichone/p/14984591.html