Hudi-通过Hive查询hudi表数据
作者:互联网
环境准备
集成jar包:hudi-hadoop-mr-bundle-0.10.1.jar,放入$HIVE_HOME/lib目录下
建外部表
create database db_hudi; use db_hudi; CREATE EXTERNAL TABLE IF NOT EXISTS tbl_hudi_didi( order_id BIGINT, product_id INT, city_id INT, district INT, county INT, type INT, combo_type INT, traffic_type INT, passenger_count INT, driver_product_id INT, start_dest_distance INT, arrive_time STRING, departure_time STRING, pre_total_fee DOUBLE, normal_time STRING, bubble_trace_id STRING, product_1level INT, dest_lng DOUBLE, dest_lat DOUBLE, starting_lng DOUBLE, starting_lat DOUBLE, ts BIGINT, partitionpath STRING ) PARTITIONED BY( date_str string ) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hudi.hadoop.HoodieParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' LOCATION '/hudi-warehouse/tbl_didi_haikou';
手动加入分区
--手动添加分区 ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-22') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-22'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-23') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-23'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-24') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-24'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-25') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-25'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-26') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-26'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-27') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-27'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-28') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-28'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-29') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-29'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-30') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-30'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-31') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-31'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-1') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-1'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-2') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-2'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-3') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-3'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-4') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-4'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-5') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-5'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-6') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-6'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-7') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-7'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-8') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-8'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-9') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-9'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-10') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-10';
查看分区
SHOW PARTITIONS db_hudi.tbl_hudi_didi;指标统计
-- 开发测试,设置运行模式为本地模式 set hive.exec.mode.local.auto = true; set hive.exec.mode.local.auto.tasks.max = 10; set hive.exec.mode.local.auto.inputbytes.max=88801103; set hive.exec.mode.local.auto.input.files.max=50; SET hive.mapred.mode=nonstrict; -- 指标一:订单类型统计 WITH tmp as ( SELECT product_id, COUNT(1) AS total FROM db_hudi.tbl_hudi_didi GROUP BY product_id ) SELECT CASE product_id WHEN 1 THEN "滴滴专车" WHEN 2 THEN "滴滴企业专车" WHEN 3 THEN "滴滴快车" WHEN 4 THEN "滴滴企业快车" ELSE "未知" END AS order_type, total FROM tmp ; -- 指标二:订单时效性统计 WITH tmp as ( SELECT type, COUNT(1) AS total FROM db_hudi.tbl_hudi_didi GROUP BY type ) SELECT CASE type WHEN 0 THEN "实时" WHEN 1 THEN "预约" ELSE "未知" END AS order_type, total FROM tmp ; --指标三:订单交通类型统计 SELECT traffic_type, COUNT(1) AS total FROM db_hudi.tbl_hudi_didi GROUP BY traffic_type; -- 指标五:订单价格统计,先将价格划分区间,再统计,此处使用WHEN函数和SUM函数 SELECT SUM( CASE WHEN pre_total_fee BETWEEN 0 AND 15 THEN 1 ELSE 0 END ) AS 0_15, SUM( CASE WHEN pre_total_fee BETWEEN 16 AND 30 THEN 1 ELSE 0 END ) AS 16_30, SUM( CASE WHEN pre_total_fee BETWEEN 31 AND 50 THEN 1 ELSE 0 END ) AS 31_50, SUM( CASE WHEN pre_total_fee BETWEEN 51 AND 100 THEN 1 ELSE 0 END ) AS 51_100, SUM( CASE WHEN pre_total_fee > 100 THEN 1 ELSE 0 END ) AS 100_ FROM db_hudi.tbl_hudi_didi;
标签:Hudi,EXISTS,db,didi,Hive,tbl,hudi,2017 来源: https://www.cnblogs.com/EnzoDin/p/15962045.html