其他分享
首页 > 其他分享> > Hudi-通过Hive查询hudi表数据

Hudi-通过Hive查询hudi表数据

作者:互联网

环境准备

集成jar包:hudi-hadoop-mr-bundle-0.10.1.jar,放入$HIVE_HOME/lib目录下

建外部表

create database db_hudi;

use db_hudi;

CREATE EXTERNAL TABLE IF NOT EXISTS tbl_hudi_didi(
    order_id BIGINT,
    product_id INT,
    city_id INT,
    district INT,
    county INT,
    type INT,
    combo_type INT,
    traffic_type INT,
    passenger_count INT,
    driver_product_id INT,
    start_dest_distance INT,
    arrive_time STRING,
    departure_time STRING,
    pre_total_fee DOUBLE,
    normal_time STRING,
    bubble_trace_id STRING,
    product_1level INT,
    dest_lng DOUBLE,
    dest_lat DOUBLE,
    starting_lng DOUBLE,
    starting_lat DOUBLE,
    ts BIGINT,
    partitionpath STRING
)
PARTITIONED BY(
    date_str string
)
ROW FORMAT SERDE
    'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
    'org.apache.hudi.hadoop.HoodieParquetInputFormat'
OUTPUTFORMAT
    'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
    '/hudi-warehouse/tbl_didi_haikou';

手动加入分区

--手动添加分区
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-22') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-22';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-23') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-23';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-24') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-24';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-25') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-25';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-26') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-26';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-27') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-27';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-28') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-28';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-29') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-29';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-30') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-30';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-31') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-31';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-1') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-1';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-2') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-2';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-3') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-3';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-4') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-4';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-5') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-5';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-6') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-6';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-7') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-7';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-8') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-8';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-9') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-9';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-10') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-10';

查看分区

SHOW PARTITIONS db_hudi.tbl_hudi_didi;  

指标统计

-- 开发测试,设置运行模式为本地模式
set hive.exec.mode.local.auto = true;

set hive.exec.mode.local.auto.tasks.max = 10;
set hive.exec.mode.local.auto.inputbytes.max=88801103;
set hive.exec.mode.local.auto.input.files.max=50;
SET hive.mapred.mode=nonstrict;
-- 指标一:订单类型统计
WITH tmp as (
    SELECT
        product_id,
        COUNT(1) AS total
    FROM db_hudi.tbl_hudi_didi
    GROUP BY product_id
)
SELECT
    CASE product_id
        WHEN 1 THEN "滴滴专车"
        WHEN 2 THEN "滴滴企业专车"
        WHEN 3 THEN "滴滴快车"
        WHEN 4 THEN "滴滴企业快车"
        ELSE "未知"
    END AS order_type,
    total
FROM tmp
;

-- 指标二:订单时效性统计
WITH tmp as (
    SELECT
        type,
        COUNT(1) AS total
    FROM db_hudi.tbl_hudi_didi
    GROUP BY type
)
SELECT
    CASE type
        WHEN 0 THEN "实时"
        WHEN 1 THEN "预约"
        ELSE "未知"
    END AS order_type,
    total
FROM tmp
;

--指标三:订单交通类型统计
SELECT
    traffic_type,
    COUNT(1) AS total
FROM db_hudi.tbl_hudi_didi
GROUP BY traffic_type;

-- 指标五:订单价格统计,先将价格划分区间,再统计,此处使用WHEN函数和SUM函数
SELECT
    SUM(
        CASE WHEN pre_total_fee BETWEEN 0 AND 15 THEN 1 ELSE 0 END
    ) AS 0_15,
    SUM(
        CASE WHEN pre_total_fee BETWEEN 16 AND 30 THEN 1 ELSE 0 END
    ) AS 16_30,
    SUM(
        CASE WHEN pre_total_fee BETWEEN 31 AND 50 THEN 1 ELSE 0 END
    ) AS 31_50,
    SUM(
        CASE WHEN pre_total_fee BETWEEN 51 AND 100 THEN 1 ELSE 0 END
    ) AS 51_100,
    SUM(
        CASE WHEN pre_total_fee > 100 THEN 1 ELSE 0 END
    ) AS 100_
FROM db_hudi.tbl_hudi_didi;

 

 

 

标签:Hudi,EXISTS,db,didi,Hive,tbl,hudi,2017
来源: https://www.cnblogs.com/EnzoDin/p/15962045.html