拉链表简单实现
作者:互联网
表说明:
ods_product_2 :产品 ods 表 ,
dw_product_2 :产品 dw 表
- 表初始化
-- database
create database if not exists demo ;
use demo;
-- create ods table
create table if not exists `demo`.`ods_product_2`(
goods_id string comment '商品编号',
goods_status string comment '商品状态(待审核,待售,在售,已删除)',
createtime string comment '创建日期',
modifytime string comment '修改日期'
)
partitioned by (dt string comment '日期分区')
row format delimited fields terminated by '\t' stored as TEXTFILE;
-- create dw table
create table if not exists `demo`.`dw_product_2`(
goods_id string comment '商品编号',
goods_status string comment '商品状态(待审核,待售,在售,已删除)',
createtime string comment '创建日期',
modifytime string comment '修改日期',
dw_start_date string comment '有效日期-起',
dw_end_date string comment '有效日期-止'
)
row format delimited fields terminated by '\t' stored as TEXTFILE;
-- add partition for ods
alter table `demo`.`ods_product_2` add if not exists partition (dt='2019-12-20');
alter table `demo`.`ods_product_2` add if not exists partition (dt='2019-12-21');
-- show partitions
show partitions `demo`.`ods_product_2`;
- ods data 1.txt
001 待审核 2019-12-18 2019-12-20
002 待售 2019-12-19 2019-12-20
003 在售 2019-12-20 2019-12-20
004 已删除 2019-12-15 2019-12-20
- ods data 2.txt
001 待售 2019-12-18 2019-12-21
002 待售 2019-12-19 2019-12-20
003 在售 2019-12-20 2019-12-20
004 已删除 2019-12-15 2019-12-20
005(新商品) 待审核 2019-12-21 2019-12-21
006(新商品) 待审核 2019-12-12 2019-12-21
- 操作
## put data file to hdfs
hdfs dfs -put 1.txt /apps/hive/warehouse/demo.db/ods_product_2/dt=2019-12-20/
hive> select *from ods_product_2;
OK
ods_product_2.goods_id ods_product_2.goods_status ods_product_2.createtime ods_product_2.modifytime ods_product_2.dt
001 待审核 2019-12-18 2019-12-20 2019-12-20
002 待售 2019-12-19 2019-12-20 2019-12-20
003 在售 2019-12-20 2019-12-20 2019-12-20
004 已删除 2019-12-15 2019-12-20 2019-12-20
Time taken: 0.134 seconds, Fetched: 4 row(s)
insert overwrite table `demo`.`dw_product_2`
select
t1.goods_id,
t1.goods_status,
t1.createtime,
t1.modifytime,
t1.dw_start_date,
case
when (t2.goods_id is null and t1.dw_end_date > '2019-12-20') then '2019-12-20'
else t1.dw_end_date
end
as dw_end_date
from dw_product_2 t1
left join
(select * from ods_product_2 where dt='2019-12-20'
and createtime != '2019-12-20' and modifytime != '2019-12-20'
) t2
on t1.goods_id = t2.goods_id
union all
select
goods_id,
goods_status,
createtime,
modifytime,
modifytime as dw_start_date,
'9999-12-31' as dw_end_date
from ods_product_2 where dt='2019-12-20' and ( createtime = '2019-12-20' or modifytime = '2019-12-20')
order by dw_start_date,goods_id ;
hive> select *from dw_product_2 ;
OK
dw_product_2.goods_id dw_product_2.goods_status dw_product_2.createtime dw_product_2.modifytime dw_product_2.dw_start_date dw_product_2.dw_end_date
001 待审核 2019-12-18 2019-12-20 2019-12-20 9999-12-31
002 待售 2019-12-19 2019-12-20 2019-12-20 9999-12-31
003 在售 2019-12-20 2019-12-20 2019-12-20 9999-12-31
004 已删除 2019-12-15 2019-12-20 2019-12-20 9999-12-31
Time taken: 0.119 seconds, Fetched: 4 row(s)
## put data file to hdfs
hdfs dfs -put 2.txt /apps/hive/warehouse/demo.db/ods_product_2/dt=2019-12-21/
hive> select *from ods_product_2;
OK
ods_product_2.goods_id ods_product_2.goods_status ods_product_2.createtime ods_product_2.modifytime ods_product_2.dt
001 待审核 2019-12-18 2019-12-20 2019-12-20
002 待售 2019-12-19 2019-12-20 2019-12-20
003 在售 2019-12-20 2019-12-20 2019-12-20
004 已删除 2019-12-15 2019-12-20 2019-12-20
001 待售 2019-12-18 2019-12-21 2019-12-21
002 待售 2019-12-19 2019-12-20 2019-12-21
003 在售 2019-12-20 2019-12-20 2019-12-21
004 已删除 2019-12-15 2019-12-20 2019-12-21
005(新商品) 待审核 2019-12-21 2019-12-21 2019-12-21
006(新商品) 待审核 2019-12-12 2019-12-21 2019-12-21
Time taken: 0.123 seconds, Fetched: 10 row(s)
insert overwrite table `demo`.`dw_product_2`
select
t1.goods_id,
t1.goods_status,
t1.createtime,
t1.modifytime,
t1.dw_start_date,
case
when (t2.goods_id is null and t1.dw_end_date > '2019-12-21') then '2019-12-21'
else t1.dw_end_date
end
as dw_end_date
from dw_product_2 t1
left join
(select * from ods_product_2 where dt='2019-12-21'
and createtime != '2019-12-21' and modifytime != '2019-12-21'
) t2
on t1.goods_id = t2.goods_id
union all
select
goods_id,
goods_status,
createtime,
modifytime,
modifytime as dw_start_date,
'9999-12-31' as dw_end_date
from ods_product_2 where dt='2019-12-21' and ( createtime = '2019-12-21' or modifytime = '2019-12-21')
order by dw_start_date,goods_id ;
hive> select *from dw_product_2;
OK
dw_product_2.goods_id dw_product_2.goods_status dw_product_2.createtime dw_product_2.modifytime dw_product_2.dw_start_date dw_product_2.dw_end_date
001 待审核 2019-12-18 2019-12-20 2019-12-20 2019-12-21
002 待售 2019-12-19 2019-12-20 2019-12-20 9999-12-31
003 在售 2019-12-20 2019-12-20 2019-12-20 9999-12-31
004 已删除 2019-12-15 2019-12-20 2019-12-20 9999-12-31
001 待售 2019-12-18 2019-12-21 2019-12-21 9999-12-31
005(新商品) 待审核 2019-12-21 2019-12-21 2019-12-21 9999-12-31
006(新商品) 待审核 2019-12-12 2019-12-21 2019-12-21 9999-12-31
Time taken: 0.097 seconds, Fetched: 7 row(s)
标签:product,12,20,21,拉链,实现,2019,简单,dw 来源: https://blog.csdn.net/hochoy/article/details/117400860