其他分享
首页 > 其他分享> > 拉链表简单实现

拉链表简单实现

作者:互联网

表说明:

ods_product_2 :产品 ods 表 ,

dw_product_2 :产品 dw 表

  1. 表初始化
-- database
create database if not exists demo ;

use demo;
-- create ods table
create   table if not exists  `demo`.`ods_product_2`(
goods_id string comment '商品编号',
goods_status string comment '商品状态(待审核,待售,在售,已删除)',
createtime string comment '创建日期',
modifytime string comment '修改日期'
)
partitioned by (dt string comment '日期分区')
row format delimited fields terminated by '\t' stored as TEXTFILE;

-- create dw table 
create table if not exists `demo`.`dw_product_2`(
goods_id string comment '商品编号',
goods_status string comment '商品状态(待审核,待售,在售,已删除)',
createtime string comment '创建日期',
modifytime string comment '修改日期',
dw_start_date string comment '有效日期-起',
dw_end_date string  comment '有效日期-止'
)
row format delimited fields terminated by '\t' stored as TEXTFILE;


-- add partition for ods
alter table `demo`.`ods_product_2` add if not exists partition (dt='2019-12-20');
alter table `demo`.`ods_product_2` add if not exists partition (dt='2019-12-21');

-- show partitions
show partitions `demo`.`ods_product_2`;


  1. ods data 1.txt
001	待审核	2019-12-18	2019-12-20
002	待售	2019-12-19	2019-12-20
003	在售	2019-12-20	2019-12-20
004	已删除	2019-12-15	2019-12-20
  1. ods data 2.txt
001	待售	2019-12-18	2019-12-21
002	待售	2019-12-19	2019-12-20
003	在售	2019-12-20	2019-12-20
004	已删除	2019-12-15	2019-12-20
005(新商品)	待审核	2019-12-21	2019-12-21
006(新商品)	待审核	2019-12-12	2019-12-21
  1. 操作
## put data file to hdfs 
hdfs dfs -put 1.txt  /apps/hive/warehouse/demo.db/ods_product_2/dt=2019-12-20/
hive> select *from ods_product_2;
OK
ods_product_2.goods_id	ods_product_2.goods_status	ods_product_2.createtime	ods_product_2.modifytime	ods_product_2.dt
001	待审核	2019-12-18	2019-12-20	2019-12-20
002	待售	2019-12-19	2019-12-20	2019-12-20
003	在售	2019-12-20	2019-12-20	2019-12-20
004	已删除	2019-12-15	2019-12-20	2019-12-20
Time taken: 0.134 seconds, Fetched: 4 row(s)

insert overwrite table `demo`.`dw_product_2`
select 
t1.goods_id,
t1.goods_status,
t1.createtime,
t1.modifytime,
t1.dw_start_date,
case 
when (t2.goods_id is null and t1.dw_end_date > '2019-12-20') then  '2019-12-20' 
else t1.dw_end_date 
end  
as dw_end_date
from dw_product_2 t1
left join 
(select * from ods_product_2 where  dt='2019-12-20' 
and createtime != '2019-12-20' and  modifytime != '2019-12-20'
) t2 
on t1.goods_id = t2.goods_id

union all 

select
goods_id,
goods_status,
createtime,
modifytime,
modifytime as dw_start_date,
'9999-12-31' as dw_end_date
from ods_product_2  where  dt='2019-12-20' and ( createtime = '2019-12-20' or modifytime = '2019-12-20')
order by dw_start_date,goods_id ;
hive> select *from dw_product_2 ;
OK
dw_product_2.goods_id	dw_product_2.goods_status	dw_product_2.createtime	dw_product_2.modifytime	dw_product_2.dw_start_date	dw_product_2.dw_end_date
001	待审核	2019-12-18	2019-12-20	2019-12-20	9999-12-31
002	待售	2019-12-19	2019-12-20	2019-12-20	9999-12-31
003	在售	2019-12-20	2019-12-20	2019-12-20	9999-12-31
004	已删除	2019-12-15	2019-12-20	2019-12-20	9999-12-31
Time taken: 0.119 seconds, Fetched: 4 row(s)

## put data file to hdfs 
hdfs dfs -put 2.txt  /apps/hive/warehouse/demo.db/ods_product_2/dt=2019-12-21/
hive> select *from ods_product_2;
OK
ods_product_2.goods_id	ods_product_2.goods_status	ods_product_2.createtime	ods_product_2.modifytime	ods_product_2.dt
001	待审核	2019-12-18	2019-12-20	2019-12-20
002	待售	2019-12-19	2019-12-20	2019-12-20
003	在售	2019-12-20	2019-12-20	2019-12-20
004	已删除	2019-12-15	2019-12-20	2019-12-20
001	待售	2019-12-18	2019-12-21	2019-12-21
002	待售	2019-12-19	2019-12-20	2019-12-21
003	在售	2019-12-20	2019-12-20	2019-12-21
004	已删除	2019-12-15	2019-12-20	2019-12-21
005(新商品)	待审核	2019-12-21	2019-12-21	2019-12-21
006(新商品)	待审核	2019-12-12	2019-12-21	2019-12-21
Time taken: 0.123 seconds, Fetched: 10 row(s)

insert overwrite table `demo`.`dw_product_2`
select 
t1.goods_id,
t1.goods_status,
t1.createtime,
t1.modifytime,
t1.dw_start_date,
case 
when (t2.goods_id is null and t1.dw_end_date > '2019-12-21') then  '2019-12-21' 
else t1.dw_end_date 
end  
as dw_end_date
from dw_product_2 t1
left join 
(select * from ods_product_2 where  dt='2019-12-21' 
and createtime != '2019-12-21' and  modifytime != '2019-12-21'
) t2 
on t1.goods_id = t2.goods_id

union all 

select
goods_id,
goods_status,
createtime,
modifytime,
modifytime as dw_start_date,
'9999-12-31' as dw_end_date
from ods_product_2  where  dt='2019-12-21' and ( createtime = '2019-12-21' or modifytime = '2019-12-21')
order by dw_start_date,goods_id ;
hive> select *from dw_product_2;
OK
dw_product_2.goods_id	dw_product_2.goods_status	dw_product_2.createtime	dw_product_2.modifytime	dw_product_2.dw_start_date	dw_product_2.dw_end_date
001	待审核	2019-12-18	2019-12-20	2019-12-20	2019-12-21
002	待售	2019-12-19	2019-12-20	2019-12-20	9999-12-31
003	在售	2019-12-20	2019-12-20	2019-12-20	9999-12-31
004	已删除	2019-12-15	2019-12-20	2019-12-20	9999-12-31
001	待售	2019-12-18	2019-12-21	2019-12-21	9999-12-31
005(新商品)	待审核	2019-12-21	2019-12-21	2019-12-21	9999-12-31
006(新商品)	待审核	2019-12-12	2019-12-21	2019-12-21	9999-12-31
Time taken: 0.097 seconds, Fetched: 7 row(s)

标签:product,12,20,21,拉链,实现,2019,简单,dw
来源: https://blog.csdn.net/hochoy/article/details/117400860