其他分享
首页 > 其他分享> > 电商项目_dws层建设

电商项目_dws层建设

作者:互联网

三、DWS层建设----临时聚合表的设计与实现

# 事实表中一条记录所表达的业务细节程度被称为粒度。这种粒度通常有2种类型组成:一种是主题信息的隶属维度属性组合所表示的细节程度,另一种是此事实数据中涉及的其他主题域的维度信息(一般是关联key)。
# DWS是基于DWD数据,以其中的主题信息为维度,以其他主题数据为统计度量的数据集合表

示例:
主题为【交易主题】下的订单记录
粒度1:【order_id, order_status,payment_money, order_ctime】是订单业务相关的维度信息
粒度2:【customer_id】用户主题相关,另外与支付主题相关

创建dws_nshop.dws_nshop_order_customer
大概的schema如下:
【order_id order_status,payment_money, order_ctime】 【customer_count】
用户启动日志表(当天)
create external table if not exists dws_nshop.dws_nshop_ulog_launch(
  user_id string comment '用户id',
  device_num string comment '设备号',
  device_type string comment '设备类型',
  os string comment '手机系统',
  os_version string comment '手机系统版本',
  manufacturer string comment '手机制造商',
  carrier string comment '电信运营商',
  network_type string comment '网络类型',
  area_code string comment '地区编码',
  launch_count int comment '启动次数'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_ulog_launch/'
用户浏览日志表(当天)
create external table if not exists dws_nshop.dws_nshop_ulog_view(
  user_id string comment '用户id',
  device_num string comment '设备号',
  device_type string comment '设备类型',
  os string comment '手机系统',
  os_version string comment '手机系统版本',
  manufacturer string comment '手机制造商',
  carrier string comment '电信运营商',
  network_type string comment '网络类型',
  area_code string comment '地区编码',
  view_count int comment '浏览次数'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_ulog_view/'
用户查询日志表(当天)
create external table if not exists dws_nshop.dws_nshop_ulog_search(
  user_id string comment '用户id',
  device_num string comment '设备号',
  device_type string comment '设备类型',
  os string comment '手机系统',
  os_version string comment '手机系统版本',
  manufacturer string comment '手机制造商',
  carrier string comment '电信运营商',
  network_type string comment '网络类型',
  area_code string comment '地区编码',
  search_count int comment '搜索次数'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_ulog_search/'
用户关注日志表(当天)
create external table if not exists dws_nshop.dws_nshop_ulog_comment(
  user_id string comment '用户id',
  device_num string comment '设备号',
  device_type string comment '设备类型',
  os string comment '手机系统',
  os_version string comment '手机系统版本',
  manufacturer string comment '手机制造商',
  carrier string comment '电信运营商',
  network_type string comment '网络类型',
  area_code string comment '地区编码',
  comment_count int comment '关注次数',-- 不去重
  comment_target_count int comment '关注产品次数',--去重
  ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_ulog_comment/'
用户交易记录表(当天)
create external table if not exists dws_nshop.dws_nshop_user_orders(
  user_id string comment '用户id',
  customer_natives string comment '所在区域',
  orders_count int comment '订单数量',
  orders_pay DECIMAL(10,1) comment '订单金额',
  orders_shipping DECIMAL(10,1) comment '订单运费金额',
  orders_district DECIMAL(10,1) comment '订单优惠金额',
  ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_user_orders/'
用户投诉订单表(当天)
create external table if not exists dws_nshop.dws_nshop_user_complainant(
  user_id string comment '用户id',
  area_code string comment '地区编码',
  compl_orders_count int comment '订单数量',
  compl_orders_pay DECIMAL(10,1) comment '订单金额',
  compl_supplier_count int comment '商家数量',
  ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_user_complainant/'
商家用户交互记录表(当天)
-- 4320505513101  浏览表 target_id  关联 页面布局表   4320101010101 page_code
-- 43201010101 商品信息表 product_code  关联 页面布局表  43201010101 page_target
-- 32010101 供应商表 supplier_code  关联 商品信息表  supplier_code
create external table if not exists dws_nshop.dws_nshop_supplier_user(
  supplier_id string comment '商家id',
  supplier_type int comment '供应商类型:1.自营,2.官方 3其他',
  view_count int comment '浏览次数',
  comment_users int comment '关注人数',
  comment_area_code int comment '关注地区数量',
  ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/supplier/dws_nshop_supplier_user/'
商家日流水表(当天)
create external table if not exists dws_nshop.dws_nshop_supplier_sales(
  supplier_id string comment '商家id',
  supplier_type int comment '供应商类型:1.自营,2.官方 3其他',
  sales_users int comment '购物人数',
  sales_users_area int comment '购物地区数量',
  sales_orders int comment '购物订单数',
  salaes_orders_pay DECIMAL(10,1) comment '订单金额',
  salaes_orders_district DECIMAL(10,1) comment '订单优惠金额',
  ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/supplier/dws_nshop_supplier_sales/'
广告投放用户表(当天)
create external table if not exists dws_nshop.dws_nshop_release_user(
  release_sources string comment '投放渠道',
  release_category string comment '投放浏览产品分类',
  release_users int comment '投放浏览用户数',
  release_product_page int comment '投放浏览产品页面数',
  ct bigint comment '创建时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/release/dws_nshop_release_user/'
用户营销活动表(当天)
create external table if not exists dws_nshop.dws_nshop_user_release(
  user_id string comment '用户id',
  os string comment '手机系统',
  os_version string comment '手机系统版本',
  manufacturer string comment '手机制造商',
  carrier string comment '电信运营商',
  network_type string comment '网络类型',
  area_code string comment '地区编码',
  source_count int comment '投放来源数量',
  ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_user_release/'

DWS层Hql语句实现(注意去重 collect_set)

用户启动日志表
insert overwrite table dws_nshop.dws_nshop_ulog_launch partition(bdp_day='20220630')
select
distinct user_id,
device_num ,
device_type,
os  ,
os_version ,
manufacturer,
carrier ,
network_type,
area_code ,
count(device_num) over(partition by device_num)
from dwd_nshop.dwd_nshop_actlog_launch
where bdp_day='20220630'
用户浏览日志表
insert overwrite table dws_nshop.dws_nshop_ulog_view partition(bdp_day='20220630')
select
collect_set(user_id)[0],
collect_set(device_num)[0] ,
collect_set(device_type)[0],
collect_set(os)[0]  ,
collect_set(os_version)[0] ,
collect_set(manufacturer)[0],
collect_set(carrier)[0] ,
collect_set(network_type)[0],
collect_set(area_code)[0] ,
count(device_num)
from dwd_nshop.dwd_nshop_actlog_pdtview 
where bdp_day='20220630'
group by
device_num
用户查询日志表
insert overwrite table dws_nshop.dws_nshop_ulog_search partition(bdp_day='20220630')
select
collect_set(user_id)[0],
collect_set(device_num)[0] ,
collect_set(device_type)[0],
collect_set(os)[0]  ,
collect_set(os_version)[0] ,
collect_set(manufacturer)[0],
collect_set(carrier)[0] ,
collect_set(network_type)[0],
collect_set(area_code)[0] ,
count(device_num)
from dwd_nshop.dwd_nshop_actlog_pdtsearch  
where bdp_day='20220630'
group by
device_num
用户关注日志表
insert overwrite table dws_nshop.dws_nshop_ulog_comment partition(bdp_day='20220630')
select
collect_set(user_id)[0],
collect_set(device_num)[0] ,
collect_set(device_type)[0],
collect_set(os)[0]  ,
collect_set(os_version)[0] ,
collect_set(manufacturer)[0],
collect_set(carrier)[0] ,
collect_set(network_type)[0],
collect_set(area_code)[0] ,
count(target_id),
count(distinct target_id),
collect_set(ct)[0]
from dwd_nshop.dwd_actlog_product_comment  
where bdp_day='20220630'
group by
device_num
用户交易记录表
with t1 as(
    select
    a.customer_id,
    a.order_id,
    a.district_money,
    a.shipping_money,
    b.customer_natives,
    a.payment_money
    from dwd_nshop.dwd_nshop_orders_details a
    join ods_nshop.ods_02_customer b
    on a.customer_id=b.customer_id
    where bdp_day='20220630'
)
insert overwrite table dws_nshop.dws_nshop_user_orders partition(bdp_day='20220630')
select
customer_id,
customer_natives,
count(order_id),
sum(payment_money),
sum(shipping_money),
sum(district_money),
current_timestamp() ct
from t1
group by
customer_id,
customer_natives
用户投诉订单表
with t1 as(
    select
    a.customer_id,
    a.order_id,
    a.supplier_code,
    b.customer_natives,
    a.payment_money
    from dwd_nshop.dwd_nshop_orders_details a
    join ods_nshop.ods_02_customer b
    on a.customer_id=b.customer_id
    where order_status=6 and bdp_day='20220630'
)
insert overwrite table dws_nshop.dws_nshop_user_complainant partition(bdp_day='20220630')
select
customer_id,
customer_natives,
count(order_id),
sum(payment_money),
count(supplier_code),
current_timestamp() ct
from t1
group by
customer_id,
customer_natives
商家用户交互记录表
-- 4320505513101 a 浏览表 target_id  关联 b 页面布局表   4320101010101 page_code
-- 43201010101 c 商品信息表 product_code  关联 页面布局表  43201010101 page_target
-- 32010101 d 供应商表 supplier_code  关联 商品信息表  supplier_code

-- 商家的浏览次数
with t1 as(
select 
c.supplier_code,
d.supplier_type,
count(*) as view_count
from dwd_nshop.dwd_nshop_actlog_pdtview a
join ods_nshop.dim_pub_page b
on a.target_id=b.page_code
join ods_nshop.dim_pub_product c
on b.page_target=c.product_code
join ods_nshop.dim_pub_supplier d
on c.supplier_code=d.supplier_code
where bdp_day='20220630'
group by 
c.supplier_code,
d.supplier_type
),
-- 商家关注人数和地区数量
t2 as(
select 
c.supplier_code,
d.supplier_type,
count(distinct a.user_id) as comment_users,
count(distinct a.area_code) as comment_area_code
from dwd_nshop.dwd_actlog_product_comment a
join ods_nshop.dim_pub_page b
on a.target_id=b.page_code
join ods_nshop.dim_pub_product c
on b.page_target=c.product_code
join ods_nshop.dim_pub_supplier d
on c.supplier_code=d.supplier_code
where bdp_day='20220630'
group by 
c.supplier_code,
d.supplier_type
)
insert overwrite table dws_nshop.dws_nshop_supplier_user partition(bdp_day='20220630')
select
t1.supplier_code,
t1.supplier_type,
t1.view_count,
t2.comment_users,
t2.comment_area_code,
current_timestamp()
from t1 join t2
on t1.supplier_code=t2.supplier_code
and t1.supplier_type=t2.supplier_type;
商家日流水表
insert overwrite table dws_nshop.dws_nshop_supplier_sales partition(bdp_day='20220630')
select
a.supplier_code,
c.supplier_type,
count(distinct a.customer_id),
count(distinct a.consignee_zipcode),
count(a.order_id),
sum(a.payment_money),
sum(a.district_money),
current_timestamp()
from dwd_nshop.dwd_nshop_orders_details a 
join ods_nshop.dim_pub_product b
on a.product_code=b.product_code
join ods_nshop.dim_pub_supplier c
on b.supplier_code=c.supplier_code
where a.bdp_day='20220630'
group by 
a.supplier_code,
c.supplier_type;
广告投放用户表
insert overwrite table dws_nshop.dws_nshop_release_user partition(bdp_day='20220630')
select
release_sources,
release_category,
count(distinct customer_id),
count(*),
current_timestamp()
from dwd_nshop.dwd_nshop_releasedatas
where bdp_day='20220630'
group by
release_sources,
release_category
用户营销活动表
insert overwrite table dws_nshop.dws_nshop_user_release partition(bdp_day='20220630')
select 
a.customer_id,
a.os,
a.os_version,
a.manufacturer,
b.carrier,
b.network_type,
a.area_code,
count(*) over(partition by a.release_sources),
a.ct
from dwd_nshop.dwd_nshop_releasedatas a 
join ods_nshop.ods_nshop_01_useractlog b
on a.customer_id=b.customer_id;

作业:将以下两张表的HQL实现并查询数据,分区时间设置今天的时间

中间层:当天用户启动统计表
create external table if not exists mid_nshop.mid_nshop_actlog_launch_timesegs(
  user_id string comment '用户id',
  os string comment '手机系统',
  os_version string comment '手机系统版本',
  manufacturer string comment '手机制造商',
  carrier string comment '电信运营商',
  network_type string comment '网络类型',
  area_code string comment '地区编码',
  launch_times string comment '24bit表示24小时启动情况',
  launch_count int comment '用户每天启动次数',
  ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/mid/actlog/mid_nshop_actlog_launch_timesegs/'
中间层:当天流量指标统计表
create external table if not exists mid_nshop.mid_nshop_user_action_log(
  device_type string comment '设备类型',
  os string comment '手机系统',
  manufacturer string comment '手机制造商',
  carrier string comment '电信运营商',
  network_type string comment '网络类型',
  area_code string comment '地区编码',
  launch_count int comment '启动次数',
  view_count int comment '浏览次数',
  search_count int comment '搜索次数',
  comment_count int comment '关注次数',
  comment_target_count int comment '关注产品次数',
  ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/mid/user/mid_nshop_user_action_log/'

标签:comment,dws,code,string,nshop,建设,电商,id
来源: https://www.cnblogs.com/atao-BigData/p/16448943.html