电商项目_dws层建设
作者:互联网
三、DWS层建设----临时聚合表的设计与实现
# 事实表中一条记录所表达的业务细节程度被称为粒度。这种粒度通常有2种类型组成:一种是主题信息的隶属维度属性组合所表示的细节程度,另一种是此事实数据中涉及的其他主题域的维度信息(一般是关联key)。
# DWS是基于DWD数据,以其中的主题信息为维度,以其他主题数据为统计度量的数据集合表
示例:
主题为【交易主题】下的订单记录
粒度1:【order_id, order_status,payment_money, order_ctime】是订单业务相关的维度信息
粒度2:【customer_id】用户主题相关,另外与支付主题相关
创建dws_nshop.dws_nshop_order_customer
大概的schema如下:
【order_id order_status,payment_money, order_ctime】 【customer_count】
用户启动日志表(当天)
create external table if not exists dws_nshop.dws_nshop_ulog_launch(
user_id string comment '用户id',
device_num string comment '设备号',
device_type string comment '设备类型',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
launch_count int comment '启动次数'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_ulog_launch/'
用户浏览日志表(当天)
create external table if not exists dws_nshop.dws_nshop_ulog_view(
user_id string comment '用户id',
device_num string comment '设备号',
device_type string comment '设备类型',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
view_count int comment '浏览次数'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_ulog_view/'
用户查询日志表(当天)
create external table if not exists dws_nshop.dws_nshop_ulog_search(
user_id string comment '用户id',
device_num string comment '设备号',
device_type string comment '设备类型',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
search_count int comment '搜索次数'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_ulog_search/'
用户关注日志表(当天)
create external table if not exists dws_nshop.dws_nshop_ulog_comment(
user_id string comment '用户id',
device_num string comment '设备号',
device_type string comment '设备类型',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
comment_count int comment '关注次数',-- 不去重
comment_target_count int comment '关注产品次数',--去重
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_ulog_comment/'
用户交易记录表(当天)
create external table if not exists dws_nshop.dws_nshop_user_orders(
user_id string comment '用户id',
customer_natives string comment '所在区域',
orders_count int comment '订单数量',
orders_pay DECIMAL(10,1) comment '订单金额',
orders_shipping DECIMAL(10,1) comment '订单运费金额',
orders_district DECIMAL(10,1) comment '订单优惠金额',
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_user_orders/'
用户投诉订单表(当天)
create external table if not exists dws_nshop.dws_nshop_user_complainant(
user_id string comment '用户id',
area_code string comment '地区编码',
compl_orders_count int comment '订单数量',
compl_orders_pay DECIMAL(10,1) comment '订单金额',
compl_supplier_count int comment '商家数量',
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_user_complainant/'
商家用户交互记录表(当天)
-- 4320505513101 浏览表 target_id 关联 页面布局表 4320101010101 page_code
-- 43201010101 商品信息表 product_code 关联 页面布局表 43201010101 page_target
-- 32010101 供应商表 supplier_code 关联 商品信息表 supplier_code
create external table if not exists dws_nshop.dws_nshop_supplier_user(
supplier_id string comment '商家id',
supplier_type int comment '供应商类型:1.自营,2.官方 3其他',
view_count int comment '浏览次数',
comment_users int comment '关注人数',
comment_area_code int comment '关注地区数量',
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/supplier/dws_nshop_supplier_user/'
商家日流水表(当天)
create external table if not exists dws_nshop.dws_nshop_supplier_sales(
supplier_id string comment '商家id',
supplier_type int comment '供应商类型:1.自营,2.官方 3其他',
sales_users int comment '购物人数',
sales_users_area int comment '购物地区数量',
sales_orders int comment '购物订单数',
salaes_orders_pay DECIMAL(10,1) comment '订单金额',
salaes_orders_district DECIMAL(10,1) comment '订单优惠金额',
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/supplier/dws_nshop_supplier_sales/'
广告投放用户表(当天)
create external table if not exists dws_nshop.dws_nshop_release_user(
release_sources string comment '投放渠道',
release_category string comment '投放浏览产品分类',
release_users int comment '投放浏览用户数',
release_product_page int comment '投放浏览产品页面数',
ct bigint comment '创建时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/release/dws_nshop_release_user/'
用户营销活动表(当天)
create external table if not exists dws_nshop.dws_nshop_user_release(
user_id string comment '用户id',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
source_count int comment '投放来源数量',
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/shujia/bigdata17/data/nshop/dws/user/dws_nshop_user_release/'
DWS层Hql语句实现(注意去重 collect_set)
用户启动日志表
insert overwrite table dws_nshop.dws_nshop_ulog_launch partition(bdp_day='20220630')
select
distinct user_id,
device_num ,
device_type,
os ,
os_version ,
manufacturer,
carrier ,
network_type,
area_code ,
count(device_num) over(partition by device_num)
from dwd_nshop.dwd_nshop_actlog_launch
where bdp_day='20220630'
用户浏览日志表
insert overwrite table dws_nshop.dws_nshop_ulog_view partition(bdp_day='20220630')
select
collect_set(user_id)[0],
collect_set(device_num)[0] ,
collect_set(device_type)[0],
collect_set(os)[0] ,
collect_set(os_version)[0] ,
collect_set(manufacturer)[0],
collect_set(carrier)[0] ,
collect_set(network_type)[0],
collect_set(area_code)[0] ,
count(device_num)
from dwd_nshop.dwd_nshop_actlog_pdtview
where bdp_day='20220630'
group by
device_num
用户查询日志表
insert overwrite table dws_nshop.dws_nshop_ulog_search partition(bdp_day='20220630')
select
collect_set(user_id)[0],
collect_set(device_num)[0] ,
collect_set(device_type)[0],
collect_set(os)[0] ,
collect_set(os_version)[0] ,
collect_set(manufacturer)[0],
collect_set(carrier)[0] ,
collect_set(network_type)[0],
collect_set(area_code)[0] ,
count(device_num)
from dwd_nshop.dwd_nshop_actlog_pdtsearch
where bdp_day='20220630'
group by
device_num
用户关注日志表
insert overwrite table dws_nshop.dws_nshop_ulog_comment partition(bdp_day='20220630')
select
collect_set(user_id)[0],
collect_set(device_num)[0] ,
collect_set(device_type)[0],
collect_set(os)[0] ,
collect_set(os_version)[0] ,
collect_set(manufacturer)[0],
collect_set(carrier)[0] ,
collect_set(network_type)[0],
collect_set(area_code)[0] ,
count(target_id),
count(distinct target_id),
collect_set(ct)[0]
from dwd_nshop.dwd_actlog_product_comment
where bdp_day='20220630'
group by
device_num
用户交易记录表
with t1 as(
select
a.customer_id,
a.order_id,
a.district_money,
a.shipping_money,
b.customer_natives,
a.payment_money
from dwd_nshop.dwd_nshop_orders_details a
join ods_nshop.ods_02_customer b
on a.customer_id=b.customer_id
where bdp_day='20220630'
)
insert overwrite table dws_nshop.dws_nshop_user_orders partition(bdp_day='20220630')
select
customer_id,
customer_natives,
count(order_id),
sum(payment_money),
sum(shipping_money),
sum(district_money),
current_timestamp() ct
from t1
group by
customer_id,
customer_natives
用户投诉订单表
with t1 as(
select
a.customer_id,
a.order_id,
a.supplier_code,
b.customer_natives,
a.payment_money
from dwd_nshop.dwd_nshop_orders_details a
join ods_nshop.ods_02_customer b
on a.customer_id=b.customer_id
where order_status=6 and bdp_day='20220630'
)
insert overwrite table dws_nshop.dws_nshop_user_complainant partition(bdp_day='20220630')
select
customer_id,
customer_natives,
count(order_id),
sum(payment_money),
count(supplier_code),
current_timestamp() ct
from t1
group by
customer_id,
customer_natives
商家用户交互记录表
-- 4320505513101 a 浏览表 target_id 关联 b 页面布局表 4320101010101 page_code
-- 43201010101 c 商品信息表 product_code 关联 页面布局表 43201010101 page_target
-- 32010101 d 供应商表 supplier_code 关联 商品信息表 supplier_code
-- 商家的浏览次数
with t1 as(
select
c.supplier_code,
d.supplier_type,
count(*) as view_count
from dwd_nshop.dwd_nshop_actlog_pdtview a
join ods_nshop.dim_pub_page b
on a.target_id=b.page_code
join ods_nshop.dim_pub_product c
on b.page_target=c.product_code
join ods_nshop.dim_pub_supplier d
on c.supplier_code=d.supplier_code
where bdp_day='20220630'
group by
c.supplier_code,
d.supplier_type
),
-- 商家关注人数和地区数量
t2 as(
select
c.supplier_code,
d.supplier_type,
count(distinct a.user_id) as comment_users,
count(distinct a.area_code) as comment_area_code
from dwd_nshop.dwd_actlog_product_comment a
join ods_nshop.dim_pub_page b
on a.target_id=b.page_code
join ods_nshop.dim_pub_product c
on b.page_target=c.product_code
join ods_nshop.dim_pub_supplier d
on c.supplier_code=d.supplier_code
where bdp_day='20220630'
group by
c.supplier_code,
d.supplier_type
)
insert overwrite table dws_nshop.dws_nshop_supplier_user partition(bdp_day='20220630')
select
t1.supplier_code,
t1.supplier_type,
t1.view_count,
t2.comment_users,
t2.comment_area_code,
current_timestamp()
from t1 join t2
on t1.supplier_code=t2.supplier_code
and t1.supplier_type=t2.supplier_type;
商家日流水表
insert overwrite table dws_nshop.dws_nshop_supplier_sales partition(bdp_day='20220630')
select
a.supplier_code,
c.supplier_type,
count(distinct a.customer_id),
count(distinct a.consignee_zipcode),
count(a.order_id),
sum(a.payment_money),
sum(a.district_money),
current_timestamp()
from dwd_nshop.dwd_nshop_orders_details a
join ods_nshop.dim_pub_product b
on a.product_code=b.product_code
join ods_nshop.dim_pub_supplier c
on b.supplier_code=c.supplier_code
where a.bdp_day='20220630'
group by
a.supplier_code,
c.supplier_type;
广告投放用户表
insert overwrite table dws_nshop.dws_nshop_release_user partition(bdp_day='20220630')
select
release_sources,
release_category,
count(distinct customer_id),
count(*),
current_timestamp()
from dwd_nshop.dwd_nshop_releasedatas
where bdp_day='20220630'
group by
release_sources,
release_category
用户营销活动表
insert overwrite table dws_nshop.dws_nshop_user_release partition(bdp_day='20220630')
select
a.customer_id,
a.os,
a.os_version,
a.manufacturer,
b.carrier,
b.network_type,
a.area_code,
count(*) over(partition by a.release_sources),
a.ct
from dwd_nshop.dwd_nshop_releasedatas a
join ods_nshop.ods_nshop_01_useractlog b
on a.customer_id=b.customer_id;
作业:将以下两张表的HQL实现并查询数据,分区时间设置今天的时间
中间层:当天用户启动统计表
create external table if not exists mid_nshop.mid_nshop_actlog_launch_timesegs(
user_id string comment '用户id',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
launch_times string comment '24bit表示24小时启动情况',
launch_count int comment '用户每天启动次数',
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/mid/actlog/mid_nshop_actlog_launch_timesegs/'
中间层:当天流量指标统计表
create external table if not exists mid_nshop.mid_nshop_user_action_log(
device_type string comment '设备类型',
os string comment '手机系统',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
launch_count int comment '启动次数',
view_count int comment '浏览次数',
search_count int comment '搜索次数',
comment_count int comment '关注次数',
comment_target_count int comment '关注产品次数',
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/mid/user/mid_nshop_user_action_log/'
标签:comment,dws,code,string,nshop,建设,电商,id 来源: https://www.cnblogs.com/atao-BigData/p/16448943.html