其他分享
首页 > 其他分享> > 电商项目_ods层建设

电商项目_ods层建设

作者:互联网

ODS贴源层

1 ODS层存放您从业务系统获取的最原始的数据,是其他上层数据的源数据。
2 本项目中使用的ODS层数据主要包括: 
	用户基本信息、商品分类信息、商品信息、店铺信息、
	订单数据、订单支付信息、活动信息、行为日志信息           
业务数据库表介绍
中文含义 MYSQL表名 HIVE表名
用户基本信息表 nshop.customer ods_nshop.ods_nshop_02_customer
收货地址管理表 nshop.customer_consignee ods_nshop.ods_nshop_02_customer_consignee
订单表 nshop.orders ods_nshop.ods_nshop_02_orders
订单详情表 nshop.order_detail ods_nshop.ods_nshop_02_order_detail
订单支付记录表 nshop.orders_pay_records ods_nshop.ods_nshop_02_orders_pay_records
用户相关
create table if not exists nshop.customer (
    customer_id varchar(20) NOT NULL COMMENT '用户ID',
    customer_login varchar(20) NOT NULL COMMENT '用户登录名',
	customer_nickname varchar(10) NOT NULL COMMENT '用户名(昵称)',
    customer_name varchar(10) NOT NULL COMMENT '用户真实姓名',
	customer_pass varchar(8) NOT NULL COMMENT '用户密码',
	customer_mobile varchar(20) NOT NULL COMMENT '用户手机',
    customer_idcard varchar(20) NOT NULL COMMENT '身份证',
    customer_gender TINYINT NOT NULL COMMENT '性别:1男 0女',
    customer_birthday varchar(10) NOT NULL COMMENT '出生年月',
    customer_age TINYINT NOT NULL COMMENT '年龄',
    customer_age_range varchar(2) NOT NULL COMMENT '年龄段',
	customer_email varchar(50)  NULL COMMENT '用户邮箱',
    customer_natives varchar(10)  NULL COMMENT '所在地区',
	customer_ctime bigint NULL COMMENT '创建时间',
	customer_utime bigint NULL COMMENT '修改时间',
    customer_device_num varchar(20) NOT NULL COMMENT '用户手机设备号',
  	PRIMARY KEY (`customer_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
create table if not exists nshop.customer_consignee (
    consignee_id varchar(20) NOT NULL COMMENT '收货地址ID',
	customer_id varchar(20) NOT NULL COMMENT '用户ID',
	consignee_name varchar(10) NOT NULL COMMENT '收货人',
	consignee_mobile varchar(15) NOT NULL COMMENT '收货人电话',
	consignee_zipcode varchar(10) NOT NULL COMMENT '收货人地区',
	consignee_addr varchar(200) NOT NULL COMMENT '收货人详细地址',
	consignee_tag varchar(10) NOT NULL COMMENT '标签:1家 2公司 3学校',
	ctime bigint NULL COMMENT '创建时间',
  	PRIMARY KEY (`consignee_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `customer_attention` (
  `customer_id` varchar(20) NOT NULL,
  `attention_id` varchar(20) NOT NULL,
  `attention_type` tinyint(4) NOT NULL,
  `attention_status` int(11) NOT NULL DEFAULT '1',
  `attention_ctime` bigint(20) NOT NULL,
  KEY `customer_id` (`customer_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
商品相关
create table if not exists nshop.category(
  category_code VARCHAR(10) NOT NULL COMMENT '分类编码',
  category_name VARCHAR(10) NOT NULL COMMENT '分类名称',
  category_parent_id VARCHAR(10) NULL COMMENT '父分类ID',
  category_status TINYINT NOT NULL DEFAULT 1 COMMENT '分类状态:0禁止,1启用',
  category_utime bigint NULL COMMENT  '最后修改时间',
  PRIMARY KEY (`category_code`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
create table if not exists nshop.supplier(
  supplier_code varchar(10) NOT NULL COMMENT '供应商编码',
  supplier_name varchar(30) NOT NULL COMMENT '供应商名称',
  supplier_type TINYINT NOT NULL COMMENT '供应商类型:1.自营,2.官方 3其他',
  supplier_status TINYINT NOT NULL DEFAULT 1 COMMENT '状态:0禁止,1启用',
  supplier_utime bigint NULL COMMENT  '最后修改时间',
  PRIMARY KEY (`supplier_code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
create table if not exists nshop.product(
  product_code VARCHAR(20) NOT NULL COMMENT '商品ID(分类编码+供应商编码+编号)',
  product_name VARCHAR(20) NOT NULL COMMENT '商品名称',
  product_remark TEXT NOT NULL COMMENT '商品描述',
  category_code VARCHAR(10) NOT NULL COMMENT '分类ID',
  supplier_code varchar(10) NOT NULL COMMENT '商品的供应商编码', 
  product_price DECIMAL(5,1) NOT NULL COMMENT '商品销售价格',
  product_weighing_cost DECIMAL(2,1) NOT NULL COMMENT '商品加权价格',
  product_publish_status TINYINT NOT NULL DEFAULT 0 COMMENT '上下架状态:0下架 1上架',
  product_audit_status TINYINT NOT NULL DEFAULT 0 COMMENT '审核状态:0未审核,1已审核',
  product_bar_code VARCHAR(50) NOT NULL COMMENT '国条码',
  product_weight FLOAT NULL COMMENT '商品重量',
  product_length FLOAT NULL COMMENT '商品长度',
  product_height FLOAT NULL COMMENT '商品高度',
  product_width FLOAT NULL COMMENT '商品宽度',
  product_colors SMALLINT UNSIGNED NOT NULL COMMENT '0:白|1:赤|2:红|3:黄|4:绿|5:青|6:蓝|7:紫|8:黑|9:彩色',
  product_date varchar(10) NOT NULL COMMENT '生产日期',
  product_shelf_life INT NOT NULL COMMENT '商品有效期',
  product_ctime bigint NULL COMMENT '商品录入时间',
  product_utime bigint NULL COMMENT '最后修改时间',
  PRIMARY KEY (`product_code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
订单相关
create table if not exists nshop.orders (
    order_id VARCHAR(50) NOT NULL COMMENT '订单ID(时间+商品ID+4位随机)',
	customer_id varchar(20) NOT NULL COMMENT '下单用户ID',
	order_status TINYINT NOT NULL COMMENT '订单状态:1已提交; 2待支付 3出货中 4已发货 5已收货(完成) 6投诉 7退货',
    customer_ip varchar(20) NULL COMMENT '下单用户IP',
	user_longitude varchar(20) NULL COMMENT '用户地理:经度',
	user_latitude varchar(20) NULL COMMENT '用户地理:纬度',
	user_areacode varchar(10) NULL COMMENT '用户所在地区',
	consignee_name varchar(10) NOT NULL COMMENT '收货人',
	consignee_mobile varchar(15) NOT NULL COMMENT '收货人电话',
	consignee_zipcode varchar(10) NOT NULL COMMENT '收货人地址',
	pay_type varchar(5) NULL COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支付(货到付款) 20 ',
	pay_code varchar(30) NULL COMMENT '支付对应唯一标识,如微信号、支付宝号',
	pay_nettype varchar(1) NOT NULL COMMENT '支付网络方式:0 wifi | 1 4g | 2 3g |3 线下支付',
    district_money DECIMAL(8,1) NOT NULL DEFAULT 0.0 COMMENT '优惠金额',
  	shipping_money DECIMAL(8,1) NOT NULL DEFAULT 0.0 COMMENT '运费金额',
  	payment_money DECIMAL(10,1) NOT NULL DEFAULT 0.0 COMMENT '支付金额',
	order_ctime bigint NULL COMMENT '创建时间',
    shipping_time bigint NULL COMMENT '发货时间',
  	receive_time bigint NULL COMMENT '收货时间',
  	PRIMARY KEY (`order_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
create table if not exists nshop.order_detail(
  order_detail_id varchar(20) NOT NULL COMMENT '订单详情表ID',
  order_id VARCHAR(50) NOT NULL COMMENT '订单表ID',
  product_id varchar(20) NOT NULL COMMENT '订单商品ID',
  product_name VARCHAR(50) NOT NULL COMMENT '商品名称',
  product_remark VARCHAR(100) NOT NULL COMMENT '商品描述',
  product_cnt INT NOT NULL DEFAULT 1 COMMENT '购买商品数量',
  product_price DECIMAL(5,1) NOT NULL COMMENT '购买商品单价',
  weighing_cost DECIMAL(2,1) NOT NULL COMMENT '商品加权价格',
  district_money DECIMAL(4,1) NOT NULL DEFAULT 0.0 COMMENT '优惠金额',
  is_activity NOT NULL DEFAULT 0 COMMENT '1:参加活动|0:没有参加活动',
  order_detail_ctime bigint NULL COMMENT '下单时间',
  PRIMARY KEY (`order_detail_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

create table if not exists nshop.orders_pay_records (
    pay_id varchar(20) NOT NULL COMMENT '支付记录ID',
    order_id varchar(30) NOT NULL COMMENT '订单ID',
	customer_id varchar(20) NOT NULL COMMENT '用户ID',
	pay_status varchar(5) NOT NULL COMMENT '支付状态:0 支付失败| 1 支付成功',
	pay_type varchar(5) NULL COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支付(货到付款) 20 ',
	pay_code varchar(30) NULL COMMENT '支付对应唯一标识,如微信号、支付宝号',
	pay_nettype varchar(1) NOT NULL COMMENT '支付网络方式:1 wifi | 2 4g | 3 3g |4 线下支付',
	pay_amount double(10,1) NOT NULL COMMENT '支付金额',
	pay_ctime bigint NULL COMMENT '创建时间',
  	PRIMARY KEY (`pay_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
时间维度
create table if not exists nshop.date_dim (
	date_day varchar(30) NOT NULL COMMENT '日期:yyyyMMdd 如20190520',
	date_day_desc varchar(30) NOT NULL COMMENT '日期格式:yyyy年MM月dd日 如2019年05月20日',
	date_day_month varchar(30) NOT NULL COMMENT '日期:20 本月第几天 如2019年5月20日为5月第20天',
	date_day_year varchar(30) NOT NULL COMMENT '日期:139 本年第几天 如2019年5月20日为139天',
	date_day_en varchar(30) NOT NULL COMMENT '日期:monday 星期几',
	date_week varchar(30) NOT NULL COMMENT '周:本月第几周 4 如 20190520为本月第4周',
	date_week_desc varchar(30) NOT NULL COMMENT '周:本月第几周 如 20190504',
	date_month varchar(30) NOT NULL COMMENT '月: 5 如 20190520为本年5月',
	date_month_en varchar(30) NOT NULL COMMENT '月: may ',
	date_month_desc varchar(30) NOT NULL COMMENT '月:如 201905',
	date_quarter varchar(30) NOT NULL COMMENT '季度:2',
	date_quarter_en varchar(30) NOT NULL COMMENT '季度:Q2',
	date_quarter_desc varchar(30) NOT NULL COMMENT '季度:2019Q2 如 20190520为2019Q2',
	date_year varchar(30) NOT NULL COMMENT '年:2019',
  	PRIMARY KEY (`date_day`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
地区列表
create table if not exists nshop.area_dim (
	region_code varchar(30) NOT NULL COMMENT '地区编码 如110105  | 130406 ',
	region_code_desc varchar(30) NOT NULL COMMENT '地区编码 如朝阳区 | 峰峰矿区',
	region_city varchar(30) NOT NULL COMMENT '地区编码 如1101 北京市朝阳区 | 1304 邯郸',
	region_city_desc varchar(30) NOT NULL COMMENT '地区编码 如1101 | 1304 邯郸市',
	region_province varchar(30) NOT NULL COMMENT '地区编码 如11 北京市 | 13 河北省',
	region_province_desc varchar(30) NOT NULL COMMENT '地区编码 如 北京市 | 河北',
  	PRIMARY KEY (`region_code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
通用字典表
create table if not exists nshop.comm_dim(
  dim_type VARCHAR(10) not NULL COMMENT '字典类型',
  dim_code VARCHAR(30) not NULL COMMENT '字典编码',
  dim_remark varchar(30) NULL COMMENT '字段描述',
  dim_ext1 varchar(30) NULL COMMENT '扩展1',
  dim_ext2 varchar(30) NULL COMMENT '扩展2',
  dim_ext3 varchar(30) NULL COMMENT '扩展3',
  dim_ext4 varchar(30) NULL COMMENT '扩展4',
  ct timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
  PRIMARY KEY (`dim_type`,`dim_code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
页面布局表
create table if not exists nshop.page_dim (
	page_code varchar(30) NOT NULL COMMENT '页面编码',
	page_remark varchar(30) NULL COMMENT '页面描述',
	page_type varchar(5) NOT NULL COMMENT '页面类型(1:导航页 2:分类页 3:店铺页 4:产品页)',
    page_target varchar(30) NULL COMMENT '页面对应实体编号(如产品、店铺)',
	page_ctime bigint NOT NULL COMMENT '创建时间',
  	PRIMARY KEY (`page_code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

同步业务数据到ODS
数仓ods层和业务数据表的对应关系
中文含义 MYSQL表名 HIVE表名
用户基本信息表 nshop.customer ods_nshop.ods_02_customer
用户关注表 nshop_customer_attention ods_nshop.ods_02_customer_attention
用户收货地址管理表 nshop.customer_consignee ods_nshop.ods_02_customer_consignee
订单表 nshop.orders ods_nshop.ods_02_orders
订单详情表 nshop.order_detail ods_nshop.ods_02_order_detail
订单支付记录表 nshop.orders_pay_records ods_nshop.ods_02_orders_pay_records
商品分类表 nshop.category ods_nshop.dim_pub_category
商品信息表 nshop.product ods_nshop.dim_pub_product
供应商信息表 nshop.supplier ods_nshop.dim_pub_supplier
页面布局表 nshop.page_dim ods_nshop.dim_pub_page
地域字典表 nshop.area_dim ods_nshop.dim_pub_area
通用字典表 nshop.comm_dim ods_nshop.dim_pub_comm
时间字典表 nshop.date_dim ods_nshop.dim_pub_date
数仓ods层库表建设

创建数据库

create database if not exists ods_nshop;
use ods_nshop;

用户基本信息表

create external table if not exists ods_nshop.ods_02_customer (
 customer_id string COMMENT '用户ID',
 customer_login string COMMENT '用户登录名',
 customer_nickname string COMMENT '用户名(昵称)',
 customer_name string COMMENT '用户真实姓名',
 customer_pass string COMMENT '用户密码',
 customer_mobile string COMMENT '用户手机',
 customer_idcard string COMMENT '身份证',
 customer_gender TINYINT COMMENT '性别:1男 0女',
 customer_birthday string COMMENT '出生年月',
 customer_age TINYINT COMMENT '年龄',
 customer_age_range string  COMMENT '年龄段',
 customer_email string COMMENT '用户邮箱',
 customer_natives string COMMENT '所在地区',
 customer_ctime bigint COMMENT '创建时间',
 customer_utime bigint COMMENT '修改时间',
 customer_device_num string COMMENT '用户手机设备号'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_customer/'

用户关注表

create external table if not exists ods_nshop.ods_02_customer_attention (
  customer_id string COMMENT '用户ID',
  attention_id string COMMENT '关注对象ID',
  attention_type TINYINT COMMENT '关注类型:3店铺 4商品',
  attention_status bigint  COMMENT '关注状态:1关注 0取消',
  attention_ctime bigint COMMENT '生成时间'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_customer_attention/'

收货地址管理表

create external table if not exists ods_nshop.ods_02_customer_consignee (
 consignee_id string COMMENT '收货地址ID',
 customer_id string COMMENT '用户ID',
 consignee_name string COMMENT '收货人',
 consignee_mobile string COMMENT '收货人电话',
 consignee_zipcode string COMMENT '收货人地区',
 consignee_addr string COMMENT '收货人详细地址',
 consignee_tag string COMMENT '标签:1家 2公司 3学校',
 ctime bigint COMMENT '创建时间'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_customer_consignee/'

商品分类表

create external table if not exists ods_nshop.dim_pub_category (
 category_code string COMMENT '分类编码',
 category_name string COMMENT '分类名称',
 category_parent_id string COMMENT '父分类ID',
 category_status TINYINT  COMMENT '分类状态:0禁止,1启用',
 category_utime bigint COMMENT  '最后修改时间'
)
location '/shujia/bigdata17/data/nshop/ods/dim_pub_category/'

供应商信息表

create external table if not exists  ods_nshop.dim_pub_supplier(
 supplier_code string COMMENT '供应商编码',
 supplier_name string COMMENT '供应商名称',
 supplier_type TINYINT  COMMENT '供应商类型:1.自营,2.官方 3其他',
 supplier_status TINYINT  COMMENT '状态:0禁止,1启用',
 supplier_utime bigint  COMMENT  '最后修改时间'
)
location '/shujia/bigdata17/data/nshop/ods/dim_pub_supplier/'

订单表

create external table if not exists ods_nshop.ods_02_orders (
 order_id string COMMENT '订单ID(时间+商品ID+4位随机)',
 customer_id string COMMENT '下单用户ID',
 order_status TINYINT COMMENT '订单状态',
 customer_ip string COMMENT '下单用户IP',
 user_longitude string COMMENT ':经度',
 user_latitude string COMMENT '用户地理:纬度',
 user_areacode string COMMENT '用户所在地区',
 consignee_name string COMMENT '收货人',
 consignee_mobile string COMMENT '收货人电话',
 consignee_zipcode string COMMENT '收货人地址',
 pay_type string COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支付(货到付款) 20 ',
 pay_code string COMMENT '支付对应唯一标识,如微信号、支付宝号',
 pay_nettype string COMMENT '支付网络方式:0 wifi | 1 4g | 2 3g |3 线下支付',
 district_money DECIMAL(8,1) COMMENT '优惠金额',
 shipping_money DECIMAL(8,1)  COMMENT '运费金额',
 payment_money DECIMAL(10,1) COMMENT '支付金额',
 order_ctime bigint  COMMENT '创建时间',
 shipping_time bigint  COMMENT '发货时间',
 receive_time bigint  COMMENT '收货时间'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_orders/'

订单详情表

create external table if not exists ods_nshop.ods_02_order_detail(
 order_detail_id string COMMENT '订单详情表ID',
 order_id string COMMENT '订单表ID',
 product_id string COMMENT '订单商品ID',
 product_name string COMMENT '商品名称',
 product_remark string COMMENT '商品描述',
 product_cnt INT  COMMENT '购买商品数量',
 product_price DECIMAL(5,1)  COMMENT '购买商品单价',
 weighing_cost DECIMAL(2,1)  COMMENT '商品加权价格',
 district_money DECIMAL(4,1)  COMMENT '优惠金额',
 is_activity bigint COMMENT '1:参加活动|0:没有参加活动',
 order_detail_ctime bigint COMMENT '下单时间'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_order_detail/'

页面布局

create external table if not exists ods_nshop.dim_pub_page (
 page_code string COMMENT '页面编码',
 page_remark string COMMENT '页面描述',
 page_type string COMMENT '页面类型(1:导航页 2:分类页 3:店铺页 4:产品页)',
 page_target string COMMENT '页面对应实体编号(如产品、店铺)',
 page_ctime bigint COMMENT '创建时间'
)
location '/shujia/bigdata17/data/nshop/ods/dim_pub_page/'

地域表

create external table if not exists ods_nshop.dim_pub_area (
 region_code string COMMENT '地区编码 如110105  | 130406 ',
 region_code_desc string COMMENT '地区编码 如朝阳区 | 峰峰矿区',
 region_city string COMMENT '地区编码 如1101 北京市朝阳区 | 1304 邯郸',
 region_city_desc string COMMENT '地区编码 如1101 | 1304 邯郸市',
 region_province string COMMENT '地区编码 如11 北京市 | 13 河北省',
 region_province_desc string COMMENT '地区编码 如 北京市 | 河北'
)
location '/shujia/bigdata17/data/nshop/ods/dim_pub_area/'

订单支付记录表

create external table if not exists ods_nshop.ods_02_orders_pay_records (
 pay_id string COMMENT '支付记录ID',
 order_id string COMMENT '订单ID',
 customer_id string COMMENT '用户ID',
 pay_status string COMMENT '支付状态:0 支付失败| 1 支付成功',
 pay_type string COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支付(货到付款) 20 ',
 pay_code string COMMENT '支付对应唯一标识,如微信号、支付宝号',
 pay_nettype string COMMENT '支付网络方式:1 wifi | 2 4g | 3 3g |4 线下支付',
 pay_amount double COMMENT '支付金额',
 pay_ctime bigint  COMMENT '创建时间'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_orders_pay_records/'

商品信息表

create external table if not exists ods_nshop.dim_pub_product(
  product_code string COMMENT '商品ID(分类编码+供应商编码+编号)',
  product_name string COMMENT '商品名称',
  product_remark string  COMMENT '商品描述',
  category_code string COMMENT '分类ID',
  supplier_code string COMMENT '商品的供应商编码', 
  product_price DECIMAL COMMENT '商品销售价格',
  product_weighing_cost DECIMAL(2,1)  COMMENT '商品加权价格',
  product_publish_status TINYINT  COMMENT '上下架状态:0下架 1上架',
  product_audit_status TINYINT  COMMENT '审核状态:0未审核,1已审核',
  product_bar_code string COMMENT '国条码',
  product_weight FLOAT  COMMENT '商品重量',
  product_length FLOAT  COMMENT '商品长度',
  product_height FLOAT  COMMENT '商品高度',
  product_width FLOAT  COMMENT '商品宽度',
  product_colors SMALLINT  COMMENT '0:白|1:赤|2:红|3:黄|4:绿|5:青|6:蓝|7:紫|8:黑|9:彩色',
  product_date string COMMENT '生产日期',
  product_shelf_life bigint  COMMENT '商品有效期',
  product_ctime bigint  COMMENT '商品录入时间',
  product_utime bigint  COMMENT '最后修改时间'
)
location '/shujia/bigdata17/data/nshop/ods/dim_pub_product/'

日期维度表

create external table if not exists ods_nshop.dim_pub_date (
 date_day string COMMENT '日期:yyyyMMdd 如20190520',
 date_day_desc string COMMENT '日期格式:yyyy年MM月dd日 如2019年05月20日',
 date_day_month string COMMENT '日期:20 本月第几天 如2019年5月20日为5月第20天',
 date_day_year string COMMENT '日期:139 本年第几天 如2019年5月20日为139天',
 date_day_en string COMMENT '日期:monday 星期几',
 date_week string COMMENT '周:本月第几周 4 如 20190520为本月第4周',
 date_week_desc string COMMENT '周:本月第几周 如 20190504',
 date_month string COMMENT '月: 5 如 20190520为本年5月',
 date_month_en string COMMENT '月: may ',
 date_month_desc string COMMENT '月:如 201905',
 date_quarter string COMMENT '季度:2',
 date_quarter_en string COMMENT '季度:Q2',
 date_quarter_desc string COMMENT '季度:2019Q2 如 20190520为2019Q2',
 date_year string COMMENT '年:2019'
)
location '/shujia/bigdata17/data/nshop/ods/dim_pub_date/'

Sqoop导入脚本
create external table if not exists ods_nshop.ods_02_customer (
 customer_id string COMMENT '用户ID',
 customer_login string COMMENT '用户登录名',
 customer_nickname string COMMENT '用户名(昵称)',
 customer_name string COMMENT '用户真实姓名',
 customer_pass string COMMENT '用户密码',
 customer_mobile string COMMENT '用户手机',
 customer_idcard string COMMENT '身份证',
 customer_gender TINYINT COMMENT '性别:1男 0女',
 customer_birthday string COMMENT '出生年月',
 customer_age TINYINT COMMENT '年龄',
 customer_age_range string  COMMENT '年龄段',
 customer_email string COMMENT '用户邮箱',
 customer_natives string COMMENT '所在地区',
 customer_ctime bigint COMMENT '创建时间',
 customer_utime bigint COMMENT '修改时间',
 customer_device_num string COMMENT '用户手机设备号'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_customer/'

--这两个同步通过命令行执行,其他的通过脚本执行 

sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--query "select customer_id,customer_login,customer_nickname,customer_name,customer_pass,customer_mobile,customer_idcard,customer_gender,customer_birthday,customer_age,customer_age_range,customer_email,customer_natives,customer_ctime,customer_utime,customer_device_num from customer where \$CONDITIONS" \
--username root \
--password 123456 \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_customer/ \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_customer \
--m 1

sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--query "select customer_id,customer_login,customer_nickname,customer_name,customer_pass,customer_mobile,customer_idcard,customer_gender,customer_birthday,customer_age,customer_age_range,customer_email,customer_natives,customer_ctime,customer_utime,customer_device_num from customer where \$CONDITIONS" \
--username root \
--password 123456 \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_customer \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_customer \
--delete-target-dir \
-m 1


sqoop脚本文件如下:
#!/bin/bash



sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--query "select customer_id,attention_id,attention_type,attention_status,attention_ctime from customer_attention where \$CONDITIONS" \
--username root \
--password 123456 \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_customer_attention \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_customer_attention \
--delete-target-dir \
-m 1

sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--query "select consignee_id,customer_id,consignee_name,consignee_mobile,consignee_zipcode,consignee_addr,consignee_tag,ctime from customer_consignee where \$CONDITIONS" \
--username root \
--password 123456 \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_customer_consignee \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_customer_consignee \
--delete-target-dir \
-m 1

sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select category_code,category_name,category_parent_id,category_status,category_utime from category where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_category \
--hive-import \
--hive-database ods_nshop \
--hive-table dim_pub_category \
--delete-target-dir \
-m 1

sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select supplier_code,supplier_name,supplier_type,supplier_status,supplier_utime from supplier where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_supplier \
--hive-import \
--hive-database ods_nshop \
--hive-table dim_pub_supplier \
--delete-target-dir \
-m 1

sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select order_id,customer_id,order_status,customer_ip,customer_longitude,customer_latitude,customer_areacode,consignee_name,consignee_mobile,consignee_zipcode,pay_type,pay_code,pay_nettype,district_money,shipping_money,payment_money,order_ctime,shipping_time,receive_time from orders where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_orders \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_orders \
--delete-target-dir \
-m 1

sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select order_detail_id,order_id,product_id,product_name,product_remark,product_cnt,product_price,weighing_cost,district_money,is_activity,order_detail_ctime from order_detail where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_order_detail \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_order_detail \
--delete-target-dir \
-m 1

sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select page_code,page_remark,page_type,page_target,page_ctime from page_dim where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_page \
--hive-import \
--hive-database ods_nshop \
--hive-table dim_pub_page \
--delete-target-dir \
-m 1

sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select region_code,region_code_desc,region_city,region_city_desc,region_province,region_province_desc from area_dim where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_area \
--hive-import \
--hive-database ods_nshop \
--hive-table dim_pub_area \
--delete-target-dir \
-m 1

sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select date_day,date_day_desc,date_day_month,date_day_year,date_day_en,date_week,date_week_desc,date_month,date_month_en,date_month_desc,date_quarter,date_quarter_en,date_quarter_desc,date_year from date_dim where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_date \
--hive-import \
--hive-database ods_nshop \
--hive-table dim_pub_date \
--delete-target-dir \
-m 1

sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select pay_id,order_id,customer_id,pay_status,pay_type,pay_code,pay_nettype,pay_amount,pay_ctime from orders_pay_records where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_orders_pay_records \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_orders_pay_records \
--delete-target-dir \
-m 1

sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select product_code,product_name,product_remark,category_code,supplier_code,product_price,product_weighing_cost,product_publish_status,product_audit_status,product_bar_code,product_weight,product_length,product_height,product_width,product_colors,product_date,product_shelf_life,product_ctime,product_utime from product where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_product \
--hive-import \
--hive-database ods_nshop \
--hive-table dim_pub_product \
--delete-target-dir \
-m 1

定时任务:

#设定定时任务
#crond定时方式的配置
vim /etc/crontab 
#在最后一行加上定时任务的配置 每小时执行一次
* */1 * * * root /usr/local/shell/test.sh

/usr/local/soft/atao_file/xiangmu_1/sqoopshell.sh
#修改crontab的配置后需要重启crontab,使配置生效
/bin/systemctl restart crond.service


埋点日志

日志数据我们直接load就可以了,下面的外部数据再使用flume采集,为了演示不同的导数据方式,这里就直接load了

电商平台在网页版或APP版终端上进行业务埋点,采集相关用户行为数据发送给服务器进行用户行为分析。
日志文件如下(截取部分)
{"action":"05","event_type":"01","customer_id":"20101000324999676","device_num":"586344","device_type":"9","os":"2","os_version":"2.2","manufacturer":"05","carrier":"2","network_type":"2","area_code":"41092","longitude":"116.35636","latitude":"40.06919","extinfo":"{\"target_type\":\"4\",\"target_keys\":\"20402\",\"target_order\":\"31\",\"target_ids\":\"[\\\"4320402595801\\\",\\\"4320402133801\\\",\\\"4320402919201\\\",\\\"4320402238501\\\"]\"}","ct":1567896035000}
{"action":"05","event_type":"02","customer_id":"20101000405999595","device_num":"956236","device_type":"9","os":"2","os_version":"4.3","manufacturer":"08","carrier":"2","network_type":"3","area_code":"10015","longitude":"116.35985","latitude":"40.069590000000005","extinfo":"{\"target_type\":\"3\",\"target_id\":\"4320406544401\",\"target_action\":\"01\"}","ct":1567896035000}
{"action":"05","event_type":"02","customer_id":"20101000648999352","device_num":"876263","device_type":"2","os":"2","os_version":"6.2","manufacturer":"07","carrier":"2","network_type":"2","area_code":"42058","longitude":"116.34721","latitude":"40.06685","extinfo":"{\"target_type\":\"3\",\"target_id\":\"4320701402201\",\"target_action\":\"01\"}","ct":1567896035000}
{"action":"07","event_type":"02","customer_id":"20101000729999271","device_num":"451481","device_type":"9","os":"2","os_version":"4.5","manufacturer":"09","carrier":"3","network_type":"2","area_code":"37162","longitude":"116.35737","latitude":"40.06962","extinfo":"{\"target_id\":\"4320207200701\"}","ct":1567896035000}
{"action":"06","event_type":"","customer_id":"20101000810999190","device_num":"114649","device_type":"9","os":"2","os_version":"8.2","manufacturer":"02","carrier":"3","network_type":"0","area_code":"35062","longitude":"116.38244999999999","latitude":"40.06447","extinfo":"{}","ct":1567896035000}
{"action":"07","event_type":"01","customer_id":"20101000972999028","device_num":"332835","device_type":"2","os":"2","os_version":"7.5","manufacturer":"02","carrier":"2","network_type":"0","area_code":"15082","longitude":"116.33457","latitude":"40.06449","extinfo":"{\"target_id\":\"4320701224301\"}","ct":1567896035000}
{"action":"05","event_type":"02","customer_id":"20101000972999028","device_num":"332835","device_type":"2","os":"2","os_version":"7.5","manufacturer":"02","carrier":"2","network_type":"0","area_code":"15082","longitude":"116.33457","latitude":"40.06449","extinfo":"{\"target_type\":\"3\",\"target_id\":\"4320701224301\",\"target_action\":\"01\"}","ct":1567896035000}
{"action":"08","event_type":"02","customer_id":"20101001377998623","device_num":"269526","device_type":"9","os":"2","os_version":"2.2","manufacturer":"05","carrier":"2","network_type":"3","area_code":"13053","longitude":"116.36325","latitude":"40.06456","extinfo":"{\"target_id\":\"4320901702901\"}","ct":1567896035000}
{"action":"08","event_type":"01","customer_id":"20101001458998542","device_num":"252612","device_type":"9","os":"2","os_version":"7.5","manufacturer":"03","carrier":"2","network_type":"2","area_code":"10013","longitude":"116.39949","latitude":"40.069250000000004","extinfo":"{\"target_id\":\"4320901657101\"}","ct":1567896035000}
中文含义 HIVE表名
用户行为日志表 ods_nshop.ods_nshop_01_useractlog
用户行为日志表

创建hadoop目录

hadoop fs -mkdir -p '/shujia/bigdata17/data/nshop/ods/user_action_log/'
create external table if not exists ods_nshop.ods_nshop_01_useractlog(
  action string comment '行为类型:install安装|launch启动|interactive交互|page_enter_h5页面曝光|page_enter_native页面进入|exit退出',
  event_type string comment '行为类型:click点击|view浏览|slide滑动|input输入',
  customer_id string comment '用户id',
  device_num string comment '设备号',
  device_type string comment '设备类型',
  os string comment '手机系统',
  os_version string comment '手机系统版本',
  manufacturer string comment '手机制造商',
  carrier string comment '电信运营商',
  network_type string comment '网络类型',
  area_code string comment '地区编码',
  longitude string comment '经度',
  latitude string comment '纬度',
  extinfo string comment '扩展信息(json格式)',
  duration string comment '停留时长',
  ct bigint comment '创建时间'
) partitioned by (bdp_day string)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE
location '/shujia/bigdata17/data/nshop/ods/user_action_log/'

注意

json格式数据表需要通过serde机制处理
1 在hive-site.xml中设置三方jar包
<property>  
	<name>hive.aux.jars.path</name>
	<value>/usr/local/hive-3.1.2/lib/</value>
</property>

2 在hive.aux.jars.path设置的路径中增加hive-hcatalog-core-3.1.2.jar 
[root@master hive-3.1.2]# cp $HIVE_HOME/hcatalog/share/hcatalog/hive-hcatalog-core-3.1.2.jar /usr/local/hive-3.1.2/lib/
同步数据到表

将0000文件上传服务器,然后使用hive命令映射到hive表

hive> load data local inpath "/usr/local/soft/atao_file/xiangmu_file/000000_0" into table ods_nshop.ods_nshop_01_useractlog partition(bdp_day="20220630");
外部数据
外部数据来源主要有:
1 企业间的接口调用(如其他企业平台上的广告投放营销)
2 公共数据的爬虫数据(公共数据、友商数据、社区|社交平台公开数据)
中文含义 HIVE表名
广告投放信息表 ods_nshop.ods_nshop_01_releasedatas
广告投放信息
create external table if not exists ods_nshop.ods_nshop_01_releasedatas(
  device_num string comment '设备号',
  device_type string comment '设备类型',
  os string comment '手机系统',
  os_version string comment '手机系统版本',
  manufacturer string comment '手机制造商',
  area_code string comment '地区编码',
  release_sid string comment '投放请求id',
  release_session string comment '投放会话id',
  release_sources string comment '投放渠道',
  release_params string comment '投放请求参数',
  ct string comment '创建时间'
) partitioned by (bdp_day string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
stored as textfile
location '/shujia/bigdata17/data/nshop/ods/release/'
采集数据

外部数据需要使用Flume进行数据采集操作,将采集到的数据映射到Hive中,首先创建Hive的表

外部数据如下(截取)
723748,2,2,6.1,3,540529,1572654785000fvkpnp,1572650000000000000000000,tencent,ip=3.61.2.222&deviceNum=723748&lon=null&lat=null&aid=null&ctime=1572654905000&sources=tencent&session=1572654785000723748416198&productPage=4320308033801,1573260000000
928223,2,2,2.8,7,441881,1572654785000mvfnnn,1572650000000000000000000,liebao,ip=95.219.76.152&deviceNum=928223&lon=null&lat=null&aid=null&ctime=1572654919000&sources=liebao&session=1572654785000928223624678&productPage=4320102710201,1573260000000
928223,2,2,7.1,3,653123,1572654785000mvfnnn,1572650000000000000000000,liebao,ip=95.219.76.152&deviceNum=928223&lon=null&lat=null&aid=null&ctime=1572654919000&sources=liebao&session=1572654785000928223624678&productPage=4320102710201,1573260000000
784295,9,2,9.7,5,632726,1572654785000qvisko,1572650000000000000000000,baidu,ip=210.121.223.225&deviceNum=784295&lon=null&lat=null&aid=null&ctime=1572655033000&sources=baidu&session=1572654785000784295423824&productPage=4320206519001,1573260000000
496626,9,2,4.8,5,540522,1572654785000jdesth,1572650000000000000000000,douyin,ip=231.61.62.159&deviceNum=496626&lon=null&lat=null&aid=null&ctime=1572654947000&sources=douyin&session=1572654785000496626329353&productPage=4320303786701,1573260000000
235966,2,2,2.7,2,610602,1572654785000updxen,1572650000000000000000000,ximalaya,ip=246.85.131.184&deviceNum=235966&lon=null&lat=null&aid=null&ctime=1572654853000&sources=ximalaya&session=1572654785000235966421378&productPage=4320505806501,1573260000000
275157,9,2,2.4,5,632724,1572654785000pqwgch,1572650000000000000000000,liebao,ip=106.227.203.6&deviceNum=275157&lon=null&lat=null&aid=null&ctime=1572655014000&sources=liebao&session=1572654785000275157768731&productPage=4320603904901,1573260000000
322548,2,2,5.2,2,632822,1572654785000wtjppb,1572650000000000000000000,liebao,ip=105.247.218.235&deviceNum=322548&lon=null&lat=null&aid=null&ctime=1572654981000&sources=liebao&session=1572654785000322548126172&productPage=4320403173201,1573260000000
432626,9,2,1.4,5,540227,1572654785000dxifed,1572650000000000000000000,netyex,ip=216.169.184.123&deviceNum=432626&lon=null&lat=null&aid=null&ctime=1572654916000&sources=netyex&session=1572654785000432626892945&productPage=4320808279201,1573260000000
693898,9,2,2.2,9,620924,1572654785000tnjomq,1572650000000000000000000,ximalaya,ip=130.238.115.6&deviceNum=693898&lon=null&lat=null&aid=null&ctime=1572654861000&sources=ximalaya&session=1572654785000693898367422&productPage=4320503604301,1573260000000
164237,9,2,9.2,3,360423,1572654785000bpxghm,1572650000000000000000000,douyin,ip=119.26.191.164&deviceNum=164237&lon=null&lat=null&aid=null&ctime=1572655050000&sources=douyin&session=1572654785000164237721644&productPage=4320804560801,1573260000000

配置Flume采集文件

监控文件如下
# 命名
a1.sources = r1
a1.channels = c1
a1.sinks = s1
# 关联
a1.sources.r1.channels = c1
a1.sinks.s1.channel = c1
# 配置source类型和属性
a1.sources.r1.type =  TAILDIR
a1.sources.r1.filegroups = g1
a1.sources.r1.filegroups.g1 = /usr/local/soft/flume-1.9.0/data/.*.csv
# 元数据保存位置
a1.sources.r1.positionFile = /usr/local/soft/flume-1.9.0/flume-log/taildir_position.json
# 配置channel类型属性
a1.channels.c1.type = memory
# 缓存池大小
a1.channels.c1.capacity = 1000
# 每个事务sink拉取的大小
a1.channels.c1.transactionCapacity = 100
# 配置Sink类型和属性
a1.sinks.s1.type = hdfs
a1.sinks.s1.hdfs.path = hdfs://master:9000/data/nshop/ods/release/%Y%m%d
a1.sinks.s1.hdfs.fileSuffix = .log
# 下面三个配置参数如果都设置为0,那么表示不执行次参数(失效)
a1.sinks.s1.hdfs.rollInterval = 10
a1.sinks.s1.hdfs.rollSize = 0
a1.sinks.s1.hdfs.rollCount = 0
# 设置采集文件格式 如果你是存文本文件,就是用DataStream
a1.sinks.s1.hdfs.fileType = DataStream
a1.sinks.s1.hdfs.writeFormat = Text
# 开启本地时间戳获取参数,因为我们的目录上面已经使用转义符号,所以要使用时间戳
a1.sinks.s1.hdfs.useLocalTimeStamp = true

创建文件

[root@master conf]# vim agentData.conf

启动执行

[root@master conf]# flume-ng agent -n a1 -c conf -f agentData.conf -Dflume.root.logger=INFO,console

编写一个sh脚本,然后生产数据到对应位置的文件夹下面

#!/bin/base
cat 2.txt >> /usr/local/soft/flume-1.9.0/data/2.txt

执行映射

hive> load data inpath "/data/nshop/ods/release22/20220630/*" into table ods_nshop.ods_nshop_01_releasedatas partition(bdp_day='20220630');

标签:COMMENT,customer,NULL,string,nshop,ods,建设,电商
来源: https://www.cnblogs.com/atao-BigData/p/16448884.html