数据库
首页 > 数据库> > mysql 练习基础 11:行转列的方法

mysql 练习基础 11:行转列的方法

作者:互联网

-- 数据准备
create database taobao;

use taobao;

create table UserBehavior(
    user_id int,
    item_id int,
    item_category int,
    behavior_type varchar(10),
    user_geohash varchar(10),
    times datetime,
    amount decimal(5,2)
);

show variables like '%secure%';-- 查看安全路径,随后将表格粘贴到该安全路径下,
load data infile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/UserBehavior.csv"
into table UserBehavior
fields terminated by ','
ignore 1 lines;

select * from UserBehavior limit 10;
select count(*) from UserBehavior;#1048575



-- 数据清洗
-- 缺失值处理
select
count(user_id),
count(item_id),
count(item_category),
count(behavior_type),
count(user_geohash),
count(times),
count(amount)
from UserBehavior;



-- 异常值检查

select min(times),max(times),min(amount),max(amount) from userbehavior;

-- 重复记录处理

select distinct * from userbehavior;

-- 字段处理:根据times字段增加计算字段用户行为日期、周和小时,排除后续分析不需要的user_geohash字段,并将筛选后的结果保存到新表
#保存方式一:直接导出表格csv文件
select 
user_id,
item_id,
item_category,
behavior_type,
date(times) as 日期,
hour(times) as 小时,
date_format(times,'%w') as 星期,
amount
from (select distinct * from userbehavior) as t #从去重的表里面去查询
limit 100;

#保存方式二:代码保存(至安全路径)
select 
user_id,
item_id,
item_category,
behavior_type,
date(times) as 日期,
hour(times) as 小时,
date_format(times,'%w') as 星期,
amount
from (select distinct * from userbehavior) as t  
limit 100
into outfile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/t2.csv"-- 保存到安全路径并命名为t2
fields terminated by ','
lines terminated by '\r\n';

#保存方式三:视图 
#视图:优点 1.简化SQL语句,快速提取,将复杂查询语句进行封装;2.安全,保护数据,设置访问权限 让不同级别用户看到不同的视图;缺点 操作视图比直接操作基础表要慢
create view userbehavior_new as 
select 
user_id,
item_id,
item_category,
behavior_type,
date(times) as 日期,
hour(times) as 小时,
date_format(times,'%w') as 星期,
amount
from (select distinct * from userbehavior) as t;

select * from userbehavior_new limit 100;

select count(*) from userbehavior_new;#962097

  
-- 数据预览
select
    count(distinct user_id) as 用户数,
    count(distinct item_id) as 商品数,
    count(distinct item_category) as 类目数
from userbehavior_new;

select 
    behavior_type,
    count(*) as 行为次数
from userbehavior_new
group by behavior_type;



-- 数据分析
-- 1.流量指标分析
-- 每日PV、UV、人均浏览量、成交量、销售额
select 
日期,
sum(behavior_type="pv") as 浏览量,
count(distinct user_id) as 访客数,
sum(behavior_type="pv")/count(distinct user_id) as 人均浏览量,
sum(behavior_type="buy") as 成交量,
sum(if(behavior_type="buy",amount,0)) as 销售额
from userbehavior_new
group by 日期;


-- 周一到周日PV、UV、人均浏览量、成交量、销售额
select 
星期,
sum(behavior_type="pv") as 浏览量,
count(distinct user_id) as 访客数,
sum(behavior_type="pv")/count(distinct user_id) as 人均浏览量,
sum(behavior_type="buy") as 成交量,
sum(if(behavior_type="buy",amount,0)) as 销售额
from userbehavior_new
group by 星期;


-- 每小时PV、UV、人均浏览量、成交量、销售额
select 
小时,
sum(behavior_type="pv") as 浏览量,
count(distinct user_id) as 访客数,
sum(behavior_type="pv")/count(distinct user_id) as 人均浏览量,
sum(behavior_type="buy") as 成交量,
sum(if(behavior_type="buy",amount,0)) as 销售额
from userbehavior_new
group by 小时;



-- 2.行为转化分析(转化率=当前行为用户数/上一行为用户数)
select 
    behavior_type,
    count(distinct user_id) as 用户人数,
    lag(count(distinct user_id),1) over(order by behavior_type desc) as 上一行为用户人数,
    ifnull(count(distinct user_id)/lag(count(distinct user_id),1) over(order by behavior_type desc),1) as 转化率
from userbehavior_new
group by behavior_type;


-- 浏览—加购—购买的转化率
select 
    behavior_type,
    count(distinct user_id) as 用户人数,
    lag(count(distinct user_id),1) over(order by behavior_type desc) as 上一行为用户人数,
    ifnull(count(distinct user_id)/lag(count(distinct user_id),1) over(order by behavior_type desc),1) as 转化率
from userbehavior_new
where behavior_type in ('pv','cart','buy')
group by behavior_type;


-- 每日浏览—加购—购买的转化率
# 行转列的方法
-- charlly  整理
select 
日期,
sum(if(behavior_type="pv",用户人数,0)) as 浏览人数, -- 用if条件判断将行转化成列,然后对其进行聚合
sum(if(behavior_type="cart",用户人数,0)) as 加购人数,
sum(if(behavior_type="cart",用户人数,0))/sum(if(behavior_type="pv",用户人数,0)) as 浏览_加购转化率,
sum(if(behavior_type="buy",用户人数,0)) as 购买人数,
sum(if(behavior_type="buy",用户人数,0))/sum(if(behavior_type="cart",用户人数,0)) as 加购_购买转化率
from
    (select 
        日期,
        behavior_type,
        count(distinct user_id) as 用户人数     
    from userbehavior_new
    where behavior_type in ('pv','cart','buy')
    group by 日期, behavior_type) as t
group by 日期;



-- 每日浏览—加购—购买的转化率  老师答案
select 
    日期,
    sum(if(behavior_type='pv',用户人数,0)) as 浏览人数,
    sum(if(behavior_type='cart',用户人数,0)) as 加购人数,
    sum(if(behavior_type='buy',用户人数,0)) as 购买人数,
    sum(if(behavior_type='cart',用户人数,0))/sum(if(behavior_type='pv',用户人数,0)) as 浏览_加购转化率,
    sum(if(behavior_type='buy',用户人数,0))/sum(if(behavior_type='cart',用户人数,0)) as 加购_够买转化率
from 
    (select 
        日期,
        behavior_type,
        count(distinct user_id) as 用户人数
    from userbehavior_new
    where behavior_type in ('pv','cart','buy')
    group by 日期,behavior_type) as t
group by 日期;

 

标签:11,count,sum,转列,user,behavior,mysql,type,id
来源: https://www.cnblogs.com/charlly/p/15415272.html