其他分享
首页 > 其他分享> > 数据分析--基础指标多维统计

数据分析--基础指标多维统计

作者:互联网

1.1. 多维度统计PV总量
--第一种方式:直接在dw_weblog_detail单表上进行查询
1.1.1 计算该处理批次(一天)中的各小时pvs

drop table dw_pvs_everyhour_oneday;
create table dw_pvs_everyhour_oneday(month string,day string,hour string,pvs bigint) partitioned by(datestr string);

insert into table dw_pvs_everyhour_oneday partition(datestr='20181101')
select month ,day,hour,count(*) as pvs from dw_weblog_detail 
where datestr='20181101' 
group by month,day,hour;

select *  from dw_pvs_everyhour_oneday ;
+--------------------------------+------------------------------+-------------------------------+------------------------------+----------------------------------+--+
| dw_pvs_everyhour_oneday.month  | dw_pvs_everyhour_oneday.day  | dw_pvs_everyhour_oneday.hour  | dw_pvs_everyhour_oneday.pvs  | dw_pvs_everyhour_oneday.datestr  |
+--------------------------------+------------------------------+-------------------------------+------------------------------+----------------------------------+--+
| 11                             | 01                           | 06                            | 222                          | 20181101                         |
| 11                             | 01                           | 07                            | 2020                         | 20181101                         |
| 11                             | 01                           | 08                            | 4104                         | 20181101                         |
| 11                             | 01                           | 09                            | 2748                         | 20181101                         |
| 11                             | 01                           | 10                            | 1136                         | 20181101                         |
| 11                             | 01                           | 11                            | 1142                         | 20181101                         |
| 11                             | 01                           | 12                            | 1242                         | 20181101                         |
| 11                             | 01                           | 13                            | 1062                         | 20181101                         |
| 11                             | 01                           | 14                            | 1028                         | 20181101                         |
| 11                             | 01                           | 15                            | 1518                         | 20181101                         |
| 11                             | 01                           | 16                            | 950                          | 20181101                         |
| 11                             | 01                           | 17                            | 764                          | 20181101                         |
| 11                             | 01                           | 18                            | 524                          | 20181101                         |
| 11                             | 01                           | 19                            | 780                          | 20181101                         |
| 11                             | 01                           | 20                            | 422                          | 20181101                         |
| 11                             | 01                           | 21                            | 426                          | 20181101                         |
| 11                             | 01                           | 22                            | 702                          | 20181101                         |
| 11                             | 01                           | 23                            | 764                          | 20181101                         |
| 11                             | 02                           | 00                            | 624                          | 20181101                         |
| 11                             | 02                           | 01                            | 648                          | 20181101                         |
| 11                             | 02                           | 02                            | 1092                         | 20181101                         |
| 11                             | 02                           | 03                            | 1104                         | 20181101                         |
| 11                             | 02                           | 04                            | 1138                         | 20181101                         |
| 11                             | 02                           | 05                            | 1080                         | 20181101                         |
| 11                             | 02                           | 06                            | 300                          | 20181101                         |
+--------------------------------+------------------------------+-------------------------------+------------------------------+----------------------------------+--+

--计算每天的pvs
drop table dw_pvs_everyday;

create table  dw_pvs_everyday(pvs bigint,month string,day string);

insert into table dw_pvs_everyday
select count(*) as	pvs,a.month as month,a.day as day from dw_weblog_detail a
group by a.month,a.day;

select * from dw_pvs_everyday;
+--------+--------+------+--+
| aspvs  | month  | day  |
+--------+--------+------+--+
| 21554  | 11     | 01   |
| 5986   | 11     | 02   |
+--------+--------+------+--+

delete from dw_pvs_everyday where  day='01' and day='02'; (注意hive里面不支持更新和删除
	--Error: Error while compiling statement: FAILED: SemanticException [Error 10294]: 
	--Attempt to do update or delete using transaction manager that does not support these operations. (state=42000,code=10294)
)

--维度:月
drop table dw_pvs_everymonth;
desc dw_pvs_everymonth;
create table dw_pvs_everymonth(pvs bigint,month string);

--所需技术:
--常用的join有四种,inner join, left outer join, right outer join, full outer join
INNER JOIN
		跟JOIN是一样的,一般INNER关键字可以省略。
		INNER JOIN将只会返回相匹配的元素项,即不会返回结果为NULL的数据项。
left join 
		--左连接,返回所有的记录,即使在右表中没有匹配的行。

right outer join 
		--右连接,返回右表中的所有记录,即使在左表中没有记录与它匹配

full outer join 
		--全连接,返回左右表中的所有记录

--插入
insert into table dw_pvs_everymonth
select count(*) as pvs ,a.month as month from 
(select distinct(month) from t_dim_time) a join	dw_weblog_detail b 
on a.month=b.month group by a.month;

--查看
select * from dw_pvs_everymonth;
+------------------------+--------------------------+--+
| dw_pvs_everymonth.pvs  | dw_pvs_everymonth.month  |
+------------------------+--------------------------+--+
| 27540                  | 11                       |
+------------------------+--------------------------+--+


--(1.2)统计每小时各来访url产生的pv量,查询结果存入:( "dw_pvs_referer_everyhour" )
drop table dw_pvs_referer_everyhour;

desc dw_pvs_referer_everyhour;
+--------------------------+-----------------------+-----------------------+--+
|         col_name         |       data_type       |        comment        |
+--------------------------+-----------------------+-----------------------+--+
| referer_url              | string                |                       |
| referer_host             | string                |                       |
| month                    | string                |                       |
| day                      | string                |                       |
| hour                     | string                |                       |
| pv_referer_cnt           | bigint                |                       |
| datestr                  | string                |                       |
|                          | NULL                  | NULL                  |
| # Partition Information  | NULL                  | NULL                  |
| # col_name               | data_type             | comment               |
|                          | NULL                  | NULL                  |
| datestr                  | string                |                       |
+--------------------------+-----------------------+-----------------------+--+
create table dw_pvs_referer_everyhour(referer_url string,referer_host string,
month string,day string,hour string,pv_referer_cnt bigint) partitioned by(datestr string);


表
+-------------------------+-------------------------------+-------------------------------+------------------------------+--------------------------+---------------------------+-------------------------+-----------------------+------------------------+-----------------------------------------------+--------------------------+-----------------------------------+------------------------------------------------+----------------------------+-------------------------------+-----------------------------+--------------------------------+----------------------------------------------------+---------------------------+--+
| dw_weblog_detail.valid  | dw_weblog_detail.remote_addr  | dw_weblog_detail.remote_user  | dw_weblog_detail.time_local  | dw_weblog_detail.daystr  | dw_weblog_detail.timestr  | dw_weblog_detail.month  | dw_weblog_detail.day  | dw_weblog_detail.hour  |           dw_weblog_detail.request            | dw_weblog_detail.status  | dw_weblog_detail.body_bytes_sent  |         dw_weblog_detail.http_referer          | dw_weblog_detail.ref_host  |   dw_weblog_detail.ref_path   | dw_weblog_detail.ref_query  | dw_weblog_detail.ref_query_id  |          dw_weblog_detail.http_user_agent          | dw_weblog_detail.datestr  |
+-------------------------+-------------------------------+-------------------------------+------------------------------+--------------------------+---------------------------+-------------------------+-----------------------+------------------------+-----------------------------------------------+--------------------------+-----------------------------------+------------------------------------------------+----------------------------+-------------------------------+-----------------------------+--------------------------------+----------------------------------------------------+---------------------------+--+
| false                   | 194.237.142.21                | -                             | 2018-11-01 06:49:18          | 2018-11-01               | 06:49:18                  | 11                      | 01                    | 06                     | /wp-content/uploads/2013/07/rstudio-git3.png  | 304                      | 0                                 | "-"                                            | NULL                       | NULL                          | NULL                        | NULL                           | "Mozilla/4.0(compatible;)"                         | 20181101                  |
| false                   | 163.177.71.12                 | -                             | 2018-11-01 06:49:33          | 2018-11-01               | 06:49:33                  | 11                      | 01                    | 06                     | /                                             | 200                      | 20                                | "-"                                            | NULL                       | NULL                          | NULL                        | NULL                           | "DNSPod-Monitor/1.0"                               | 20181101                  |
| false                   | 163.177.71.12                 | -                             | 2018-11-01 06:49:36          | 2018-11-01               | 06:49:36                  | 11                      | 01                    | 06                     | /                                             | 200                      | 20                                | "-"                                            | NULL                       | NULL                          | NULL                        | NULL                           | "DNSPod-Monitor/1.0"                               | 20181101                  |
| false                   | 101.226.68.137                | -                             | 2018-11-01 06:49:42          | 2018-11-01               | 06:49:42                  | 11                      | 01                    | 06                     | /                                             | 200                      | 20                                | "-"                                            | NULL                       | NULL                          | NULL                        | NULL                           | "DNSPod-Monitor/1.0"                               | 20181101                  |
| false                   | 101.226.68.137                | -                             | 2018-11-01 06:49:45          | 2018-11-01               | 06:49:45                  | 11                      | 01                    | 06                     | /                                             | 200                      | 20                                | "-"                                            | NULL                       | NULL                          | NULL                        | NULL                           | "DNSPod-Monitor/1.0"                               | 20181101                  |
| false                   | 60.208.6.156                  | -                             | 2018-11-01 06:49:48          | 2018-11-01               | 06:49:48                  | 11                      | 01                    | 06                     | /wp-content/uploads/2013/07/rcassandra.png    | 200                      | 185524                            | "http://cos.name/category/software/packages/"  | cos.name                   | /category/software/packages/  | NULL                        | NULL                           | "Mozilla/5.0(WindowsNT6.1)AppleWebKit/537.36(KHTML,likeGecko)Chrome/29.0.1547.66Safari/537.36" | 20181101                  |
| false                   | 222.68.172.190                | -                             | 2018-11-01 06:49:57          | 2018-11-01               | 06:49:57                  | 11                      | 01                    | 06                     | /images/my.jpg                                | 200                      | 19939                             | "http://www.angularjs.cn/A00n"                 | www.angularjs.cn           | /A00n                         | NULL                        | NULL                           | "Mozilla/5.0(WindowsNT6.1)AppleWebKit/537.36(KHTML,likeGecko)Chrome/29.0.1547.66Safari/537.36" | 20181101                  |
| false                   | 183.195.232.138               | -                             | 2018-11-01 06:50:16          | 2018-11-01               | 06:50:16                  | 11                      | 01                    | 06                     | /                                             | 200                      | 20                                | "-"                                            | NULL                       | NULL                          | NULL                        | NULL                           | "DNSPod-Monitor/1.0"                               | 20181101                  |
| false                   | 183.195.232.138               | -                             | 2018-11-01 06:50:16          | 2018-11-01               | 06:50:16                  | 11                      | 01                    | 06                     | /                                             | 200                      | 20                                | "-"                                            | NULL                       | NULL                          | NULL                        | NULL                           | "DNSPod-Monitor/1.0"                               | 20181101                  |
| false                   | 66.249.66.84                  | -                             | 2018-11-01 06:50:28          | 2018-11-01               | 06:50:28                  | 11                      | 01                    | 06                     | /page/6/                                      | 200                      | 27777                             | "-"                                            | NULL                       | NULL                          | NULL                        | NULL                           | "Mozilla/5.0(compatible;Googlebot/2.1;+http://www.google.com/bot.html)" | 20181101                  |
+-------------------------+-------------------------------+-------------------------------+------------------------------+--------------------------+---------------------------+-------------------------+-----------------------+------------------------+-----------------------------------------------+--------------------------+-----------------------------------+------------------------------------------------+----------------------------+-------------------------------+-----------------------------+--------------------------------+----------------------------------------------------+---------------------------+--+

--统计每小时各来访url产生的pv量,查询结果存入:( "dw_pvs_referer_everyhour" )


insert into table dw_pvs_referer_everyhour partition(datestr='20181101')
select http_referer,ref_host,month,day,hour, count(1) as pv_referer_cnt 
from dw_weblog_detail
group by http_referer,ref_host,month,day,hour having ref_host is not null
order by month,day,hour,pv_referer_cnt
limit 10;
+----------------------------------------------------+-------------------+--------+------+-------+-----------------+--+
|                    http_referer                    |     ref_host      | month  | day  | hour  | pv_referer_cnt  |
+----------------------------------------------------+-------------------+--------+------+-------+-----------------+--+
| "http://blog.fens.me/series-nodejs/"               | blog.fens.me      | 11     | 01   | 06    | 2               |
| "http://www.angularjs.cn/"                         | www.angularjs.cn  | 11     | 01   | 06    | 2               |
| "http://www.google.com/url?sa=t&rct=j&q=nodejs%20%E5%BC%82%E6%AD%A5%E5%B9%BF%E6%92%AD&source=web&cd=1&cad=rja&ved=0CCgQFjAA&url=%68%74%74%70%3a%2f%2f%62%6c%6f%67%2e%66%65%6e%73%2e%6d%65%2f%6e%6f%64%65%6a%73%2d%73%6f%63%6b%65%74%69%6f%2d%63%68%61%74%2f&ei=rko5UrylAefOiAe7_IGQBw&usg=AFQjCNG6YWoZsJ_bSj8kTnMHcH51hYQkAA&bvm=bv.52288139,d.aGc" | www.google.com    | 11     | 01   | 06    | 2               |
| "http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=6&cad=rja&ved=0CHIQFjAF&url=http%3A%2F%2Fblog.fens.me%2Fvps-ip-dns%2F&ei=j045UrP5AYX22AXsg4G4DQ&usg=AFQjCNGsJfLMNZnwWXNpTSUl6SOEzfF6tg&sig2=YY1oxEybUL7wx3IrVIMfHA&bvm=bv.52288139,d.b2I" | www.google.com    | 11     | 01   | 06    | 2               |
| "http://cos.name/category/software/packages/"      | cos.name          | 11     | 01   | 06    | 2               |
| "http://www.baidu.com/s?tn=baiduhome_pg&ie=utf-8&bs=%E5%9C%A8linux%E5%90%AF%E5%8A%A8%E4%B8%8Bmongodb.conf&f=8&rsv_bp=1&wd=about+to+fork+child+process%2C+waiting+until+server+is+ready+for+connections.&rsv_n=2&rsv_sug3=1&rsv_sug1=1&rsv_sug4=187&inputT=906" | www.baidu.com     | 11     | 01   | 06    | 2               |
| "http://www.angularjs.cn/A00n"                     | www.angularjs.cn  | 11     | 01   | 06    | 4               |
| "http://blog.fens.me/nodejs-express3/"             | blog.fens.me      | 11     | 01   | 06    | 4               |
| "http://blog.fens.me/nodejs-async/"                | blog.fens.me      | 11     | 01   | 06    | 10              |
| "http://blog.fens.me/wp-content/themes/silesia/style.css" | blog.fens.me      | 11     | 01   | 06    | 14              |
+----------------------------------------------------+-------------------+--------+------+-------+-----------------+--+


insert into table dw_pvs_referer_everyhour partition(datestr='20181101')
select http_referer,ref_host,month,day,hour, count(1) as pv_referer_cnt 
from dw_weblog_detail
group by http_referer,ref_host,month,day,hour having ref_host is not null
order by month,day,hour,pv_referer_cnt;

select * from  dw_pvs_referer_everyhour limit 5;
+----------------------------------------------------+----------------------------------------+---------------------------------+-------------------------------+--------------------------------+------------------------------------------+-----------------------------------+--+
|        dw_pvs_referer_everyhour.referer_url        | dw_pvs_referer_everyhour.referer_host  | dw_pvs_referer_everyhour.month  | dw_pvs_referer_everyhour.day  | dw_pvs_referer_everyhour.hour  | dw_pvs_referer_everyhour.pv_referer_cnt  | dw_pvs_referer_everyhour.datestr  |
+----------------------------------------------------+----------------------------------------+---------------------------------+-------------------------------+--------------------------------+------------------------------------------+-----------------------------------+--+
| "http://blog.fens.me/series-nodejs/"               | blog.fens.me                           | 11                              | 01                            | 06                             | 2                                        | 20181101                          |
| "http://www.angularjs.cn/"                         | www.angularjs.cn                       | 11                              | 01                            | 06                             | 2                                        | 20181101                          |
| "http://www.google.com/url?sa=t&rct=j&q=nodejs%20%E5%BC%82%E6%AD%A5%E5%B9%BF%E6%92%AD&source=web&cd=1&cad=rja&ved=0CCgQFjAA&url=%68%74%74%70%3a%2f%2f%62%6c%6f%67%2e%66%65%6e%73%2e%6d%65%2f%6e%6f%64%65%6a%73%2d%73%6f%63%6b%65%74%69%6f%2d%63%68%61%74%2f&ei=rko5UrylAefOiAe7_IGQBw&usg=AFQjCNG6YWoZsJ_bSj8kTnMHcH51hYQkAA&bvm=bv.52288139,d.aGc" | www.google.com                         | 11                              | 01                            | 06                             | 2                                        | 20181101                          |
| "http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=6&cad=rja&ved=0CHIQFjAF&url=http%3A%2F%2Fblog.fens.me%2Fvps-ip-dns%2F&ei=j045UrP5AYX22AXsg4G4DQ&usg=AFQjCNGsJfLMNZnwWXNpTSUl6SOEzfF6tg&sig2=YY1oxEybUL7wx3IrVIMfHA&bvm=bv.52288139,d.b2I" | www.google.com                         | 11                              | 01                            | 06                             | 2                                        | 20181101                          |
| "http://cos.name/category/software/packages/"      | cos.name                               | 11                              | 01                            | 06                             | 2                                        | 20181101                          |
+----------------------------------------------------+--------------------------------------

--统计每小时各来访host的产生的pv数并排序
drop table dw_pvs_refererhost_everyhour;

create table dw_pvs_refererhost_everyhour(ref_host string,month string,day string,hour string,ref_host_cnts bigint) partitioned by(datestr string);

desc dw_pvs_refererhost_everyhour;
+--------------------------+-----------------------+-----------------------+--+
|         col_name         |       data_type       |        comment        |
+--------------------------+-----------------------+-----------------------+--+
| ref_host                 | string                |                       |
| month                    | string                |                       |
| day                      | string                |                       |
| hour                     | string                |                       |
| ref_host_cnts            | bigint                |                       |
| datestr                  | string                |                       |
|                          | NULL                  | NULL                  |
| # Partition Information  | NULL                  | NULL                  |
| # col_name               | data_type             | comment               |
|                          | NULL                  | NULL                  |
| datestr                  | string                |                       |
+--------------------------+-----------------------+-----------------------+--+

select ref_host,month,day,hour,count(1) as ref_host_cnts
from dw_weblog_detail 
group by ref_host,month,day,hour 
having ref_host is not null
order by hour asc,day asc,month asc,ref_host_cnts desc  limit 10;

+---------------------+--------+------+-------+----------------+--+
|      ref_host       | month  | day  | hour  | ref_host_cnts  |
+---------------------+--------+------+-------+----------------+--+
| blog.fens.me        | 11     | 02   | 00    | 222            |
| www.fens.me         | 11     | 02   | 00    | 26             |
| h2w.iask.cn         | 11     | 02   | 00    | 12             |
| www.google.com.hk   | 11     | 02   | 00    | 6              |
| angularjs.cn        | 11     | 02   | 00    | 6              |
| cnodejs.org         | 11     | 02   | 00    | 2              |
| www.leonarding.com  | 11     | 02   | 00    | 2              |
| www.itpub.net       | 11     | 02   | 00    | 2              |
| blog.fens.me        | 11     | 02   | 01    | 178            |
| cos.name            | 11     | 02   | 01    | 6              |
+---------------------+--------+------+-------+----------------+--+

insert into table dw_pvs_refererhost_everyhour partition(datestr='20181101')
select ref_host,month,day,hour,count(1) as ref_host_cnts
from dw_weblog_detail 
group by ref_host,month,day,hour 
having ref_host is not null
order by hour asc,day asc,month asc,ref_host_cnts desc;

select * from dw_pvs_refererhost_everyhour limit 5;
+----------------------------------------+-------------------------------------+-----------------------------------+------------------------------------+---------------------------------------------+---------------------------------------+--+
| dw_pvs_refererhost_everyhour.ref_host  | dw_pvs_refererhost_everyhour.month  | dw_pvs_refererhost_everyhour.day  | dw_pvs_refererhost_everyhour.hour  | dw_pvs_refererhost_everyhour.ref_host_cnts  | dw_pvs_refererhost_everyhour.datestr  |
+----------------------------------------+-------------------------------------+-----------------------------------+------------------------------------+---------------------------------------------+---------------------------------------+--+
| blog.fens.me                           | 11                                  | 02                                | 00                                 | 222                                         | 20181101                              |
| www.fens.me                            | 11                                  | 02                                | 00                                 | 26                                          | 20181101                              |
| h2w.iask.cn                            | 11                                  | 02                                | 00                                 | 12                                          | 20181101                              |
| www.google.com.hk                      | 11                                  | 02                                | 00                                 | 6                                           | 20181101                              |
| angularjs.cn                           | 11                                  | 02                                | 00                                 | 6                                           | 20181101                              |
+----------------------------------------+-------------------------------------+-----------------------------------+------------------------------------+---------------------------------------------+---------------------------------------+--+

 

标签:数据分析,11,pvs,01,20181101,指标,多维,dw,NULL
来源: https://blog.csdn.net/longyanchen/article/details/98865467