数据分析(pandas)---06.分组聚合
作者:互联网
import numpy as np
import pandas as pd
# 读取数据
detail = pd.read_excel('output.xlsx',sheet_name=0)
lite_detail=detail[['order_id','counts','amounts']]
detail_group = lite_detail.groupby(by='order_id')
print(detail_group['order_id'])
# 数据分析
print('每个订单的均值:\n',detail_group.mean().head())
print('每个订单的标准差:\n',detail_group.std().head())
print('每个订单的大小:\n',detail_group.size().head())
# 任务
# task_detail = detail[['dishes_name','amounts']]
# detail_group = task_detail.groupby(by='dishes_name')
# print(detail_group.mean())
# print(detail_group.size().tail())
# 2.agg---对于不同的列使用不同的聚合函数
# (1)一次性进行多个聚合统计,
# a.所有列都执行聚合操作
lite_detail = detail[['counts','amounts']]
print('菜品销量和售价的的均值:\n',lite_detail.agg([np.sum,np.mean]))#给函数名
# b.指定列执行指定聚合操作
print('菜品销量总和 与 售价的均值:\n',lite_detail.agg({'counts':np.sum,'amounts':[np.mean,np.std]}))
# c.可自定义聚合操作
def double_sum(col):
return col*2
print('某上市公司为美化数据逼迫技术人员的数据分析操作:\n')
print(lite_detail.agg({'counts':double_sum},axis=0))
print(detail[['counts','amounts']].agg(double_sum))
# apply
# 每列均值
print('订单详情表的菜品销量与售价的均值:\n',detail[['counts','amounts']].apply(np.mean))
# 每组均值
new_detail = detail[['order_id','counts','amounts']].groupby(by='order_id')
print('订单详情表中分组后每组的均值:\n',new_detail.apply(np.mean).head())
print('订单详情表中分组后每组的均方差:\n',new_detail.apply(np.std).head())
# transform
print('订单详情表中菜品销量与售价的两倍:\n',detail[['counts','amounts']].transform(lambda x:2*x).head())
# 离差标准化:消除大单位和小单的影响(消除量纲)
# (当前值-最小值)/(最大值-最小值)
# def aa(x):
# print(type(x),x) #输出x是一个序列
print('订单详情表分组后实现组内离差标准化:\n',new_detail[['counts','amounts']].transform(lambda x:(x-x.min())/(x.max()-x.min())))
标签:数据分析,06,amounts,detail,np,print,group,counts,pandas 来源: https://blog.csdn.net/weixin_44111377/article/details/96566458