2019年华中杯数学建模B题数据处理代码
作者:互联网
在5月一号晚上,我参加了华中杯数学建模。对于数据处理,我用到了下面代码,希望能帮助到大家。
myDivision.py文件,用于对附件一的数据进行划分:
import pandas as pd #分析excel表格
import matplotlib.pyplot as plt # matplotlib中有很多可用的模块,我们使用pyplot模块
from numpy import *
#读取excel表格并返回日期
def readExcel():
df = pd.read_excel('附件1.xlsx',sheet_name='销量数据')
return set(df['货号']) #按货号分
#return set(df['日期']) # 按日期分
#按货号进行分开
def changeGoods(x):
df2 = pd.read_excel('附件1.xlsx',sheet_name='销量数据')
#按货号分
for i in range(len(x)):
k = df2[df2['货号'] == x[i]]
k.to_csv('huowu//{name}.csv'.format(name = x[i]), sep=',', header=True, index=True)
#按日期进行分开
def changeDay(x):
df2 = pd.read_excel('附件1.xlsx',sheet_name='销量数据')
#按日期分
for i in range(len(x)):
k = df2[df2['日期'] == x[i]]
# my_data = x[i][0:4] + x[i][5:7] + x[i][8:10]
k.to_csv('data//{name}.csv'.format(name = x[i]), sep=',', header=True, index=True)
#把每个日期和货物总量算出来
def summaryAll(x):
sum = 0
df = pd.read_csv(x)
for m in range(len(df['销售件数'])):
sum += df['销售件数'][m]
return sum
#创建表格
def createExcel(day,data):
my_day = []
for i in range(len(day)):
# my_day.append(day[i][0:4] + '/' + day[i][4:6] + '/' +day[i][6:8])
my_day.append(day[i][0:4] + day[i][4:6] + day[i][6:8])
df1 = pd.DataFrame({'日期': my_day ,'总销量': data})
df1.to_excel('每个日期的总销量二.xlsx', sheet_name='每天的总销量', startcol=0, index=False)
def main():
this = readExcel() #读取excel表格并返回日期
my_list = list(this)
#changeDay(my_list) #按日期进行分开
changeGoods(my_list) # 按货号进行分开
my_data = []
for i in range(len(my_list)) :
my_data.append(str(my_list[i][0:4]+my_list[i][5:7]+my_list[i][8:10]))
my_data.sort()
my_data_sum = []
for j in range(len(my_data)):
a = summaryAll('day/{my_day}.csv'.format(my_day = my_data[j]))
my_data_sum.append(a)
print(my_data)
print(my_data_sum)
data = []
for k in range(len(my_data)):
data.append(k)
my_data_sum2 = []
for n in range(len(my_data_sum)):
my_data_sum2.append(math.log(my_data_sum[n]))
print(my_data_sum2)
createExcel(my_data, my_data_sum)
if __name__ == '__main__':
main()
myDivision2.py用于对每月的货号总销量进行汇总:
import pandas as pd #分析excel表格
#读取excel表格并返回产品
def readExcelProdect():
# df = pd.read_excel('附件一.xlsx',sheet_name='销量数据')
df = pd.read_excel('附件1.xlsx', sheet_name='销量数据')
return set(df['货号']) #按货物分
#读取excel表格并返回日期
def readExcelDay():
# df = pd.read_excel('附件一.xlsx',sheet_name='销量数据')
df = pd.read_excel('附件1.xlsx', sheet_name='销量数据')
return set(df['日期']) # 按日期分
#生成汇总表
def createExcel(x,y):
dict = {}
dict[x[0]] = y
for i in range(1,len(x)):
w = []
for j in range(0,len(y)):
w.append('')
dict[x[i]] = w
df = pd.DataFrame(dict)
df.to_excel('汇总.xlsx')
#处理每一种货物的表格
def createData(x,day,goods):
every_month = []
for i in range(0,len(x['月份'])):
every_month.append(str(x['月份'][i]))
month = list(set(every_month))
month.sort()
all_num = []
for i in range(len(month)):
sum = 0
my = x[x['月份'] == int(month[i])]
n = list(my['销售件数'])
for j in range(len(n)):
sum += n[j]
all_num.append(sum)
# print(month)
# print(all_num)
#进行补零
all_month = day
all_xiao_shou = []
#print(day)
n1 = 0
for m in range(len(day)):
if day[m] == month[0]:
n1 = m #每个货物的月份开始日期
n2 = n1 + len(month) - 1 #每个货物的二月份结束日期
for n in range(len(day)):
if n < n1 or n > n2 :
all_xiao_shou.append(0)
else:
all_xiao_shou.append(all_num[n-n1])
# print(day)
print(all_xiao_shou)
def main():
this1 = readExcelProdect() # 读取excel表格并返回货号
my_goods = list(this1)
my_goods_new = []
for i in range(len(my_goods)):
my_goods_new.append(my_goods[i][2:7])
my_goods_new.sort()
this_goods = []
for j in range(len(my_goods_new)):
this_goods.append('SS'+my_goods_new[j])
#print(this_goods)
#print(len(this_goods))
this2 = readExcelDay() # 读取excel表格并返回日期
my_day = list(this2)
my_day_new = []
for i in range(len(my_day)):
this_day = str(my_day[i])
my_day_new.append(this_day[0:6])
my_day_new = list(set(my_day_new))
my_day_new.sort()
# print(my_day_new)
# createExcel(my_day_new, this_goods)
df = pd.read_csv('huowu/SS61146.csv')
createData(df, my_day_new, this_goods)
for j in range(len(this_goods)):
df = pd.read_csv('huowu/{n}.csv'.format(n = this_goods[j]))
# print(this_goods[j])
createData(df,my_day_new,this_goods)
if __name__ == '__main__':
main()
处理延期比数据并生成频率直方图,并把每个区间赋值为1-10以内数据,自动生成txt文件,方便matlib处理:
yanQiBi1.py
import pandas as pd
# 柱形图-折线图
from pyecharts import Bar, Line, Overlap
import numpy as np
import matplotlib.pyplot as plt
#读取按季度分的表格并统计每个区间的个数
def readExcel(x):
yy = []
a1 = [];a11 = [];a12 = [];a13 = []
a2 = [];a3 = [];a4 = [];a5 = [];a6 = []
for i in range(len(x['延期比'])):
# if x['延期比'][i] >= 0 and x['延期比'][i] < 0.01:
# a1.append(x['延期比'][i])
#0-0.01
# if x['延期比'][i] == 0:
# # a11.append(x['延期比'][i])
# a11.append(1)
if x['延期比'][i] > 0 and x['延期比'][i] < 0.001:
# a12.append(x['延期比'][i])
a12.append(1)
elif x['延期比'][i] >= 0.001 and x['延期比'][i] < 0.01:
# a13.append(x['延期比'][i])
a13.append(2)
elif x['延期比'][i] >= 0.01 and x['延期比'][i] < 0.02:
# a2.append(x['延期比'][i])
a2.append(3)
elif x['延期比'][i] >= 0.02 and x['延期比'][i] < 0.1:
# a3.append(x['延期比'][i])
a3.append(4)
elif x['延期比'][i] >= 0.1 and x['延期比'][i] < 0.2:
# a4.append(x['延期比'][i])
a4.append(5)
elif x['延期比'][i] >= 0.2 and x['延期比'][i] < 0.35:
# a5.append(x['延期比'][i])
a5.append(6)
elif x['延期比'][i] >= 0.35 and x['延期比'][i] <= 0.5:
# a6.append(x['延期比'][i])
a6.append(7)
# yy.append(len(a11))
yy.append(len(a12))
yy.append(len(a13))
yy.append(len(a2))
yy.append(len(a3))
yy.append(len(a4))
yy.append(len(a5))
yy.append(len(a6))
return yy
#生成matlab需要数据
def createMatl(myData):
print(myData)
my_list = []
for i in range(myData[0]):
my_list.append(1)
for i in range(myData[1]):
my_list.append(2)
for i in range(myData[2]):
my_list.append(3)
for i in range(myData[3]):
my_list.append(4)
for i in range(myData[4]):
my_list.append(5)
for i in range(myData[5]):
my_list.append(6)
for i in range(myData[6]):
my_list.append(7)
print(my_list)
return my_list
# with open('all_data.txt','w') as f:
# f.write(str(my_list))
def createChart(my_list):
xx = [1,2,3,4,5,6,7]
bar = Bar("柱形图-折线图")
bar.add('bar', xx, my_list)
line = Line()
line.add('line', xx, my_list)
overlap = Overlap()
overlap.add(bar)
overlap.add(line)
overlap.show_config()
overlap.render(path='第二题延期比.html')
def main():
df = pd.read_excel('附件二.xlsx',sheet_name='汇总')
list = readExcel(df)
#createChart(list)
#createMatl(list)
list = createMatl(list)
# 求均值
arr_mean = np.mean(list)
# 求方差
arr_var = np.var(list)
# 求标准差
arr_std = np.std(list)
print(arr_mean)
print(arr_var)
print(arr_std)
# plt.style.use('seaborn-white')
# # 最基本的频次直方图命令
# plt.hist(list)
# plt.show()
if __name__ == '__main__':
main()
yanQiBi2.py
import pandas as pd
# 柱形图-折线图
from pyecharts import Bar, Line, Overlap
#读取按季度分的表格并统计每个区间的个数
def readExcel(x):
yy = []
a1 = [];a2 = [];a3 = [];a4 = [];a5 = []
a6 = [];a7 = [];a8 = [];a9 = [];a10 = []
for i in range(len(x['延期比'])):
if x['延期比'][i] >= 0 and x['延期比'][i] < 0.1:
# a1.append(x['延期比'][i])
a1.append(1)
elif x['延期比'][i] >= 0.1 and x['延期比'][i] < 0.2:
# a2.append(x['延期比'][i])
a2.append(2)
elif x['延期比'][i] >= 0.2 and x['延期比'][i] < 0.3:
#a3.append(x['延期比'][i])
a3.append(3)
elif x['延期比'][i] >= 0.3 and x['延期比'][i] < 0.4:
# a4.append(x['延期比'][i])
a4.append(4)
elif x['延期比'][i] >= 0.4 and x['延期比'][i] < 0.5:
# a5.append(x['延期比'][i])
a5.append(5)
elif x['延期比'][i] >= 0.5 and x['延期比'][i] < 0.6:
# a6.append(x['延期比'][i])
a6.append(6)
elif x['延期比'][i] >= 0.6 and x['延期比'][i] < 0.7:
# a7.append(x['延期比'][i])
elif x['延期比'][i] >= 0.7 and x['延期比'][i] < 0.8:
a8.append(x['延期比'][i])
elif x['延期比'][i] >= 0.8 and x['延期比'][i] < 0.9:
a9.append(x['延期比'][i])
elif x['延期比'][i] >= 0.9 and x['延期比'][i] <= 1:
a10.append(x['延期比'][i])
yy.append(len(a1))
yy.append(len(a2))
yy.append(len(a3))
yy.append(len(a4))
yy.append(len(a5))
yy.append(len(a6))
yy.append(len(a7))
yy.append(len(a8))
yy.append(len(a9))
yy.append(len(a10))
print(yy)
return yy
def createChart(my_list):
xx = ['0-0.01','0.1-0.2','0.2-0.3','0.3-0.4','0.4-0.5',
'0.5-0.6','0.6-0.7','0.7-0.8','0.8-0.9','0.9-1']
bar = Bar("柱形图-折线图")
bar.add('bar', xx, my_list)
line = Line()
line.add('line', xx, my_list)
overlap = Overlap()
overlap.add(bar)
overlap.add(line)
overlap.show_config()
overlap.render(path='第三季度.html')
def main():
df = pd.read_excel('按季度分.xlsx',sheet_name='第三季度')
list = readExcel(df)
createChart(list)
if __name__ == '__main__':
main()
还有上新量的值类似:
import pandas as pd
# 柱形图-折线图
from pyecharts import Bar, Line, Overlap
import numpy as np
import matplotlib.pyplot as plt
#统计每个区间的个数
def readExcel(x):
shang_xin = []
for i in range(len(x['上新日销量'])):
shang_xin.append(x['上新日销量'][i])
shang_xin.sort()
jian_ge = (max(shang_xin) - min(shang_xin))/10
yy = []
a1 = [];a2 = [];a3 = [];a4 = []
a5 = [];a6 = [];a7 = [];a8 = []
for i in range(len(x['上新日销量'])):
if x['上新日销量'][i] >= shang_xin[0] and x['上新日销量'][i] < shang_xin[0] + jian_ge/5:
a1.append(x['上新日销量'][i])
elif x['上新日销量'][i] >= shang_xin[0] + jian_ge/5 and x['上新日销量'][i] < shang_xin[0] + jian_ge/5*2:
a2.append(x['上新日销量'][i])
elif x['上新日销量'][i] >= shang_xin[0] + jian_ge/5*2 and x['上新日销量'][i] < shang_xin[0] + jian_ge/5*3:
a3.append(x['上新日销量'][i])
elif x['上新日销量'][i] >= shang_xin[0] + jian_ge/5*3 and x['上新日销量'][i] < shang_xin[0] + jian_ge:
a4.append(x['上新日销量'][i])
elif x['上新日销量'][i] >= shang_xin[0] + jian_ge and x['上新日销量'][i] < shang_xin[0] + jian_ge*2/3*2:
a5.append(x['上新日销量'][i])
elif x['上新日销量'][i] >= shang_xin[0] + jian_ge*2/3*2 and x['上新日销量'][i] < shang_xin[0] + jian_ge*2:
a6.append(x['上新日销量'][i])
elif x['上新日销量'][i] >= shang_xin[0] + jian_ge*2 and x['上新日销量'][i] < shang_xin[0] + jian_ge*3:
a7.append(x['上新日销量'][i])
elif x['上新日销量'][i] >= shang_xin[0] + jian_ge*3 and x['上新日销量'][i] <= shang_xin[0] + jian_ge*10:
a8.append(x['上新日销量'][i])
yy.append(len(a1))
yy.append(len(a2))
yy.append(len(a3))
yy.append(len(a4))
yy.append(len(a5))
yy.append(len(a6))
yy.append(len(a7))
yy.append(len(a8))
return yy
#生成matlab需要数据
def createMatl(myData):
print(myData)
my_list = []
for i in range(myData[0]):
my_list.append(1)
for i in range(myData[1]):
my_list.append(2)
for i in range(myData[2]):
my_list.append(3)
for i in range(myData[3]):
my_list.append(4)
for i in range(myData[4]):
my_list.append(5)
for i in range(myData[5]):
my_list.append(6)
for i in range(myData[6]):
my_list.append(7)
for i in range(myData[7]):
my_list.append(8)
#print(my_list)
with open('第二题上新量.txt','w') as f:
f.write(str(my_list))
return my_list
#生成图表
def createChart(my_list):
xx = ['294-806','806-1319','1319-1832','1832-2857',
'2857-3712','3712-5421','5421-7984','7984-25930']
bar = Bar("柱形图-折线图")
bar.add('bar', xx, my_list)
line = Line()
line.add('line', xx, my_list)
overlap = Overlap()
overlap.add(bar)
overlap.add(line)
overlap.show_config()
overlap.render(path='第二题上新量未赋值区间.html')
def main():
df = pd.read_excel('附件二.xlsx',sheet_name='汇总')
list = readExcel(df) #统计每个区间的个数
#createChart(list) #生成图表
#createMatl(list)
list = createMatl(list)
# 求均值
arr_mean = np.mean(list)
# 求方差
arr_var = np.var(list)
# 求标准差
arr_std = np.std(list)
print(arr_mean)
print(arr_var)
print(arr_std)
# plt.style.use('seaborn-white')
# # 最基本的频次直方图命令
# plt.hist(list)
# plt.show()
if __name__ == '__main__':
main()
其他的数据类似,祝福大家有参加建模的同学能够马到成功,如果有需求的话,请联系qq:1657264184。题目的话,在建模完后会上传到资源下载区,有想研究的可以下载。
标签:建模,len,range,2019,数据处理,my,day,append,延期 来源: https://blog.csdn.net/ITxiaoangzai/article/details/89789496