数据可视化之二手房数据实战(一)
作者:互联网
1、导包
import pandas as pd
from pyecharts.charts import Bar, Pie, Map, Line, Scatter
from pyecharts import options as opts
from pyecharts.render import make_snapshot
from snapshot_selenium import snapshot
from pyecharts.globals import CurrentConfig
2、读取数据
#
df = pd.read_excel("./xlsx/二手房数据.xlsx")
3、数据清洗
# 查看数据前5列
print(df.head())
# 查看数据表结构
print(df.describe())
# 查看数据为空值的总数
print(df.isnull().sum())
# 将电梯这一列有空值的数据填充为未知,inplace=True在原数据进行修改
print(df["电梯"].fillna("未知", inplace=True))
# 在次查看是否有空值
print(df.isnull().sum())
4、数据可视化
可视化展示-北京各城区二手房数量地图分布
# 根据市区进行分组后计算小区的数量
nums = df.groupby("市区")["小区"].agg("count")
# 将这市区转换为list的列表
citys = nums.index.tolist()
# 每个值加上区
city = [i + "区" for i in citys]
# 小区的数量转换为tolist()
xqnum = nums.values.tolist()
map = (
Map(init_opts=opts.InitOpts(bg_color="white"))
.add("", [list(z) for z in zip(city, xqnum)], "北京")
.set_global_opts(title_opts=opts.TitleOpts(title="北京市二手房区分布"),
visualmap_opts=opts.VisualMapOpts(is_show=True, max_=3000))
)
# 如果只需要HTML5代码不要图片选第一个,要图片选第二个
# map.render("beiJingMap.html")
make_snapshot(snapshot, map.render("beiJingMap.html"), "./beiJingMap.png")
可视化展示-北京各城区二手房数量-平均价格柱状图
prices = df.groupby("市区")["价格(万元)"].agg("mean")
prices = prices.round(2) #只要两位小数
price = prices.values.tolist()
bar = (
Bar(init_opts=opts.InitOpts(bg_color="white"))
.add_xaxis(citys)
.add_yaxis("数量", xqnum)
.extend_axis(yaxis=opts.AxisOpts(is_show=True, max_=900, min_=200, name="价格(万元)"))
.set_global_opts(yaxis_opts=opts.AxisOpts(name="数量"),
title_opts=opts.TitleOpts(title="各城区二手房数量-平均价格柱状图"),
tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis", axis_pointer_type="cross"),
xaxis_opts=opts.AxisOpts(axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow")))
)
line = (
Line()
.add_xaxis(citys)
.add_yaxis("价格", price, z=10, yaxis_index=1)
)
bar.overlap(line)
make_snapshot(snapshot, bar.render("./Mean房价数量.html"), "./Mean房价数量.png")
可视化展示-二手房价格最高的TOP15
df.sort_values(by="价格(万元)", ascending=False, inplace=True)
top_price = df["价格(万元)"].apply("{0:.0f}".format).head(15).tolist()
xiaoqu = df["小区"].head(15).tolist()
top_bar = (
Bar(init_opts=opts.InitOpts(bg_color="white"))
.add_xaxis(xiaoqu)
.add_yaxis("数量", top_price)
.set_global_opts(xaxis_opts=opts.AxisOpts(name="数量"),
yaxis_opts=opts.AxisOpts(name="价格(万元)"))
)
make_snapshot(snapshot,top_bar.render("./TOP15房价最高.html"),"./TOP15房价最高.png")
装修情况-有无电梯(玫瑰图)
# 计算Bar数据
zhuangxiu = df.groupby("装修情况")["装修情况"].agg("count")
x_zx = zhuangxiu.index.tolist()
y_num = zhuangxiu.values.tolist()
# 计算圆数据
dianti = df.groupby("电梯")["电梯"].agg("count")
youdt = dianti.index.tolist()
dt_num = dianti.values.tolist()
youdt.pop()
dt_num.pop()
zx_bar = (
Bar(init_opts=opts.InitOpts(bg_color="white"))
.add_xaxis(x_zx)
.add_yaxis("", y_num, category_gap="50%")
.set_global_opts(legend_opts=opts.LegendOpts(pos_left='85%', pos_top="63%", orient="scroll"),
yaxis_opts=opts.AxisOpts(name="装修情况"),
xaxis_opts=opts.AxisOpts(name="数量"))
.set_series_opts(label_opts=opts.LabelOpts(position="right"))
.reversal_axis()
)
zx_pie = (
Pie()
.add("", [list(z) for z in zip(youdt, dt_num)], radius=["8%", "25%"], rosetype="radius", center=["75%", "65%"])
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{c}\n({d}%)"))
)
zx_bar.overlap(zx_pie)
make_snapshot(snapshot, zx_bar.render("./装修情况.html"), "./装修情况.png")
二手房总价与面积(散点图)
jg = df["价格(万元)"].tolist()
mj = df["面积(㎡)"].tolist()
scatter = (
Scatter(init_opts=opts.InitOpts(bg_color="white"))
.add_xaxis(mj)
.add_yaxis("", jg)
.set_global_opts(xaxis_opts=opts.AxisOpts(type_="value", name="面积(㎡)"),
yaxis_opts=opts.AxisOpts(name="价格(万元)"))
)
make_snapshot(snapshot,scatter.render("./散点图.html"),"./散点图.png")
标签:实战,tolist,yaxis,df,add,可视化,snapshot,数据,opts 来源: https://www.cnblogs.com/Perfect6/p/15988944.html