python爬虫高铁12306余票爬取
作者:互联网
import requests
import pandas as pd
import arrow
import json
import os
from itertools import combinations
os.chdir(r'C:/Users/Windows/Desktop')
class HighSpeed(object):
def __init__(self,date,from_station,to_station):
self.date = date
self.from_station = from_station
self.to_station = to_station
def get_pd(self):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/80.0.3970.5 Safari/537.36'}
r = requests.get('https://www.12306.cn/kfzmpt/lcxxcx/query?purpose_codes=ADULT&queryDate='+self.date+'&from_station='+self.to_station+'&to_station='+self.from_station, headers = headers).text
return r
def to_csv(self):
js = self.get_pd()
js2 = json.loads(js)
try:
data = js2['data']['datas']
dt_data = pd.DataFrame(data)
tm = arrow.now().format('HH:mm')
dt_data['记录时间'] = tm
dt_data.to_csv(self.date + 'test.csv',encoding = 'gbk',mode = 'a',index = False)
except:
print('empty')
if __name__ == '__main__':
cheng_yu = ['ICW','JOW','FYW','WZW','NKW','NWW','RQ','FQW','WMW','FZW','CYW','CQW','DYW','MYW','NIW','NCW','MSW','YBW','VJW','RXW']
com = combinations(cheng_yu,2)
com_use = []
for x in com:
com_use.append(x)
for i in range(12,13): #注意时间的修改
date = '2020-01-{}'.format(str(i))
for ls in com_use:
hspeed = HighSpeed(date,ls[0],ls[1])
hspeed.to_csv()
print('finished!')
之前没有找到12306的这个接口,大于20张余票的都显示有,这个网址居然还是可以显示完全,而且良心的是爬取获得的数据直接就是json数据都不需要进行清洗,非常方便!
标签:__,余票,python,self,12306,import,date,station,data 来源: https://blog.csdn.net/weixin_44056948/article/details/103943048