爬虫_urllib中ajax的post请求
作者:互联网
下载肯德基官网中餐厅的数据。
经过分析:
1.请求接口的地址:http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname
2.请求方式:post
3.请求参数:
全部代码:
#获取肯德基官网数据 # import urllib.parse import urllib.request #分析得到的结论: #1.请求地址:http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname #2.请求方式:post #3.请求参数: # cname: 北京 # pid: # pageIndex: 2 # pageSize: 10 def create_request(page): base_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname' data = { 'cname': '北京', 'pid':'', 'pageIndex': page, 'pageSize': '10', } data = urllib.parse.urlencode(data).encode('utf-8') headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36' } request = urllib.request.Request(url=base_url,data=data,headers=headers) return request def get_content(request): response = urllib.request.urlopen(request) content = response.read().decode('utf-8') return content def down_load(page,content): with open('kfc_'+str(page)+'.json','w',encoding='utf-8') as fp: fp.write(content) if __name__ == '__main__': start_page = int(input('请输入起始页码')) end_page = int(input('请输入结束页码')) for page in range(start_page,end_page): #请求对象的定制 request = create_request(page) #获取网页源码 content = get_content(request) #下载 down_load(page,content)
测试效果:
标签:request,urllib,content,ajax,cname,ashx,post,page 来源: https://www.cnblogs.com/ckfuture/p/16301072.html