使用pyhon爬取中国银行保险监督管理委员会的保险许可证
作者:互联网
import requests
import json
import time
import random
import math
from retry import retry
address_list = ['临沂市', '南宁市', '保定市', '廊坊市', '阜阳市', '蚌埠市', '滁州市', '马鞍山市', '芜湖市', '安庆市', '六安市', '莆田市', '漳州市', '宁德市', '清远市', '潮州市', '汕头市', '揭阳市', '江门市', '肇庆市', '湛江市', '遵义市', '邯郸市', '唐山市', '秦皇岛市', '邢台市', '沧州市', '大庆市', '洛阳市', '新乡市', '南阳市', '商丘市', '周口市', '驻马店市', '信阳市', '荆州市', '宜昌市', '衡阳市', '岳阳市', '株洲市', '九江市', '赣州市', '上饶市', '宜春市', '连云港市', '宿迁市', '淮安市', '盐城市', '扬州市', '泰州市', '镇江市', '淄博市', '威海市', '济宁市', '菏泽市', '咸阳市', '绵阳市', '湖州市', '舟山市', '海口市', '三亚市', '桂林市', '柳州市', '呼和浩特市', '银川市', '乌鲁木齐市', '襄阳市', '三明市', '茂名市', '德州市']
url = r"http://xkz.cbirc.gov.cn/bx/getLicence.do?useState=3"
data = {
"start": "10",
"limit": "10"
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
"Origin": r"http://xkz.cbirc.gov.cn",
"Referer": r"http://xkz.cbirc.gov.cn/bx/",
"Cookie": r"isClick=true; JSESSIONID=0000yAISFDk5epshBvgldsOaQRQ:-1",
'Connection':'close'
}
params = {
"address":"XXX"
}
@retry(tries=7)
def get_baoxian(address):
all_data = []
params["address"] = address
r = requests.post(url=url, data=data, headers=headers, params = params, proxies = proxies, timeout=1)
r.encoding = 'utf-8'
all = json.loads(r.text)
total = all["total"]/10
count = math.ceil(total)
params["address"] = address
for i in range(count):
print(address, "已经开始第", i, "页")
temp_data = get_single_baoxian(count, address, i)
all_data.extend(temp_data)
return all_data
@retry(tries=7)
def get_single_baoxian(count, address, i):
all_data = []
data["start"] = str(i*10)
r2 = requests.post(url=url, data=data, headers=headers, params = params, proxies = proxies, timeout=1)
r2.encoding = 'utf-8'
all2 = json.loads(r2.text)
for i in range(len(all2["datas"])):
single_data = "机构编码:{},证件流水号:{},机构名称:{},批准成立日期:{},发证日期:{},城市:{}".format(all2["datas"][i]["certCode"], all2["datas"][i]["flowNo"],
all2["datas"][i]["fullName"], all2["datas"][i]["setDate"],
all2["datas"][i]["printDate"], address)
all_data.append(single_data)
return all_data
def save(data):
for i in data:
with open("中国银行保险监督管理委员会1.txt", "a") as f:
f.write(i)
f.write("\n")
for address in address_list:
baoxian_data = get_baoxian(address)
save(baoxian_data)
print(address, "已完成")
标签:pyhon,baoxian,data,爬取,params,all2,address,import,保险 来源: https://blog.csdn.net/weixin_49328057/article/details/121411243