python批量获取基金定期报告
作者:互联网
import pandas as pd
import numpy as np
import os
import urllib
import requests
from fake_useragent import UserAgent
import json
import time
def getpdfurl(codes,sdate,edate):
sdate = pd.Timestamp(sdate).strftime('%Y-%m-%d')#用pandas库把输入时间转化成标准格式时间
edate = pd.Timestamp(edate).strftime('%Y-%m-%d')#用pandas库把输入时间转化成标准格式时间
ords = a[codes]#params里面的ords构成,从股票代码获得对应idcodes
stocks = codes + ',' + ords#params里面的stocks构成
params = {
'pageNum': '1',
'pageSize': '30',
'column': 'fund',
'tabName': 'fulltext',
'plate':'' ,
'stock': stocks,
'searchkey':'' ,
'secid':'' ,
'category': 'category_ndbg_jjgg;category_bndbg_jjgg;category_jdbg_jjgg',
'trade':'' ,
'seDate': '{}~{}'.format(sdate,edate),
'sortName': '',
'sortType': '',
'isHLtitle': 'true'}
url = 'http://www.cninfo.com.cn/new/hisAnnouncement/query'
headers = {"User-Agent": UserAgent(verify_ssl=False).random}#useragent库制造fake名
response_comment = requests.post(url,params = params,headers = headers )
res = json.loads(response_comment.text)#返回值转text,再转成json格式
n = len(res['announcements'])#读取['announcements']键的值
allpdf = []
for k in range(n):#遍历每个返回值,构建返回的list
allpdf.append(pd.DataFrame.from_dict(res['announcements'][k],orient='index').T)
allpdf = pd.concat(allpdf,axis = 0).reset_index(drop = True)
allpdf = allpdf[['secName','announcementTitle','adjunctUrl']]#list表头?为什么不在最前面
return allpdf
def getFundReportpdf(allpdf,fpath):
headers = {"User-Agent": UserAgent(verify_ssl=False).random}
for k in range(allpdf.shape[0]):
url = allpdf.adjunctUrl[k]
urls = 'http://static.cninfo.com.cn/{}#navpanes=0&toolbar=0&statusbar=0&pagemode=thumbs&page=1'.format(url)
fname =allpdf.announcementTitle[k]
r = requests.get(urls, timeout = 300,headers = headers)
with open (fpath + '{}.pdf'.format(fname),'wb') as f:
f.write(r.content)
f.close()
time.sleep(2)
标签:批量,headers,python,获取,params,pd,allpdf,import,edate 来源: https://blog.csdn.net/weixin_64577521/article/details/122014657