编程语言
首页 > 编程语言> > python批量获取基金定期报告

python批量获取基金定期报告

作者:互联网

import pandas as pd
import numpy as np
import os

import urllib
import requests
from fake_useragent import UserAgent
import json
import time

def getpdfurl(codes,sdate,edate):
sdate = pd.Timestamp(sdate).strftime('%Y-%m-%d')#用pandas库把输入时间转化成标准格式时间
edate = pd.Timestamp(edate).strftime('%Y-%m-%d')#用pandas库把输入时间转化成标准格式时间

ords = a[codes]#params里面的ords构成,从股票代码获得对应idcodes
stocks = codes + ',' + ords#params里面的stocks构成

params = {
'pageNum': '1',
'pageSize': '30',
'column': 'fund',
'tabName': 'fulltext',
'plate':'' ,
'stock': stocks,
'searchkey':'' ,
'secid':'' ,
'category': 'category_ndbg_jjgg;category_bndbg_jjgg;category_jdbg_jjgg',
'trade':'' ,
'seDate': '{}~{}'.format(sdate,edate),
'sortName': '',
'sortType': '',
'isHLtitle': 'true'}

url = 'http://www.cninfo.com.cn/new/hisAnnouncement/query'
headers = {"User-Agent": UserAgent(verify_ssl=False).random}#useragent库制造fake名
response_comment = requests.post(url,params = params,headers = headers )
res = json.loads(response_comment.text)#返回值转text,再转成json格式

n = len(res['announcements'])#读取['announcements']键的值

allpdf = []
for k in range(n):#遍历每个返回值,构建返回的list
allpdf.append(pd.DataFrame.from_dict(res['announcements'][k],orient='index').T)
allpdf = pd.concat(allpdf,axis = 0).reset_index(drop = True)

allpdf = allpdf[['secName','announcementTitle','adjunctUrl']]#list表头?为什么不在最前面

return allpdf

def getFundReportpdf(allpdf,fpath):

headers = {"User-Agent": UserAgent(verify_ssl=False).random}
for k in range(allpdf.shape[0]):
url = allpdf.adjunctUrl[k]

urls = 'http://static.cninfo.com.cn/{}#navpanes=0&toolbar=0&statusbar=0&pagemode=thumbs&page=1'.format(url)

fname =allpdf.announcementTitle[k]
r = requests.get(urls, timeout = 300,headers = headers)

with open (fpath + '{}.pdf'.format(fname),'wb') as f:
f.write(r.content)
f.close()
time.sleep(2)

标签:批量,headers,python,获取,params,pd,allpdf,import,edate
来源: https://blog.csdn.net/weixin_64577521/article/details/122014657