案例 - 异步爬取网站小说
作者:互联网
小说 :
#https://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"4306063500"} #https://dushu.baidu.com/api/pc/getChapterContent?data={"book_id":"4306063500","cid":"4306063500|1569782244","need_bookinfo":1} import requests import asyncio import aiohttp import json import aiofiles async def aiodownload(cid, book_id, title): data = { "book_id": book_id, "cid": f"{book_id}|{cid}", "need_bookinfo": 1 } data = json.dumps(data) # 将json 变成字符串 url = f'https://dushu.baidu.com/api/pc/getChapterContent?data={data}' async with aiohttp.ClientSession() as session: async with session.get(url) as resp: dic = await resp.json() async with aiofiles.open(title, mode='w', encoding='utf-8') as f: await f.write(dic['data']['novel']['content']) async def getCatlog(url): # 请求数据 resp = requests.get(url) #循环出数据 dic = resp.json() # 新建一个列表 tasks = [] for item in dic['data']['novel']['items']: title = item['title'] cid = item['cid'] #准备异步 tasks.append(aiodownload(cid, book_id, title)) await asyncio.wait(tasks) if __name__ == '__main__': book_id = '4306063500' url = 'https://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"' + book_id + '"}' #链接拼接 asyncio.run(getCatlog(url))
标签:异步,cid,url,title,爬取,案例,book,data,id 来源: https://www.cnblogs.com/longly1111/p/16215591.html