爬取虎牙直播同一时间段的所有直播间信息
作者:互联网
import requests
from openpyxl import Workbook
from bs4 import BeautifulSoup
from tqdm import tqdm
class LiveSpider():
def super_spider(self):
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/85.0.4183.102 Safari/537.36",
"x-requested-with": "XMLHttpRequest"
}
Response = requests.get("https://www.huya.com/g")
html = Response.text
soup = BeautifulSoup(html, "html.parser")
for gameId in tqdm(range(1, len(soup.find_all("li")))):
worker = Workbook()
wk = worker.active
wk.append(["直播类型", "直播房间名", "房间号", "主播昵称", "直播介绍", "人流量"])
params = {
"m": "LiveList",
"do": "getLiveListByPage",
"gameId": gameId,
"tagAll": "0",
"page": "1"
}
response = requests.get("https://www.huya.com/cache.php", params=params, headers=headers)
for page in range(1, int(response.json()["data"]["totalPage"]) + 1):
params["page"] = str(page)
live_response = requests.get("https://www.huya.com/cache.php", params=params, headers=headers)
live_number = len(live_response.json()["data"]["datas"])
for num in range(0, live_number):
live = live_response.json()["data"]["datas"][num]
lst = [live["gameFullName"], live["roomName"], live["uid"], live["nick"], live["introduction"],
live["totalCount"]]
try:
wk.append(lst)
except:
pass
worker.save(f"huya_live_{gameId}.xlsx")
l = LiveSpider()
l.super_spider()
标签:直播间,headers,page,爬取,虎牙,live,params,import,response 来源: https://www.cnblogs.com/liuyuchao/p/14017154.html