Python网络爬虫爬取贴吧话题热议榜单(可自定义条数)
作者:互联网
1 import pandas as pd 2 import requests as rq 3 from bs4 import BeautifulSoup 4 url="http://tieba.baidu.com/hottopic/browse/topicList?res_type=1" 5 def res_caputure(): 6 try: 7 res = rq.get(url,timeout=30) 8 res.raise_for_status() 9 res.encoding = res.apparent_encoding 10 return res.text 11 except: 12 return "发生异常,响应码为{}".format(res.status_code) 13 if __name__ == "__main__": 14 r = res_caputure() 15 soup = BeautifulSoup(r) 16 a = soup.select('a[target]') 17 p = soup.select('span') 18 soup_p=[] 19 soup_a=[] 20 s=input("输入要查看百度贴吧话题热议榜单的条数(直接回车默认为10条,最高为30):") 21 if s=='': 22 s=10 23 else: 24 s=int(s) 25 for k in range(3,s*2+3,2): 26 soup_p.append(p[k].string) 27 for i in range(0,s): 28 soup_a.append(a[i].string) 29 dt={'排名':range(1,s+1),'标题':soup_a,'内容数':soup_p} 30 df=pd.DataFrame(dt) 31 print(df)
可自定义查看条数,按回车确定,不输入默认为10条
演示如下:
标签:__,10,30,自定义,Python,res,热议,soup,import 来源: https://www.cnblogs.com/chargecrystal/p/12540150.html