爬取三国演义的章节和内容
作者:互联网
import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4315.5 Safari/537.36'
}
url = 'https://www.shicimingju.com/book/sanguoyanyi.html'
response = requests.get(url=url,headers=headers)
# print(response.encoding) # 查看返回数据的编码
response.encoding = 'utf-8' # 指定字符集防止乱码
page_text = response.text
soup = BeautifulSoup(page_text,'lxml')
li_list = soup.select('.book-mulu > ul >li')
fp = open('./sanguoyanyi3.txt','w',encoding='utf-8')
for li in li_list:
title = li.a.string
detail_url = 'https://www.shicimingju.com' + li.a['href']
response_detail = requests.get(url=detail_url,headers=headers)
response_detail.encoding = 'utf-8'
detail_text = response_detail.text
detail_soup = BeautifulSoup(detail_text,'lxml')
content = detail_soup.find('div', class_='chapter_content').text
fp.write(title +':' + content + '\n')
print(title,'下载完毕!!')
标签:章节,url,text,detail,li,爬取,headers,response,三国演义 来源: https://www.cnblogs.com/niucunguo/p/14408090.html