import requests
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'}
response = requests.get('https://quanxiaoshuo.com/177913/', headers=headers)
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser', from_encoding='utf-8')#html.parser或lxml
title = []
for volumn in soup.find_all(class_="volumn"):
    b = volumn.find('b')
    if b!=None:
        b_title = b.string
        title.append({'volumn': b_title})
chapters = []
for chapter in soup.find_all(class_='chapter'):# 获取所有的a标记中url和章节内容
    a = chapter.find('a')
    chapter_title = a.get('title')
    chapters.append({'chapter_title': chapter_title})
import json
with open('xylz_title.json', 'w') as fp:
    json.dump(title, fp=fp, indent=4)
with open('xylz_chapters.json', 'w') as fp:
    json.dump(chapters, fp=fp, indent=4)
来源: https://blog.csdn.net/zcs2312852665/article/details/120893410