首页 > 其他分享> > 爬取中国所有成语

爬取中国所有成语

2021-07-16 22:02:03 作者：互联网

直接上代码

from requests import *
from lxml import etree
def main():
    text=[]
    for a in range(0x4e00,0x9fa6):
        text.append(chr(a))
    try:
        for a in text:
            response=get("https://www.chengyucidian.net/search/?type=1&keyword=%s"%a)
            html=etree.HTML(response.text)
            href=html.xpath("/html/body/div[4]/div/ul//li/a/@href")
            for a in html.xpath("/html/body/div[4]/div/ul//li/a/text()"):
                print(a,end="")
            for a in href:
                response=get("https://www.chengyucidian.net"+a)
                html=etree.HTML(response.text)
                print(html.xpath("/html/body/div[4]/div[2]/div/div[1]/div[2]/p[2]/text()"))
    except Exception:
        print("网络异常")
if __name__=="__main__":
    main()

标签：__,body,text,所有,爬取,html,div,response,成语
来源： https://blog.csdn.net/zhouhongkai1/article/details/118825348