爬取中国所有成语
作者:互联网
直接上代码
from requests import *
from lxml import etree
def main():
text=[]
for a in range(0x4e00,0x9fa6):
text.append(chr(a))
try:
for a in text:
response=get("https://www.chengyucidian.net/search/?type=1&keyword=%s"%a)
html=etree.HTML(response.text)
href=html.xpath("/html/body/div[4]/div/ul//li/a/@href")
for a in html.xpath("/html/body/div[4]/div/ul//li/a/text()"):
print(a,end="")
for a in href:
response=get("https://www.chengyucidian.net"+a)
html=etree.HTML(response.text)
print(html.xpath("/html/body/div[4]/div[2]/div/div[1]/div[2]/p[2]/text()"))
except Exception:
print("网络异常")
if __name__=="__main__":
main()
标签:__,body,text,所有,爬取,html,div,response,成语 来源: https://blog.csdn.net/zhouhongkai1/article/details/118825348