爬取 pic
作者:互联网
1 import time 2 import requests 3 from bs4 import BeautifulSoup 4 5 index = 1 6 num = 1 7 while index <= 27: 8 header = { 9 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " 10 "Chrome/60.0.3100.0 Safari/537.36 " 11 } 12 url = 'https://desk.3gbizhi.com/deskDM/index_{}.html'.format(index) 13 resp = requests.get(url, headers=header) 14 # bs4解析页面 15 page = BeautifulSoup(resp.text, 'html.parser') 16 pageList = page.find_all('a', style="display: block;", target="_blank") 17 armUrl = set([]) 18 for i in pageList: 19 armUrl.add(i.get('href')) 20 21 # 最终页面发起请求 22 hrefList = set([]) 23 for url in armUrl: 24 resp = requests.get(url, headers=header) 25 time.sleep(0.001) 26 # bs4解析页面 27 page = BeautifulSoup(resp.text, 'html.parser') 28 src = page.find('div', class_="morew").find('a').get('href') 29 # 下载图片 30 imgResp = requests.get(src) 31 imgName = src.split("/")[-1] 32 with open("cartoonImg/" + imgName, mode="wb") as f: 33 f.write(imgResp.content) 34 print("图片下载中------------------" + str(num)) 35 num = num+1 36 f.close() 37 index = index + 1 38 print('\n下载完成!')
标签:index,bs4,pic,BeautifulSoup,爬取,import 来源: https://www.cnblogs.com/ang0/p/16325181.html