【我的python之路】-记录人生第一只python爬虫-爬图小蜘蛛~
作者:互联网
import requests
import re
import json
from lxml import etree
listurl=[]
listres=[]
for unum in range(1,3):
url = f'https://desk.zol.com.cn/pc/{unum}.html'
listurl.append(url)
for ur in listurl:
furl = requests.get(ur)
ets = etree.HTML(furl.text)
result = ets.xpath('//ul[@class="pic-list2 clearfix"]/li/a[@class="pic"]/@href')
for i in result:
i = 'https://desk.zol.com.cn/' + i
if i.endswith('.exe'):
continue
listres.append(i)
for item in listres:
url = item
urlstr = requests.get(url)
deskPic = re.compile(r'var deskPicArr.*?=(?P<deskPicArr>.*?);', re.S)
urlstrlist = deskPic.search(urlstr.text)
liststr = urlstrlist.group('deskPicArr')
aaa = json.loads(liststr)
for item in aaa['list']:
oriSize = item['oriSize']
imgsrc = item['imgsrc']
imgsrc = imgsrc.replace('##SIZE##', oriSize)
name = imgsrc.split('/')[-1] #取图片最后一个/后的名称为图片名,保存到本地地址
resp_img = requests.get(imgsrc)
with open(f'C:\\Users\wu\Pictures\Saved Pictures\\{name}', mode='wb') as fa:
fa.write(resp_img.content)
break
print('下载完成!')
标签:item,python,get,爬虫,蜘蛛,url,imgsrc,import,requests 来源: https://blog.csdn.net/weixin_40551017/article/details/122475411