记录一个Python爬虫实践
作者:互联网
import requests
import os
from bs4 import BeautifulSoup
# 保存图片
def downloadImg(image_src):
tempfileName=os.path.basename(image_src)
with open (f"爬虫图片/{tempfileName}","wb") as fout:
fout.write(requests.get(image_src).content)
print(f"{image_src} ok");
headers={
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
page=1
domain="https://pic.netbian.com"
while page<100:
page+=1
req=requests.get(f"https://pic.netbian.com/4kmeinv/index_{page}.html",headers=headers);
req.encoding="gbk";# 解决乱码问题
soup=BeautifulSoup(req.text,"html.parser");
for link in soup.find_all("a"):
if link.get("href") and "/tupian" in link.get("href"):
tempReq=requests.get(domain+link.get("href"),headers=headers)
tempReq.encoding="gbk"
tempSoup=BeautifulSoup(tempReq.text,"html.parser")
tempA= tempSoup.find(id="img")
for tempImage in tempA.find_all("img"):
if tempImage.get("src") and "/uploads" in tempImage.get("src"):
downloadImg(domain+tempImage.get("src"))
标签:src,Python,image,爬虫,实践,537.36,fout,import,tempfileName 来源: https://blog.csdn.net/canyanol/article/details/122821749