某度图片抓取(代码)
作者:互联网
import requests
from urllib.parse import quote
import jsonpath
import json
url = r'https://image.baidu.com/search/acjson'
headers = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
}
if __name__ == '__main__':
save_dir = r'imgs' # 需要自己创建
word=input('请输入需要查询的关键词:')
page=input('请输入需要查询的页数 (默认每页30张图片):')
word = quote(word)
k=0
for i in range(1,int(page)+1):
print(i)
pn = int(page) * 30
pre = r'https://image.baidu.com/search/acjson?'
back = f'tn=resultjson_com&logid=6505551048133465805&ipn=rj&ct=201326592&is=&fp=result&fr=&word={word}&queryWord={word}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&expermode=&nojc=&isAsync=&pn={pn}&rn=30&gsm=78&1641108482235='
main_url = pre+back
print(main_url)
resp = requests.get(main_url, headers=headers)
# print(resp.text)
js_p = resp.json()
ervery_page_urls=jsonpath.jsonpath(js_p,'$..thumbURL')
for img_src in ervery_page_urls:
print(img_src)
img_resp = requests.get(img_src, headers=headers)
try:
with open(save_dir+ f'/{k}.jpg', mode='wb') as f:
f.write(img_resp.content)
print(f'已经下载了{k}张!!!!正在下载第{i}页的内容!!')
k+=1
except Exception as e:
print(f'第{k}张下载失败!!!!')
pass
标签:word,img,代码,抓取,page,headers,某度,print,resp 来源: https://blog.csdn.net/ssunshining/article/details/122294291