首页 > 其他分享> > 某度图片抓取（代码）

某度图片抓取（代码）

2022-01-03 23:03:17 作者：互联网

import requests
from urllib.parse import quote
import jsonpath
import json

url = r'https://image.baidu.com/search/acjson'


headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
}


if __name__ == '__main__':
    save_dir = r'imgs'  # 需要自己创建
    word=input('请输入需要查询的关键词：')
    page=input('请输入需要查询的页数 (默认每页30张图片)：')

    word = quote(word)

    k=0
    for i in range(1,int(page)+1):
        print(i)
        pn = int(page) * 30

        pre = r'https://image.baidu.com/search/acjson?'
        back = f'tn=resultjson_com&logid=6505551048133465805&ipn=rj&ct=201326592&is=&fp=result&fr=&word={word}&queryWord={word}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=&copyright=&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&expermode=&nojc=&isAsync=&pn={pn}&rn=30&gsm=78&1641108482235='
        main_url = pre+back
        print(main_url)
        resp = requests.get(main_url, headers=headers)
        # print(resp.text)
        js_p = resp.json()

        ervery_page_urls=jsonpath.jsonpath(js_p,'$..thumbURL')

        for img_src in ervery_page_urls:
            print(img_src)
            img_resp = requests.get(img_src, headers=headers)
            try:
                with open(save_dir+ f'/{k}.jpg', mode='wb') as f:
                    f.write(img_resp.content)
                    print(f'已经下载了{k}张!!!!正在下载第{i}页的内容！！')
                    k+=1
            except Exception as e:
                print(f'第{k}张下载失败!!!!')
                pass

标签：word,img,代码,抓取,page,headers,某度,print,resp
来源： https://blog.csdn.net/ssunshining/article/details/122294291