编程语言
首页 > 编程语言> > 【python爬虫】动态图片爬取

【python爬虫】动态图片爬取

作者:互联网

爬取网站上的动态图片

 代码

import os
from re import I
import urllib
import requests
from lxml import html #定位用的lxml 还有其他如beautifulsoup xpath  
import time
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

Aname = r'\tup'
def CreateFolder(Aname):
    #address = 'C:/Users/Admin/Desktop'#设置一个本地地址,等一会创建文件夹并且放入
    address = os.getcwd()
    os.chdir(address)
    if not os.path.exists(address+Aname):
        print(address+Aname)
        print("not exist")
        os.mkdir(Aname)

def download(url,coverPath):
    header = headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11"}
    r = requests.get(url, headers=header,verify=False)
    if r.status_code == 200:
        open(coverPath, 'wb').write(r.content) # 将内容写入图片
        print("done")
    del r

def get_filename(url_str):
    #print(url_str.split('/')[-1])
    name1 = url_str.split('/')[-1]
    return name1

CreateFolder(Aname)
for page in range(2,3):
    print('page:   ',page)
    url = "https://www.keaidian.com/biaoqingbao/9875.html"
    response = requests.get(url,verify=False).text
    selector = html.fromstring(response)
    imgEle2 = selector.xpath('//li[@class="tx-img"]/a[1]')
    label = 'tu%s'%page
    #print(imgEle)#得到的元素
    for index,i in enumerate(imgEle2):
        #print(index,i)#i还不是url链接 是一个个a标签
        imgUrl = i.xpath('@href')[0]#连接已得到
        imgUrl = "https://www.keaidian.com"+imgUrl
        #print(str)
        imgName = get_filename(imgUrl)
        #imgName = '%s_%s.jpg'%(label,index)
        coverPath = '%s\%s\%s'%(os.getcwd(),Aname,imgName)
        #print(coverPath)
        download(imgUrl,coverPath)
        #urllib.request.urlretrieve(imgUrl,coverPath)
    #time.sleep(2)   

 

0⭮⭯

标签:python,爬虫,coverPath,爬取,url,print,import,os,Aname
来源: https://www.cnblogs.com/shucode/p/15724734.html