首页 > 编程语言> > 利用Python实现图片批量下载
利用Python实现图片批量下载

2022-05-24 23:31:37 作者：互联网
 1 from email import header
 2 import requests
 3 from lxml import etree
 4 import sys
 5 import optparse
 6 from urllib import parse
 7 import time
 8 
 9 class ImagesDownloader:
10     def __init__(self) -> None:
11         self.url = self.get_params()
12         self.iamges_store = 'images/'
13         self.header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0'}
14 
15     def get_params(self):
16         parser = optparse.OptionParser("Usage: <Program> -u url")
17         parser.add_option('-u', '--url', dest='url', type='string', help='Specify url')
18         options, args = parser.parse_args()
19         if options.url is None:
20             print(parser.usage)
21             sys.exit(0)
22         return options.url
23     
24     def get_webpage(self, url):
25         try:
26             response = requests.get(url=url, headers=self.header).text
27             return response
28         except Exception as e:
29             print(e)
30             sys.exit(0)
31     
32     def download_image(self, url, filename):
33         try:
34             response = requests.get(url=url, headers=self.header).content
35             with open(filename, 'wb') as f:
36                 f.write(response)
37             print("Downloaded image successfully: %s" % filename.split('/')[-1])
38         except Exception as e:
39             print(e)
40             sys.exit(0)
41     
42 
43     def extract_images_links(self, response):
44         images_links = []
45         try:
46             html = etree.HTML(response)
47             res = html.xpath('//img/@src')
48             for each in res:
49                 if each.startswith('//'):
50                     images_links.append('http:'+each)
51                 elif each.startswith('http://') or each.startswith('https://'):
52                     images_links.append(each)
53                 else:
54                     images_links.append(parse.urljoin(self.url, each))
55                 
56             return images_links
57         except Exception as e:
58             print(e)
59             sys.exit(0)
60     
61     def run(self):
62         response = self.get_webpage(self.url)
63         images_links = self.extract_images_links(response)
64         # print(images_links)
65         for link in images_links:
66             print("Download image from : %s" % link)
67             filename = link.split('/')[-1]
68             filepath = self.iamges_store + filename
69             self.download_image(link, filepath)
70             time.sleep(2)
71 
72 if __name__ == "__main__":
73     imagedownload = ImagesDownloader()
74     imagedownload.run()
75             
76
标签：links,批量,Python,self,url,each,images,response,下载
来源： https://www.cnblogs.com/jason-huawen/p/16307656.html