首页 > 其他分享> > 爬虫实战就要从爬取妹子图开始，有兴趣才有动力

爬虫实战就要从爬取妹子图开始，有兴趣才有动力

2021-06-08 15:29:23 作者：互联网

#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author: YangPC
@file:爱美女网.py
@time:2021/06/08
@QQ:327844761
"""
import requests
import re
from lxml import etree
import os
import urllib.request
import time
import random


def get_html(page):
    time.sleep(random.randint(1, 3))
    print("正在爬取第{}页的图片".format(page).center(100, '-'))
    url = 'https://www.2meinv.com/index-{}.html'.format(page)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0'
    }
    response = requests.get(url=url, headers=headers).content.decode('utf-8')
    html = re.sub('\xa9', '', response)
    root = etree.HTML(html)
    aNode = root.xpath('/html/body/div[4]/div[1]/div[1]/div[2]/ul/li/a/img/@src')
    path = os.getcwd() + '\\美女图片\\'
    # 如果目录不存在就创建目录
    if not os.path.exists(path):
        os.makedirs(path)

    for (index, a) in enumerate(aNode):
        print("正在下载第{}张图片：{}".format(index + 1, a))
        filename = path + os.path.basename(a)
        urllib.request.urlretrieve(a, filename=filename)


if __name__ == '__main__':
    for page in range(1, 264):
        try:
            get_html(page)
        except Exception as e:
            get_html(page)
            print("发生了异常：{}".format(e))
            continue

标签：实战,__,爬虫,html,import,妹子,path,os,page
来源： https://blog.csdn.net/qq_19309473/article/details/117703719