其他分享
首页 > 其他分享> > 【爬虫】豆瓣电影

【爬虫】豆瓣电影

作者:互联网

# -*- coding:utf-8 -*-
# Filename:test_豆瓣250.py
import requests
import re
import csv


def douban_film():
    header = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/101.0.4951.64 Safari/537.36 "
    }
    f = open("data.csv", mode="w")
    for i in range(0, 200, 25):
        url = "https://movie.douban.com/top250?start=" + str(i) + "&filter="

        csvwriter = csv.writer(f)
        res = requests.get(url, headers=header)
        # 页面元素
        page_content = res.text
        # 预加载
        obj = re.compile(r'<li>.*?<div class="item">.*?<span class="title">(?P<filmname>.*?)'
                         r'</span>.*?<p class="">.*?<br>(?P<year>.*?)&nbsp'
                         r'.*?<span class="rating_num" property="v:average">(?P<score>.*?)</span>'
                         r'.*?<span>(?P<num>.*?)人评价</span>', re.S)
        result = obj.finditer(page_content)
        for it in result:
            dic = it.groupdict()
            dic['year'] = dic['year'].strip()
            csvwriter.writerow(dic.values())
    f.close()
    print("over!")


if __name__ == '__main__':
    douban_film()

 

标签:__,douban,电影,爬虫,dic,re,豆瓣,import,csv
来源: https://www.cnblogs.com/hanyr/p/16325180.html