selenium+pyquery自动化
作者:互联网
使用selenium+pyquery爬取豆瓣top250,并保存数据库中(这里写自定义目录标题)
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import os
import time
from selenium.webdriver.common.keys import Keys
from pyquery import PyQuery as pq
import pymysql
chromedriver = "E:\chromeDriver\chromedriver.exe"
os.environ["webdriver.chrome.driver"] = chromedriver
driver = webdriver.Chrome(chromedriver)
driver.maximize_window()
start_url = "https://movie.douban.com/top250"
for i in range(10):
url = start_url+'?start='+str(25*i)+"&filter="
driver.get(url)
time.sleep(2)
html = driver.page_source
doc = pq(html)
list = []
for t in doc(".grid_view li"):
name = pq(t).find('.info .hd .title').text()
daoyan = pq(t).find(".info .bd p").text()
pingjia = pq(t).find(".star span").text()
list.append([name,daoyan,pingjia])
# 打开数据库连接
db = pymysql.connect(host="localhost", user="root", password="rootroot", database="pachong")
# 使用cursor()方法获取操作游标
cursor = db.cursor()
for i in list:
# SQL 插入语句
sql = """INSERT INTO movie(name,
daoyan, pingjia)
VALUES (%s, %s,%s)"""
try:
# 执行sql语句
cursor.execute(sql,i)
# 提交到数据库执行
db.commit()
except:
# 如果发生错误则回滚
db.rollback()
# 关闭数据库连接
db.close()
标签:webdriver,pq,pyquery,selenium,db,自动化,import,driver 来源: https://blog.csdn.net/m0_37486489/article/details/116502285