爬取基金持有股票并存进数据库
作者:互联网
#!/usr/bin/python
# -*- coding: utf-8 -*-
# @Author : He who seize the right moment, is the right man
# @Time : 2021/3/5 10:40
import re
import sys
import time
import datetime
import pymysql
from selenium import webdriver
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
opt = Options()
opt.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错
opt.add_argument('window-size=1920x3000') # 设置浏览器分辨率
opt.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug
opt.add_argument('--hide-scrollbars') # 隐藏滚动条,应对一些特殊页面
opt.add_argument('blink-settings=imagesEnabled=false') # 不加载图片,提升运行速度
opt.add_argument('--headless') # 浏览器不提供可视化界面。Linux下如果系统不支持可视化不加这条会启动失败
driver = Chrome(options=opt) # 创建无界面对象
# driver = Chrome(executable_path='/opt/google/chrome/chromedriver',options=opt)
def dbconnections():
conn = pymysql.connect(host='localhost', user='root', password='rkm2018', port=3306, db='stock', charset='utf8')
dbs = conn.cursor()
dbs.execute("SELECT DISTINCT(codes) from tb_fund_sim;")
results = dbs.fetchall()
str = ''
for fundcode in results:
u = 'http://fund.eastmoney.com/' + str.join(fundcode) + '.html'
url = u.replace(' ', '')
doc = open('py.log', 'a+')
print(url,file=doc)
driver.get(url)
data = driver.find_element_by_xpath('//*[@id="position_shares"]/div[1]/table').text
datas = "'" + data + "'"
datass = datas.replace('\n','<br/>').replace(' ',' ').replace('-','<font size=5 color="green">下跌</font>').replace('相关资讯','').replace('股吧','')
sql = "update tb_fund_sim set holdstock=" + datass + " where codes=" + str.join(fundcode)
dbs.execute(sql)
conn.commit()
dbs.close()
conn.close()
print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+",完成刷新。",file=doc)
doc.close()
if __name__ == '__main__':
while True:
today = datetime.datetime.now().weekday() + 1
if today <= 5:
# 范围时间
d_time = datetime.datetime.strptime(str(datetime.datetime.now().date()) + '9:00', '%Y-%m-%d%H:%M')
d_time1 = datetime.datetime.strptime(str(datetime.datetime.now().date()) + '15:00', '%Y-%m-%d%H:%M')
# 当前时间
n_time = datetime.datetime.now()
# 判断当前时间是否在范围时间内
if n_time > d_time and n_time < d_time1:
dbconnections()
time.sleep(20)
# driver.close()
else:
print(False)
else:
doc = open('py.log', 'a+')
print("今天是星期:"+str(today)+",不进行持有股获取,休眠Python Script.",file=doc)
doc.close()
time.sleep(480)
标签:opt,-%,doc,数据库,datetime,爬取,add,time,并存 来源: https://www.cnblogs.com/chenglee/p/14854692.html