编程语言
首页 > 编程语言> > python爬虫 python3+selenium+chrome

python爬虫 python3+selenium+chrome

作者:互联网

1、准备

  安装selenium   使用命令安装selenium: pip install selenium

 

  下载浏览器驱动:谷歌浏览器驱动下载地址:http://chromedriver.storage.googleapis.com/index.html

          驱动程序和浏览器的映射关系:https://blog.csdn.net/mcfnhm/article/details/85339414

        将下载后的浏览器驱动程序解压  将chromedriver.exe复制到python的安装目录下的scripts的文件夹中

 2、设置浏览器无头模式

from  selenium import webdriver
from time import sleep
#无头模式
from selenium.webdriver.chrome.options import Options
#实现回避检测(此方式已弃用)
#from selenium.webdriver import ChromeOptions

#无头
chrom_option = Options()
chrom_option.add_argument('--headless')
chrom_option.add_argument('--disable-gpu')

#规避检测(此方式已弃用)
#option = ChromeOptions()
chrom_option.add_experimental_option('excludeSwitches', ['enable-automation'])

chrom = webdriver.Chrome(options=chrom_option)

chrom.get("https://www.baidu.com")
print(chrom.page_source)

3、动作链示例

from selenium import webdriver
from time import sleep
#导入动作链
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import  By
from selenium.webdriver.chrome.options import Options


url='https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
chrom = webdriver.Chrome()
chrom.get(url)
chrom.maximize_window()

#定位元素位于iframe标签中 需要通过一下操作后再进行标签定位
chrom.switch_to.frame("iframeResult")
div_ele = chrom.find_element(By.ID,'draggable')

action = ActionChains(chrom)
action.click_and_hold(div_ele)

for i in range(5):
    #move_by_offset(x,y)
    #perform立即执行动作链
    action.move_by_offset(17,0).perform()
    sleep(1)
#释放动作链
action.release()

chrom.quit()

 4.读取excel后写入txt

import xlrd
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep



def read_excel(url,chrome_url):
    # 导入需要读取的表格
    excel = xlrd.open_workbook(url)
    sheet = excel.sheets()[0]


    txt_path = './reData'
    if not os.path.exists(txt_path):
        os.mkdir(txt_path)
    fp = open('./'+txt_path+'/error.txt','w',encoding='utf-8')
    fs = open('./'+txt_path+'/succ.txt','w',encoding='utf-8')

    for row in range(2,sheet.nrows):
        name = sheet.cell_value(row,5)
        pwd  = sheet.cell_value(row,6)
        if len(name) > 0 and len(pwd) > 0:
            chrom = webdriver.Chrome()
            chrom.get(chrome_url)
            chrom.maximize_window()
            sleep(1)
            page_text=''
            try:
                name_input_ele = chrom.find_element(By.ID, 'userName')
                pwd_input_ele = chrom.find_element(By.ID, 'password')
                btn = chrom.find_element(By.ID, 'login')
                name_input_ele.send_keys(name)
                pwd_input_ele.send_keys(pwd)
                btn.click()
                sleep(1)

                page_text = chrom.page_source
            except:
                chrom.quit()

            if page_text.find('用户名或密码错误') >0:
                fp.write('%10s—%10s\n' % (name, pwd))
            else:
                fs.write('%10s—%10s\n' % (name, pwd))

        chrom.quit()
    fp.close()
    fs.close()



if __name__ == '__main__':
    pass

 

标签:webdriver,name,python,selenium,chrome,import,txt,chrom
来源: https://www.cnblogs.com/GOOGnine/p/15934587.html