python爬虫 python3+selenium+chrome
作者:互联网
1、准备
安装selenium 使用命令安装selenium: pip install selenium
下载浏览器驱动:谷歌浏览器驱动下载地址:http://chromedriver.storage.googleapis.com/index.html
驱动程序和浏览器的映射关系:https://blog.csdn.net/mcfnhm/article/details/85339414
将下载后的浏览器驱动程序解压 将chromedriver.exe复制到python的安装目录下的scripts的文件夹中
2、设置浏览器无头模式
from selenium import webdriver from time import sleep #无头模式 from selenium.webdriver.chrome.options import Options #实现回避检测(此方式已弃用) #from selenium.webdriver import ChromeOptions #无头 chrom_option = Options() chrom_option.add_argument('--headless') chrom_option.add_argument('--disable-gpu') #规避检测(此方式已弃用) #option = ChromeOptions() chrom_option.add_experimental_option('excludeSwitches', ['enable-automation']) chrom = webdriver.Chrome(options=chrom_option) chrom.get("https://www.baidu.com") print(chrom.page_source)
3、动作链示例
from selenium import webdriver from time import sleep #导入动作链 from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options url='https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' chrom = webdriver.Chrome() chrom.get(url) chrom.maximize_window() #定位元素位于iframe标签中 需要通过一下操作后再进行标签定位 chrom.switch_to.frame("iframeResult") div_ele = chrom.find_element(By.ID,'draggable') action = ActionChains(chrom) action.click_and_hold(div_ele) for i in range(5): #move_by_offset(x,y) #perform立即执行动作链 action.move_by_offset(17,0).perform() sleep(1) #释放动作链 action.release() chrom.quit()
4.读取excel后写入txt
import xlrd import os from selenium import webdriver from selenium.webdriver.common.by import By from time import sleep def read_excel(url,chrome_url): # 导入需要读取的表格 excel = xlrd.open_workbook(url) sheet = excel.sheets()[0] txt_path = './reData' if not os.path.exists(txt_path): os.mkdir(txt_path) fp = open('./'+txt_path+'/error.txt','w',encoding='utf-8') fs = open('./'+txt_path+'/succ.txt','w',encoding='utf-8') for row in range(2,sheet.nrows): name = sheet.cell_value(row,5) pwd = sheet.cell_value(row,6) if len(name) > 0 and len(pwd) > 0: chrom = webdriver.Chrome() chrom.get(chrome_url) chrom.maximize_window() sleep(1) page_text='' try: name_input_ele = chrom.find_element(By.ID, 'userName') pwd_input_ele = chrom.find_element(By.ID, 'password') btn = chrom.find_element(By.ID, 'login') name_input_ele.send_keys(name) pwd_input_ele.send_keys(pwd) btn.click() sleep(1) page_text = chrom.page_source except: chrom.quit() if page_text.find('用户名或密码错误') >0: fp.write('%10s—%10s\n' % (name, pwd)) else: fs.write('%10s—%10s\n' % (name, pwd)) chrom.quit() fp.close() fs.close() if __name__ == '__main__': pass
标签:webdriver,name,python,selenium,chrome,import,txt,chrom 来源: https://www.cnblogs.com/GOOGnine/p/15934587.html