python爬虫: KK平台所有美女主播的照片
作者:互联网
这个网站需要将网站滚到底部,才会加载出主播信息,所以需要使用selenium 库,并借用chrome浏览器
#coding=utf-8
import sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
import requests
from bs4 import BeautifulSoup
import os
from selenium import webdriver
import time
base_dir = "D:/python/src/download/"
def downloadSource(url,i):
from contextlib import closing
with closing(requests.get(url,stream = True)) as resp, open(base_dir+str(i)+".png",'wb') as f:
for chunk in (resp.iter_content(128)):
f.write(chunk)
def parseHtml(bsoup):
try:
usr_container = bsoup.find("div",class_="main_content live-list")
all_user = usr_container.find_all("a")
print(len(all_user))
i = 0
for item in all_user:
user_name = item.find("span",class_="actor_name").string
user_info = item.find("div",class_="video-cover")
cover = user_info.img['src'].split("!")[0] #为了获取更大分辨率图片
try:
print("downloading {0}:{1}".format(user_name, cover))
downloadSource(cover,i)
i = i+1
print("download finish")
except:
print("download error")
except:
print("parse failed")
def get_content(driver,url):
driver.get(url)
for i in range(1,30):
time.sleep(1)
driver.execute_script('window.scrollTo(0,1000000)')
content = driver.page_source.encode('utf-8')
driver.close()
soup = BeautifulSoup(content, 'lxml')
return soup
def main():
url = "http://www.kktv1.com/list/all/1/0"
if not os.path.exists(base_dir):
os.makedirs(base_dir)
driver = webdriver.Chrome()
bs0 = get_content(driver, url)
parseHtml(bs0)
if __name__ == "__main__":
main()
print("Exit!!")
结果:
标签:女主播,driver,content,python,url,KK,user,print,import 来源: https://blog.csdn.net/xuyinxin/article/details/91306607