其他分享
首页 > 其他分享> > 3.10爬取网页数据示例(二)

3.10爬取网页数据示例(二)

作者:互联网

import requests
import os
import bs4
url='http://xkcd.com'
ml='F:\ABD'
os.makedirs(ml,exist_ok=True)
while not url.endswith('#'):
print('Download page %s...' %url)
res=requests.get(url)
res.raise_for_status()
soup=bs4.BeautifulSoup(res.text)
print('Done.')
comicElem=soup.select('#comic img')
if comicElem==[]:
print('Could not find comic image.')
else:
comicUrl=comicElem[0].get('src')
print('Downloading image %s...'%(comicUrl))
res=requests.get(comicUrl)
res.raise_for_status()
print('Done.')
imageFile=open(os.path.join(ml,os.path.basename(comicUrl)),'wb')
for chunk in res.iter_content(100000):
imageFile.write(chunk)
imageFile.close()
prevLink=soup.select('a[rel="prev"]')[0]
url='http://xkcd.com'+prevLink.get('href')
print('Done.')

标签:imageFile,get,3.10,res,示例,爬取,url,print,comicUrl
来源: https://www.cnblogs.com/cqkangle/p/10505248.html