首页 > 其他分享> > 3.10爬取网页数据示例（二）

3.10爬取网页数据示例（二）

2019-03-10 14:42:00 作者：互联网

import requests
import os
import bs4
url='http://xkcd.com'
ml='F:\ABD'
os.makedirs(ml,exist_ok=True)
while not url.endswith('#'):
    print('Download page %s...' %url)
    res=requests.get(url)
    res.raise_for_status()
    soup=bs4.BeautifulSoup(res.text)
    print('Done.')
comicElem=soup.select('#comic img')
if comicElem==[]:
    print('Could not find comic image.')
else:
    comicUrl=comicElem[0].get('src')
    print('Downloading image %s...'%(comicUrl))
    res=requests.get(comicUrl)
    res.raise_for_status()
print('Done.')
imageFile=open(os.path.join(ml,os.path.basename(comicUrl)),'wb')
for chunk in res.iter_content(100000):
    imageFile.write(chunk)
    imageFile.close()
prevLink=soup.select('a[rel="prev"]')[0]
url='http://xkcd.com'+prevLink.get('href')
print('Done.')

标签：imageFile,get,3.10,res,示例,爬取,url,print,comicUrl
来源： https://www.cnblogs.com/cqkangle/p/10505248.html