The fourth day of Crawler learning
作者:互联网
爬取58同城
from bs4 import BeautifulSoup
import requests
url = "https://qd.58.com/diannao/35200617992782x.shtml"
web_data = requests.get(url)
soup = BeautifulSoup(web_data.text, 'lxml')
title = soup.title.text
cost = soup.select("div#basicinfo span.infocard__container__item__main__text--price")
time = soup.select(".detail-title__info__text:nth-child(1)")
visitor = soup.select("span#totalcount")
area = soup.select("div.infocard__container__item:nth-child(3)>div.infocard__container__item__main")
who = soup.select("div.infocard__container__item:nth-child(4)>div.infocard__container__item__main")
data = {
"title": title,
"cost": cost[0].get_text().strip(),
"time": time[0].get_text().strip(),
"area": list(area[0].stripped_strings),
"who": who[0].get_text().strip(),
"visitor": visitor[0].get_text().strip()
}
print(data)
{'title': '现货400多台液晶电脑,低价出售,保修一年,可送货,李村附近,需要请联系! - 青岛58同城', 'cost': '350 元', 'time': '2018-08-23 发布', 'area': ['李沧', '-', '李村'], 'who': '李先生', 'visitor': '0'}
标签:__,fourth,title,text,soup,item,learning,div,day 来源: https://www.cnblogs.com/moumangtai/p/10821347.html