爬取北京的二手房的信息 地址:https://bj.lianjia.com/zufang/rs/
作者:互联网
import time import requests from bs4 import BeautifulSoup from lxml import etree # @Author : 熊xiaohui # @Software: PyCharm #爬取北京的二手房的信息 a=0 cz={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.70'} for i in range(1,101): qwe_123='https://bj.lianjia.com/zufang/pg'+str(i)+'/#contentList' qwe=requests.get(qwe_123,headers=cz).text radsd=etree.HTML(qwe) z=radsd.xpath('//*[@id="content"]/div/div/div/div/p[1]/a/text()')#整租·芳群园三区 z1=radsd.xpath('//*[@id="content"]/div/div/div/div/p[2]/a[1]/text()')#2室1厅 z2=radsd.xpath('//*[@id="content"]/div/div/div/div/p[2]/a[2]/text()')#南/北 z3=radsd.xpath('//*[@id="content"]/div/div/div/div/p[2]/a[3]/text()')# 芳群园三区 z4=radsd.xpath('//*[@class="content__list--item--des"]/text()[5]')#这个列表,这个抽取第5元素 就是索引的意思 60.00 z5=radsd.xpath('//*[@class="content__list--item--des"]/text()[6]')#南北 z6=radsd.xpath('//*[@class="content__list--item--des"]/text()[7]')# 2室1厅1卫 z7=radsd.xpath('///*[@id="content"]/div/div/div/div/span/em/text()')#5800 for i1,i2,i3,i4,i5,i6,i7,i8 in zip(z,z1,z2,z3,z4,z5,z6,z7): q1=str(i1).strip() q2 = str(i2).strip() q3 = str(i3).strip() q4 = str(i4).strip() q5 = str(i5).strip() q6 = str(i6).strip() q7 = str(i7).strip() q8 = str(i8).strip() print(q1,q2,q3,q4,q5,q6,q7,q8) a+=1 time.sleep(0.5) print(a)
标签:content,rs,text,zufang,爬取,radsd,str,strip,div 来源: https://www.cnblogs.com/xxh12/p/16673893.html