赶集网
作者:互联网
import requests
import re
base_url = "http://suqian.ganji.com/zufang/pn1/"
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'}
resp = requests.get(base_url, headers = headers)
# print(resp.text)
text = resp.text
houses = re.findall(r"""
<div.+?ershoufang-list.+?<a.+?js-title.+?>(.+?)</a> #获取房源标题
.+?<dd.+?dd-item.+?size.+?<span>(.+?)</span> #户型
.+?<div.+?price.+?<span.+?>(.+?)</span>#价格
""",text,re.VERBOSE|re.DOTALL)
for house in houses:
print(house)
标签:.+,re,text,resp,headers,537.36,赶集网 来源: https://blog.csdn.net/qq_45232776/article/details/111152136