大众点评爬虫
作者:互联网
import requests from lxml import etree import csv headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36' } cookies={ 'Cookie': 'fspop=test; cy=70; cye=changchun; __guid=169583271.1176092058052156700.1618064807707.5415; _lxsdk_cuid=178bc2d991bc8-06f82d2a1ad0c8-3e604809-1fa400-178bc2d991ec8; _lxsdk=178bc2d991bc8-06f82d2a1ad0c8-3e604809-1fa400-178bc2d991ec8; _hc.v=8e6ff184-ecf9-beda-8556-f21cac38d549.1618064809; s_ViewType=10; ctu=a39fa7b43d5011077a7a6a13b07f7eab2586a77330045fd09fb1ec9fcd4ecbef; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1618064809,1618108223; dplet=18e634c44bc3d5ff4dc4d2377c0348ce; dper=e18ff3f28e86ce6d07b46b29a43464b7a9697e4b309dc739fb65478c72a0a4e1ac4eeb1e4858e57828c84156f0e7221b89ce58e7174f2e6bf336e124ae5c277bbd1b72b6716c024fccd8bbd09c27536eb08f23c8a6e50a5b20884368c4b64588; ll=7fd06e815b796be3df069dec7836c3df; ua=dpuser_7353802477; monitor_count=40; _lxsdk_s=178bec40f52-ba4-7b6-bfe%7C%7C173; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1618111329' } dict={ '\ue36f':'9', '\uea1a':'8', '\ue13b':'7', '\uf680':'6', '\uea7a':'5', '\uf6db':'4', '\ue1a7':'3', '\ueef3':'2', '\ue9cd':'0' }#svg映射,注意自己查找 def gethtml(url): r=requests.get(url=url,headers=headers,cookies=cookies) r.encoding='utf-8' html=r.text return html def shiftnumber(num_list): count = '' for num in num_list: if num in dict.keys(): cc = dict[num] else: cc = num count += cc return count # name_list=[] # total_score_list=[] # evaluation_num_list =[] # per_capita_list=[] # taste_score_list=[] # environment_score_list=[] # service_score_list=[] for i in range(1,5): print('正在爬取第{}页'.format(i)) url='http://www.dianping.com/changchun/ch10/g110p{}'.format(i) html=gethtml(url) tree = etree.HTML(html) #name = tree.xpath('//*[@id="shop-all-list"]/ul/li[1]/div[2]/div[1]/a/h4/text()')[0] li_list=tree.xpath('//*[@id="shop-all-list"]/ul/li') for li in li_list: name = li.xpath('.//div[@class="tit"]/a/h4/text()')[0] total_score = li.xpath('./div[2]/div[2]/div/div[2]/text()')[0] evaluation_num = li.xpath('./div[2]/div[2]/a[1]/b//text()') evaluation_num = shiftnumber(evaluation_num) per_capita = li.xpath('./div[2]/div[2]/a[2]/b//text()') per_capita = shiftnumber(per_capita) taste_score = li.xpath('./div[2]/span/span[1]/b//text()') taste_score = shiftnumber(taste_score) environment_score = li.xpath('./div[2]/span/span[2]/b//text()') environment_score = shiftnumber(environment_score) service_score = li.xpath('./div[2]/span/span[3]/b//text()') service_score = shiftnumber(service_score) print('开始保存。。。。') with open ('长春火锅店.csv','a',encoding='utf-8',newline='') as csvfile:# newline=''可以解决空行问题 writer=csv.writer(csvfile) #writer.writerow(['火锅店名称','总评分','评价人数','人均消费','口味','环境','服务']) writer.writerow([name,total_score,evaluation_num,per_capita,taste_score,environment_score,service_score])
标签:大众,num,text,list,爬虫,点评,score,div,li 来源: https://www.cnblogs.com/fengqing111/p/14643858.html