其他分享
首页 > 其他分享> > 拼接省份

拼接省份

作者:互联网

import re import json from odps import ODPS from threading import Thread import threading from urllib import parse import datetime from lxml import etree
import random  import requests import time
from models import *
district_url = "https://hotel.qunar.com/napi/seo?path=%2Fseo%2Fnav&city="
def read_city():     city_list = regions.select()     province = []     province_city = []     for data in city_list:         # if data.pre_code == 86 and '市' in data.area_name: # 此处代码确认直辖市         #     print(data.area_name,data.area_code)         if '0000' in str(data.area_code):             province.append([data.area_name,data.area_code])         # if '0000' in str(data.pre_code):         #     print(data.area_name,data.area_code)     for item in province:         for data in city_list:             if data.pre_code == item[1]:                 if data.area_name == "市辖区":                     #print(item[0],item[0])                     province_city.append([item[0],item[0]])                 else :                     #print(item[0],data.area_name)                     province_city.append([item[0],data.area_name])
    return province_city
# # 获取城市数据,存储,利用获取到的json文本数据 # def save_city_list(): #     with open('cityList.json','r',encoding='utf8')as fp: #         json_data = json.load(fp) #         for data in json_data: #             for data_0 in data: #                 for data_value in data_0['value']: #                     district_url_0 = district_url + str(data_value['url']) #                     response = requests.request("GET", district_url_0) #                     json_city = json.loads(response.text) #                     #if len(json_city) > 0 and len(json_city['data'] > 0): #                     try: #                         if  (json_city['data'][0]["name"] == data_value['name'] + "行政区酒店") and (json_city['data'][0]['type'] == "city"): #                             for item in json_city['data'][0]['list']: #                                 data_i = item["name"].split("酒店")[0] #                                 data_i = data_i.split(" ")[0] #                                 catalogue = qunar_List_City() #                                 catalogue.district_name = data_i # 行政区域名字 #                                 catalogue.district_spell = item['id'] # 行政区域拼音 #                                 catalogue.city_name = data_value['name'] # 城市名称 #                                 catalogue.city_spell = data_value['url'] # 城市拼音 #                                 catalogue.create_time = datetime.datetime.now() # 抓取时间 #                                 existed_id = qunar_List_City.select().where(qunar_List_City.district_spell==item['id']) #                                 if existed_id: #                                     pass   #                                 else: #                                     catalogue.save(force_insert=True) #                         else: #                             catalogue = qunar_List_City() #                             catalogue.district_name = data_value['name'] # 行政区域名字 #                             catalogue.district_spell = data_value['url'] # 行政区域拼音 #                             catalogue.city_name = data_value['name'] # 城市名称 #                             catalogue.city_spell = data_value['url'] # 城市拼音 #                             catalogue.create_time = datetime.datetime.now() # 抓取时间 #                             existed_id = qunar_List_City.select().where(qunar_List_City.city_name==data_value['name']) #                             if existed_id: #                                 pass   #                             else: #                                 catalogue.save(force_insert=True) #                     except: #                         #print(response.status_code) #                         print("非大陆数据或者城市酒店数据为空") #                         print(district_url_0)  
# 获取城市数据,存储,利用获取到的json文本数据 def save_city_list(pro_city):     with open('cityList.json','r',encoding='utf8')as fp:         json_data = json.load(fp)         for data in json_data:             for data_0 in data:                 for data_value in data_0['value']:                     province = " "                     for city_name in pro_city:                         if data_value['name'] in city_name[1]:                             province = city_name[0]
                    district_url_0 = district_url + str(data_value['url'])                     response = requests.request("GET", district_url_0)                     json_city = json.loads(response.text)
                    # if  (json_city['data'][0]["name"] == data_value['name'] + "行政区酒店") and (json_city['data'][0]['type'] == "city"):                     #     for item in json_city['data'][0]['list']:                     #         data_i = item["name"].split("酒店")[0]                     #         data_i = data_i.split(" ")[0]                     #         catalogue = List_City()                     #         catalogue.province_name = province                     #         catalogue.district_name = data_i # 行政区域名字                     #         catalogue.district_spell = item['id'] # 行政区域拼音                     #         catalogue.city_name = data_value['name'] # 城市名称                     #         catalogue.city_spell = data_value['url'] # 城市拼音                     #         catalogue.create_time = datetime.datetime.now() # 抓取时间                     #         existed_id = List_City.select().where(List_City.district_spell==item['id'])                     #         if existed_id:                     #             pass                       #         else:                     #             catalogue.save(force_insert=True)
                    #if len(json_city) > 0 and len(json_city['data'] > 0):                     try:                         if  (json_city['data'][0]["name"] == data_value['name'] + "行政区酒店") and (json_city['data'][0]['type'] == "city"):                             for item in json_city['data'][0]['list']:                                 data_i = item["name"].split("酒店")[0]                                 data_i = data_i.split(" ")[0]                                 catalogue = List_City()                                 catalogue.province_name = province                                 catalogue.district_name = data_i # 行政区域名字                                 catalogue.district_spell = item['id'] # 行政区域拼音                                 catalogue.city_name = data_value['name'] # 城市名称                                 catalogue.city_spell = data_value['url'] # 城市拼音                                 catalogue.create_time = datetime.datetime.now() # 抓取时间                                 existed_id = List_City.select().where(List_City.district_spell==item['id'])                                 if existed_id:                                     pass                                   else:                                     catalogue.save(force_insert=True)                         # else:                         #     catalogue = List_City()                         #     catalogue.province_name = province                         #     catalogue.district_name = data_value['name'] # 行政区域名字                         #     catalogue.district_spell = data_value['url'] # 行政区域拼音                         #     catalogue.city_name = data_value['name'] # 城市名称                         #     catalogue.city_spell = data_value['url'] # 城市拼音                         #     catalogue.create_time = datetime.datetime.now() # 抓取时间                         #     existed_id = List_City.select().where(List_City.city_name==data_value['name'])                         #     if existed_id:                         #         pass                           #     else:                         #         catalogue.save(force_insert=True)                     except:                         #print(response.status_code)                         print("非大陆数据或者城市酒店数据为空")                         print(district_url_0) 
if __name__ == "__main__":     #create_tables()     pro_city = read_city()     save_city_list(pro_city)     

标签:city,name,省份,json,value,catalogue,拼接,data
来源: https://www.cnblogs.com/dog-and-cat/p/13615479.html