多进程解析xml文件及入库
作者:互联网
xml文件解析成json字符串
import json
improt xmltodict
def parse(path):
with open(path, encoding="utf8) as f:
data = f.read()
xmlparse = xmltodict.parse(data)
res = json.dumps(xmlparse)
return res
解析、插入、多进程实例代码
# -*- coding: utf-8 -*-
import os
import json
import xmltodict
import pymysql
from concurrent.futures import ThreadPoolExecutor
def read_xml(path):
p_list = []
for roots, _, names in os.walk(path):
for name in names:
if name.endswith(".xml"):
p_list.append(os.path.join(roots, name))
return p_list
def parse(path):
"""
解析xml文件
:param path:xml文件地址
:return: json字符串列表
"""
with open(path, encoding="utf8") as f:
data = f.read()
xmlparse = xmltodict.parse(data)
jsonstr = json.dumps(xmlparse)
res = json.loads(jsonstr)
id = res["sapphire"]["dcarRecord"]["patientInfo"]["identifier"]["id"]["@V"]
name = res["sapphire"]["dcarRecord"]["patientInfo"]["name"]["@use"]
path_ = path.split("\\")[-1]
res_ = [path_, id, name]
insert(res_)
def insert(res):
"""
xml信息入库
:param res:
:return:
"""
host = "***"
user = "***"
password = "***"
db = "***"
conn = pymysql.connect(host=host, user=user, password=password, db=db)
# sql = """insert into ecg values('{}', '{}', '{}')""".format(res[0], res[1], res[2])
# sql = """select * from ecg"""
sql = """insert into ecg values ('3080637512_201806051059551.xml', '3080637512_201806051059515', '张新粉1')"""
print(sql)
with conn.cursor() as a:
a.execute(sql)
conn.commit()
line = a.fetchone()
print(res[0], "插入成功")
def main(path):
"""
多进程
:param path:
:return:
"""
p_list = read_xml(path)
with ThreadPoolExecutor(max_workers=1) as pool:
pool.map(parse, p_list)
if __name__ == '__main__':
path = r"G:\Desktop\text"
main(path)
标签:xml,name,res,json,import,path,解析,入库 来源: https://blog.csdn.net/qq_42908378/article/details/121914766