Python读取Excel批量自动创建Hive数据表SQL
作者:互联网
需要使用请自行下载使用说明和Excel
百度网盘链接地址:https://pan.baidu.com/s/1RNqJwAF8T-KLGQUFvvyZwg/812l
import xlrd # 打开文件 data = xlrd.open_workbook("Create.xlsx") # 获取第一个sheet内容 table = data.sheet_by_index(0) tabNameList = [] # 表名称 tabColList = [] # 表列 tabTypeList = [] # 列类型 tabColCommList = [] # 列描述 tabCommList = [] # 表描述 tabParColList = [] # 分区字段 tabParTypeList = [] # 分区字段属性 tabParCommList = [] # 分区字段描述 index = [1]#下标值 result=[]#最终结果 def getIndex(): number = 0 name = table.col_values(0)[1:] # 表名称 for item in range(0, len(name)): if item + 1 == len(name): number = item + 2 index.append(number) else: if name[item] != name[item + 1]: number = item + 2 index.append(number) def Create_table(): creat = ("\nCREATE TABLE IF NOT EXISTS %s ( \n" % str(tabNameList[0])) # 添加表名称 col = [] # 存储列名和属性 for it in range(0, len(tabTypeList)): col.append(tabColList[it] + " " + tabTypeList[it] + " COMMENT \'" + tabColCommList[it] + "\'") for item in range(0, len(col)): creat += col[item] if item != len(tabTypeList) - 1: creat += ",\n" creat = (creat + "\n) COMMENT \'%s\'\n" % str(tabCommList[0])) par = [] # 存储分区列名和属性和备注 if len(tabParColList) > 0: for it in range(0, len(tabParColList)): par.append(tabParColList[it] + " " + tabParTypeList[it] + " COMMENT \'" + tabParCommList[it] + "\'") creat = creat + "PARTITIONED BY (\n" for item in range(0, len(par)): creat += par[item] if item != len(tabParColList) - 1: creat += ",\n" creat = creat + ");\n" #stored as parquet可以添加压缩属性 result.append(creat) print(result) def getInFo(): getIndex() for item in range(0, len(index)-1): tabNameList.clear() tabColList.clear() tabTypeList.clear() tabColCommList.clear() tabCommList.clear() tabParColList.clear() tabParTypeList.clear() tabParCommList.clear() if len(index)==2: left = index[0] right = index[1] else: left = index[item] right = index[item + 1] name = table.col_values(0)[left:right] # 表名称 for item in name: if (item not in tabNameList): tabNameList.append(item) col = table.col_values(1)[left:right] # 表列 for item in col: tabColList.append(item) type = table.col_values(2)[left:right] # 列类型 for item in type: if item != "": tabTypeList.append(item) else: tabTypeList.append("string") colcomm = table.col_values(3)[left:right] # 列描述 for item in colcomm: if item != "": tabColCommList.append(item) else: tabColCommList.append("") comm = table.col_values(4)[left:right] # 表描述 for item in comm: if item != "": tabCommList.append(item) break else: tabCommList.append("") break parcol = table.col_values(5)[left:right] # 分区字段 for item in parcol: if item != "": tabParColList.append(item) partype = table.col_values(6)[left:right] # 分区字段属性 for item in partype: if item != "": tabParTypeList.append(item) else: tabParTypeList.append("string") parcomm = table.col_values(7)[left:right] # 分区字段描述 for item in parcomm: if item != "": tabParCommList.append(item) else: tabParCommList.append("") Create_table() getInFo() # 输出文件设置 fname = "建表.sql" for item in result: print(item) outFile = open(fname, 'a+',encoding = 'utf-8') outFile.write(item) outFile.close()
标签:creat,item,Python,Excel,len,数据表,table,col,append 来源: https://www.cnblogs.com/tkzm/p/13880223.html