编程语言
首页 > 编程语言> > Python excel转换为table 主要是合并单元格问题解决

Python excel转换为table 主要是合并单元格问题解决

作者:互联网

我们公司最近又开始搞这些神奇的需求了

把excel表格 转换为 html里面的table,数量估计有几百个,分了我几十个,尼玛,不想像他们那样一个一个手搓,伤不起。。。
决定用python解决,后面附上代码

代码,未完成版



import os
import openpyxl
from openpyxl import Workbook
from copy import deepcopy
from openpyxl.utils import get_column_letter


# 原文:https://www.cnblogs.com/liuda9495/p/9039732.html



workbook2 = Workbook()




def in_area(arr_area, row, col):# 判断是否在区域中
    for area in arr_area:
        if row >= area['r1'] and row <= area['r2'] and col >= area['c1'] and col <= area['c2']:
            #print('in_area')
            #print(area)            
            return area
    #print('not_in_area')
    return None


def is_first(area, row, col):# 判断是否是区域的第一个单元格
    min_row = area['r1']  if area['r1'] < area['r2'] else area['r1']
    min_col = area['c1']  if area['c1'] < area['c2'] else area['c2']    
    if row==min_row and col==min_col:
        #print('is_first')
        #print('min_row,min_col', min_row, min_col)    
        return True
    #print('not_first')
    return False


def create_worksheet(path):

    #path='test1.xlsx'
    workbook = openpyxl.load_workbook(path)# 加载excel
    name_list = workbook.sheetnames# 所有sheet的名字
    
    for index, value in enumerate(name_list):
        print(index, value)    
        
        worksheet = workbook[name_list[index]]# 读取第一个工作表

        # 获取所有 合并单元格的 位置信息
        # 是个可迭代对象,单个对象类型:openpyxl.worksheet.cell_range.CellRange
        # print后就是excel坐标信息
        m_list = worksheet.merged_cells

        l = deepcopy(m_list)# 深拷贝
        arr_area = []
        
        # 拆分合并的单元格 并填充内容
        for m_area in l:
            
            # 这里的行和列的起始值(索引),和Excel的一样,从1开始,并不是从0开始(注意)
            r1, r2, c1, c2 = m_area.min_row, m_area.max_row, m_area.min_col, m_area.max_col

            worksheet.unmerge_cells(start_row=r1, end_row=r2, start_column=c1, end_column=c2)
            print('区域:', m_area, '  坐标:', r1, r2, c1, c2)
            arr_area.append({'r1':r1,'r2':r2,'c1':c1,'c2':c2,'m_area':str(m_area)})
            
            # # 获取一个单元格的内容
            # first_value = worksheet.cell(r1, c1).value

            # # 区域 数据填充
            # for r in range(r1, r2+1):# 遍历行        
                # if c2 - c1 > 0:# 多个列,遍历列
                    # for c in range(c1, c2+1):
                        # worksheet.cell(r, c).value = first_value
                # else:# 一个列
                    # worksheet.cell(r, c1).value = first_value


        strHtml = ''
        
        # 遍历行何列 坐标从0开始
        for x in range(worksheet.max_row):
            strHtml += '<tr>'
            for y in range(worksheet.max_column):
                row = x + 1
                col = y + 1
                cellValue = worksheet.cell(row, col).value
                # print('单元格内容:', cellValue)
                
                area = in_area(arr_area, row, col)
                if not area is None:
                    if is_first(area, row, col):
                        rowspan = area['r2']-area['r1']+1
                        colspan = area['c2']-area['c1']+1
                        print('合并单元格:', area)
                        print('rowspan,colspan:', rowspan, colspan)
                        strHtml = '<td rowspan="{}" colspan="{}">{}</td>'.format(rowspan, colspan, cellValue)
                        print(strHtml)
                    else:
                        print('在合并单元格中,跳过')
                else:
                    print('单独的单元格')
                
                
                #if cellValue is not None:
                    #print('null')
                    
            strHtml += '</tr>'

        print('arr_area------------')
        print(len(arr_area))
        print(arr_area)
        
        area = in_area(arr_area, 5, 1)
        if not area is None:
            is_first(area, 5, 1)        











#def each_files():    
    #pathDir =  os.listdir('./files/')
    #for index, value in enumerate(pathDir):
        #filepath2 = './files/' + value
        #print(filepath2)
        #create_worksheet(filepath2)
        


#each_files()
#workbook2.save('test2.xlsx')



# 遍历行
# 遍历列
# 单元格 在不在区域中
# 是否是 区域的开头
# 如果是开头,则用区域。如果不是开头,则跳过



# 判断一个单元格(行和列) 是否在区域中
# 判断一个单元格 是否是区域的开头







create_worksheet('E://1.xlsx')



















标签:arr,area,Python,worksheet,单元格,excel,print,row
来源: https://www.cnblogs.com/guxingy/p/16259007.html