编程语言
首页 > 编程语言> > python 使用 python-docx 操作 word

python 使用 python-docx 操作 word

作者:互联网

1. python-docx 库安装与介绍

注:安装python-docx也可能出现以下问题:
在这里插入图片描述
这里附上查找到的一个比较好的解决方法.

2. Python 读取 Word 文档内容

2 .1 word 文档结构介绍

在这里插入图片描述

2 .2 python-docx 提取文字和文字块

① python-docx 提取文字

from docx import Document
doc = Document(r"D:\PythonCode\test.docx")
print(doc.paragraphs)
for paragraph in doc.paragraphs:
   print(paragraph.text)

② python-docx 提取文字块

from docx import Document
doc = Document(r"D:\PythonCode\test.docx")
print(doc.paragraphs)
paragraph = doc.paragraphs[0]
runs = paragraph.runs
print(runs)
for run in paragraph.runs:
   print(run.text)
paragraph = doc.paragraphs[1]
runs = paragraph.runs
print(runs)
for run in paragraph.runs:
   print(run.text)

2.3 利用 Python 向 Word 文档写入内容

① 添加段落

from docx import Document
doc = Document(r"D:\PythonCode\test.docx")
# print(doc.add_heading("一级标题", level=1)) 添加一级标题的时候出错,还没有解决!
paragraph1 = doc.add_paragraph("这是一个段落")
paragraph2 = doc.add_paragraph("这是第二个段落")
doc.save(r"D:\PythonCode\test1.docx")
""" 
添加段落的时候,赋值给一个变量,方便我们后面进行格式调整;
"""

② 添加文字块

from docx import Document
doc = Document(r"D:\PythonCode\test.docx")
# 这里相当于输入了一个空格,后面等待着文字输入
paragraph3 = doc.add_paragraph()
paragraph3.add_run("我被加粗了文字块儿").bold = True
paragraph3.add_run(",我是普通文字块儿,")
paragraph3.add_run("我是斜体文字块儿").italic = True
doc.save(r"D:\PythonCode\test.docx")

③ 添加一个分页

from docx import Document
doc = Document(r"D:\PythonCode\test.docx")
doc.add_page_break()
doc.save(r"D:\PythonCode\test.docx")

④ 添加图片

from docx import Document
from docx.shared import Cm
doc = Document(r"D:\PythonCode\test.docx")
doc.add_picture(r"D:\PythonCode\test.png",width=Cm(5),height=Cm(5))
doc.save(r"D:\PythonCode\test.docx")
""" 
Cm 模块,用于设定图片尺寸大小
"""

⑤ 添加表格

from docx import Document
doc = Document(r"D:\PythonCode\test.docx")
list1 = [
["姓名","性别","家庭地址"],
["唐僧","男","湖北省"],
["孙悟空","男","北京市"],
["猪八戒","男","广东省"],
["沙和尚","男","湖南省"]
]
list2 = [
["姓名","性别","家庭地址"],
["貂蝉","女","河北省"],
["杨贵妃","女","贵州省"],
["西施","女","山东省"]
]
table1 = doc.add_table(rows=5,cols=3)
for row in range(5):
    cells = table1.rows[row].cells
    for col in range(3):
        cells[col].text = str(list1[row][col])
doc.add_paragraph("-----------------------------------------------------------")
table2 = doc.add_table(rows=4,cols=3)
for row in range(4):
    cells = table2.rows[row].cells
    for col in range(3):
        cells[col].text = str(list2[row][col])
doc.save(r"D:\PythonCode\test.docx")

⑥ 提取 word 表格,并保存在 excel 中

from docx import Document
from openpyxl import Workbook
doc = Document(r"D:\PythonCode\test.docx")
t0 = doc.tables[0]
workbook = Workbook()
sheet = workbook.active
for i in range(len(t0.rows)):
    list1 = []
    for j in range(len(t0.columns)):
        list1.append(t0.cell(i,j).text)
        sheet.append(list1)
workbook.save

3. 利用 Python 调整 Word 文档样式

3 .1 修改文字字体样式

from docx import Document
from docx.shared import Pt,RGBColor
from docx.oxml.ns import qn
doc = Document(r"D:\PythonCode\test.docx")
for paragraph in doc.paragraphs:
    for run in paragraph.runs:
        run.font.bold = True
        run.font.italic = True
        run.font.underline = True
        run.font.strike = True
        run.font.shadow = True
        run.font.size = Pt(18)
        run.font.color.rgb = RGBColor(255,255,0)
        run.font.name = "宋体"
        # 设置像宋体这样的中文字体,必须添加下面 2 行代码
        r = run._element.rPr.rFonts
        r.set(qn("w:eastAsia"),"宋体")
doc.save(r"D:\PythonCode\test.docx")

3 .2 修改段落样式

① 对齐样式

from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
doc = Document(r"D:\PythonCode\test.docx")
print(doc.paragraphs[0].text)
doc.paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
# 这里设置的是居中对齐
doc.save(r"D:\PythonCode\test.docx")
""" 
居中对齐是其中一种样式,这里还有其他选择,自己百度了解:
LEFT,CENTER,RIGHT,JUSTIFY,DISTRIBUTE,JUSTIFY_MED,JUSTIFY_HI,JUSTIFY_L
OW,THAI_JUSTIFY
"""

② 行间距调整

from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
doc = Document(r"D:\PythonCode\test.docx")
for paragraph in doc.paragraphs:
    paragraph.paragraph_format.line_spacing = 5.0
doc.save(r"D:\PythonCode\test.docx")

③ 段前与段后间距

from docx import Document
from docx.shared import Pt
doc = Document(r"D:\PythonCode\test.docx")
for paragraph in doc.paragraphs:
    paragraph.paragraph_format.space_before = Pt(12) #段前间距
    paragraph.paragraph_format.space_after = Pt(12) #段后间距
    #Pt(12)表示12磅
doc.save(r"D:\PythonCode\test.docx")

标签:docx,word,PythonCode,python,doc,paragraph,test,Document
来源: https://blog.csdn.net/qq_44284204/article/details/119373747