使用python删除word文档中的指定段落,顺便实现一下文档中的图片导出
作者:互联网
#! /etc/env/bin python3 #! *_* coding=utf8 *_* from pathlib import Path from docx import Document import os # 从word中导出图片 def extract_img_word(filename='',doc_path=''): ''' docx文档其实也是一个zip压缩包,所以我们可以通过zip包解压它 也可以直接改文件后缀 ''' from zipfile import ZipFile with ZipFile(filename) as zip_file: for names in zip_file.namelist(): if names.startswith("word/media/image"): zip_file.extract(names, doc_path) ''' pip install python-docx https://python-docx.readthedocs.io/en/latest/ ''' #创建文档 def createWord(): document = Document() document.add_heading('Document Title', 0) document.add_paragraph('A plain paragraph having some') document.add_heading('Heading, level 1', level=1) document.add_heading('Heading, level 1', level=2) document.add_paragraph('以下段落需要删除') document.add_paragraph('A plain paragraph') document.add_paragraph('A plain paragraph 新段落') document.add_heading('Heading, level 2', level=2) document.save('H:/temp/test.docx') createWord() #删除指定段落 def delete_paragraph(paragraph): p = paragraph._element p.getparent().remove(p) # p._p = p._element = None paragraph._p = paragraph._element = None def delWordContent(docx_file='',dest_file=''): #读取文本 doc = Document(docx_file) paragraphs = doc.paragraphs i = 0 flag = False for p in paragraphs: i+=1 #print(str(i)) #print(p.text) if p.text.find('需要删除') > -1: #print('找到了') flag = True if flag is True: #print('deleting') delete_paragraph(p) if flag is True: #保存为新文件 doc.save(dest_file) delWordContent(docx_file='H:/temp/test.docx',dest_file='H:/temp/test-new.docx') def testDel(): dest_dir = 'words' for filename in Path('H:/').glob('*.docx'): print(str(filename)) dest_file = str(filename.parent / f'{dest_dir}'/filename.name) delWordContent(docx_file = str(filename), dest_file = dest_file) os.remove(str(filename))
标签:docx,word,python,dest,add,paragraph,文档,file,document 来源: https://www.cnblogs.com/liangblog/p/16203382.html