编程语言
首页 > 编程语言> > PyPDF2,一个用Python拆分合并PDF的库

PyPDF2,一个用Python拆分合并PDF的库

作者:互联网

发现一个小众的库:PyPDF2。

可以用它非常方便地拆分、合并、调整PDF文件页面。

比如:

from PyPDF2 import PdfFileReader, PdfFileWriter 
import os
import re

def split_pdf(infn, outfn): 
    pdf_input = PdfFileReader(open(infn, 'rb')) 
    # 获取 pdf 共用多少页 
    page_count = pdf_input.getNumPages() 
    print(page_count) 
    #将每一页单独输出一个页面
    for i in range(page_count): 
        pdf_output = PdfFileWriter() 
        pdf_output.addPage(pdf_input.getPage(i)) 
        pdf_output.write(open(outfn + str(i)+".pdf", 'wb')) 

def merge_pdf(infnList, outfn): 
    pdf_output = PdfFileWriter() 
    total_page = 0 
    #sort infnList
    fns = lambda s: sum(((s,int(n))for s,n in re.findall('(\D+)(\d+)','a%s0'%s)),())
    infnList = sorted(infnList, key=fns)
    for infn in infnList: 
        try:
            pdf_input = PdfFileReader(open(infn, 'rb')) 
        except:
            print("Error at ", infn)
        # 获取 pdf 共用多少页 
        page_count = pdf_input.getNumPages() 
        print(infn, " pages: ", page_count) 
        for i in range(page_count): 
            pdf_output.addPage(pdf_input.getPage(i)) 
        pdf_output.addBookmark(infn,total_page)
        total_page += page_count
    print("Total pages: ", pdf_output.getNumPages())
    pdf_output.write(open(outfn, 'wb')) 

def walk_files(path,endpoint=None):
    '''
    遍历所有文件夹下的文件
    '''
    file_list = []
    for root,dirs,files in os.walk(path):
        for file in files:
            file_path = os.path.join(root,file)
            if file_path.endswith(endpoint):
                file_list.append(file_path)

    return file_list

if __name__ == '__main__': 
    infn = 'infn.pdf'
    outfn = 'outfn.pdf'
    #split_pdf(infn, outfn)
    merge_pdf(walk_files("[FOLDER_NAME]",endpoint=".pdf"), outfn)

这个库还在缓慢开发中,文档也不是很全。基本功能还是有的。更多情况去PyPDF2官网了解吧:Home page for the PyPDF2 projecticon-default.png?t=L892https://mstamy2.github.io/PyPDF2/#documentation

标签:infn,output,Python,PyPDF2,outfn,file,pdf,PDF,page
来源: https://blog.csdn.net/cg1985cg/article/details/120388908