PyPDF2,一个用Python拆分合并PDF的库
作者:互联网
发现一个小众的库:PyPDF2。
可以用它非常方便地拆分、合并、调整PDF文件页面。
比如:
from PyPDF2 import PdfFileReader, PdfFileWriter
import os
import re
def split_pdf(infn, outfn):
pdf_input = PdfFileReader(open(infn, 'rb'))
# 获取 pdf 共用多少页
page_count = pdf_input.getNumPages()
print(page_count)
#将每一页单独输出一个页面
for i in range(page_count):
pdf_output = PdfFileWriter()
pdf_output.addPage(pdf_input.getPage(i))
pdf_output.write(open(outfn + str(i)+".pdf", 'wb'))
def merge_pdf(infnList, outfn):
pdf_output = PdfFileWriter()
total_page = 0
#sort infnList
fns = lambda s: sum(((s,int(n))for s,n in re.findall('(\D+)(\d+)','a%s0'%s)),())
infnList = sorted(infnList, key=fns)
for infn in infnList:
try:
pdf_input = PdfFileReader(open(infn, 'rb'))
except:
print("Error at ", infn)
# 获取 pdf 共用多少页
page_count = pdf_input.getNumPages()
print(infn, " pages: ", page_count)
for i in range(page_count):
pdf_output.addPage(pdf_input.getPage(i))
pdf_output.addBookmark(infn,total_page)
total_page += page_count
print("Total pages: ", pdf_output.getNumPages())
pdf_output.write(open(outfn, 'wb'))
def walk_files(path,endpoint=None):
'''
遍历所有文件夹下的文件
'''
file_list = []
for root,dirs,files in os.walk(path):
for file in files:
file_path = os.path.join(root,file)
if file_path.endswith(endpoint):
file_list.append(file_path)
return file_list
if __name__ == '__main__':
infn = 'infn.pdf'
outfn = 'outfn.pdf'
#split_pdf(infn, outfn)
merge_pdf(walk_files("[FOLDER_NAME]",endpoint=".pdf"), outfn)
这个库还在缓慢开发中,文档也不是很全。基本功能还是有的。更多情况去PyPDF2官网了解吧:Home page for the PyPDF2 projecthttps://mstamy2.github.io/PyPDF2/#documentation
标签:infn,output,Python,PyPDF2,outfn,file,pdf,PDF,page 来源: https://blog.csdn.net/cg1985cg/article/details/120388908