-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPDF_Combiner.py
84 lines (68 loc) · 2.88 KB
/
PDF_Combiner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# coding: utf-8
import PyPDF2
import os
import logging
class Combine_PDF():
def __init__(self):
self.logger = logging.getLogger("SR_E2W2P.Combine_PDF")
self.logger.info("init Combine_PDF")
pass
def append_PDFReaderObj_to_PDFWriterObj(self, pdfFileObj, pdfWriter):
'''
füge ein PDF Object zu einem PDF object hinzu
input:
pdfFileObj as PyPDF2.PdfFileReader(pdfFile_1)
pdfWriter as PyPDF2.PdfFileWriter()
return:
PdfFileWriter as PyPDF2.PdfFileWriter()
'''
for currentPageNumber in range(pdfFileObj.numPages):
pageObject = pdfFileObj.getPage(currentPageNumber)
pdfWriter.addPage(pageObject)
return pdfWriter
def read_pdf_files_in_folder(self, PDFFolder, Extention = ".pdf"):
'''
Funktion finded alle PDFs im Ordner "PDFFolder"
input: FolderName with pdf files
return PyPDF2.pdf.PdfFileReader
'''
PDFFileNameArray=[]
for filename in os.listdir(PDFFolder):
if filename.lower().endswith(Extention):
PDFFileNameArray.append(filename)
# print("filename: ", filename)
# print(PDFFileNameArray)
AllPDFNames=[]
for ActualPDFFileName in PDFFileNameArray:
AllPDFNames.append(PDFFolder + os.path.sep + ActualPDFFileName)
# print(PDFFolder + os.path.sep + ActualPDFFileName)
return AllPDFNames
def PDF_Combiner(self, PDFFolder, PDFOutputName):
'''
Funktion um alle PDFs zusammenzufügen
'''
# Suche alle PDFs im Ordner "PDFFolder"
AllPDFNames = self.read_pdf_files_in_folder(PDFFolder)
# erstelle ein PDFObject um alle seiten zu sammeln
pdfWriter = PyPDF2.PdfFileWriter()
pdfFile = []
# Füge alles zusammen
self.logger.info("Füge alle PDFs aus dem Ordner '{}' zusammen in '{}'".format(PDFFolder, PDFOutputName))
for number, eachPDFName in enumerate(AllPDFNames):
self.logger.info("Datei '{}' wird hinzugefügt".format(eachPDFName))
pdfFile.append(open(eachPDFName, 'rb'))
eachPDFObject = PyPDF2.PdfFileReader(pdfFile[number])
pdfWriter = self.append_PDFReaderObj_to_PDFWriterObj(eachPDFObject, pdfWriter)
# write Output and close
pdfOutputFile = open(PDFOutputName, 'wb')
self.logger.info("Schreibe: {}".format(PDFOutputName))
pdfWriter.write(pdfOutputFile)
pdfOutputFile.close()
# schließe alle gelesenen dokumente
for number, eachpdfFile in enumerate(pdfFile):
pdfFile[number].close()
if __name__ == "__main__":
PDFFolder = r"C:\Temp\PDF"
PDFOutputName = r"C:\Temp\PDF\Output.pdf"
PDF_combiner = Combine_PDF()
PDF_combiner.PDF_Combiner(PDFFolder, PDFOutputName)