-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDocx_to_pdf.py
163 lines (141 loc) · 6.52 KB
/
Docx_to_pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# coding: utf-8
# # Konvertiert Word-Dokumente in PDF um
# - Benötigt wird eine Word installation -> <b>'Word.Application'</b>
import win32com
import win32com.client
import os
import logging
class Docx_to_PDF():
def __init__(self):
self.wdFormatPDF = 17
self.logger = logging.getLogger("SR_E2W2P.Docx_to_PDF")
self.logger.info("init Docx_to_PDF")
pass
def set_wdFormatPDF(self, wdFormatPDF=17):
self.wdFormatPDF = wdFormatPDF
def read_docx_files_in_folder(self, PDFFolder, Extention=".docx"):
'''
input: FolderName with pdf files
return PyPDF2.pdf.PdfFileReader
'''
PDFFileNameArray = []
for filename in os.listdir(PDFFolder):
if filename.lower().endswith(Extention):
PDFFileNameArray.append(filename)
# print("filename: ", filename)
# print(PDFFileNameArray)
AllPDFNames = []
for ActualPDFFileName in PDFFileNameArray:
AllPDFNames.append([PDFFolder, ActualPDFFileName])
# print(PDFFolder + os.path.sep + ActualPDFFileName)
return AllPDFNames
def check_file_exist(self, filepath):
return os.path.exists(filepath)
def docx_to_pdf(self, infile, outfile) -> bool:
"""Convert a Word .docx to PDF"""
# Todo: build in retry
# converting multiple documents failed on slow computers
# retry = False # retry if write document failed
# retry_count = 2
# retry_counter = 0
success = False
if self.check_file_exist(outfile):
os.remove(outfile)
self.logger.info("Delete File first: {}".format(outfile))
try:
self.logger.debug("Word is running before starting: {}".format(self.check_word_running()))
word = win32com.client.Dispatch("Word.Application")
self.logger.debug("Started wort application: {}".format(word))
doc = word.Documents.Open(infile)
self.logger.debug("Open Document: {}".format(doc))
doc.SaveAs(outfile, FileFormat=self.wdFormatPDF)
self.logger.debug("Save Document as PDF: {}".format(doc))
if self.check_file_exist(outfile):
self.logger.info("File written: {}".format(outfile))
success = True
else:
self.logger.info("Failed to write file: {}".format(outfile))
except Exception as e:
self.logger.error("Error in Docx_to_PDF.docx_to_pdf. Input: {}\noutput: {}\nerror: {}".format(infile, outfile, e))
success = False
finally:
try:
doc.Close()
except Exception as e:
self.logger.error("Error in Docx_to_PDF.docx_to_pdf. doc.Close() not possible")
success = False
try:
word.Quit()
except Exception as e:
self.logger.error("Error in Docx_to_PDF.docx_to_pdf. word.Quit() not possible")
success = False
self.logger.error("Check Word is running: {}".format(self.check_word_running()))
if self.check_file_exist(outfile):
# self.logger.info("".format())
pass
else:
self.logger.info("Failed to write file: {}".format(outfile))
success = False
self.logger.debug("Word is running at end: {}".format(self.check_word_running()))
return success
def FileExtention(self, FullString) -> str:
'''
Return the File Extention of a sting given FileName
e.g. Check for File Extention
# # if FileExtention(String_to_check) == FileExtention(FileExtention_to_check_for):
'''
FileParts = FullString.split('.')
return FileParts[len(FileParts) - 1]
def Compare_FileExtention(self, FullString, Extention):
return self.FileExtention(FullString) == self.FileExtention(Extention)
def convert_all_docx_of_inputFolder_to_PDF_in_outputFolder(self, infolder, outputFolder):
'''
Wandel alle Docx Dokumente aus dem "infolder" in PDF um und speichere unter "outputFolder" ab.
'''
success = None
All_docx_files_in_folder = self.read_docx_files_in_folder(infolder)
for each_file in All_docx_files_in_folder:
actual_source_file = each_file[0] + os.path.sep + each_file[1]
actual_target_file = outputFolder + os.path.sep + each_file[1].replace(self.FileExtention(each_file[1]),
"pdf")
self.logger.info("Converting: {} ".format(actual_source_file))
success = self.docx_to_pdf(actual_source_file, actual_target_file)
self.logger.info("Converted: {}".format(actual_target_file))
if success:
self.logger.info("convert all docx to pdf done")
else:
self.logger.warning("convert all docx to pdf failed")
def convert_selected_docx_file_to_PDF_in_outputFolder(self, wordfile, outputFolder):
'''
Wandel alle Docx Dokumente aus dem "infolder" in PDF um und speichere unter "outputFolder" ab.
'''
success = None
actual_source_file = wordfile
filename = wordfile.split(os.path.sep)[-1]
actual_target_file = outputFolder + os.path.sep + filename.replace(self.FileExtention(filename),
"pdf")
self.logger.info("Converting: {} ".format(actual_source_file))
success = self.docx_to_pdf(actual_source_file, actual_target_file)
self.logger.info("Converted: {}".format(actual_target_file))
if success:
self.logger.info("convert selected docx to pdf done")
else:
self.logger.warning("convert selected docx to pdf failed")
def check_word_running(self):
from subprocess import Popen, PIPE
command = "tasklist"
pipe = Popen(command, shell=True, stdout=PIPE, stderr=PIPE)
pipe.communicate()
reply = str(pipe.communicate())
process_to_find = "winword.exe"
if process_to_find in reply.lower():
self.logger.info("{} is running".format(process_to_find))
return True
else:
self.logger.info("{} is not running".format(process_to_find))
return False
if __name__ == "__main__":
infolder = r""
outputFolder = r""
converter = Docx_to_PDF()
converter.convert_all_docx_of_inputFolder_to_PDF_in_outputFolder(infolder, outputFolder)