Let’s see all the things that we can make with pdf files using python.
- convert all docx into pdf files
Convert all docx into pdf files
This code converts all the docx in a folder into corrispondent pdf files. I called this docpdf.
Converts all docx to pdf
The code
import os import win32com.client import re path = os.getcwd() # path = (r'G:\\programmi firmare studenti\\programma firma studenti 20 21\\') word_file_names = [] word = win32com.client.Dispatch('Word.Application') for dirpath, dirnames, filenames in os.walk(path): for f in filenames: if f.lower().endswith(".docx") : new_name = f.replace(".docx", ".pdf") in_file =(dirpath + '/'+ f) new_file =(dirpath + '/' + new_name) doc = word.Documents.Open(f"{in_file}") doc.SaveAs(new_file, FileFormat = 17) doc.Close() if f.lower().endswith(".doc"): new_name = f.replace(".doc", ".pdf") in_file =(dirpath +'/' + f) new_file =(dirpath +'/' + new_name) doc = word.Documents.Open(in_file) doc.SaveAs(new_file, FileFormat = 17) doc.Close() word.Quit()
A slightly different code to make the same thing
import os import comtypes.client wdFormatPDF = 17 x = 0 for f in os.listdir(): if f.endswith(".docx"): in_file = os.path.abspath(f) out_file = os.path.abspath("demo" + str(x) + ".pdf") word = comtypes.client.CreateObject('Word.Application') doc = word.Documents.Open(in_file) doc.SaveAs(out_file, FileFormat=wdFormatPDF) doc.Close() x += 1 word.Quit()
Convert all docx in pdf and merge all of them in one file
Modifing the code a little bit, with the use of the PdfFIleMerger class from PyPDF2, we can also merge all the pdf generated from the word documents into one pdf containing all the other.
import os import glob import comtypes.client from PyPDF2 import PdfMerger def docx_to_pdf(word, file, x): input_file = os.path.abspath(file) output_file = os.path.abspath("demo" + str(x) + ".pdf") # loads each word document doc = word.Documents.Open(input_file) doc.SaveAs(output_file, FileFormat=16+1) doc.Close() # Closes the document, not the application return output_file def docx1pdf(): """docxs to pdfs""" word = comtypes.client.CreateObject('Word.Application') pdfslist = PdfMerger() # Remember to close the Word Application x = 0 # saves as pdf all docx for f in glob.glob("*.docx"): output_file = docx_to_pdf(word, f, x) pdfslist.append(open(output_file, 'rb')) x += 1 word.Quit() # unite all pdfs i n one pdf with open("result.pdf", "wb") as result_pdf: pdfslist.write(result_pdf) docx1pdf()
Subscribe to the newsletter for updates
Tkinter templates
My youtube channel
Twitter: @pythonprogrammi - python_pygame