How to convert docx files into one or more pdf

Let’s see all the things that we can make with pdf files using python.

Convert all docx into pdf files

This code converts all the docx in a folder into corrispondent pdf files. I called this docpdf.

Converts all docx to pdf

The code

import os
import win32com.client
import re

path = os.getcwd()
# path = (r'G:\\programmi firmare studenti\\programma firma studenti 20 21\\')
word_file_names = []
word = win32com.client.Dispatch('Word.Application')
for dirpath, dirnames, filenames in os.walk(path):
    for f in filenames:  
        if f.lower().endswith(".docx") :
            new_name = f.replace(".docx", ".pdf")
            in_file =(dirpath + '/'+ f)
            new_file =(dirpath + '/' + new_name)
            doc = word.Documents.Open(f"{in_file}")
            doc.SaveAs(new_file, FileFormat = 17)
            doc.Close()
        if f.lower().endswith(".doc"):
            new_name = f.replace(".doc", ".pdf")
            in_file =(dirpath +'/' + f)
            new_file =(dirpath +'/' + new_name)
            doc = word.Documents.Open(in_file)
            doc.SaveAs(new_file, FileFormat = 17)
            doc.Close()
word.Quit()

A slightly different code to make the same thing

import os
import comtypes.client

wdFormatPDF = 17

x = 0
for f in os.listdir():
    if f.endswith(".docx"):
        in_file = os.path.abspath(f)
        out_file = os.path.abspath("demo" + str(x) + ".pdf")
        word = comtypes.client.CreateObject('Word.Application')
        doc = word.Documents.Open(in_file)
        doc.SaveAs(out_file, FileFormat=wdFormatPDF)
        doc.Close()
        x += 1

word.Quit()

Convert all docx in pdf and merge all of them in one file

Modifing the code a little bit, with the use of the PdfFIleMerger class from PyPDF2, we can also merge all the pdf generated from the word documents into one pdf containing all the other.

import os
import glob
import comtypes.client
from PyPDF2 import PdfMerger


def docx_to_pdf(word, file, x):
    input_file = os.path.abspath(file)
    output_file = os.path.abspath("demo" + str(x) + ".pdf")
    # loads each word document
    doc = word.Documents.Open(input_file)
    doc.SaveAs(output_file, FileFormat=16+1)
    doc.Close() # Closes the document, not the application
    return output_file


def docx1pdf():
    """docxs to pdfs"""
    word = comtypes.client.CreateObject('Word.Application')
    pdfslist = PdfMerger()
    # Remember to close the Word Application
    x = 0
    # saves as pdf all docx
    for f in glob.glob("*.docx"):
        output_file = docx_to_pdf(word, f, x)
        pdfslist.append(open(output_file, 'rb'))
        x += 1
    word.Quit()
    # unite all pdfs i n one pdf
    with open("result.pdf", "wb") as result_pdf:
        pdfslist.write(result_pdf)


docx1pdf()

Subscribe to the newsletter for updates
Tkinter templates
My youtube channel

Twitter: @pythonprogrammi - python_pygame

Videos

Speech recognition game

Pygame's Platform Game

Other Pygame's posts