How to batch convert documents to other formats using a VB macro in Microsoft Office

The linked page contains a Visual Basic macro that you can use with Microsoft Office (Word) to convert all files in a directory to a selected format (TXT, RTF, HTML and PDF are supported by the script). I've modified the script to support conversion to DOC and DOCX as well, and support embedding images into the document if you are converting to something else than HTML (this comes handy eg. in HTML->DOC(X) conversions). For the modifications I took advice from Rick Strahl, Calle Arnesten and Jouni Heikniemi.

Here's the original script by "Faster":
Option Explicit

Sub ChangeDocsToTxtOrRTFOrHTML()
'with export to PDF in Word 2007
    Dim fs As Object
    Dim oFolder As Object
    Dim tFolder As Object
    Dim oFile As Object
    Dim strDocName As String
    Dim intPos As Integer
    Dim locFolder As String
    Dim fileType As String
    On Error Resume Next
    locFolder = InputBox("Enter the folder path to DOCs", "File Conversion", "C:\myDocs")
    Select Case Application.Version
        Case Is < 12
            Do
                fileType = UCase(InputBox("Change DOC to TXT, RTF, HTML", "File Conversion", "TXT"))
            Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML")
        Case Is >= 12
            Do
                fileType = UCase(InputBox("Change DOC to TXT, RTF, HTML or PDF(2007+ only)", "File Conversion", "TXT"))
            Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML" Or fileType = "PDF")
    End Select
    Application.ScreenUpdating = False
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set oFolder = fs.GetFolder(locFolder)
    Set tFolder = fs.CreateFolder(locFolder & "Converted")
    Set tFolder = fs.GetFolder(locFolder & "Converted")
    For Each oFile In oFolder.Files
        Dim d As Document
        Set d = Application.Documents.Open(oFile.Path)
        strDocName = ActiveDocument.Name
        intPos = InStrRev(strDocName, ".")
        strDocName = Left(strDocName, intPos - 1)
        ChangeFileOpenDirectory tFolder
        Select Case fileType
        Case Is = "TXT"
            strDocName = strDocName & ".txt"
            ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatText
        Case Is = "RTF"
            strDocName = strDocName & ".rtf"
            ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatRTF
        Case Is = "HTML"
            strDocName = strDocName & ".html"
            ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatFilteredHTML
        Case Is = "PDF"
            strDocName = strDocName & ".pdf"

            ' *** Word 2007 users - remove the apostrophe at the start of the next line ***
            'ActiveDocument.ExportAsFixedFormat OutputFileName:=strDocName, ExportFormat:=wdExportFormatPDF
           
        End Select
        d.Close
        ChangeFileOpenDirectory oFolder
    Next oFile
    Application.ScreenUpdating = True
End Sub

And here's the modded one:
Option Explicit

Private Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)

Sub ConvertDocs()
    Dim fs As Object
    Dim oFolder As Object
    Dim tFolder As Object
    Dim oFile As Object
    Dim strDocName As String
    Dim intPos As Integer
    Dim locFolder As String
    Dim fileType As String
    Dim office2007 As Boolean
    Dim lf As LinkFormat
    Dim oField As Field
    Dim oIShape As InlineShape
    Dim oShape As Shape
    On Error Resume Next
    locFolder = InputBox("Enter the path to the folder with the documents to be converted", "File Conversion", "C:\myDocs")
    If Application.Version >= 12 Then
        office2007 = True
        Do
            fileType = UCase(InputBox("Enter one of the following formats (to convert to): TXT, RTF, HTML, DOC, DOCX or PDF", "File Conversion", "TXT"))
        Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML" Or fileType = "PDF" Or fileType = "DOC" Or fileType = "DOCX")
    Else
        office2007 = False
        Do
            fileType = UCase(InputBox("Enter one of the following formats (to convert to): TXT, RTF, HTML or DOC", "File Conversion", "TXT"))
        Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML" Or fileType = "DOC")
    End Select
    Application.ScreenUpdating = False
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set oFolder = fs.GetFolder(locFolder)
    Set tFolder = fs.CreateFolder(locFolder & "Converted")
    Set tFolder = fs.GetFolder(locFolder & "Converted")
    For Each oFile In oFolder.Files
        Dim d As Document
        Set d = Application.Documents.Open(oFile.Path)
        ' put the document into print view
        If fileType = "RTF" Or fileType = "DOC" Or fileType = "DOCX" Then
            With ActiveWindow.View
                .ReadingLayout = False
                .Type = wdPrintView
            End With
        End If
        ' try to embed linked images from fields, shapes and inline shapes into the document
        ' (for some reason this does not work for all images in all HTML files I've tested)
        If Not fileType = "HTML" Then
            For Each oField In d.Fields
                Set lf = oField.LinkFormat
                If oField.Type = wdFieldIncludePicture And Not lf Is Nothing And Not lf.SavePictureWithDocument Then
                    lf.SavePictureWithDocument = True
                    Sleep (2000)
                    lf.BreakLink()
                    d.UndoClear()
                End If
            Next
            For Each oShape In d.Shapes
                Set lf = oShape.LinkFormat
                If Not lf Is Nothing And Not lf.SavePictureWithDocument Then
                    lf.SavePictureWithDocument = True
                    Sleep (2000)
                    lf.BreakLink()
                    d.UndoClear()
                End If
            Next
            For Each oIShape In d.InlineShapes
                Set lf = oIShape.LinkFormat
                If Not lf Is Nothing And Not lf.SavePictureWithDocument Then
                    lf.SavePictureWithDocument = True
                    Sleep (2000)
                    lf.BreakLink()
                    d.UndoClear()
                End If
            Next
        End If
        strDocName = d.Name
        intPos = InStrRev(strDocName, ".")
        strDocName = Left(strDocName, intPos - 1)
        ChangeFileOpenDirectory(tFolder)
        ' Check out these links for a comprehensive list of supported file formats and format constants:
        ' http://msdn.microsoft.com/en-us/library/microsoft.office.interop.word.wdsaveformat.aspx
        ' http://msdn.microsoft.com/en-us/library/office/bb238158.aspx
        ' (In the latter list you can see the values that the constants are associated with.
        '  Office 2003 only supported values up to wdFormatXML(=11). Values from wdFormatXMLDocument(=12)
        '  til wdFormatDocumentDefault(=16) were added in Office 2007, and wdFormatPDF(=17) and wdFormatXPS(=18)
        '  were added in Office 2007 SP2. Office 2010 added the various wdFormatFlatXML* formats and wdFormatOpenDocumentText.)
        If Not office2007 And fileType = "DOCX" Then
            fileType = "DOC"
        End If
        Select Case fileType
            Case Is = "TXT"
                strDocName = strDocName & ".txt"
                d.SaveAs(FileName := strDocName, FileFormat := wdFormatText)
            Case Is = "RTF"
                strDocName = strDocName & ".rtf"
                d.SaveAs(FileName := strDocName, FileFormat := wdFormatRTF)
            Case Is = "HTML"
                strDocName = strDocName & ".html"
                d.SaveAs(FileName := strDocName, FileFormat := wdFormatFilteredHTML)
            Case Is = "DOC"
                strDocName = strDocName & ".doc"
                d.SaveAs(FileName := strDocName, FileFormat := wdFormatDocument)
            Case Is = "DOCX"
                strDocName = strDocName & ".docx"
                ' *** Word 2007+ users - remove the apostrophe at the start of the next line ***
                'd.SaveAs(FileName := strDocName, FileFormat := wdFormatDocumentDefault)
            Case Is = "PDF"
                strDocName = strDocName & ".pdf"
                ' *** Word 2007 SP2+ users - remove the apostrophe at the start of the next line ***
                'd.ExportAsFixedFormat(OutputFileName := strDocName, ExportFormat := wdExportFormatPDF)
        End Select
        d.Close
        ChangeFileOpenDirectory(oFolder)
    Next oFile
    Application.ScreenUpdating = True
End Sub

The Sleep() calls are there to let Office finish downloading the linked images. It's a pretty lame solution, but as Jouni Heikniemi admitted not to be able to find a better one, I couldn't either (or at least I didn't bother that much to find one). Probably some property of the linked fields/shapes changes, when the embedding is finished. But it would take a lot of effort to find out which property to watch for and I don't need it that much. I'm just converting a few dozen documents (HTML files) to another format, the linked images are local and performance is not a priority ... so a Sleep(2000) is just OK for my purposes.

Comments

Comment viewing options

Select your preferred way to display the comments and click "Save settings" to activate your changes.

Helpful

This was very helpful and saved me a lot of time - thank you very much!

Modifications: Folder picker, Cancel button and Doc to Docx

Thanks for the code, i'm really glad to have found it.

Few changes that I find convenient for my needs:
instead of:
locFolder = InputBox("Enter the folder path to DOCs", "File Conversion", "C:\myDocs")

i prefer to use folder picker:
Set fd = Application.FileDialog(msoFileDialogFolderPicker)
With fd
.IntialFileName = "D:\Users\Documents\"
.Title = "Select folder and click OK"
.AllowMultiSelect = False
If .Show <> -1 Then
MsgBox "Cancelled by user", , "List Folder Contents"
Exit Sub
End If
If .SelectedItems.Count > 0 Then
locFolder = .SelectedItems.Item(1)
End If
End With

To activate Cancel button, after the line:
fileType = UCase(InputBox("Change DOC to TXT, RTF, HTML", "File Conversion", "TXT"))

insert the following:
If fileType = vbNullString Then
MsgBox "Conversion cancelled" 'whatever message you like
End
End If
If Len(fileType) = 0 Then
Exit Do
End If

To include also Doc to DOCX conversions: add DOCX to both InputBox lines:
fileType = UCase(InputBox("Change DOC to DOCX, TXT, RTF, HTML or PDF(2007+ only)", "File Conversion", "DOCX"))

and add Case:

Case Is = "DOCX"
strDocName = strDocName & ".docx"
ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatXMLDocument

Re: Modifications: Folder picker, Cancel button and Doc to Docx

Thanks for your feedback and contribution! Smile

I'm getting an error when I run this

"End Select without End Case."

I looked through and don't see any Select Case.

Am I missing something? Note: I'm a newb.

Re: I'm getting an error when I run this

I believe that Ainar's mod has a small bug:
insert the following:
If fileType = vbNullString Then
MsgBox "Conversion cancelled" 'whatever message you like
End
End If
If Len(fileType) = 0 Then
Exit Do
End If

This seems to be better:
If fileType = vbNullString Or Len(fileType) = 0 Then
  MsgBox "Conversion cancelled" 'whatever message you like
  Exit Sub
End If

End Select without select case

I am also unable to use the macro; and get the same error message.

Where in the code should I insert the suggested extra code snippet in order to get the macro to run.
Thanks

Re: End Select without select case

Please, try my version. It should work just fine.