The linked page contains a Visual Basic macro that you can use with Microsoft Office (Word) to convert all files in a directory to a selected format (TXT, RTF, HTML and PDF are supported by the script). I've modified the script to support conversion to DOC and DOCX as well, and support embedding images into the document if you are converting to something else than HTML (this comes handy eg. in HTML->DOC(X) conversions). For the modifications I took advice from
Rick Strahl,
Calle Arnesten and
Jouni Heikniemi.
Here's the original script by "Faster":
Option Explicit
Sub ChangeDocsToTxtOrRTFOrHTML()
'with export to PDF in Word 2007
Dim fs As Object
Dim oFolder As Object
Dim tFolder As Object
Dim oFile As Object
Dim strDocName As String
Dim intPos As Integer
Dim locFolder As String
Dim fileType As String
On Error Resume Next
locFolder = InputBox("Enter the folder path to DOCs", "File Conversion", "C:\myDocs")
Select Case Application.Version
Case Is < 12
Do
fileType = UCase(InputBox("Change DOC to TXT, RTF, HTML", "File Conversion", "TXT"))
Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML")
Case Is >= 12
Do
fileType = UCase(InputBox("Change DOC to TXT, RTF, HTML or PDF(2007+ only)", "File Conversion", "TXT"))
Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML" Or fileType = "PDF")
End Select
Application.ScreenUpdating = False
Set fs = CreateObject("Scripting.FileSystemObject")
Set oFolder = fs.GetFolder(locFolder)
Set tFolder = fs.CreateFolder(locFolder & "Converted")
Set tFolder = fs.GetFolder(locFolder & "Converted")
For Each oFile In oFolder.Files
Dim d As Document
Set d = Application.Documents.Open(oFile.Path)
strDocName = ActiveDocument.Name
intPos = InStrRev(strDocName, ".")
strDocName = Left(strDocName, intPos - 1)
ChangeFileOpenDirectory tFolder
Select Case fileType
Case Is = "TXT"
strDocName = strDocName & ".txt"
ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatText
Case Is = "RTF"
strDocName = strDocName & ".rtf"
ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatRTF
Case Is = "HTML"
strDocName = strDocName & ".html"
ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatFilteredHTML
Case Is = "PDF"
strDocName = strDocName & ".pdf"
' *** Word 2007 users - remove the apostrophe at the start of the next line ***
'ActiveDocument.ExportAsFixedFormat OutputFileName:=strDocName, ExportFormat:=wdExportFormatPDF
End Select
d.Close
ChangeFileOpenDirectory oFolder
Next oFile
Application.ScreenUpdating = True
End Sub
And here's the modded one:
Option Explicit
Private Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
Sub ConvertDocs()
Dim fs As Object
Dim oFolder As Object
Dim tFolder As Object
Dim oFile As Object
Dim strDocName As String
Dim intPos As Integer
Dim locFolder As String
Dim fileType As String
Dim office2007 As Boolean
Dim lf As LinkFormat
Dim oField As Field
Dim oIShape As InlineShape
Dim oShape As Shape
On Error Resume Next
locFolder = InputBox("Enter the path to the folder with the documents to be converted", "File Conversion", "C:\myDocs")
If Application.Version >= 12 Then
office2007 = True
Do
fileType = UCase(InputBox("Enter one of the following formats (to convert to): TXT, RTF, HTML, DOC, DOCX or PDF", "File Conversion", "TXT"))
Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML" Or fileType = "PDF" Or fileType = "DOC" Or fileType = "DOCX")
Else
office2007 = False
Do
fileType = UCase(InputBox("Enter one of the following formats (to convert to): TXT, RTF, HTML or DOC", "File Conversion", "TXT"))
Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML" Or fileType = "DOC")
End Select
Application.ScreenUpdating = False
Set fs = CreateObject("Scripting.FileSystemObject")
Set oFolder = fs.GetFolder(locFolder)
Set tFolder = fs.CreateFolder(locFolder & "Converted")
Set tFolder = fs.GetFolder(locFolder & "Converted")
For Each oFile In oFolder.Files
Dim d As Document
Set d = Application.Documents.Open(oFile.Path)
' put the document into print view
If fileType = "RTF" Or fileType = "DOC" Or fileType = "DOCX" Then
With ActiveWindow.View
.ReadingLayout = False
.Type = wdPrintView
End With
End If
' try to embed linked images from fields, shapes and inline shapes into the document
' (for some reason this does not work for all images in all HTML files I've tested)
If Not fileType = "HTML" Then
For Each oField In d.Fields
Set lf = oField.LinkFormat
If oField.Type = wdFieldIncludePicture And Not lf Is Nothing And Not lf.SavePictureWithDocument Then
lf.SavePictureWithDocument = True
Sleep (2000)
lf.BreakLink()
d.UndoClear()
End If
Next
For Each oShape In d.Shapes
Set lf = oShape.LinkFormat
If Not lf Is Nothing And Not lf.SavePictureWithDocument Then
lf.SavePictureWithDocument = True
Sleep (2000)
lf.BreakLink()
d.UndoClear()
End If
Next
For Each oIShape In d.InlineShapes
Set lf = oIShape.LinkFormat
If Not lf Is Nothing And Not lf.SavePictureWithDocument Then
lf.SavePictureWithDocument = True
Sleep (2000)
lf.BreakLink()
d.UndoClear()
End If
Next
End If
strDocName = d.Name
intPos = InStrRev(strDocName, ".")
strDocName = Left(strDocName, intPos - 1)
ChangeFileOpenDirectory(tFolder)
' Check out these links for a comprehensive list of supported file formats and format constants:
' http://msdn.microsoft.com/en-us/library/microsoft.office.interop.word.wdsaveformat.aspx
' http://msdn.microsoft.com/en-us/library/office/bb238158.aspx
' (In the latter list you can see the values that the constants are associated with.
' Office 2003 only supported values up to wdFormatXML(=11). Values from wdFormatXMLDocument(=12)
' til wdFormatDocumentDefault(=16) were added in Office 2007, and wdFormatPDF(=17) and wdFormatXPS(=18)
' were added in Office 2007 SP2. Office 2010 added the various wdFormatFlatXML* formats and wdFormatOpenDocumentText.)
If Not office2007 And fileType = "DOCX" Then
fileType = "DOC"
End If
Select Case fileType
Case Is = "TXT"
strDocName = strDocName & ".txt"
d.SaveAs(FileName := strDocName, FileFormat := wdFormatText)
Case Is = "RTF"
strDocName = strDocName & ".rtf"
d.SaveAs(FileName := strDocName, FileFormat := wdFormatRTF)
Case Is = "HTML"
strDocName = strDocName & ".html"
d.SaveAs(FileName := strDocName, FileFormat := wdFormatFilteredHTML)
Case Is = "DOC"
strDocName = strDocName & ".doc"
d.SaveAs(FileName := strDocName, FileFormat := wdFormatDocument)
Case Is = "DOCX"
strDocName = strDocName & ".docx"
' *** Word 2007+ users - remove the apostrophe at the start of the next line ***
'd.SaveAs(FileName := strDocName, FileFormat := wdFormatDocumentDefault)
Case Is = "PDF"
strDocName = strDocName & ".pdf"
' *** Word 2007 SP2+ users - remove the apostrophe at the start of the next line ***
'd.ExportAsFixedFormat(OutputFileName := strDocName, ExportFormat := wdExportFormatPDF)
End Select
d.Close
ChangeFileOpenDirectory(oFolder)
Next oFile
Application.ScreenUpdating = True
End Sub
The Sleep() calls are there to let Office finish downloading the linked images. It's a pretty lame solution, but as Jouni Heikniemi admitted not to be able to find a better one, I couldn't either (or at least I didn't bother that much to find one). Probably some property of the linked fields/shapes changes, when the embedding is finished. But it would take a lot of effort to find out which property to watch for and I don't need it that much. I'm just converting a few dozen documents (HTML files) to another format, the linked images are local and performance is not a priority ... so a Sleep(2000) is just OK for my purposes.
Comments
Helpful
Modifications: Folder picker, Cancel button and Doc to Docx
Few changes that I find convenient for my needs:
instead of:
locFolder = InputBox("Enter the folder path to DOCs", "File Conversion", "C:\myDocs")
i prefer to use folder picker:
Set fd = Application.FileDialog(msoFileDialogFolderPicker)
With fd
.IntialFileName = "D:\Users\Documents\"
.Title = "Select folder and click OK"
.AllowMultiSelect = False
If .Show <> -1 Then
MsgBox "Cancelled by user", , "List Folder Contents"
Exit Sub
End If
If .SelectedItems.Count > 0 Then
locFolder = .SelectedItems.Item(1)
End If
End With
To activate Cancel button, after the line:
fileType = UCase(InputBox("Change DOC to TXT, RTF, HTML", "File Conversion", "TXT"))
insert the following:
If fileType = vbNullString Then
MsgBox "Conversion cancelled" 'whatever message you like
End
End If
If Len(fileType) = 0 Then
Exit Do
End If
To include also Doc to DOCX conversions: add DOCX to both InputBox lines:
fileType = UCase(InputBox("Change DOC to DOCX, TXT, RTF, HTML or PDF(2007+ only)", "File Conversion", "DOCX"))
and add Case:
Case Is = "DOCX"
strDocName = strDocName & ".docx"
ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatXMLDocument
Re: Modifications: Folder picker, Cancel button and Doc to Docx
I'm getting an error when I run this
I looked through and don't see any Select Case.
Am I missing something? Note: I'm a newb.
Re: I'm getting an error when I run this
This seems to be better:
If fileType = vbNullString Or Len(fileType) = 0 Then
MsgBox "Conversion cancelled" 'whatever message you like
Exit Sub
End If
End Select without select case
Where in the code should I insert the suggested extra code snippet in order to get the macro to run.
Thanks
Re: End Select without select case