Sub ExtractPDFText()
Dim pdfApp As Acrobat.AcroApp
Dim pdfDoc As Acrobat.CAcroPDDoc
Dim pdfPage As Acrobat.AcroPDPage
Dim txtData As String
Dim i As Integer
Dim filePath As String
Dim outputFolder As String
Dim outputFileName As String
' 初始化Acrobat对象
Set pdfApp = CreateObject("AcroExch.App")
pdfApp.Show
' 设置输入和输出文件夹
filePath = "C:\path\to\your\pdf\files\" ' 修改为PDF文件所在文件夹路径
outputFolder = "C:\path\to\output\folder\" ' 修改为输出文本文件的文件夹路径
' 创建文件夹如果它不存在
If Not Dir(outputFolder, vbDirectory)<> vbNullString Then
MkDir outputFolder
End If
' 获取PDF文件列表
filePath = filePath & Dir("*.pdf")
While filePath <>""
' 打开PDF文档
Set pdfDoc = CreateObject("AcroExch.PDDoc")
pdfDoc.Open(filePath)
' 遍历文档中的每一页
For i =0 To pdfDoc.GetNumPages()-1
Set pdfPage = pdfDoc.AcquirePage(i)
txtData = pdfPage.GetText()
' 这里可以添加代码处理txtData,例如保存到文件
' 提取文本并保存到文件
outputFileName = outputFolder &"\" & GetFilenameFromPath(filePath) & "-" & i & ".txt"
SaveTextToFile txtData, outputFileName
' 释放页对象
pdfDoc.ReleasePage(pdfPage)
Next i
' 关闭文档并释放对象
pdfDoc.Close()
Set pdfDoc = Nothing
' 获取下一个PDF文件
filePath = Dir()
Wend
' 关闭Acrobat对象
pdfApp.Exit
Set pdfApp = Nothing
End Sub
' 获取文件名称不包含路径
Function GetFilenameFromPath(filePath As String) As String
GetFilenameFromPath = Right(filePath, Len(filePath)- InStrRev(filePath, "\"))
End Function
' 将文本保存到文件
Sub SaveTextToFile(textData As String, filePath As String)
Dim fileNum As Integer
fileNum = FreeFile()
Open filePath For Output As #fileNum
Print #fileNum, textData
Close #fileNum
End Sub