VBA批量提取PDF内容的程序
VBA批量提取PDF内容的程序
Sub ExtractPDFText()
Dim pdfApp As Acrobat.AcroApp
Dim pdfDoc As Acrobat.CAcroPDDoc
Dim pdfPage As Acrobat.AcroPDPage
Dim txtData As String
Dim i As Integer
Dim filePath As String
Dim outputFolder As String
Dim outputFileName As String
' 初始化Acrobat对象
Set pdfApp = CreateObject("AcroExch.App")
pdfApp.Show
' 设置输入和输出文件夹
filePath = "C:\path\to\your\pdf\files\" ' 修改为PDF文件所在文件夹路径
outputFolder = "C:\path\to\output\folder\" ' 修改为输出文本文件的文件夹路径
' 创建文件夹如果它不存在
If Not Dir(outputFolder, vbDirectory) <> vbNullString Then
MkDir outputFolder
End If
' 获取PDF文件列表
filePath = filePath & Dir("*.pdf")
While filePath <> ""
' 打开PDF文档
Set pdfDoc = CreateObject("AcroExch.PDDoc")
pdfDoc.Open(filePath)
' 遍历文档中的每一页
For i = 0 To pdfDoc.GetNumPages() - 1
Set pdfPage = pdfDoc.AcquirePage(i)
txtData = pdfPage.GetText()
' 这里可以添加代码处理txtData,例如保存到文件
' 提取文本并保存到文件
outputFileName = outputFolder & "\" & GetFilenameFromPath(filePath) & "-" & i & ".txt"
SaveTextToFile txtData, outputFileName
' 释放页对象
pdfDoc.ReleasePage(pdfPage)
Next i
' 关闭文档并释放对象
pdfDoc.Close()
Set pdfDoc = Nothing
' 获取下一个PDF文件
filePath = Dir()
Wend
' 关闭Acrobat对象
pdfApp.Exit
Set pdfApp = Nothing
End Sub
' 获取文件名称不包含路径
Function GetFilenameFromPath(filePath As String) As String
GetFilenameFromPath = Right(filePath, Len(filePath) - InStrRev(filePath, "\"))
End Function
' 将文本保存到文件
Sub SaveTextToFile(textData As String, filePath As String)
Dim fileNum As Integer
fileNum = FreeFile()
Open filePath For Output As #fileNum
Print #fileNum, textData
Close #fileNum
End Sub
原文地址:https://blog.csdn.net/weixin_43050480/article/details/144318729
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!