自学内容网 自学内容网

VBA批量提取PDF内容的程序

VBA批量提取PDF内容的程序

Sub ExtractPDFText()
    Dim pdfApp As Acrobat.AcroApp
    Dim pdfDoc As Acrobat.CAcroPDDoc
    Dim pdfPage As Acrobat.AcroPDPage
    Dim txtData As String
    Dim i As Integer
    Dim filePath As String
    Dim outputFolder As String
    Dim outputFileName As String
    
    ' 初始化Acrobat对象
    Set pdfApp = CreateObject("AcroExch.App")
    pdfApp.Show
    
    ' 设置输入和输出文件夹
    filePath = "C:\path\to\your\pdf\files\" ' 修改为PDF文件所在文件夹路径
    outputFolder = "C:\path\to\output\folder\" ' 修改为输出文本文件的文件夹路径
    
    ' 创建文件夹如果它不存在
    If Not Dir(outputFolder, vbDirectory) <> vbNullString Then
        MkDir outputFolder
    End If
    
    ' 获取PDF文件列表
    filePath = filePath & Dir("*.pdf")
    While filePath <> ""
        ' 打开PDF文档
        Set pdfDoc = CreateObject("AcroExch.PDDoc")
        pdfDoc.Open(filePath)
        
        ' 遍历文档中的每一页
        For i = 0 To pdfDoc.GetNumPages() - 1
            Set pdfPage = pdfDoc.AcquirePage(i)
            txtData = pdfPage.GetText()
            
            ' 这里可以添加代码处理txtData,例如保存到文件
            
            ' 提取文本并保存到文件
            outputFileName = outputFolder & "\" & GetFilenameFromPath(filePath) & "-" & i & ".txt"
            SaveTextToFile txtData, outputFileName
            
            ' 释放页对象
            pdfDoc.ReleasePage(pdfPage)
        Next i
        
        ' 关闭文档并释放对象
        pdfDoc.Close()
        Set pdfDoc = Nothing
        
        ' 获取下一个PDF文件
        filePath = Dir()
    Wend
    
    ' 关闭Acrobat对象
    pdfApp.Exit
    Set pdfApp = Nothing
End Sub
 
' 获取文件名称不包含路径
Function GetFilenameFromPath(filePath As String) As String
    GetFilenameFromPath = Right(filePath, Len(filePath) - InStrRev(filePath, "\"))
End Function
 
' 将文本保存到文件
Sub SaveTextToFile(textData As String, filePath As String)
    Dim fileNum As Integer
    fileNum = FreeFile()
    Open filePath For Output As #fileNum
    Print #fileNum, textData
    Close #fileNum
End Sub


原文地址:https://blog.csdn.net/weixin_43050480/article/details/144318729

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!