代码如下:
Dim FilesList(1 To 99999, 1 To 1) '在主SUB代码外定义数组及位置变量,为跨SUB调用
Dim FilesList_i As Integer
Dim FS As Object
Dim If_Sub As String
Sub 遍历文件() '主SUB
Dim Path_todo As String
Path_todo = InputBox("输入待处理目录路径", "路径录入", "e:\test") '输入目录
If_Sub = InputBox("是否遍历子文件夹【0→否;1→是】", "是否遍历", 1) '是否遍历的记录
Set FS = CreateObject("Scripting.FileSystemObject")
Call GetAllFiles(Path_todo) 'Call 遍历文件过程
If FilesList_i = 0 Then Exit Sub '若遍历后发现没有任何文件,就直接退出'
Call Tables_to_DOC 'Call 提取表格数据过程
End Sub
Sub GetAllFiles(ByVal RecepPath As String)'获取全部文件sub
Dim Mainfolder, SubFolder, File_Currentfolder As Object '分别为当前主目录,子目录,主目录下文件列表
On Error Resume Next '一些系统文件夹可能导致代码报错,可跳过
Set Mainfolder = FS.getfolder(RecepPath)
For Each File_Currentfolder In Mainfolder.Files '在当前主目录下所有Files中遍历
FilesList_i = FilesList_i + 1 '存储用数组中位置偏移
If Right(RecepPath, 1) <> "\" Then '避免出现格式不完整的目录路径
RecepPath = RecepPath & "\"
End If
FilesList(FilesList_i, 1) = RecepPath & File_Currentfolder.Name '用数组记录文件路径和完整文件名
Next
If Int(If_Sub) <> 1 Then Exit Sub '若不需要获取子目录中文件,就不需要递归'
For Each SubFolder In Mainfolder.SubFolders
Call GetAllFiles(SubFolder.Path) '递归调用
Next
End Sub
Sub Tables_to_DOC()'提取数据sub
Dim WordDOC, CurDOC As Object 'DOC文件对象
Dim TableCount, Table_i As Integer '当前DOC中表格个数,表格序数
Dim r, c, i, CellsR, CellsC As Integer '提取表格数据时需要的行号变量,列号变量,记录用数组的位置变量,Excel中的行列序号
Set WordDOC = CreateObject("word.application")
CellsR = 1
CellsC = 1 '提取后的数据在Excel中从A1单元格开始记录
For i = 1 To UBound(FilesList) - LBound(FilesList) + 1 '遍历数组,只针对DOC和DOCX处理
If Right(UCase(FilesList(i, 1)), 4) = ".DOC" Or Right(UCase(FilesList(i, 1)), 4) = "DOCX" Then '检查扩展名是否为Word文件
Set CurDOC = WordDOC.documents.Open(FilesList(i, 1))
WordDOC.Visible = False
TableCount = WordDOC.ActiveDocument.tables.Count '记录当前DOC中表格个数
For Table_i = 1 To TableCount
CellsC = 0 '每一张word表格在xls中的位置从第一列开始
For r = 1 To WordDOC.ActiveDocument.tables(Table_i).Rows.Count
CellsC = 0 '每一行也从xls的第一列开始存放
For c = 1 To WordDOC.ActiveDocument.tables(Table_i).Columns.Count
On Error Resume Next
CellsC = CellsC + 1
Cells(CellsR, CellsC).Value = WordDOC.ActiveDocument.tables(Table_i).Cell(r, c).Range.Text 'Word表格的内容通过该方法获取
Cells(CellsR, CellsC).Value = Left(Cells(CellsR, CellsC).Value, Len(Cells(CellsR, CellsC).Value) - 1) '去除获取内容末尾的黑点
Next c
CellsR = CellsR + 1
Next r
Next Table_i
CurDOC.Close '关闭已复制完表格的DOC
End If
Next i '寻找下一DOC文档
End Sub