【发布时间】:2020-01-14 21:18:57
【问题描述】:
我是 stackoverflow 的新手,也是 VBA 编码的新手。在我的工作中,我们以 Ms Word 的形式提供了发货数据,这不是很有用。我找到了一种使用 VBA 传输数据的方法,并拥有一个功能齐全的代码。但是,数据集包含数十万条记录。我尝试用 20 万条记录运行一个月的数据,花了 5 天时间。只是想知道我的代码中是否有任何可以改进以加快进程的内容。我试过关闭屏幕更新、事件、计算,但效果不大。提前感谢您的帮助。
Sub Word_to_Excel()
Dim FName As String, FD As FileDialog
Dim wdApp As Object
Dim wdDoc As Object
Dim WDR, WDCheck, ShipmentID As Object
Dim ExR As Range
Dim file
Dim Path As String
Dim ImportDate As Object
Dim ImportValue As String
Dim ShipmentIDcheck As String
Dim objResult
Set objShell = CreateObject("WScript.Shell")
Set ExR = Selection ' current location in Excel Sheet
' Select Folder containing word documents
Set FD = Application.FileDialog(msoFileDialogFolderPicker)
FD.Show
FName = FD.SelectedItems(1)
file = Dir(FName & "\*.docx")
Set wdApp = CreateObject("Word.Application")
' Open word document in the folder, run macro, close it and open the next word document until there are none left
Do While file <> ""
wdApp.Documents.Open Filename:=FName & "\" & file
wdApp.ActiveWindow.ActivePane.View.Type = 1
wdApp.Visible = True
' Once the word doc is open, go to beginning of document and search for CTY/SITE/SORT:
wdApp.Selection.HomeKey Unit:=6
wdApp.Selection.Find.ClearFormatting
wdApp.Selection.Find.Execute "CTY/SITE/SORT:"
Set WDCheck = wdApp.Selection
' If "CTY/SITE/SORT:" is found, then look for Shipment ID
Do While WDCheck = "CTY/SITE/SORT:"
' Find first shipment
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.MoveDown Unit:=5, Count:=11
wdApp.Selection.MoveRight Unit:=1, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=11, Extend:=1
Set ShipmentID = wdApp.Selection
ShipmentIDcheck = Replace(ShipmentID, " ", "")
' Transfer information from Word to Excel for a Shipment ID and go to the next one.
' Shipment ID should be a string that is 11 characters long
' If Shipment ID no longer exist, go to next page by searching for the next CTY/SITE/SORT:
Do While Len(Trim(ShipmentIDcheck)) = 11
i = i + 1
ExR(i, 1) = file
ExR(i, 2) = ShipmentIDcheck
' Consignee Name
wdApp.Selection.MoveUp Unit:=5, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=12
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 3) = Trim(WDR)
' Importer Name
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 8) = Trim(WDR)
' Shipper Name
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 13) = Trim(WDR)
' Quantity
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=10, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 19) = Trim(WDR)
' Weight
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=12, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 20) = Trim(WDR)
' Value
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=12, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 21) = Trim(WDR)
' Broker
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=11, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 23) = Trim(WDR)
' Consignee Street
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.MoveDown Unit:=5, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=13
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 4) = Trim(WDR)
' Importer Street
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 9) = Trim(WDR)
' Shipper Street
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 14) = Trim(WDR)
' Description
wdApp.Selection.MoveRight Unit:=1, Count:=8
wdApp.Selection.MoveRight Unit:=1, Count:=40, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 18) = Trim(WDR)
' Consignee City
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.MoveDown Unit:=5, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=13
wdApp.Selection.MoveRight Unit:=1, Count:=13, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 5) = Trim(WDR)
' Consignee Province
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=2, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 6) = Trim(WDR)
' Consignee Postal
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=6, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 7) = Trim(WDR)
' Importer City
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=13, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 10) = Trim(WDR)
' Importer Province
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=2, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 11) = Trim(WDR)
' Importer Postal
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=6, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 12) = Trim(WDR)
' Shipper City
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=13, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 15) = Trim(WDR)
' Shipper Province
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=2, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 16) = Trim(WDR)
' Shipper Postal
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=6, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 17) = Trim(WDR)
' Country of Origin
wdApp.Selection.MoveRight Unit:=1, Count:=29
wdApp.Selection.MoveRight Unit:=1, Count:=21, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 22) = Trim(WDR)
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.MoveDown Unit:=5, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=11, Extend:=1
Set ShipmentID = wdApp.Selection
' Remove spaces from selection. Selection is then used to check if it is a shipment ID.
' If it is, then data for that shipment ID is transferred. If not, macro will go to the next page in the Word Doc.
ShipmentIDcheck = Replace(ShipmentID, " ", "")
ActiveCell.Offset(1).Select
Loop
'Simulate keyboard press "NUMLOCK" to prevent screen from locking
objResult = objShell.SendKeys("{NUMLOCK}")
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.Find.ClearFormatting
wdApp.Selection.Find.Execute "CTY/SITE/SORT:"
Set WDCheck = wdApp.Selection
Loop
wdApp.ActiveDocument.Close SaveChanges:=wdDoNotSaveChanges
ActiveWorkbook.Save
file = Dir()
Loop
wdApp.Quit
MsgBox "Data extraction completed at:" & vbNewLine & Format(Now, "mmmm d, yyyy hh:mm AM/PM")
End Sub
这就是数据集的格式。每天有多个 word 文档包含该数据集的页面和页面。每页的发货数量各不相同。但格式始终相同。 word文档中没有表格,只有用空格分隔的文本。 CTY/SITE/SORT:每个页面都是独一无二的,我用它作为锚点。如果宏找到它,那么它会向下 11 行并获取第一个货件 ID 和其他信息。然后它会检查下一个货件 ID。如果不存在,则转到下一页并重复该过程。
REPORT NUM : ABC1234 OPERATIONS SYSTEM PAGE NUM: 2
CTY/SITE/SORT: CA 00123 SUMMARY CARGO RUN TIME: 07:33:43
SORT DATE : INBOUND - SCAN RUN DATE: 01AUG19
OPER ID : ABC123
MVMT: 12345678 MVMT DT: 01AUG19 MAWB: PROD TYP: DTY TYP: IMP CTY: EXP CTY: BL TYP:
COURIER REMISSION MANIFEST EXPORT SITE: US 12345
GCCN ID: EXPECTED SHPTS: EXPECTED PKGS: EXPECTED WEIGHT:
CUSTOMS NUM CONSIGNEE NAME IMPORTER NAME SHIPPER NAME CSA QTY WGT(LBS) VALUE BROKER
SHIPMENT ID DESCRIPTION (CAD) CTRY OF ORIGIN
JOHN SMITH ABC COMPANY XYZ COMPANY 1 1.1 1000.00 UNCONSIGNED
ABC12345678 123 MAIN STREET 345 RANDOM ROAD UNIVERSITY OF WASHINGTO BICYCLE PARTS
VANCOUVER BC V1A1A1 VANCOUVER BC V2B1B2 SEATTLE WA 981234 US
JOHN SMITH ABC COMPANY XYZ COMPANY 1 1.1 1000.00 UNCONSIGNED
ABC12345678 123 MAIN STREET 345 RANDOM ROAD UNIVERSITY OF WASHINGTO BICYCLE PARTS
VANCOUVER BC V1A1A1 VANCOUVER BC V2B1B2 SEATTLE WA 981234 US
JOHN SMITH ABC COMPANY XYZ COMPANY 1 1.1 1000.00 UNCONSIGNED
ABC12345678 123 MAIN STREET 345 RANDOM ROAD UNIVERSITY OF WASHINGTO BICYCLE PARTS
VANCOUVER BC V1A1A1 VANCOUVER BC V2B1B2 SEATTLE WA 981234 US
JOHN SMITH ABC COMPANY XYZ COMPANY 1 1.1 1000.00 UNCONSIGNED
ABC12345678 123 MAIN STREET 345 RANDOM ROAD UNIVERSITY OF WASHINGTO BICYCLE PARTS
VANCOUVER BC V1A1A1 VANCOUVER BC V2B1B2 SEATTLE WA 981234 US
TOTAL FOR DUTY TYPE COURIER REMISSION
TOTAL SHIPMENTS: 4
TOTAL PACKAGES : 4
TOTAL WEIGHT : 70.9 LBS
TOTAL VALUES : 4000.00
* * *
我使用以下代码清理数据集并将它们排列为每行一条记录,每行由一个段落分隔(谢谢,macropod)。由于数据被排列成由空格分隔的列,因此我可以将其保存为 .txt 文件并将其导入到 Excel 中。现在的挑战是将代码应用于文件夹中的所有文档,并为每个文档生成一个 .txt 文件。或者如果代码可以将清理后的 .docx 文件中的所有数据合并到一个 .txt 文件中,那就更好了。
Sub CleanWordDoc()
Application.ScreenUpdating = False
Dim p As Long, StrOut As String
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Wrap = wdFindContinue
.Forward = True
.Format = False
.MatchWildcards = True
.Text = "REPORT NUM : * CTRY OF ORIGIN^13" 'Clean header on each page
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "[ ]{2,}ACTUAL SHP TOTAL*[ ]{20,}^13^m" 'Clean footer on some pages
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "[ ]{2,}TOTAL FOR DUTY*[ ]{20,}^13^m" 'Clean more footers
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "REPORT NUM :*SUMMARY*[\*] [\*][ ]{20,}^13" 'Clean last page
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "^m^13" ' Clean all page breaks
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "^13^13" ' Clean empty paragraphs
.Replacement.Text = "^13"
.Execute Replace:=wdReplaceAll
.Text = "<[ ]{1,}^13" ' Clean spaces and paragraphs at the beginning of doc
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "(*)^13(*)^13(*^13)" ' Combine 3 paragraphs into one
.Replacement.Text = "\1 \2 \3"
.Execute Replace:=wdReplaceAll
End With
End With
Application.ScreenUpdating = True
End Sub
这就是清理后的 .docx 的样子(有数百条记录):
12345678900 ABC COMPANY DEF COMPANY XYZ COMPANY 1 1.1 123.45 AAABROKER A0B12345LFD ABC ADDRESS DEF ADDRESS XYZ ADDRESS BICYCLE PARTS VANCOUVER BC V1A1A1 MARKHAM ON L1L1L1 SHENZHEN 512323 CN
98765432100 ABC COMPANY DEF COMPANY XYZ COMPANY 1 1.1 123.45 AAABROKER A0B12345LFD ABC ADDRESS DEF ADDRESS XYZ ADDRESS BICYCLE PARTS VANCOUVER BC V1A1A1 MARKHAM ON L1L1L1 SHENZHEN 512323 CN
【问题讨论】:
-
如果你的代码可以运行但速度很慢,那么它更适合Code Review。
-
好的,我会在那里发帖。谢谢
-
问题是您在循环的每次迭代中多次写入 Excel。建议您使用数组来收集每个循环迭代中的信息 (
Redim Preserve) 以累积每一位信息。然后,您可以一次性写入信息。使用具有范围的数组进行研究。 -
如果不知道(例如)您的数据被拆分了多少个文件,或者数据在 Word 中的确切格式,很难提出有用的建议。对于这个过程来说,5 天肯定太长了。
-
我添加了一个关于如何在 word 中格式化数据的示例。在一个月内,数据通常被拆分为 100 多个 word 文档,每个 word 文档可以包含 500 页数据。 SmileyFtW,我将研究使用数组和范围。谢谢。