这应该足够快(在我的机器上,一个 18MB 的文件需要 8 秒,但我只复制数据,我不对其进行重组 - 如果您不进行计算而只对内容进行重新排序,那么您应该得到相同的结果种表现)。即使行数/列数不适合电子表格,它也能正常工作。
TODO:它有点长,但您应该能够 (a) 复制粘贴 (b) 更改文件名和 (c) 修改操作数据函数以满足您的需要。其余代码是一堆可重用的实用程序函数,您不需要更改。
我不确定使用 VBA 是否可以更快 - 如果您需要更快,您应该考虑另一种语言。通常,Java 或 C# 中的相同代码会更短,因为它们已经有标准库来读取/写入文件等,而且速度也会更快。
Option Explicit
Public Sub doIt()
Dim sourceFile As String
Dim destinationFile As String
Dim data As Variant
Dim result As Variant
sourceFile = "xxxxxxx"
destinationFile = "xxxxxxx"
data = getDataFromFile(sourceFile, ",")
If Not isArrayEmpty(data) Then
result = manipulateData(data)
writeToCsv result, destinationFile, ","
Else
MsgBox ("Empty file")
End If
End Sub
Function manipulateData(sourceData As Variant) As Variant
Dim result As Variant
Dim i As Long
Dim j As Long
Dim k As Long
Dim m As Long
'redim the result array to the right size - here I only copy so same size as source
ReDim result(1 To UBound(sourceData, 1), 1 To UBound(sourceData, 2)) As Variant
For i = LBound(sourceData, 1) To UBound(sourceData, 1)
For j = LBound(sourceData, 2) To UBound(sourceData, 2)
k = i 'k to be defined - here I only copy data
m = j 'm to be defined - here I only copy data
result(k, m) = sourceData(i, j)
Next j
Next i
manipulateData = result
End Function
Public Sub writeToCsv(parData As Variant, parFileName As String, parDelimiter As String)
If getArrayNumberOfDimensions(parData) <> 2 Then Exit Sub
Dim i As Long
Dim j As Long
Dim fileNum As Long
Dim locLine As String
Dim locCsvString As String
fileNum = FreeFile
If Dir(parFileName) <> "" Then Kill (parFileName)
Open parFileName For Binary Lock Read Write As #fileNum
For i = LBound(parData, 1) To UBound(parData, 1)
locLine = ""
For j = LBound(parData, 2) To UBound(parData, 2)
If IsError(parData(i, j)) Then
locLine = locLine & "#N/A" & parDelimiter
Else
locLine = locLine & parData(i, j) & parDelimiter
End If
Next j
locLine = Left(locLine, Len(locLine) - 1)
If i <> UBound(parData, 1) Then locLine = locLine & vbCrLf
Put #fileNum, , locLine
Next i
error_handler:
Close #fileNum
End Sub
Public Function isArrayEmpty(parArray As Variant) As Boolean
'Returns false if not an array or dynamic array that has not been initialised (ReDim) or has been erased (Erase)
If IsArray(parArray) = False Then isArrayEmpty = True
On Error Resume Next
If UBound(parArray) < LBound(parArray) Then isArrayEmpty = True: Exit Function Else: isArrayEmpty = False
End Function
Public Function getArrayNumberOfDimensions(parArray As Variant) As Long
'Returns the number of dimension of an array - 0 for an empty array.
Dim i As Long
Dim errorCheck As Long
If isArrayEmpty(parArray) Then Exit Function 'returns 0
On Error GoTo FinalDimension
'Visual Basic for Applications arrays can have up to 60000 dimensions
For i = 1 To 60001
errorCheck = LBound(parArray, i)
Next i
'Not supposed to happen
getArrayNumberOfDimensions = 0
Exit Function
FinalDimension:
getArrayNumberOfDimensions = i - 1
End Function
Private Function getDataFromFile(parFileName As String, parDelimiter As String, Optional parExcludeCharacter As String = "") As Variant
'parFileName is supposed to be a delimited file (csv...)
'parDelimiter is the delimiter, "," for example in a comma delimited file
'Returns an empty array if file is empty or can't be opened
'number of columns based on the line with the largest number of columns, not on the first line
'parExcludeCharacter: sometimes csv files have quotes around strings: "XXX" - if parExcludeCharacter = """" then removes the quotes
Dim locLinesList() As Variant
Dim locData As Variant
Dim i As Long
Dim j As Long
Dim locNumRows As Long
Dim locNumCols As Long
Dim fso As Variant
Dim ts As Variant
Const REDIM_STEP = 10000
Set fso = CreateObject("Scripting.FileSystemObject")
On Error GoTo error_open_file
Set ts = fso.OpenTextFile(parFileName)
On Error GoTo unhandled_error
'Counts the number of lines and the largest number of columns
ReDim locLinesList(1 To 1) As Variant
i = 0
Do While Not ts.AtEndOfStream
If i Mod REDIM_STEP = 0 Then
ReDim Preserve locLinesList(1 To UBound(locLinesList, 1) + REDIM_STEP) As Variant
End If
locLinesList(i + 1) = Split(ts.ReadLine, parDelimiter)
j = UBound(locLinesList(i + 1), 1) 'number of columns
If locNumCols < j Then locNumCols = j
If j = 13 Then
j = j
End If
i = i + 1
Loop
ts.Close
locNumRows = i
If locNumRows = 0 Then Exit Function 'Empty file
ReDim locData(1 To locNumRows, 1 To locNumCols + 1) As Variant
'Copies the file into an array
If parExcludeCharacter <> "" Then
For i = 1 To locNumRows
For j = 0 To UBound(locLinesList(i), 1)
If Left(locLinesList(i)(j), 1) = parExcludeCharacter Then
If Right(locLinesList(i)(j), 1) = parExcludeCharacter Then
locLinesList(i)(j) = Mid(locLinesList(i)(j), 2, Len(locLinesList(i)(j)) - 2) 'If locTempArray = "", Mid returns ""
Else
locLinesList(i)(j) = Right(locLinesList(i)(j), Len(locLinesList(i)(j)) - 1)
End If
ElseIf Right(locLinesList(i)(j), 1) = parExcludeCharacter Then
locLinesList(i)(j) = Left(locLinesList(i)(j), Len(locLinesList(i)(j)) - 1)
End If
locData(i, j + 1) = locLinesList(i)(j)
Next j
Next i
Else
For i = 1 To locNumRows
For j = 0 To UBound(locLinesList(i), 1)
locData(i, j + 1) = locLinesList(i)(j)
Next j
Next i
End If
getDataFromFile = locData
Exit Function
error_open_file: 'returns empty variant
unhandled_error: 'returns empty variant
End Function