你的要求是一个很大的要求,所以我将给出一些指示和起始代码。我的代码应该写出所有的表格,但你会想尝试得到你想要的格式。有效地选择元素当然有足够的逻辑,这应该会有所帮助。 * 由于时间限制,我没有测试过使用该类循环所有检索到的 id,但已经测试了个别情况和所有 id 的检索。
获取初始案例链接和 ID:
我可能会使用一个函数返回一个包含链接和 ID 的数组。如果您提取 id,它们可以通过我在下面显示的 XMLHTTP 请求。
网址是https://crashviewer.nhtsa.dot.gov/LegacyCDS/Search
Public Function GetLinksAndIds(ByVal URL) As Variant
Dim ie As InternetExplorer, i As Long
Set ie = New InternetExplorer
With ie
.Visible = True
.navigate2 URL
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("btnSubmit1").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Dim caseLinks As Object, id As String, newURL As String
Set caseLinks = .document.querySelectorAll("[href*='CaseID=']")
Dim linksAndIds()
ReDim linksAndIds(1 To caseLinks.Length, 1 To 2)
For i = 0 To caseLinks.Length - 1
linksAndIds(i + 1, 1) = caseLinks.item(i)
linksAndIds(i + 1, 2) = Replace$(caseLinks.item(i), "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?xsl=main.xsl&CaseID=", vbNullString)
Next
.Quit
End With
GetLinksAndIds = linksAndIds
End Function
返回值示例:
对于每种情况 - 使用 XMLHTTP:
我很想避免使用 IE 并使用 XMLHTTP request(使用打印选项的 url 编码查询字符串返回更具可读性的页面版本)。尽管我已经使用 css 选择器进行了解析,但您可以将响应读入 MSXML2.DOMDocument60 并使用 XPath 进行查询。您可以将 caseid 连接到 URL。
Option Explicit
Public Sub GetTables()
Dim sResponse As String, html As HTMLDocument, clipboard As Object, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?ViewPage&xsl=Case.xsl&tab=Crash&form=CaseForm&baseNode=&vehnum=-1&occnum=-1&pos=-1&pos2=-1&websrc=true&title=Crash%20Overview%20-%20Summary&caseid=112007272&year=&fullimage=false", False '<==concatenate caseid into URL
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = .responseText
End With
Set html = New HTMLDocument
html.body.innerHTML = sResponse
Dim tables As Object, i As Long
Set tables = html.querySelectorAll("table")
For i = 0 To tables.Length - 1
clipboard.SetText tables.item(i).outerHTML
clipboard.PutInClipboard
ws.Cells(LastRow(ws) + 2, 1).PasteSpecial
Next
End Sub
'https://www.rondebruin.nl/win/s9/win005.htm '<< Function below modified from here
Public Function LastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
LastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function
使用一个类来保存 xmlhttp 对象会是什么样子(未经测试):
类 clsHTTP:
Option Explicit
Private http As Object
Private Sub Class_Initialize()
Set http = CreateObject("MSXML2.XMLHTTP")
End Sub
Public Function GetString(ByVal URL As String) As String
Dim sResponse As String
With http
.Open "GET", URL, False
.send
sResponse = .responseText
End With
End Function
标准模块 1:
Option Explicit
Public Sub GetTables()
Dim sResponse As String, html As HTMLDocument, clipboard As Object, ws As Worksheet
Dim initialLinksURL As String, http As clsHTTP, i As Long, j As Long, newURL As String
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set html = New HTMLDocument
initialLinksURL = "https://crashviewer.nhtsa.dot.gov/LegacyCDS/Search"
Dim linksAndIds()
linksAndIds = GetLinksAndIds(initialLinksURL)
For i = LBound(linksAndIds, 2) To UBound(linksAndIds, 2)
newURL = "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?ViewPage&xsl=Case.xsl&tab=Crash&form=CaseForm&baseNode=&vehnum=-1&occnum=-1&pos=-1&pos2=-1&websrc=true&title=Crash%20Overview%20-%20Summary&caseid=" & linksAndIds(i, 2) & "&year=&fullimage=false"
html.body.innerHTML = http.GetString(newURL)
Dim tables As Object
Set tables = html.querySelectorAll("table")
For j = 0 To tables.Length - 1
clipboard.SetText tables.item(j).outerHTML
clipboard.PutInClipboard
ws.Cells(LastRow(ws) + 2, 1).PasteSpecial
Next
Next
End Sub
'https://www.rondebruin.nl/win/s9/win005.htm
Public Function LastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
LastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function
Public Function GetLinksAndIds(ByVal URL) As Variant
Dim ie As InternetExplorer, i As Long
Set ie = New InternetExplorer
With ie
.Visible = True
.navigate URL
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("btnSubmit1").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Dim caseLinks As Object, id As String, newURL As String
Set caseLinks = .document.querySelectorAll("[href*='CaseID=']")
Dim linksAndIds()
ReDim linksAndIds(1 To caseLinks.Length, 1 To 2)
For i = 0 To caseLinks.Length - 1
linksAndIds(i + 1, 1) = caseLinks.item(i)
linksAndIds(i + 1, 2) = Replace$(caseLinks.item(i), "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?xsl=main.xsl&CaseID=", vbNullString)
Next
.Quit
End With
GetLinksAndIds = linksAndIds
End Function
所有 Internet Explorer 选项:
Option Explicit
Public Sub GetTables()
Dim sResponse As String, html As HTMLDocument, clipboard As Object, ws As Worksheet
Dim initialLinksURL As String, i As Long, j As Long, newURL As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set html = New HTMLDocument
initialLinksURL = "https://crashviewer.nhtsa.dot.gov/LegacyCDS/Search"
Dim ie As InternetExplorer, caseLinks As Object
Set ie = New InternetExplorer
With ie
.Visible = True
.Navigate2 initialLinksURL
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("btnSubmit1").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Set caseLinks = .document.querySelectorAll("[href*='CaseID=']")
Dim linksAndIds()
ReDim linksAndIds(1 To caseLinks.Length, 1 To 2)
For i = 0 To caseLinks.Length - 1
linksAndIds(i + 1, 1) = caseLinks.item(i)
linksAndIds(i + 1, 2) = Replace$(caseLinks.item(i), "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?xsl=main.xsl&CaseID=", vbNullString)
Next
For i = LBound(linksAndIds, 2) To 2 ' UBound(linksAndIds, 2)
newURL = "https://crashviewer.nhtsa.dot.gov/nass-cds/CaseForm.aspx?ViewPage&xsl=Case.xsl&tab=Crash&form=CaseForm&baseNode=&vehnum=-1&occnum=-1&pos=-1&pos2=-1&websrc=true&title=Crash%20Overview%20-%20Summary&caseid=" & linksAndIds(i, 2) & "&year=&fullimage=false"
.Navigate2 newURL
While .Busy Or .readyState < 4: DoEvents: Wend
Dim tables As Object
Set tables = .document.querySelectorAll("table")
For j = 0 To tables.Length - 1
clipboard.SetText tables.item(j).outerHTML
clipboard.PutInClipboard
ws.Cells(LastRow(ws) + 2, 1).PasteSpecial
Next
Next
.Quit
End With
End Sub
'https://www.rondebruin.nl/win/s9/win005.htm
Public Function LastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
LastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function