【发布时间】:2018-10-24 16:13:05
【问题描述】:
我正在构建我的第一个 VBA 代码来从网站上抓取数据。我可以打开该站点并通过一个按钮导航以在屏幕上获取正确的数据,但我很难引用正确的表格进行循环。我想访问一个嵌入式表“活动”。为此,我从here 那里得到了关于如何循环遍历表格和提取信息并嵌入到我的代码中的答案。以下是三个通过错误的区域。
这些是否有关联(尤其是查询 B&C)?有人有什么想法吗?
非常感谢!
--------解决方案代码(来自下面的 QHarr 答案)-------------
注意:需要参考(VBE > 工具 > 参考并添加参考): 微软互联网控制 Microsoft HTML 对象库
Public Sub GetTable()
Dim IE As InternetExplorer, ele As Object, clipboard As Object, hTable As htmlTable, t As Date, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
Const MAX_WAIT_SEC As Long = 20
Set IE = New InternetExplorer
With IE
.Visible = True
.navigate "https://na3.docusign.net/Member/EmailStart.aspx?a=59595fcb-34be-4375-b880-a0be581d0f37&r=f6d28b49-e66d-4fa4-a7e9-69c2c741fde5"
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
DoEvents
On Error Resume Next
Set ele = .Document.querySelector("[data-qa='show-history']")
'On Error GoTo 0 'I removed this line as it was throwing an error as soon as the 'Show-history' element loaded.
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While ele Is Nothing
If ele Is Nothing Then Exit Sub
ele.Click
While .Busy Or .readyState < 4: DoEvents: Wend
Set hTable = .Document.querySelector("[data-qa='history-dialog-audit-logs']")
''**********************************************************************
'' Loop table and write out method. This method uses the sub WriteTable
Application.ScreenUpdating = False
WriteTable hTable, 1, ws
Application.ScreenUpdating = True
''**********************************************************************
.Quit
End With
End Sub
Public Sub WriteTable(ByVal hTable As htmlTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
r = startRow
With ws
Set tRow = hTable.getElementsByTagName("tr")
For Each tr In tRow
Set tCell = tr.getElementsByTagName("td")
c = 1
For Each td In tCell
.Cells(r, c).Value = td.innerText
c = c + 1
Next td
r = r + 1
Next tr
End With
End Sub
------原始查询码-----
查询 A: 页面加载时出现需要对象的错误,如果我继续执行脚本,该错误就会消失,所以我认为处理加载时间有问题吗?它发生在“循环”代码完成后:
With objIE
.Visible = True
.navigate WebSite
Do While .Busy Or .readyState <> 4
DoEvents
Loop
.document.querySelector("[data-qa='show-history']").Click
查询 B: 我在这一行收到另一个 object required 错误,我也可以继续过去:
For Each ele In objIE.document.getElementById("activity").getElementsByTagName("tr")
查询 C: 我在下一行收到一个下标超出范围的错误,并且无法继续进行
Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
Sub googlesearch3()
Set objIE = CreateObject("InternetExplorer.Application")
WebSite = "websiteurl"
With objIE
.Visible = True
.navigate WebSite
Do While .Busy Or .readyState <> 4
DoEvents
Loop
.document.querySelector("[data-qa='show-history']").Click
End With
'within the 'history-dialog-audit-logs' tabe, loop and extract data
'we will output data to excel, starting on row 1
y = 1
'look at all the 'tr' elements in the 'table' with id 'myTable',
'and evaluate each, one at a time, using 'ele' variable
For Each ele In objIE.document.getElementById("activity").getElementsByTagName("tr")
'show the text content of 'tr' element being looked at
Debug.Print ele.textContent
'each 'tr' (table row) element contains 4 children ('td') elements
'put text of 1st 'td' in col A
Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
'put text of 2nd 'td' in col B
Sheets("Sheet1").Range("B" & y).Value = ele.Children(1).textContent
'put text of 3rd 'td' in col C
Sheets("Sheet1").Range("C" & y).Value = ele.Children(2).textContent
'put text of 4th 'td' in col D
Sheets("Sheet1").Range("D" & y).Value = ele.Children(3).textContent
'increment row counter by 1
y = y + 1
'repeat until last ele has been evaluated
Next
'check if word 'completed' is mentoined anwhere, if so update 'Status' to 'Completed' and search for text.
'Find "signed the envelope" and show all text before this until you find <td?. Stop after one occurance
'store text in 'LastSigned'string
'find "sent an invitation to" and show all text before this until you find <td>. Stop after one occurance
'store text in 'CurrentlyWith' sting
Set IE = Nothing
End Sub
补充: 我已经尝试了答案here,但是 DIM 语句不起作用...
【问题讨论】:
标签: html excel vba web-scraping