【发布时间】:2016-09-08 23:07:33
【问题描述】:
下面的宏应该填充网站上的搜索参数并从结果中抓取数据 - 131 页,通常每页 5 行数据。为了测试,我注释掉了循环,所以它只有 1 页。
在 Excel 工作表中填写以下数据列:公司名称、联系人姓名、联系人电子邮件、位置、电话 (BH)、手机、服务。
我只有电话 (BH) 和手机有问题。原因是公司有时只有电话 (BH) 或移动电话,而不是两者兼有。
我循环的方式(5 个公司名称,然后是 5 个联系人姓名等)需要检测所有数据,否则我不知道这些数据应该在哪一行。
Option Explicit
Private Sub CommandButton1_Click()
Dim htmlele As IHTMLElement
Dim Link As String
Dim ie As Object
Dim page As Integer
Dim companyresult As Long, nameresult As Long, emailresult As Long, locationresult As Long, servicesresult As Long
Application.ScreenUpdating = False
Link = "http://bdav.org.au/find"
Set ie = CreateObject("InternetExplorer.Application")
ie.Navigate Link
'ie.Visible = True
Do Until ie.ReadyState = 4 And ie.Busy = False
DoEvents
Loop
For Each htmlele In ie.Document.getElementsByTagName("input")
If htmlele.Name = "postcode" Then htmlele.Value = "3000"
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("input")
If htmlele.Name = "dist" Then htmlele.Value = "99"
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("input")
If htmlele.Type = "submit" Then htmlele.Click
Next htmlele
Do Until ie.ReadyState = 4 And ie.Busy = False
DoEvents
Loop
'Do
page = page + 1
Link = "http://bdav.org.au/find/" & page
Do Until ie.ReadyState = 4 And ie.Busy = False
DoEvents
Loop
With ThisWorkbook.Worksheets("results")
For Each htmlele In ie.Document.getElementsByTagName("h2")
companyresult = companyresult + 1
.Range("a" & companyresult + 1).Value = htmlele.innerHTML
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("a")
If Left(htmlele.href, 7) = "mailto:" Then
nameresult = nameresult + 1
.Range("b" & nameresult + 1).Value = htmlele.innerHTML
End If
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("a")
If Left(htmlele.href, 7) = "mailto:" Then
emailresult = emailresult + 1
.Range("c" & emailresult + 1).Value = Mid(htmlele.href, 8)
End If
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("label")
If htmlele.innerHTML = "Location:" Then
locationresult = locationresult + 1
.Range("d" & locationresult + 1).Value = htmlele.NextSibling.innerHTML
End If
Next htmlele
For Each htmlele In ie.Document.getElementsByTagName("label")
If htmlele.innerHTML = "Services:" Then
servicesresult = servicesresult + 1
If Right(htmlele.NextSibling.innerHTML, 4) = "<br>" Then
htmlele.NextSibling.innerHTML = Left(htmlele.NextSibling.innerHTML, Len(htmlele.NextSibling.innerHTML) - 4)
End If
.Range("g" & servicesresult + 1).Value = Replace(htmlele.NextSibling.innerHTML, "<br>", vbLf)
End If
Next htmlele
End With
'Loop
ie.Quit
Set ie = Nothing
Application.ScreenUpdating = True
End Sub
【问题讨论】:
标签: vba web-scraping