【问题标题】:VBA to get custom data in excel [closed]VBA在excel中获取自定义数据[关闭]
【发布时间】:2018-08-26 04:28:24
【问题描述】:

我正在尝试完全按照此视频中的内容进行操作。 https://www.youtube.com/watch?v=MswEPIFTEVU

【问题讨论】:

标签: excel vba web-scraping


【解决方案1】:

您可以改用XMLHTTP request 来获取数据,这要快得多。这假设您的链接位于从单元格 A1 开始的 A 列工作表 1 中。 它获取电话号码并将它们返回到链接旁边的 B 列。 如果您有大量链接,则此方法比您正在考虑的方法更有效。

Option Explicit
Public Sub GetInfo()
    Dim i As Long, html As HTMLDocument, links(), http As Object, results(), URL As String
    Dim lastRow As Long
    With Worksheets("Sheet1")
        lastRow = .Cells(.Rows.Count, "A").End(xlUp).Row 'URL are in column A sheet 1, starting at A1

        If lastRow = 1 Then
            ReDim links(1, 1): links(1, 1) = .Range("A1")
        Else
            links = .Range("A1:A" & lastRow).Value
        End If

        ReDim results(0 To UBound(links, 1) - 1)
        Set http = CreateObject("MSXML2.XMLHTTP")

        For i = LBound(links) To UBound(links)
            If InStr(links(i, 1), "http") > 0 Then
                URL = links(i, 1)
                Set html = GetHTMLDoc(http, URL)
                results(i - 1) = GetNumber(html)
                Set html = Nothing
            End If
        Next
        .Range("B1").Resize(UBound(results, 1) + 1, 1) = Application.WorksheetFunction.Transpose(results)
    End With

End Sub
Public Function GetHTMLDoc(ByVal http As Object, ByVal URL As String) As HTMLDocument
    Dim html As New HTMLDocument, sResponse As String
    With http
        .Open "GET", URL, False
        .send
        sResponse = StrConv(.responseBody, vbUnicode)
    End With

    html.body.innerHTML = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))

    Set GetHTMLDoc = html
End Function
Public Function GetNumber(ByVal html As HTMLDocument) As String
    Dim arr() As String
    arr = Split(html.querySelector(".container").innerText, Chr$(10))
    GetNumber = Replace$(arr(8), "Number : +", vbNullString)
End Function

你可以重写如下得到城市、名称、运营商

Option Explicit
Public Sub GetInfo()
    Dim i As Long, html As HTMLDocument, links(), http As Object, results(), URL As String
    Dim lastRow As Long
    With Worksheets("Sheet1")
        lastRow = .Cells(.Rows.Count, "A").End(xlUp).Row 'URL are in column A sheet 1, starting at A1

        If lastRow = 1 Then
            ReDim links(1, 1): links(1, 1) = .Range("A1")
        Else
            links = .Range("A1:A" & lastRow).Value
        End If

        ReDim results(0 To UBound(links, 1) - 1)
        Set http = CreateObject("MSXML2.XMLHTTP")

        For i = LBound(links) To UBound(links)
            If InStr(links(i, 1), "http") > 0 Then
                URL = links(i, 1)
                Set html = GetHTMLDoc(http, URL)
                results(i - 1) = GetNumber(html)
                Set html = Nothing
            End If
        Next

        For i = LBound(results) To UBound(results)
            .Cells(i + 1, 2).Resize(1, UBound(results(i)) + 1) = results(i)
        Next
    End With

End Sub
Public Function GetHTMLDoc(ByVal http As Object, ByVal URL As String) As HTMLDocument
    Dim html As New HTMLDocument, sResponse As String
    With http
        .Open "GET", URL, False
        .send
        sResponse = StrConv(.responseBody, vbUnicode)
    End With

    html.body.innerHTML = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))

    Set GetHTMLDoc = html
End Function
Public Function GetNumber(ByVal html As HTMLDocument) As Variant
    Dim arr() As String, outputArr(0 To 2)
    arr = Split(html.querySelector(".container").innerText, Chr$(10))
    outputArr(0) = Replace$(arr(2), "Name : ", vbNullString)
    outputArr(1) = Replace$(arr(10), "Carrier : ", vbNullString)
    outputArr(2) = Replace$(arr(14), "City : ", vbNullString)
    GetNumber = outputArr
End Function

如果由于某种原因位置匹配不起作用,您可以使用以下版本的函数GetNumber

Public Function GetNumber(ByVal html As HTMLDocument) As Variant
    Dim arr() As String, outputArr(0 To 2), i As Long
    arr = Split(html.querySelector(".container").innerText, Chr$(10))
    For i = LBound(arr) To UBound(arr)
    Select Case True
        Case InStr(arr(i), "Name") > 0
            outputArr(0) = Replace$(arr(2), "Name : ", vbNullString)
        Case InStr(arr(i), "Carrier") > 0
            outputArr(1) = Replace$(arr(10), "Carrier : ", vbNullString)
        Case InStr(arr(i), "City") > 0
            outputArr(2) = Replace$(arr(14), "City : ", vbNullString)
        End Select
    Next i
    GetNumber = outputArr
End Function

【讨论】:

  • 看起来很奇怪,我打心底里感谢您的回复,但似乎网站没有返回结果,因为流量很大。但它现在不工作,应该在网站开始工作时工作。但现在它只给我编号,我想要 B、C 和 D 列中的“姓名”、“承运人”和“城市”。我想这就是它的工作原理?
  • 一会儿。所以你根本不想要这个号码?
  • 我有数字,这就是我在列 A 中创建 url 的方式。最重要的是需要知道名称和“城市”。
  • 你好 Qharr,在一些有联系人的图片或显示图片的 url 中,它给了我性别、地区、电子邮件,而不是姓名、运营商和城市。就像这个一样。 smsprank.ga/true/pro.php?no=7414814525任何想法为什么?我注意到这种情况仅发生在带有 DP 的 url 中,并且网站每天只允许获取 120 个数据,这没关系,但你可以修改它,以便它给出 DP 联系人的姓名、运营商和城市,而不是性别、地区、电子邮件。请记住 url 有 DP 和非 DP 联系人。
  • outputArr(0) = Replace$(arr(0), "Name : ", vbNullString) outputArr(1) = Replace$(arr(8), "Carrier : ", vbNullString) outputArr( 2) = Replace$(arr(12), "City : ", vbNullString) 对我有用,似乎我必须首先对整个结果进行分类以查找不良数据(与 DP 联系的数据),然后使用新代码重新扫描。跨度>
猜你喜欢
  • 1970-01-01
  • 1970-01-01
  • 2019-02-08
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
相关资源
最近更新 更多