【问题标题】:VBA - HTML scraping problemsVBA - HTML抓取问题
【发布时间】:2017-10-15 22:51:30
【问题描述】:

我正在尝试从网站https://www.rbauction.com/heavy-equipment-auctions 抓取拍卖数据。我目前的尝试是使用以下代码将网站的 HTML 提取到 VBA 中,然后对其进行解析并仅保留我想要的项目(拍卖名称、天数、项目数)。

Sub RBA_Auction_Scrape()

    Dim S_Sheet As Worksheet
    Dim Look_String As String
    Dim Web_HTML As String
    Dim HTTP_OBJ As New MSXML2.XMLHTTP60

    On Error GoTo ERR_LABEL:
    Set S_Sheet = ActiveWorkbook.ActiveSheet
    Web_HTML = ""
    HTTP_OBJ.Open "GET", "https://www.rbauction.com/heavy-equipment auctions", False
    HTTP_OBJ.Send
    On Error Resume Next
    Select Case HTTP_OBJ.Status
        Case 0: Web_HTML = HTTP_OBJ.responseText
        Case 200: Web_HTML = HTTP_OBJ.responseText
        Case Else: GoTo ERR_LABEL
    End Select

    Debug.Print Web_HTML

End Sub

它成功地提取了数据,但包含所有拍卖名称和规模的“即将举行的重型设备拍卖”部分没有被拉入 VBA。一般来说,我对 HTML 不是很好,但我希望有人能提供解决方案,或者至少解释一下当我搜索被拉入 VBA 的网站 HTML 时,找不到我想要的文章。

【问题讨论】:

    标签: json vba excel web-scraping xmlhttprequest


    【解决方案1】:

    https://www.rbauction.com/heavy-equipment-auctions 提供的链接的网页源 HTML 不包含必要的数据,它使用 AJAX。 https://www.rbauction.com 网站有一个可用的 API。响应以 JSON 格式返回。导航页面 e. G。在 Chrome 中,然后打开开发者工具窗口 (F12)、网络选项卡,重新加载 (F5) 页面并检查记录的 XHR。最相关的数据是 URL https://www.rbauction.com/rba-api/calendar/v1?e1=true 返回的 JSON 字符串:

    您可以使用下面的 VBA 代码来检索上述信息。 JSON.bas 模块导入VBA 项目进行JSON 处理。

    Option Explicit
    
    Sub Test_www_rbauction_com()
    
        Const Transposed = False ' Output option
    
        Dim sResponse As String
        Dim vJSON
        Dim sState As String
        Dim i As Long
        Dim aRows()
        Dim aHeader()
    
        ' Retrieve JSON data
        XmlHttpRequest "GET", "https://www.rbauction.com/rba-api/calendar/v1?e1=true", "", "", "", sResponse
        ' Parse JSON response
        JSON.Parse sResponse, vJSON, sState
        If sState <> "Object" Then
            MsgBox "Invalid JSON response"
            Exit Sub
        End If
        ' Pick core data
        vJSON = vJSON("auctions")
        ' Extract selected properties for each item
        For i = 0 To UBound(vJSON)
            Set vJSON(i) = ExtractKeys(vJSON(i), Array("eventId", "name", "date", "itemCount"))
            DoEvents
        Next
        ' Convert JSON structure to 2-d arrays for output
        JSON.ToArray vJSON, aRows, aHeader
        ' Output
        With ThisWorkbook.Sheets(1)
            .Cells.Delete
            If Transposed Then
                Output2DArray .Cells(1, 1), WorksheetFunction.Transpose(aHeader)
                Output2DArray .Cells(1, 2), WorksheetFunction.Transpose(aRows)
            Else
                OutputArray .Cells(1, 1), aHeader
                Output2DArray .Cells(2, 1), aRows
            End If
            .Columns.AutoFit
        End With
        MsgBox "Completed"
    
    End Sub
    
    Sub XmlHttpRequest(sMethod As String, sUrl As String, arrSetHeaders, sFormData, sRespHeaders As String, sContent As String)
    
        Dim arrHeader
    
        'With CreateObject("Msxml2.ServerXMLHTTP")
        '    .SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
        With CreateObject("MSXML2.XMLHTTP")
            .Open sMethod, sUrl, False
            If IsArray(arrSetHeaders) Then
                For Each arrHeader In arrSetHeaders
                    .SetRequestHeader arrHeader(0), arrHeader(1)
                Next
            End If
            .send sFormData
            sRespHeaders = .GetAllResponseHeaders
            sContent = .responseText
        End With
    
    End Sub
    
    Function ExtractKeys(oSource, aKeys, Optional oDest = Nothing) As Object
    
        Dim vKey
    
        If oDest Is Nothing Then Set oDest = CreateObject("Scripting.Dictionary")
        For Each vKey In aKeys
            If oSource.Exists(vKey) Then
                If IsObject(oSource(vKey)) Then
                    Set oDest(vKey) = oSource(vKey)
                Else
                    oDest(vKey) = oSource(vKey)
                End If
            End If
        Next
        Set ExtractKeys = oDest
    
    End Function
    
    Sub OutputArray(oDstRng As Range, aCells As Variant)
    
        With oDstRng
            .Parent.Select
            With .Resize(1, UBound(aCells) - LBound(aCells) + 1)
                .NumberFormat = "@"
                .Value = aCells
            End With
        End With
    
    End Sub
    
    Sub Output2DArray(oDstRng As Range, aCells As Variant)
    
        With oDstRng
            .Parent.Select
            With .Resize( _
                    UBound(aCells, 1) - LBound(aCells, 1) + 1, _
                    UBound(aCells, 2) - LBound(aCells, 2) + 1)
                .NumberFormat = "@"
                .Value = aCells
            End With
        End With
    
    End Sub
    

    我的输出如下:

    顺便说一句,类似的方法适用于in other answers

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2010-10-28
      • 1970-01-01
      • 1970-01-01
      • 2022-01-08
      • 2016-05-19
      • 2016-10-04
      相关资源
      最近更新 更多