【问题标题】:Extract data locations from map从地图中提取数据位置
【发布时间】:2017-09-27 13:10:23
【问题描述】:

我想从地图中提取数据,然后获取并存储所有充电站处于特定状态的位置。 (例如:https://www.plugshare.com/

如何做到这一点?我不介意使用任何编程语言,但哪一种最适合这个应用程序?

【问题讨论】:

    标签: json vba google-maps web-scraping xmlhttprequest


    【解决方案1】:

    您可以使用 XHR 直接从 https://www.plugshare.com 检索数据。您必须研究一下网站如何收集数据。对于任何动态加载的数据,您只需检查网页所做的 XHR,找到包含相关数据的数据,制作相同的 XHR(站点是否提供 API)并解析响应。导航页面 e. G。在 Chrome 中,然后打开开发者工具窗口 (F12)、网络选项卡,重新加载 F5 页面并检查列表中的 XHR。

    有一个对 URL https://www.plugshare.com/api/locations/region?... 的请求,它返回具有指定坐标的矩形视口区域中的充电站的纬度、经度和其他信息。您可以找到如下所示的 URL、查询参数和一些必要的标头:

    响应为 JSON 格式:

    您需要在请求中添加基本授权标头。要检索凭据,请转到 Sources 选项卡,为包含 https://www.plugshare.com/api/locations/region 的 URL 添加 XHR 断点,重新加载 F5 页面,当页面在 XHR 上暂停时,逐帧遵循调用堆栈:

    跳过属于New Relic 功能的任何NREUMnrWrapper 对象。单击漂亮打印 {} 以格式化源。搜索 e. G。 BasicAuthorizationsetRequestHeader 在源中,对于这种特殊情况,第一个匹配项在 https://www.plugshare.com/js/main.js?_=1 中找到:

    点击地图上的一个电台,你会看到另一个 XHR 出现,其 URL 类似于 https://www.plugshare.com/api/locations/[id],其中包含该电台的详细信息,如下所示:

    响应也是 JSON 格式:

    您还可以从https://www.plugshare.com/api/stations/[id] 等 URL 获取电台数据。

    您可以使用下面的 VBA 代码来检索上述信息。 JSON.bas模块导入VBA项目进行JSON处理。

    Option Explicit
    
    Sub Test_www_plugshare_com()
    
        Const Transposed = False ' Output option
        Const Detailed = True ' Scrape option
    
        Dim sResponse As String
        Dim aQryHds()
        Dim oQuery As Object
        Dim sQuery As String
        Dim vRegionJSON
        Dim sState As String
        Dim aResult()
        Dim i As Long
        Dim vLocationJSON
        Dim aRows()
        Dim aHeader()
    
        ' Retrieve auth token
        XmlHttpRequest "GET", "https://www.plugshare.com/js/main.js?_=1", "", "", "", sResponse
        With RegExMatches(sResponse, "var s\=""(Basic [^""]*)"";")  ' var s="Basic *";
            If .Count > 0 Then
                aQryHds = Array( _
                    Array("Authorization", .Item(0).SubMatches(0)), _
                    Array("Accept", "application/json") _
                )
            Else
                MsgBox "Can't retrieve auth token"
                Exit Sub
            End If
        End With
        ' Set query parameters
        Set oQuery = CreateObject("Scripting.Dictionary")
        With oQuery
            .Add "minimal", "1"
            .Add "count", "500"
            .Add "latitude", "19.697593650121235"
            .Add "longitude", "-155.06529816792295"
            .Add "spanLng", "0.274658203125"
            .Add "spanLat", "0.11878815323507652"
            .Add "access", "1,3"
            .Add "outlets", "[{""connector"":1},{""connector"":2},{""connector"":3},{""connector"":4},{""connector"":5},{""connector"":6,""power"":0},{""connector"":6,""power"":1},{""connector"":7},{""connector"":8},{""connector"":9},{""connector"":10},{""connector"":11},{""connector"":12},{""connector"":13},{""connector"":14},{""connector"":15}]"
            .Add "fast", "add"
        End With
        sQuery = EncodeQueryParams(oQuery)
        ' Retrieve a list of stations for the viewport
        XmlHttpRequest "GET", "https://www.plugshare.com/api/locations/region?" & sQuery, aQryHds, "", "", sResponse
        ' Parse JSON response
        JSON.Parse sResponse, vRegionJSON, sState
        If sState <> "Array" Then
            MsgBox "Invalid JSON response"
            Exit Sub
        End If
        ' Populate result array
        ReDim aResult(UBound(vRegionJSON))
        ' Extract selected properties from parsed JSON
        For i = 0 To UBound(aResult)
            Set aResult(i) = ExtractKeys(vRegionJSON(i), Array("id", "name", "latitude", "longitude"))
            DoEvents
        Next
        If Detailed Then
            ' Populate result array with detailed info for each location
            For i = 0 To UBound(aResult)
                ' Retrieve detailed info for each location
                XmlHttpRequest "GET", "https://www.plugshare.com/api/locations/" & aResult(i)("id"), aQryHds, "", "", sResponse
                ' Parse JSON response
                JSON.Parse sResponse, vLocationJSON, sState
                If sState = "Object" Then
                    ' Extract selected properties from parsed JSON
                    Set aResult(i) = ExtractKeys(vLocationJSON, Array("reverse_geocoded_address", "hours", "phone", "description"), aResult(i))
                End If
                DoEvents
            Next
        End If
        ' Convert resulting array to arrays for output
        JSON.ToArray aResult, aRows, aHeader
        ' Output
        With ThisWorkbook.Sheets(1)
            .Cells.Delete
            If Transposed Then
                Output2DArray .Cells(1, 1), WorksheetFunction.Transpose(aHeader)
                Output2DArray .Cells(1, 2), WorksheetFunction.Transpose(aRows)
            Else
                OutputArray .Cells(1, 1), aHeader
                Output2DArray .Cells(2, 1), aRows
            End If
            .Columns.AutoFit
        End With
        MsgBox "Completed"
    
    End Sub
    
    Sub XmlHttpRequest(sMethod As String, sUrl As String, arrSetHeaders, sFormData, sRespHeaders As String, sContent As String)
    
        Dim arrHeader
    
        'With CreateObject("Msxml2.ServerXMLHTTP")
        '    .SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
        With CreateObject("MSXML2.XMLHTTP")
            .Open sMethod, sUrl, False
            If IsArray(arrSetHeaders) Then
                For Each arrHeader In arrSetHeaders
                    .SetRequestHeader arrHeader(0), arrHeader(1)
                Next
            End If
            .send sFormData
            sRespHeaders = .GetAllResponseHeaders
            sContent = .responseText
        End With
    
    End Sub
    
    Function RegExMatches(sText, sPattern, Optional bGlobal = True, Optional bMultiLine = True, Optional bIgnoreCase = True) As Object
    
        With CreateObject("VBScript.RegExp")
            .Global = bGlobal
            .MultiLine = bMultiLine
            .IgnoreCase = bIgnoreCase
            .Pattern = sPattern
            Set RegExMatches = .Execute(sText)
        End With
    
    End Function
    
    Function EncodeQueryParams(oParams As Object) As String
    
        Dim aParams
        Dim i As Long
    
        aParams = oParams.Keys()
        For i = 0 To UBound(aParams)
            aParams(i) = EncodeUriComponent((aParams(i))) & "=" & EncodeUriComponent((oParams(aParams(i))))
        Next
        EncodeQueryParams = Join(aParams, "&")
    
    End Function
    
    Function EncodeUriComponent(strText As String) As String
    
        Static objHtmlfile As Object
    
        If objHtmlfile Is Nothing Then
            Set objHtmlfile = CreateObject("htmlfile")
            objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
        End If
        EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)
    
    End Function
    
    Function ExtractKeys(oSource, aKeys, Optional oTarget = Nothing) As Object
    
        Dim vKey
    
        If oTarget Is Nothing Then Set oTarget = CreateObject("Scripting.Dictionary")
        For Each vKey In aKeys
            If oSource.Exists(vKey) Then
                If IsObject(oSource(vKey)) Then
                    Set oTarget(vKey) = oSource(vKey)
                Else
                    oTarget(vKey) = oSource(vKey)
                End If
            End If
        Next
        Set ExtractKeys = oTarget
    
    End Function
    
    Sub OutputArray(oDstRng As Range, aCells As Variant)
    
        With oDstRng
            .Parent.Select
            With .Resize(1, UBound(aCells) - LBound(aCells) + 1)
                .NumberFormat = "@"
                .Value = aCells
            End With
        End With
    
    End Sub
    
    Sub Output2DArray(oDstRng As Range, aCells As Variant)
    
        With oDstRng
            .Parent.Select
            With .Resize( _
                    UBound(aCells, 1) - LBound(aCells, 1) + 1, _
                    UBound(aCells, 2) - LBound(aCells, 2) + 1)
                .NumberFormat = "@"
                .Value = aCells
            End With
        End With
    
    End Sub
    

    如果您有很多要输出的项目以防止应用程序挂起,请更改为 Const Detailed = False,因为 XHR 处于同步模式。我指定视口坐标的输出如下:

    顺便说一句,类似的方法适用于in other answers

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 2019-08-04
      • 2020-06-26
      • 1970-01-01
      • 2021-09-26
      • 2021-09-26
      • 1970-01-01
      • 2011-11-18
      相关资源
      最近更新 更多