【发布时间】:2017-09-27 13:10:23
【问题描述】:
我想从地图中提取数据,然后获取并存储所有充电站处于特定状态的位置。 (例如:https://www.plugshare.com/)
如何做到这一点?我不介意使用任何编程语言,但哪一种最适合这个应用程序?
【问题讨论】:
标签: json vba google-maps web-scraping xmlhttprequest
我想从地图中提取数据,然后获取并存储所有充电站处于特定状态的位置。 (例如:https://www.plugshare.com/)
如何做到这一点?我不介意使用任何编程语言,但哪一种最适合这个应用程序?
【问题讨论】:
标签: json vba google-maps web-scraping xmlhttprequest
您可以使用 XHR 直接从 https://www.plugshare.com 检索数据。您必须研究一下网站如何收集数据。对于任何动态加载的数据,您只需检查网页所做的 XHR,找到包含相关数据的数据,制作相同的 XHR(站点是否提供 API)并解析响应。导航页面 e. G。在 Chrome 中,然后打开开发者工具窗口 (F12)、网络选项卡,重新加载 F5 页面并检查列表中的 XHR。
有一个对 URL https://www.plugshare.com/api/locations/region?... 的请求,它返回具有指定坐标的矩形视口区域中的充电站的纬度、经度和其他信息。您可以找到如下所示的 URL、查询参数和一些必要的标头:
响应为 JSON 格式:
您需要在请求中添加基本授权标头。要检索凭据,请转到 Sources 选项卡,为包含 https://www.plugshare.com/api/locations/region 的 URL 添加 XHR 断点,重新加载 F5 页面,当页面在 XHR 上暂停时,逐帧遵循调用堆栈:
跳过属于New Relic 功能的任何NREUM 和nrWrapper 对象。单击漂亮打印 {} 以格式化源。搜索 e. G。 Basic、Authorization 或 setRequestHeader 在源中,对于这种特殊情况,第一个匹配项在 https://www.plugshare.com/js/main.js?_=1 中找到:
点击地图上的一个电台,你会看到另一个 XHR 出现,其 URL 类似于 https://www.plugshare.com/api/locations/[id],其中包含该电台的详细信息,如下所示:
响应也是 JSON 格式:
您还可以从https://www.plugshare.com/api/stations/[id] 等 URL 获取电台数据。
您可以使用下面的 VBA 代码来检索上述信息。 将JSON.bas模块导入VBA项目进行JSON处理。
Option Explicit
Sub Test_www_plugshare_com()
Const Transposed = False ' Output option
Const Detailed = True ' Scrape option
Dim sResponse As String
Dim aQryHds()
Dim oQuery As Object
Dim sQuery As String
Dim vRegionJSON
Dim sState As String
Dim aResult()
Dim i As Long
Dim vLocationJSON
Dim aRows()
Dim aHeader()
' Retrieve auth token
XmlHttpRequest "GET", "https://www.plugshare.com/js/main.js?_=1", "", "", "", sResponse
With RegExMatches(sResponse, "var s\=""(Basic [^""]*)"";") ' var s="Basic *";
If .Count > 0 Then
aQryHds = Array( _
Array("Authorization", .Item(0).SubMatches(0)), _
Array("Accept", "application/json") _
)
Else
MsgBox "Can't retrieve auth token"
Exit Sub
End If
End With
' Set query parameters
Set oQuery = CreateObject("Scripting.Dictionary")
With oQuery
.Add "minimal", "1"
.Add "count", "500"
.Add "latitude", "19.697593650121235"
.Add "longitude", "-155.06529816792295"
.Add "spanLng", "0.274658203125"
.Add "spanLat", "0.11878815323507652"
.Add "access", "1,3"
.Add "outlets", "[{""connector"":1},{""connector"":2},{""connector"":3},{""connector"":4},{""connector"":5},{""connector"":6,""power"":0},{""connector"":6,""power"":1},{""connector"":7},{""connector"":8},{""connector"":9},{""connector"":10},{""connector"":11},{""connector"":12},{""connector"":13},{""connector"":14},{""connector"":15}]"
.Add "fast", "add"
End With
sQuery = EncodeQueryParams(oQuery)
' Retrieve a list of stations for the viewport
XmlHttpRequest "GET", "https://www.plugshare.com/api/locations/region?" & sQuery, aQryHds, "", "", sResponse
' Parse JSON response
JSON.Parse sResponse, vRegionJSON, sState
If sState <> "Array" Then
MsgBox "Invalid JSON response"
Exit Sub
End If
' Populate result array
ReDim aResult(UBound(vRegionJSON))
' Extract selected properties from parsed JSON
For i = 0 To UBound(aResult)
Set aResult(i) = ExtractKeys(vRegionJSON(i), Array("id", "name", "latitude", "longitude"))
DoEvents
Next
If Detailed Then
' Populate result array with detailed info for each location
For i = 0 To UBound(aResult)
' Retrieve detailed info for each location
XmlHttpRequest "GET", "https://www.plugshare.com/api/locations/" & aResult(i)("id"), aQryHds, "", "", sResponse
' Parse JSON response
JSON.Parse sResponse, vLocationJSON, sState
If sState = "Object" Then
' Extract selected properties from parsed JSON
Set aResult(i) = ExtractKeys(vLocationJSON, Array("reverse_geocoded_address", "hours", "phone", "description"), aResult(i))
End If
DoEvents
Next
End If
' Convert resulting array to arrays for output
JSON.ToArray aResult, aRows, aHeader
' Output
With ThisWorkbook.Sheets(1)
.Cells.Delete
If Transposed Then
Output2DArray .Cells(1, 1), WorksheetFunction.Transpose(aHeader)
Output2DArray .Cells(1, 2), WorksheetFunction.Transpose(aRows)
Else
OutputArray .Cells(1, 1), aHeader
Output2DArray .Cells(2, 1), aRows
End If
.Columns.AutoFit
End With
MsgBox "Completed"
End Sub
Sub XmlHttpRequest(sMethod As String, sUrl As String, arrSetHeaders, sFormData, sRespHeaders As String, sContent As String)
Dim arrHeader
'With CreateObject("Msxml2.ServerXMLHTTP")
' .SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
With CreateObject("MSXML2.XMLHTTP")
.Open sMethod, sUrl, False
If IsArray(arrSetHeaders) Then
For Each arrHeader In arrSetHeaders
.SetRequestHeader arrHeader(0), arrHeader(1)
Next
End If
.send sFormData
sRespHeaders = .GetAllResponseHeaders
sContent = .responseText
End With
End Sub
Function RegExMatches(sText, sPattern, Optional bGlobal = True, Optional bMultiLine = True, Optional bIgnoreCase = True) As Object
With CreateObject("VBScript.RegExp")
.Global = bGlobal
.MultiLine = bMultiLine
.IgnoreCase = bIgnoreCase
.Pattern = sPattern
Set RegExMatches = .Execute(sText)
End With
End Function
Function EncodeQueryParams(oParams As Object) As String
Dim aParams
Dim i As Long
aParams = oParams.Keys()
For i = 0 To UBound(aParams)
aParams(i) = EncodeUriComponent((aParams(i))) & "=" & EncodeUriComponent((oParams(aParams(i))))
Next
EncodeQueryParams = Join(aParams, "&")
End Function
Function EncodeUriComponent(strText As String) As String
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)
End Function
Function ExtractKeys(oSource, aKeys, Optional oTarget = Nothing) As Object
Dim vKey
If oTarget Is Nothing Then Set oTarget = CreateObject("Scripting.Dictionary")
For Each vKey In aKeys
If oSource.Exists(vKey) Then
If IsObject(oSource(vKey)) Then
Set oTarget(vKey) = oSource(vKey)
Else
oTarget(vKey) = oSource(vKey)
End If
End If
Next
Set ExtractKeys = oTarget
End Function
Sub OutputArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize(1, UBound(aCells) - LBound(aCells) + 1)
.NumberFormat = "@"
.Value = aCells
End With
End With
End Sub
Sub Output2DArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
UBound(aCells, 1) - LBound(aCells, 1) + 1, _
UBound(aCells, 2) - LBound(aCells, 2) + 1)
.NumberFormat = "@"
.Value = aCells
End With
End With
End Sub
如果您有很多要输出的项目以防止应用程序挂起,请更改为 Const Detailed = False,因为 XHR 处于同步模式。我指定视口坐标的输出如下:
顺便说一句,类似的方法适用于in other answers。
【讨论】: