【问题标题】:Downloading table from web from multiple pages从多个页面从 Web 下载表格
【发布时间】:2019-04-30 09:18:23
【问题描述】:

我正在尝试通过网络从多个链接下载表格。

Sub test()
    cnt = 0

    For i = 2 To 5
        temp = Cells(i, 1)

        lnk = Right(temp, Len(temp) - WorksheetFunction.Find("?", temp))
        ActiveWorkbook.Queries.Add Name:="Table 0", Formula:= _
            "let" & Chr(13) & "" & Chr(10) & "    Source = Web.Page(Web.Contents(""https://www.nseindia.com/marketinfo/sym_map/symbolMapping.jsp?"" & lnk))," & Chr(13) & "" & Chr(10) & "    Data0 = Source{0}[Data]," & Chr(13) & "" & Chr(10) & "    #""Changed Type"" = Table.TransformColumnTypes(Data0,{{""CALLS Chart"", type text}, {""CALLS OI"", type text}, {""CALLS Chng in OI"", type text}, {""CALLS Volume"", t" & _
            "ype text}, {""CALLS IV"", type text}, {""CALLS LTP"", type text}, {""CALLS Net Chng"", type text}, {""CALLS Bid Qty"", type text}, {""CALLS Bid Price"", type text}, {""CALLS Ask Price"", type text}, {""CALLS Ask Qty"", type text}, {""Strike Price"", type number}, {""PUTS Bid Qty"", type text}, {""PUTS Bid Price"", type text}, {""PUTS Ask Price"", type text}, {""PUTS" & _
            " Ask Qty"", type text}, {""PUTS Net Chng"", type text}, {""PUTS LTP"", type text}, {""PUTS IV"", type text}, {""PUTS Volume"", type text}, {""PUTS Chng in OI"", type text}, {""PUTS OI"", type text}, {""PUTS Chart"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & "    #""Changed Type"""
        ActiveWorkbook.Worksheets.Add
        With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
            "OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 0"";Extended Properties=""""" _
            , Destination:=Range("$A$1")).QueryTable
            .CommandType = xlCmdSql
            .CommandText = Array("SELECT * FROM [Table 0]")
            .RowNumbers = False
            .FillAdjacentFormulas = False
            .PreserveFormatting = True
            .RefreshOnFileOpen = False
            .BackgroundQuery = True
            .RefreshStyle = xlInsertDeleteCells
            .SavePassword = False
            .SaveData = True
            .AdjustColumnWidth = True
            .RefreshPeriod = 0
            .PreserveColumnInfo = True
            .ListObject.DisplayName = "Table" & cnt
            .Refresh BackgroundQuery:=False
            ActiveWorkbook.Queries("Table 0").Delete
            cnt = cnt + 1
        End With
    Next
End Sub

我得到

当我通过录制宏获得此代码时,我被困在使网页链接动态化的过程中。

ActiveWorkbook.Queries.Add Name:="Table 0", Formula:= _
        "let" & Chr(13) & "" & Chr(10) & "    Source = Web.Page(Web.Contents(""https://www.nseindia.com/marketinfo/sym_map/symbolMapping.jsp?symbol=BAJAJ-AUTO&instrument=OPTSTK&date=-&segmentLink=17""))," & Chr(13) & "" & Chr(10) & "    Data0 = Source{0}[Data]," & Chr(13) & "" & Chr(10) & "    #""Changed Type"" = Table.TransformColumnTypes(Data0,{{""CALLS Chart"", type text}, {""CALLS OI"", type text}, {""CALLS Chng in OI"", type text}, {""CALLS Volume"", t" & _
        "ype text}, {""CALLS IV"", type text}, {""CALLS LTP"", type text}, {""CALLS Net Chng"", type text}, {""CALLS Bid Qty"", type text}, {""CALLS Bid Price"", type text}, {""CALLS Ask Price"", type text}, {""CALLS Ask Qty"", type text}, {""Strike Price"", type number}, {""PUTS Bid Qty"", type text}, {""PUTS Bid Price"", type text}, {""PUTS Ask Price"", type text}, {""PUTS" & _
        " Ask Qty"", type text}, {""PUTS Net Chng"", type text}, {""PUTS LTP"", type text}, {""PUTS IV"", type text}, {""PUTS Volume"", type text}, {""PUTS Chng in OI"", type text}, {""PUTS OI"", type text}, {""PUTS Chart"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & "    #""Changed Type"""

供您参考的链接:

  1. https://www.nseindia.com/marketinfo/sym_map/symbolMapping.jsp?symbol=BAJAJ-AUTO&instrument=OPTSTK&date=-&segmentLink=17
  2. https://www.nseindia.com/marketinfo/sym_map/symbolMapping.jsp?symbol=BAJAJFINSV&instrument=OPTSTK&date=-&segmentLink=17
  3. https://www.nseindia.com/marketinfo/sym_map/symbolMapping.jsp?symbol=BAJFINANCE&instrument=OPTSTK&date=-&segmentLink=17

【问题讨论】:

  • 你想要的行为是什么?您要创建多个表,还是覆盖/替换现有的Table 0
  • 我想为网页中的每个表格创建新工作表。参考:链接 1:nseindia.com/marketinfo/sym_map/… 参考:链接2:nseindia.com/marketinfo/sym_map/…
  • 我有 100 多页用于下载表格!!感谢您抽出宝贵时间:)
  • 下载后我不需要查询表0。因此,我在这里删除ActiveWorkbook.Queries("Table 0").Delete
  • @jsheeran 更新了错误快照,请检查..

标签: html excel vba web-scraping


【解决方案1】:

您可以将 XMLHTTP 请求视为一种快速检索方法。我假设链接位于名为Links 的工作表的 A 列中,从第 1 行开始。

你会调整你的范围

Application.Transpose(ws.Range("A1:A3").Value)

确保包含所有链接。

我使用symbol 来确定要写入的工作表。我使用@Rory 稍微修改过的函数来测试工作表是否已经存在,如果不存在,我创建它。这假设符号不会在 URL 中重复,否则您需要为工作表命名选择独特的东西。

我使用 #octable 的 css id 选择器通过它的 id 来定位表。

Option Explicit    
Public Sub Test()
    Dim sResponse As String, html As HTMLDocument, links(), hTable As HTMLTable
    Dim symbol As String, i As Long, ws As Worksheet, wsTemp As Worksheet
    Set ws = ThisWorkbook.Worksheets("Links")
    links = Application.Transpose(ws.Range("A1:A3").Value)

    With CreateObject("MSXML2.XMLHTTP")
        For i = LBound(links) To UBound(links)
            If InStr(links(i), "http") > 0 Then
                .Open "GET", links(i), False
                .send
                sResponse = StrConv(.responseBody, vbUnicode)

                Set html = New HTMLDocument
                With html
                    .body.innerHTML = sResponse
                    Set hTable = .querySelector("#octable")
                End With
                symbol = Split(Split(links(i), "symbol=")(1), "&")(0)
                If Not WorksheetExists(symbol) Then
                    Set wsTemp = ThisWorkbook.Worksheets.Add
                    wsTemp.NAME = symbol
                Else
                    Set wsTemp = ThisWorkbook.Worksheets(symbol)
                End If
                If Not hTable Is Nothing Then
                    wsTemp.UsedRange.ClearContents
                    wsTemp.Cells(1, 1) = "CALLS": wsTemp.Cells(1, 13) = "PUTS"
                    WriteTable hTable, 2, wsTemp
                End If
            End If
        Next
    End With
End Sub
Public Sub WriteTable(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)

    If ws Is Nothing Then Set ws = ActiveSheet

    Dim tSection As Object, tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, C As Long, tBody As Object
    r = startRow
    With ws
        Dim headers As Object, header As Object, columnCounter As Long
        Set headers = hTable.getElementsByTagName("th")
        For Each header In headers
            columnCounter = columnCounter + 1
            If columnCounter > 3 Then
            .Cells(startRow, columnCounter - 3) = header.innerText
            End If
        Next header
        startRow = startRow + 1
        Set tBody = hTable.getElementsByTagName("tbody")
        For Each tSection In tBody               'HTMLTableSection
            Set tRow = tSection.getElementsByTagName("tr") 'HTMLTableRow
            For Each tr In tRow
                r = r + 1
                Set tCell = tr.getElementsByTagName("td")
                C = 1
                For Each td In tCell             'DispHTMLElementCollection
                    .Cells(r, C).Value = td.innerText 'HTMLTableCell
                    C = C + 1
                Next td
            Next tr
        Next tSection
    End With
End Sub

Public Function WorksheetExists(ByVal sName As String) As Boolean  '<== @Rory
    WorksheetExists = Evaluate("ISREF('" & sName & "'!A1)")
End Function

参考资料(VBE > 工具 > 参考资料):

  1. Microsoft HTML 对象库

【讨论】:

  • 完美!!非常感谢:)
  • 我收到运行时错误'- 2146697211 800c0005 )':系统找不到指定的资源。你能告诉我为什么吗?
  • 它只发生在长期运行中吗?它是否继续在几个链接上工作?
  • 是的。有什么办法解决这个问题吗?
  • 请问哪个 URL 失败了?
猜你喜欢
  • 2021-09-24
  • 1970-01-01
  • 2022-06-29
  • 2020-09-06
  • 2022-10-18
  • 2012-06-13
  • 2012-04-27
  • 2018-02-26
  • 1970-01-01
相关资源
最近更新 更多