【问题标题】:How do I extract an entire table that contain scroll down in page如何提取包含向下滚动页面的整个表格
【发布时间】:2020-10-06 17:08:38
【问题描述】:

为什么当我使用 .getElementsByTagName 方法提取表格时,它没有提取表格中包含的所有数据?请注意,这是在页面https://finance.yahoo.com/quote/MGLU3.SA/history?p=MGLU3.SA 中向下滚动。

Public Sub getHistoricCotation()

    Dim mainURL As String
    Dim elem As Object, tRow As Object
    Dim S, R, C
    Dim initial_date As String, final_date As String
    Dim stock As String

    initial_date = DateDiff("s", "1/1/1970 00:00:00", ufHistorico.txtDtInicial) + 86400
    final_date = DateDiff("s", "1/1/1970 00:00:00", ufHistorico.txtDtFinal) + 86400
    stock = ufHistorico.cbAcoes.Text

    mainURL = "https://finance.yahoo.com/quote/" & stock & "/history?period1=" & initial_date & "&period2=" & final_date & "&interval=1d&filter=history&frequency=1d"

    With CreateObject("WinHttp.WinHttpRequest.5.1")
        .Open "GET", mainURL, False

        strCookie = .getAllResponseHeaders
        strCookie = Split(Split(strCookie, "Cookie:")(1), ";")(0)

        .Open "GET", mainURL, False
        .setRequestHeader "Cookie", strCookie
        .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
        .send

        S = .responseText
    End With

    With CreateObject("htmlfile")
        .body.innerHTML = S
        For Each elem In .getElementsByTagName("tr")
            For Each tRow In elem.Cells
                C = C + 1: Cells(R + 1, C) = tRow.innerText
            Next tRow
            C = 0: R = R + 1
        Next elem
    End With

End Sub

【问题讨论】:

    标签: excel vba web-scraping


    【解决方案1】:

    您的脚本可以解析的部分是静态的,但它无法解析的部分是动态生成的。但是,好消息是该表的所有内容都可以在某些脚本标记内的页面源中使用。我创建了一个脚本来从那里挖出所需的部分。您现在要做的就是使用任何 json 转换器或正则表达式处理内容。

    这是它从那里抓取所有相关数据的方式:

    Sub FetchHistoricalPrice()
        Const mainUrl$ = "https://finance.yahoo.com/quote/MGLU3.SA/history?p=MGLU3.SA"
        Dim S$, Elem As Object
    
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", mainUrl, False
            .send
            S = .responseText
        End With
    
        With CreateObject("VBScript.RegExp")
            .Global = True
            .MultiLine = True
    
            .Pattern = "HistoricalPriceStore[\s\S]+prices[^[]+(.*?]),"
            Set Elem = .Execute(S)
            If Elem.Count > 0 Then
                Debug.Print Elem(0).SubMatches(0)
            End If
        End With
    End Sub
    

    【讨论】:

      猜你喜欢
      • 2017-05-17
      • 2015-01-19
      • 1970-01-01
      • 1970-01-01
      • 2011-01-21
      • 2021-01-12
      • 2012-02-05
      • 2019-11-04
      • 2020-10-03
      相关资源
      最近更新 更多