【问题标题】:Cannot exclude tags which are inside a <li> tag with vba无法使用 vba 排除 <li> 标记内的标记
【发布时间】:2020-01-08 18:48:23
【问题描述】:

我有几个类似下面的页面

https://www.skroutz.gr/s/2195774/Western-Digital-Blue-3-5-1TB-7200rpm.html

我想用 vba 数据提取价格、可用性、卖家名称 当我尝试以下

ie.Navigate "https://www.skroutz.gr/s/2195774/Western-Digital-Blue-3-5-1TB-7200rpm.html"

Do While ie.Busy = True Or ie.ReadyState <> 4: DoEvents: Loop

Dim NodeList As Object, currentItem As Long
Dim outputString As String
Set NodeList = ie.Document.querySelectorAll(".card.js-product-card")
With ActiveSheet
    For currentItem = 0 To NodeList.Length - 1
        outputString = outputString & vbCrLf & NodeList.Item(currentItem).innerText
    Next currentItem
    .Cells(2, 6) = Trim$(outputString)
End With

我明白了

“li”标签的全部数据如何排除所有其他数据并保留我想要的?

【问题讨论】:

    标签: excel vba web-scraping


    【解决方案1】:

    这使用循环和滚动来生成完整的项目列表,然后通过各种 css 选择器定位特定信息

    Option Explicit
    Public Sub GetInfo()
        Dim ie As New InternetExplorer, i As Long
        Const MAX_WAIT_SEC As Long = 20
    
        With ie
            .Visible = True
            .Navigate2 "https://www.skroutz.gr/s/2195774/Western-Digital-Blue-3-5-1TB-7200rpm.html"
    
            While .Busy Or .readyState < 4: DoEvents: Wend
    
            Dim finalPrices As Object, sellers As Object, availability As Object
            Dim products As Object, t As Date
            Set products = .document.querySelectorAll(".card.js-product-card")
            t = Timer
            Do
                DoEvents
                ie.document.parentWindow.execScript "window.scrollBy(0, window.innerHeight);", "javascript"
                Set finalPrices = .document.querySelectorAll(".card.js-product-card span.final-price")
                Application.Wait Now + TimeSerial(0, 0, 1)
                If Timer - t > MAX_WAIT_SEC Then Exit Do
            Loop Until finalPrices.Length = products.Length
    
            Set sellers = .document.querySelectorAll(".card.js-product-card .shop.cf a[title]")
            Set availability = .document.querySelectorAll(".card.js-product-card span.availability")
    
            With ThisWorkbook.Worksheets("Sheet1")
                For i = 0 To sellers.Length - 1
                    .Cells(i + 1, 1) = sellers.item(i)
                    .Cells(i + 1, 2) = finalPrices.item(i).innerText
                    .Cells(i + 1, 3) = availability.item(i).innerText
                Next
            End With
            .Quit
        End With
    End Sub
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2019-06-15
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2015-02-21
      • 2022-11-13
      相关资源
      最近更新 更多