【问题标题】:Scrape product specification value刮产品规格值
【发布时间】:2020-04-19 21:13:29
【问题描述】:

我正在尝试使用 VBA 从亚马逊抓取产品规格。 要抓取的 HTML 页面:https://www.amazon.in/dp/B01FXJI1OY

我的两大要求是: 1)打破产品标题以获得某些规格 2)从页面上提到的子弹点(BP)获取其余规格

我想到的解决方案(如果您认为有更好的方法,请提出建议): 使用文本标识符(即规范值或规范值之后的文本):

我当前的代码能够获取产品标题。它还获取与存储在单元格 (2,2) 中的值匹配的项目符号。请帮助我如何使用标识符获取规格的值(对于某些规格,例如保修的月/年,这是多个规格):

Sub GetchDetails()
Application.ScreenUpdating = False
Application.DisplayAlerts = False
Application.EnableEvents = False
Dim IE As Object ' InternetExplorer.Application
Dim url As String

Dim sh As Worksheet
Dim rw As Range

ThisWorkbook.Sheets("Crawler").Activate

Set sh = ActiveSheet



    Set IE = CreateObject("InternetExplorer.Application")
   ' IE.Visible = True
    url = "https://amazon.in/dp/B01FXJI1OY"


     On Error Resume Next

    IE.Navigate2 url
    Do While IE.Busy = True Or IE.readystate <> 4
       DoEvents
    Loop

    Set HTMLDoc = IE.document
    Application.Wait (Now + TimeValue("0:00:01"))

    Option Compare Text

    Set itm = HTMLDoc.getElementById("productTitle")
    Cells(rw.Row, 3).Value = itm.innertext 

    Set itm = HTMLDoc.getElementsByClassName("a-unordered-list a-vertical a-spacing-none")(0)


    i = 0
    For Each Item In itm.getElementsByTagName("li")
    If LCase(Item.innertext) Like "*" & LCase(Cells(2, 2)) & "*" Then

    Cells(rw.Row, 5 + i).Value = Item.innertext
     i = i + 1
     End If

   Next Item

【问题讨论】:

    标签: excel vba scrape


    【解决方案1】:

    我在想......像这样......开始。当然,您可以根据需要对其进行修改。

    Sub WebImport()
    
    Dim objIE As Object
    
    Set IE = CreateObject("InternetExplorer.Application")
    IE.Visible = True
    
    URL = "https://amazon.in/dp/B01FXJI1OY"
    
    'Wait for site to fully load
    IE.Navigate2 URL
    Do While IE.Busy = True
       DoEvents
    Loop
    
    RowCount = 1
    
    With Sheets("Sheet1")
       .Cells.ClearContents
       RowCount = 1
    
        For Each itm In IE.document.all
    
            If itm.classname = "a-unordered-list a-vertical a-spacing-none" Then
                .Range("A" & RowCount) = itm.classname
                .Range("B" & RowCount) = itm.innerText
                RowCount = RowCount + 1
            End If
    
        Next itm
    
    End With
    End Sub
    

    结果:

    【讨论】:

      猜你喜欢
      • 2013-01-05
      • 1970-01-01
      • 1970-01-01
      • 2015-12-19
      • 2015-03-24
      • 1970-01-01
      • 1970-01-01
      • 2015-09-02
      相关资源
      最近更新 更多