【问题标题】:How can I retrieve Amazon's keyword/phrase suggestions from the search bar如何从搜索栏中检索亚马逊的关键字/短语建议
【发布时间】:2019-09-21 05:57:24
【问题描述】:

以下是我发现并更改的一些代码,以尝试从亚马逊的搜索栏中捕获关键字/短语建议。我对网络抓取的概念非常陌生,所以我知道这里提供的代码可能非常低效和低效。我已经从 F12 DOM Explorer 和 Network 窗口手动捕获了一些数据。如果最好的答案是网络抓取,我需要 excel vba 的形式。我在下面的一些图像中看到,网络窗口中的某些内容类型似乎是“application/json”,而 Initiator/Type 是“XMLHttpRequest”,但这只是在它显示连接和身份验证之后“https://completion.amazon.com”。如果那是路线,我不知道如何完成这些请求。任何帮助将非常感激。

到目前为止,我已经尝试通过代码中的脚本以编程方式调用搜索栏,但我看不到任何效果。简单地将关键字“粘贴”到搜索栏中并附加一个“空格”不会产生建议的关键字。但是,在搜索栏中输入即可。如果我输入关键字,然后选择下拉建议的“检查元素”,则会生成动态 HTML 以显示建议的 HTML 内容(此时我可以得到我需要的内容)。我一直没有成功达到这一点。

私有子命令按钮1_Click()

Dim MyHTML_Element As IHTMLElement
Dim MyURL As String

Dim AASearchRank As Workbook
Dim AAws As Worksheet
Dim InputSearch As HTMLInputTextElement
Dim elems As IHTMLElementCollection
Dim TDelement As HTMLTableCell
Dim elems2 As IHTMLElementCollection
Dim TDelement2 As HTMLDivElement
'Dim TDelement2 As HTMLInputTextElement

Dim InputSearchButton As HTMLInputButtonElement
Dim IE As InternetExplorer

Dim x As Integer
Dim i As Long

MyURL = "https://www.amazon.com/"
Set IE = New InternetExplorer
With IE
    .Silent = True
    .Navigate MyURL
    .Visible = True
    Do
        DoEvents
    Loop Until .ReadyState = READYSTATE_COMPLETE
End With
Set HTMLDoc = IE.Document

Set AASearchRank = Application.ThisWorkbook
Set AAws = AASearchRank.Worksheets("Sheet2")

Set InputSearchButton = HTMLDoc.getElementById("nav-search-submit-text")
Set InputSearchOrder = HTMLDoc.getElementById("twotabsearchtextbox")

If Not InputSearchOrder Is Nothing Then
    InputSearchButton.Click
    Do
        DoEvents
    Loop Until IE.ReadyState = READYSTATE_COMPLETE
End If

x = 2
If AAws.Range("D" & x).Value = "" Then
    Do Until AAws.Range("B" & x) = ""
        Set InputSearch = HTMLDoc.getElementById("twotabsearchtextbox")
        InputSearch.Focus
        'When a keyword is typed in the search bar with a 'space' after, it invokes the suggestions I'm looking for.
        InputSearch.Value = "Travel "
        'InputSearch.Value = AAws.Range("C" & x) & " "

        Set InputSearchButton = HTMLDoc.getElementsByClassName("nav-input")(0)
        InputSearch.Focus

        'Here I was trying to invoke some script to see if it had any effect on the search bar drop down
        HTMLDoc.parentWindow.execScript "window.navmet.push({key:'UpNav',end:+new Date(),begin:window.navmet.tmp});"
        HTMLDoc.parentWindow.execScript "window.navmet.push({key:'Search',end:+new Date(),begin:window.navmet.tmp});"
        HTMLDoc.parentWindow.execScript "window.navmet.push({key:'NavBar',end:+new Date(),begin:window.navmet.main});"

            Do
                DoEvents
            Loop Until IE.ReadyState = READYSTATE_COMPLETE
        'Application.Wait (Now + TimeValue("0:00:05"))


        Set elems2 = HTMLDoc.getElementsByClassName("nav-issFlyout nav-flyout")
        i = 0
        For Each TDelement2 In elems2
                'Debug statements strictly for learning what each option/query returns
                Debug.Print TDelement2.innerText
                Debug.Print TDelement2.className
                Debug.Print TDelement2.dataFld

                Debug.Print TDelement2.innerHTML
                Debug.Print TDelement2.outerText
                Debug.Print TDelement2.outerHTML
                Debug.Print TDelement2.parentElement.className
                Debug.Print TDelement2.tagName
                Debug.Print TDelement2.ID

        Next

        'Once the searchbar is populated, and the drop down list provides suggestions,
        'the below code will give me what I want. If there's an easier solution,
        'I'm all for it
        Set elems = HTMLDoc.getElementsByClassName("s-suggestion")
        i = 0
        For Each TDelement In elems
            If Left(TDelement.ID, 6) = "issDiv" Then
                Debug.Print TDelement.innerText
                Debug.Print TDelement.ID
            End If

        Next
    x = x + 1
    Loop
End If

结束子

一个理想的解决方案是通过调用搜索栏动态 HTML 或通过亚马逊的补全站点来获取这些建议的关键字,但似乎这可能不对公众开放。感谢您提供的任何帮助,并为任何发布缺陷提前道歉。

【问题讨论】:

    标签: html excel vba dom web-scraping


    【解决方案1】:

    您可以在网络选项卡中找到一个 API 调用。它返回一个 json 字符串,您可以将其解析为 jsonparser 以获取建议。我使用 jsonconverter.bas,下载后我将其添加到项目中,然后转到 VBE > Tools > References > 添加对 Microsoft Scripting Runtime 的引用。

    url 本身是一个查询字符串,即它由不同的参数构成。比如有一个limit参数,其值为11,指定返回的建议数。您可能能够更改和/或删除其中的一些。下面,我将SEARCH_TERM 常量连接到查询字符串中,以表示您的搜索值(将输入到搜索框中的值)。

    我不知道是否有任何参数是基于时间的(即随着时间的推移而过期 - 自从您发布问题以来,我已经提出了许多没有问题的请求)。可能需要通过对亚马逊搜索页面的先前 GET 请求来提取必要的基于时间的值。

    params = (
        ('session-id', '141-0042012-2829544'),
        ('customer-id', ''),
        ('request-id', '7E7YCB7AZZM1HQEZF2G1'),
        ('page-type', 'Search'),
        ('lop', 'en_US'),
        ('site-variant', 'desktop'),
        ('client-info', 'amazon-search-ui'),
        ('mid', 'ATVPDKIKX0DER'),
        ('alias', 'aps'),
        ('b2b', '0'),
        ('fresh', '0'),
        ('ks', '76'),
        ('prefix', 'TRAVEL'),
        ('event', 'onKeyPress'),
        ('limit', '11'),
        ('fb', '1'),
        ('suggestion-type', ['KEYWORD', 'WIDGET']),
        ('_', '1556820864750')
    )
    

    VBA:

    Option Explicit
    Public Sub GetTable()
        Dim json As Object, suggestion As Object                '<  VBE > Tools > References > Microsoft Scripting Runtime
        Const SEARCH_TERM As String = "TRAVEL"
        Const SEARCH_TERM2 As String = "BOOKS"
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", "https://completion.amazon.com/api/2017/suggestions?session-id=141-0042012-2829544" & _
            "&customer-id=&request-id=7E7YCB7AZZM1HQEZF2G1&page-type=Search&lop=en_US&site-variant=" & _
            "desktop&client-info=amazon-search-ui&mid=ATVPDKIKX0DER&alias=aps&b2b=0&fresh=0&ks=76&" & _
            "prefix=" & SEARCH_TERM & "&event=onKeyPress&limit=11&fb=1&suggestion-type=KEYWORD&suggestion-type=" & _
            "WIDGET&_=1556820864750", False
            .setRequestHeader "User-Agent", "Mozilla/5.0"
            .send
            Set json = JsonConverter.ParseJson(.responseText)("suggestions")
        End With
        For Each suggestion In json
            Debug.Print suggestion("value")
        Next
    End Sub
    

    【讨论】:

    • QHarr - 非常感谢您的快速回复。当我尝试执行您的代码时,我得到了“WriteTxtFile .responseText”行的“未定义子或函数”。
    • 抱歉 - 我忘记包含“Option Explicit”声明......现在我已经完成了,我得到了一个“未定义的变量”用于“JsonConverter”。跨度>
    • 您是否通过链接将 jsonconverter.bas 安装到您的项目中?
    • 有任何问题请告诉我
    • 我很抱歉 - 我试图感谢您的回答,但它不会让我!
    猜你喜欢
    • 2013-01-06
    • 2014-05-28
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2014-01-02
    • 2016-06-30
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多