【发布时间】:2021-02-10 22:00:53
【问题描述】:
您好,我正在尝试从本网站获取产品名称(Cohiba Robusto)、产品尺寸(单支雪茄、3 支装、25 支箱)和价格(33.65 英镑、90 英镑、730 英镑):https://www.jjfox.co.uk/cohiba-robusto-621.html
我想得到这样的东西:
我正在使用下面的代码,它给出了一个错误(“对象变量或未设置变量”)。
将不胜感激。
Sub getproducts()
Sheets("JJFox").Select
Dim oHtml As HTMLDocument
Dim oElement As Object
Dim Elements As IHTMLElementCollection
Dim Document As HTMLDocument
Set oHtml = New HTMLDocument
'Cells(1, 6) = Time()
lastrow = ActiveSheet.Cells(Rows.Count, "A").End(xlUp).Row
cnt = lastrow + 1
counter1 = cnt
Dim gg As String
gg = "https://www.jjfox.co.uk/cohiba-robusto-621.html"
Dim objHTTP As New WinHttp.WinHttpRequest
url = gg
objHTTP.Open "POST", url, False
objHTTP.setRequestHeader "Content-Type", "application/json"
objHTTP.send ("{""key"":null,""from"":""me@me.com"",""to"":null,""cc"":null,""bcc"":null,""date"":null,""subject"":""My Subject"",""body"":null,""attachments"":null}")
oHtml.body.innerHTML = objHTTP.responseText
'Cells(rw, 2) = oHtml.getElementsByTagName("description").innerText
' If Not .Document.querySelector("button[aria-label='Close']") Is Nothing Then
' .Document.querySelector("button[aria-label='Close']").Click
' End If
txttitle = oHtml.getElementsByClassName("productcart")(0).innerText
txttitlehtml = oHtml.getElementsByClassName("packsize")(0).innerHTML
txttitle = Mid(txttitle, 1, InStr(1, txttitle, Chr(10)))
'Debug.Print txttitlehtml
'txttitle2 = oHtml.getElementsByClassName("price")(0).innerText
Dim Text As String
Text = GetHTML(gg)
starts = InStr(1, Text, "spConfig =")
endS = InStr(starts + 1, Text, "spConfig")
If starts = 0 Then
Cells(counter1, 1) = txttitle
Cells(counter1, 2) = "Single"
starts = InStr(starts + 1, Text, "productPrice")
endl = InStr(starts + 1, Text, ",")
Cells(counter1, 3) = Val(Mid(Text, starts + 14, endl - (starts + 14)))
Cells(counter1, 4) = "JJFox"
Cells(counter1, 5) = Now()
Cells(counter1, 7) = gg ' link to the page
counter1 = counter1 + 1
Else
Text = Mid(Text, starts, endS - starts)
'Debug.Print Text
'find how many pack options are avaialble
myTxt = Text
countTxt = "label"
bb = (Len(myTxt) - Len(replace(myTxt, countTxt, ""))) / Len(countTxt) - 1
'End find////////////////////////////////////
varlabel = "class=" & Chr(34) & "label" & Chr(34)
starts = InStr(1, Text, "label") + 1
Text = Mid(Text, starts, Len(Text))
For i = 1 To bb
starts = InStr(1, Text, "label")
If InStr(starts, Text, "label") Then
'Show the element's properties
Cells(counter1, 1) = txttitle
Cells(counter1, 2) = Mid(Text, starts + 8, InStr(starts, Text, " \") - (starts + 8))
starts = InStr(starts + 1, Text, "oldPrice")
endl = InStr(starts + 1, Text, ",")
Cells(counter1, 3).FormulaR1C1 = Val(Mid(Text, starts + 11, endl - (starts + 11)))
'Debug.Print Val(Mid(Text, startS + chrs, 6))
Cells(counter1, 4) = "JJFox"
Cells(counter1, 5) = Now()
starts = starts + 1
Text = Mid(Text, starts, Len(Text))
Cells(counter1, 7) = gg ' link to the page
counter1 = counter1 + 1
End If
Next i
End If
'Cells(2, 6) = Time()
End Sub
Function GetHTML(url As String) As String
With CreateObject("MSXML2.ServerXMLHTTP.6.0")
.Open "GET", url, False
.send
GetHTML = .responseText
End With
End Function
【问题讨论】:
标签: excel vba web-scraping