【问题标题】:VBA scraping tr onclickVBA 抓取 tr onclick
【发布时间】:2019-06-21 00:14:22
【问题描述】:

以下是 html/javascript 代码:我想在 tr 标签 onclick 中抓取 id。如何抓取 onClick 标签并在最后获取 id?下面的代码用于动态安全网站表,所有行都超链接并按奇偶类排序,如下所示。感谢您的帮助,谢谢!

<tr class='odd' onmouseover='mov(this)' onMouseOut="mou(this, 'odd')" onClick='location.href="custlist.php?rptname=Report&custm_id=**13857**"'> 
<td align='right'>&nbsp;1.&nbsp;</td>
<td nowrap style='text-align:;'>&nbsp;Company&nbsp;</td>
<td nowrap style='text-align:;'>&nbsp;blah blah&nbsp;</td>
<td nowrap style='text-align:;'>&nbsp; bbb&nbsp;</td>
<td nowrap style='text-align:right;'>&nbsp;1,084,771.10&nbsp;</td>
<td nowrap style='text-align:right;'>&nbsp;1,060,787.10&nbsp;</td>
<td nowrap style='text-align:right;'>&nbsp;1,203,000.00&nbsp;</td>
<td nowrap style='text-align:right;'>&nbsp;30,233.90&nbsp;</td>
<td nowrap style='text-align:left;'>&nbsp;&nbsp;</td>
</tr>
<tr  class='even'  onmouseover='mov(this)' onMouseOut="mou(this, 'even')" onClick='location.href="custlist.php?rptname=report&custm_id=22012"'>
<td align='right'>&nbsp;2.&nbsp;</td>
<td nowrap style='text-align:;'>&nbsp;T3 bbhj &nbsp;</td>
<td nowrap style='text-align:;'>&nbsp;hhht &nbsp;</td>
<td nowrap style='text-align:right;'>&nbsp;720,260.00&nbsp;</td>


<tr>

*在 25 行之后,它声明了换行代码:

            <tr class='header'>

  <th width='20px'>&nbsp;</th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=1&desc=ASC&perpage=ALL' class='sorter'>
              CUSTOMER</a>
          &nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=2&desc=ASC&perpage=ALL' class='sorter'>
              CUSTOMER AGENT</a>
          &nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=3&desc=ASC&perpage=ALL' class='sorter'>
              ACCT MANAGERS</a>
          &nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=4&desc=ASC&perpage=ALL' class='sorter'>
              TOTAL</a>
          &nbsp;<img src='images/up_arrow.gif'>&nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=5&desc=ASC&perpage=ALL' class='sorter'>
              BALANCE</a>
          &nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=6&desc=ASC&perpage=ALL' class='sorter'>
              CREDIT LIMIT</a>
          &nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=7&desc=ASC&perpage=ALL' class='sorter'>
              CREDIT AVAILABLE</a>
          &nbsp;


      </th>


      <th nowrap>&nbsp;



          <a href='/fats/custlist.php?srid=4969&sort=8&desc=ASC&perpage=ALL' class='sorter'>
              NOTES</a>
          &nbsp;


      </th>

到目前为止我的 VBA 代码:

Sub GetCreditLimit()



Dim ieApp As Object
Dim ieDoc As Object
Dim ieTable As Object
Dim clip As DataObject


Dim td As Object
Dim tr As Object

Dim objTbl As Object
Dim htmlTR As MSHTML.IHTMLElementCollection
Dim htmlTD As MSHTML.IHTMLElementCollection

Dim i As Integer
Dim Links As Object
Dim objElement As HTMLObjectElement
Dim n As Integer
Dim elems As Object
Dim e As Object




'create a new instance of ie
Set ieApp = New InternetExplorer

'you don’t need this, but it’s good for debugging
ieApp.Visible = True



'assume we’re not logged in and just go directly to the login page
ieApp.Navigate "https://brokerage.suntecktts.com/agents/login"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

Set ieDoc = ieApp.Document

'fill in the login form – View Source from your browser to get the control names
With ieDoc.forms(0)
.agent_login.Value = "username"
.agent_password.Value = "password"
.submit
End With
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

'now that we’re in, go to the page we want
ieApp.Navigate "https://brokerage.suntecktts.com/fats/custlist.php?srid=8897&page=1&perpage=ALL"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

'get the table based on the table's id
Set ieDoc = ieApp.Document
Set ieTable = ieDoc.all.Item("general-report-wrapper")



'copy the tables html to the clipboard and paste to teh sheet
If Not ieTable Is Nothing Then
Set clip = New DataObject
clip.SetText "<html>" & ieTable.outerHTML & "</html>"
clip.PutInClipboard

ActiveSheet.Range("A1").Select
ActiveSheet.PasteSpecial "Unicode Text"
Rows("1:1").Select

End If

With ieApp
Set ieDoc = ieApp.Document
End With
Do While ieApp.Busy Or Not ieApp.ReadyState = READYSTATE_COMPLETE
        DoEvents
    Loop

Set objTbl = ieDoc.getElementById("report name")
Set htmlTR = objTbl.getElementsByTagName("tr")
Set htmlTD = htmlTR.getElementsByTagName("td")
Set td = ieDoc.getElementByTagName("td")

    For Each tr In htmlTD
      ActiveSheet.Range("J2" & Rows.count).Value = tr.onclick
            i = i + 1


    Next tr






ieApp.Quit
Set ieApp = Nothing
'close 'er up

End Sub

【问题讨论】:

  • @whitney,当用户单击 时,它会立即移动到下一页(custlist.php)。您可以从 custlist.php 页面加载事件的查询字符串中获取 custm_id
  • 请分享您编写此代码的尝试并解释您遇到的问题?此外,请确保您的 html 格式正确且不要更改它。
  • @QHarr 到目前为止,我添加了我的 vba 代码...从 onClick 获取 id 的最后部分我被卡住了。
  • @QHarr 我想通了,请参阅下面的答案...非常感谢!非常感谢您的帮助! :)

标签: javascript html excel vba web-scraping


【解决方案1】:

您的 HTML 格式似乎不正确。使用格式正确的 HTML,您应该能够使用 css 选择器的组合来定位元素,方法是组合类选择器,定位 trodd 类,使用属性 = 值选择器,使用 contains (*) 修饰符规定onclick属性值必须包含子字符串custm_id。然后提取onclick 属性值字符串并使用Split 访问id。假设 Internet Explorer 作为访问方法:

Dim val As String
val = ie.document.querySelector("tr.odd[onClick*='custm_id']").getAttribute("onclick")
Debug.Print Split(Split(val, "custm_id=")(1), Chr$(34))(0)

所有ID

Dim ids As Object, i As Long, val As String
Set ids = ie.document.querySelectorAll("tr[onClick*='custm_id']")

For i = 0 To ids.Length - 1
    val = ids.Item(i).getAttribute("onclick")
    Debug.Print Split(Split(val, "custm_id=")(1), Chr$(34))(0)
Next

【讨论】:

【解决方案2】:

我想通了...见下面的代码:

Sub GetCredit()

Dim ieApp As InternetExplorer
Dim ieDoc As Object
Dim ieTable As Object
Dim clip As DataObject
Dim internetdata As Object
Dim ieResult As Object
Dim header_links As Object
Dim link As Object
Dim ieTag As Object
Dim b As Object
Dim td As Object
Dim tr As Object
Dim NewURL As String
Dim iRow As Integer


    Set ieApp = New InternetExplorer
    ieApp.Visible = True
    ieApp.Navigate "https://brokerage.suntecktts.com/agents/login"
    Do While ieApp.Busy: DoEvents: Loop
    Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

    Set ieDoc = ieApp.Document

    'fill in the login form – View Source from your browser to get the control names
    With ieDoc.forms(0)
        .agent_login.Value = "username" 'id
        .agent_password.Value = "password" 'password
        .submit
    End With
    Do While ieApp.Busy: DoEvents: Loop
    Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

    ieApp.Navigate "https://brokerage.suntecktts.com/fats/custlist.php?srid=3691&page=1&perpage=ALL"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

'get the table based on the table's id
Set ieDoc = ieApp.Document
Set ieTable = ieDoc.all.Item("general-report-wrapper")



'copy the tables html to the clipboard and paste to teh sheet
If Not ieTable Is Nothing Then
Set clip = New DataObject
clip.SetText "<html>" & ieTable.outerHTML & "</html>"
clip.PutInClipboard

ActiveSheet.Range("A1").Select
ActiveSheet.PasteSpecial "Unicode Text"
Rows("1:1").Select

End If

Dim y As Integer



Dim LR As Long
LR = ActiveSheet.UsedRange.Rows.count
Range("J2").AutoFill Destination:=Range("J2:J" & LR)



With ieApp
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop

Set ieDoc = ieApp.Document

Dim ids As Object, i As Long, val As String
Set ids = ieDoc.querySelectorAll("tr[onClick*='custm_id']")
Debug.Print ids



For i = 0 To ids.Length - 1
val = ids.Item(i).getAttribute("onclick")
Debug.Print val
Debug.Print Split(Split(val, "custm_id=")(1), Chr$(34))(0)
ActiveSheet.Range("J2").Offset(i, y).Value = val
    y = y
Next i
    i = i + 1







'close 'er up
    ieApp.Quit
    Set ieApp = Nothing








End With


End Sub

【讨论】:

  • @QHarr 是的,你的答案...忽略之前的评论,它确实会运行所有 id,但我需要删除重复的标题...。你能帮我吗?
  • @QHarr 是的,当它导入到 excel 时,表格会重复标题....如何删除它们?我现在会接受你的回答。非常感谢您的帮助。
  • 您正在使用剪贴板,其中可能有重复的标题。选项:在粘贴之前更改 html 以删除该标题;如果可能,请删除工作表中某个标题的行(如果您知道其中一个标题值,则可以使用 .Find 定位);在不使用剪贴板的情况下写出表格,并使用循环排除其中一个标题。你能粘贴bin.com的表格html吗(我认为这是问题部分)?
猜你喜欢
相关资源
最近更新 更多
热门标签