【问题标题】:How to get the url of current tab in selenium?如何在硒中获取当前标签的网址?
【发布时间】:2015-11-09 01:25:10
【问题描述】:

一切正常,但每次点击时我都无法提取新标签的网址?我错过了什么我已经尝试了所有可能的资源 - current_url,response.url,getCurrentUrl 似乎都不起作用。在这种情况下,最好的解决方案是什么?

import scrapy
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import Selector
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from selenium import webdriver
from urlparse import urljoin
import time
from selenium.webdriver.common.keys import Keys

class CompItem(scrapy.Item):
    model_name = scrapy.Field()
    model_link = scrapy.Field()
    url  =scrapy.Field()

class criticspider(CrawlSpider):
    name = "paytm_l"
    allowed_domains = ["paytm.com"]
    start_urls = ["https://paytm.com/shop/g/electronics/mobile-accessories/mobiles"]


    def __init__(self, *args, **kwargs):
        super(criticspider, self).__init__(*args, **kwargs)
        self.download_delay = 0.25
        self.browser = webdriver.Firefox()

        self.browser.implicitly_wait(2)

    def parse_start_url(self, response):
        self.browser.get(response.url)
        #sites = response.xpath('//div[@class="single-review"]/div[@class="review-header"]')
        self.browser.implicitly_wait(30)

        items = []
        time.sleep(20)
    #   for i in range(0,200):
            #self.browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")


        sel = Selector(text=self.browser.page_source)
        sites = sel.xpath('//div[contains(@class,"overflow-hidden")]')

        item = CompItem()

        for r in range(1,5):


                    #item['model_name'] = site.xpath('.//p[contains(@ng-if,"applyLimit")]/text()')
                    button = self.browser.find_element_by_xpath("/html/body/div[5]/div[5]/div/div[5]/div[3]/ul/li[%d]/a"%r)
                    main_window = self.browser.current_window_handle
                    button.send_keys(Keys.CONTROL + Keys.RETURN)
                    self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.TAB)
                    time.sleep(5)
                    self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'l')
                    self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'c')

                    item["url"]=self.browser.current_url()
                    time.sleep(10)
                    self.browser.switch_to_window(main_window)
                    time.sleep(10)

                    self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'w')
                    self.browser.switch_to_window(main_window)

                    #item['model_link'] = site.xpath('//a[contains(@class,"{"na": !productClasses(product)}"]/@href').extract()[0]

                    yield item

【问题讨论】:

    标签: python selenium xpath selenium-webdriver scrapy


    【解决方案1】:

    通过在选项卡上更改驱动程序的焦点,我能够获取网址。

    import scrapy
    from scrapy.contrib.spiders import CrawlSpider, Rule
    from scrapy.selector import Selector
    from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
    from selenium import webdriver
    from urlparse import urljoin
    import time
    from selenium.webdriver.common.keys import Keys
    
    class CompItem(scrapy.Item):
        model_name = scrapy.Field()
        model_link = scrapy.Field()
        url  =scrapy.Field()
    
    class criticspider(CrawlSpider):
        name = "paytm_l"
        allowed_domains = ["paytm.com"]
        start_urls = ["https://paytm.com/shop/g/electronics/mobile-accessories/mobiles"]
    
    
        def __init__(self, *args, **kwargs):
            super(criticspider, self).__init__(*args, **kwargs)
            self.download_delay = 0.25
            self.browser = webdriver.Firefox()
    
            self.browser.implicitly_wait(2)
    
        def parse_start_url(self, response):
            self.browser.get(response.url)
            #sites = response.xpath('//div[@class="single-review"]/div[@class="review-header"]')
            self.browser.implicitly_wait(30)
    
            items = []
            time.sleep(20)
        #   for i in range(0,200):
                #self.browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    
    
            sel = Selector(text=self.browser.page_source)
            sites = sel.xpath('//div[contains(@class,"overflow-hidden")]')
    
            item = CompItem()
    
            for r in range(1,5):
    
    
                        #item['model_name'] = site.xpath('.//p[contains(@ng-if,"applyLimit")]/text()')
                        button = self.browser.find_element_by_xpath("/html/body/div[5]/div[5]/div/div[5]/div[3]/ul/li[%d]/a"%r)
                        main_window = self.browser.current_window_handle
                        button.send_keys(Keys.CONTROL + Keys.RETURN)
                        self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.TAB)
                        time.sleep(5)
                        self.browser.switch_to_window(main_window)
    
    
                        item["url"]=self.browser.current_url
                        time.sleep(10)
    
                        time.sleep(10)
    
                        self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'w')
                        self.browser.switch_to_window(main_window)
    
                        #item['model_link'] = site.xpath('//a[contains(@class,"{"na": !productClasses(product)}"]/@href').extract()[0]
    
                        yield item
    

    【讨论】:

      猜你喜欢
      • 2020-12-15
      • 1970-01-01
      • 1970-01-01
      • 2012-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多