【问题标题】:Not able to set proxy in selenium Python无法在 selenium Python 中设置代理
【发布时间】:2021-10-10 21:28:33
【问题描述】:

我有以下代码:

from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains

from selenium.webdriver.common.proxy import Proxy, ProxyType

proxy = Proxy({
    'proxyType': ProxyType.MANUAL,
    'httpProxy': '192.156.1.1:33',
    'ftpProxy': '192.156.1.1:33',
    'sslProxy': '192.156.1.1:33',
    'noProxy': '' # set this value as desired
    })
url = 'http://www.expressvpn.com/what-is-my-ip'
driver_path = 'C:\\Users\\user\\geckodriver.exe'

browser = Firefox(executable_path = driver_path, proxy = proxy)
browser.get(url)

由于某种原因,每次我检查 ip 时,它都显示我的真实 IP 而不是代理 IP。为什么要这样做,您能否告知如何实现?代码有问题吗?

【问题讨论】:

    标签: python python-3.x selenium proxy


    【解决方案1】:

    我开始研究这个并注意到代理是在geckodriver 中使用WebDriver capabilitiesproxy configurations 设置的。

    我在测试中使用了这些来源的代理信息。

    免费代理列表:

    请允许我指出,使用免费代理 IP 地址可能会带来很大问题。这些类型的代理因存在连接问题而臭名昭著,例如与延迟相关的超时。此外,这些站点也可能是断断续续的,这意味着它们可以随时关闭。有时这些网站会被滥用,因此可能会被屏蔽。

    下面的代码使用DesiredCapabilitiesselenium.

    from selenium import webdriver
    from selenium.webdriver.firefox.options import Options
    from selenium.webdriver.firefox.options import FirefoxProfile
    from selenium.webdriver.firefox.options import DesiredCapabilities
    
    firefox_options = Options()
    firefox_options.add_argument("--disable-infobars")
    firefox_options.add_argument("--disable-extensions")
    firefox_options.add_argument("--disable-popup-blocking")
    
    profile_options = FirefoxProfile()
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
    firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
    
    firefox_capabilities = DesiredCapabilities().FIREFOX
    
    firefox_capabilities['proxy'] = {
        "proxyType": "MANUAL",
        "sslProxy": '34.95.40.165:3128',
    }
    
    driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options, desired_capabilities=firefox_capabilities)
    
    URL = 'http://www.expressvpn.com/what-is-my-ip'
    
    driver.get(URL)
    

    你也可以这样做:

    from selenium import webdriver
    from selenium.webdriver.common.proxy import Proxy
    from selenium.webdriver.firefox.options import Options
    from selenium.webdriver.firefox.options import FirefoxProfile
    from selenium.webdriver.firefox.options import DesiredCapabilities
    
    firefox_options = Options()
    firefox_options.add_argument("--disable-infobars")
    firefox_options.add_argument("--disable-extensions")
    firefox_options.add_argument("--disable-popup-blocking")
    
    profile_options = FirefoxProfile()
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
    firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
    
    firefox_capabilities = DesiredCapabilities().FIREFOX
    
    firefox_proxies = Proxy()
    firefox_proxies.ssl_proxy = '143.110.148.15:8080'
    firefox_proxies.add_to_capabilities(firefox_capabilities)
    
    driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
                               desired_capabilities=firefox_capabilities)
    
    URL = 'http://www.expressvpn.com/what-is-my-ip'
    
    driver.get(URL)
    

    也可以使用Python包http_request_randomize获取代理IP地址,可以传递给geckodriver

    import random
    import logging
    from selenium import webdriver
    from selenium.webdriver.common.proxy import Proxy
    from selenium.webdriver.firefox.options import Options
    from selenium.common.exceptions import TimeoutException
    from selenium.webdriver.firefox.options import FirefoxProfile
    from selenium.webdriver.firefox.options import DesiredCapabilities
    from http_request_randomizer.requests.proxy.ProxyObject import Protocol
    from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
    
    # Obtain a list of HTTPS proxies
    # Suppress the console debugging output by setting the log level
    req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)
    
    # Obtain a random single proxy from the list of proxy addresses
    random_proxy = random.sample(req_proxy.get_proxy_list(), 1)
    
    firefox_options = Options()
    firefox_options.add_argument("--disable-infobars")
    firefox_options.add_argument("--disable-extensions")
    firefox_options.add_argument("--disable-popup-blocking")
    
    profile_options = FirefoxProfile()
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
    firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
    
    firefox_capabilities = DesiredCapabilities().FIREFOX
    
    # add the random proxy to firefox_capabilities
    firefox_proxies = Proxy()
    firefox_proxies.ssl_proxy = random_proxy[0].get_address()
    firefox_proxies.add_to_capabilities(firefox_capabilities)
    
    driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
                               desired_capabilities=firefox_capabilities)
    
    try:
        # print proxy IP for testing
        print(random_proxy[0].get_address())
        # output 
        93.183.250.200:53281
    
        URL = 'http://www.expressvpn.com/what-is-my-ip'
        driver.get(URL)
    
    except TimeoutException as e:
        print("A Page load Timeout Occurred.")
        driver.quit()
    

    如前所述,免费代理可能有多个问题。下面的代码展示了如何使用proxy judge 来检查单个代理的状态。

    import random
    import logging
    from time import sleep
    from random import randint
    from proxy_checking import ProxyChecker
    from http_request_randomizer.requests.proxy.ProxyObject import Protocol
    from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
    
    
    def random_ssl_proxy_address():
        # Obtain a list of HTTPS proxies
        # Suppress the console debugging output by setting the log level
        req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)
    
        # Obtain a random single proxy from the list of proxy addresses
        random_proxy = random.sample(req_proxy.get_proxy_list(), 1)
    
        return random_proxy[0].get_address()
    
    
    def get_proxy_address():
        proxy_address = random_ssl_proxy_address()
        checker = ProxyChecker()
        proxy_judge = checker.check_proxy(proxy_address)
        proxy_status = [value for key, value in proxy_judge.items() if key == 'status']
    
        if proxy_status[0]:
            return proxy_address
        else:
            print('Looking for a valid proxy address.')
    
            # this sleep timer is helping with some timeout issues
            # that were happening when querying
            sleep(randint(5, 10))
    
            get_proxy_address()
    
    
    random_ssl_proxy = get_proxy_address()
    print(f'Valid proxy address: {random_ssl_proxy}')
    # output
    Valid proxy address: 98.116.152.143:3128
    

    请注意,我使用的 proxy_checker 包没有任何嵌入式错误处理,因此您必须添加一些来捕获一些错误。

    【讨论】:

    • 由于某种原因,当我尝试更改 IP 时,我不断收到以下错误:WebDriverException: Message: Reached error page: about:neterror?e=netTimeout&u=https%3A//www.expressvpn.com/what-is-my-ip&c=UTF-8&d=The%20server%20at%20www.expressvpn.com%20is%20taking%20too%20long%20to%20respond. 虽然第一次工作。尝试更改用户代理但仍然遇到相同的错误。有什么想法吗?
    • 您使用的是哪个代理 IP 地址?
    • 我不明白。请提供详细信息。
    • 在您分享的第二个代码块中,我将 ip 从143.110.148.15:8080 更改为123.110.148.15:8080
    • 哪个代理服务的 IP 为 123.110.148.15?
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2018-02-07
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多