import requests
from lxml import etree
url = \'http://www.xicidaili.com/\'
headers = {
\'user-agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36\'
}
response = requests.get(url, headers=headers)
# with open(\'xicidaili.html\', \'wb\') as f:
# f.write(response.content)
html_ele = etree.HTML(response.text)
tr_list = html_ele.xpath(\'//table[@id="ip_list"]/tr\')
# print(tr_list)
country_td = []
for tr_ele in tr_list:
# print(tr_ele)
country_list = tr_ele.xpath(\'./td[2]/text()\')
country_list1 = tr_ele.xpath(\'./td[3]/text()\')
# country = country_list + country_list1
# print(country)
# print(type(country_list))
# print(country_list)
if country_list == []:
continue
elif country_list1 == []:
continue
else:
ip = country_list[0]
ip_d = country_list1[0]
# print(ip)
# print(ip_d)
# ip_list = country_list
# print(ip_list)
ip_1 = ip + \':\' + ip_d
# print(ip_1)
# print(type(ip_1))
# http_t = \'http://\' + ip_1
proxies = ip_1
url = \'http://www.baidu.com\'
# response = requests.get(url, headers=headers, proxies=proxies)
normal_proxies = []
count= 1
print("第%s个。。" % count)
count += 1
try:
response = requests.get(url, headers=headers, proxies={"http": proxies}, timeout=1)
if response.status_code == 200:
print("该代理IP可用:", proxies)
normal_proxies.append(proxies)
else:
print("该代理IP不可用:", proxies)
except Exception:
print("该代理IP无效:", proxies)
pass