zongdidi

spider:

# -*- coding: utf-8 -*-
import scrapy
from collectip.items import CollectipItem

class XiciSpider(scrapy.Spider):
name = \'xici\'
allowed_domains = [\'xicidaili.com\']
start_urls = [\'http://www.xicidaili.com\']

def start_requests(self):
reqs = []

for i in range(1,3):
req = scrapy.Request("http://www.xicidaili.com/nn/%s"%i)
reqs.append(req)

return reqs

def parse(self, response):
ip_list = response.xpath(\'//table[@id="ip_list"]\')
print(ip_list)

trs = ip_list[0].xpath(\'tr\')

items = []

for ip in trs[1:]:
pre_item = CollectipItem()

pre_item[\'IP\'] = ip.xpath(\'td[2]/text()\')[0].extract()

pre_item[\'PORT\'] = ip.xpath(\'td[3]/text()\')[0].extract()

pre_item[\'POSITION\'] = ip.xpath(\'string(td[4])\')[0].extract().strip()

pre_item[\'TYPE\'] = ip.xpath(\'td[6]/text()\')[0].extract()

pre_item[\'SPEED\'] = ip.xpath(\'td[8]/div[@class="bar"]/@title\').re(\'\d{0,2}\.\d{0,}\')[0]

pre_item[\'LAST_CHECK_TIME\'] = ip.xpath(\'td[10]/text()\')[0].extract()

items.append(pre_item)


return items

分类:

技术点:

相关文章: