lizhihoublog
import requests
from lxml import etree

def city_page(base_url):
    url = base_url+\'post/\'
    headers={
        \'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0\',
    }
    response = requests.request(\'get\', url=url, headers=headers)
    page_data = etree.HTML(response.text)
    data = page_data.xpath("//table[@id=\'quanguo\']//tr")

    for infos in data:
        info = infos.xpath(".//td")
        for i in info:
            href_num = i.xpath("./a/@href")
            href_name = i.xpath("./a/text()")
            # print(href_name)
            # 因为最后一个元素为空所以需要判断列表值是否存在
            if href_num:
                href = href_num[0]
            # 地址乱码
            if href_name:
                dirname = href_name[0].encode(\'ISO-8859-1\').decode(\'gbk\')
            href_url = base_url+href
            # print(href_url)
            # print(dirname)
            post_code(href_url,dirname)


def post_code(base_url,dirname):
# def post_code():
    url = base_url
    # url = \'https://www.ip138.com/10/\'
    headers={
        \'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0\',
    }
    response = requests.request("get",url=url,headers=headers)
    # 设置解码格式
    response.encoding = \'gb2312\'
    # print(response.text)
    page_data = etree.HTML(response.text)
    data = page_data.xpath("//table/tr[@bgcolor=\'#ffffff\']")
    # print(data)
    for infos in data:
        info = infos.xpath("./td")
        # print(info)
        database = []
        for second_info in info:
            en_info = second_info.xpath("string()")
            # print(en_info)
            if en_info == \'\xa0\':
                continue
            database.append(en_info)
        with open(\'./邮编\'+dirname+\'.csv\',\'a+\',encoding=\'utf-8\')as f:
            f.write(str(database))
        #     pass



if __name__ == \'__main__\':
    \'\'\'
    https://www.ip138.com/post/
    https://www.ip138.com/10/   北京邮编url

    \'\'\'
    base_url = \'https://www.ip138.com/\'
    city_page(base_url)

 

分类:

技术点:

相关文章:

  • 2021-08-08
  • 2022-12-23
  • 2022-12-23
  • 2021-06-04
  • 2021-12-26
  • 2022-12-23
  • 2022-12-23
猜你喜欢
  • 2021-12-26
  • 2021-12-31
  • 2022-01-26
  • 2021-12-26
  • 2021-12-19
相关资源
相似解决方案