yangyang-1127
import requests from lxml import etree if __name__ == \'__main__\': #爬取到页面源码数据 url=\'https://su.58.com/ershoufang/\' headers={ \'User-Agent\':\'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36\' } page_text=requests.get(url=url,headers=headers).text tree=etree.HTML(page_text) #数据解析 #存储li标签对象 li_list=tree.xpath(\'//ul[@class="house-list-wrap"]/li\') #这里class属性值应该是双引号 "" fp= open(\'./58.txt\',\'w\',encoding=\'utf-8\') for li in li_list: #页面数据局部解析 title=li.xpath(\'./div[2]/p/span/text()\')[0] print(title) fp.write(title+\'\n\')

分类:

技术点:

相关文章: