from bs4 import BeautifulSoup
import sys,os,requests,pymongo,time
from lxml import etree
def get_fenlei(url,headers):
"""
获取妹子图的分类链接,并创建相对应的分类文件夹
:param url:
:param headers:
:return:
"""
response = requests.get(url,headers=headers).text
soup = BeautifulSoup(response,'lxml')
fenlei_url = soup.select('#menu-nav > li > a')
list1 = []
print(sys.path)
for i in fenlei_url:
fen_lei_lianjie = i['href']
if ''.join(fen_lei_lianjie).split('/')[3] =='all':
continue
elif ''.join(fen_lei_lianjie).split('/')[3] != '':
fenlei_name = i.get_text()
print(fenlei_name,fen_lei_lianjie,'首页')
get_fenlei_xia(fen_lei_lianjie,fenlei_name)
list1.append(fen_lei_lianjie)
def get_fenlei_xia(fen_lei_lianjie,fenlei_name):
"""
:param fen_lei_lianjie:
:param fenlei_name:
:return:
"""
print('{}{}'.format(fen_lei_lianjie,'<><><><><><>'))
response = requests.get(fen_lei_lianjie,headers=headers).text
html = etree.HTML(response)
fenye_page = html.xpath('/html/body/div[2]/div[1]/div[2]/nav/div/a[4]')
page_list = []
if fenye_page != []:
for i in fenye_page:
page_shu = i.xpath('./text()')[0]
page_url = i.xpath('./@href')[0]
for ia in range(1,int(page_shu)+1):
fenlei_url = '{}/{}/{}/{}/'.format('https://www.mzitu.com/',''.join(page_url).split('/')[3],'page',ia)
page_list.append(fenlei_url)
else:
print('{}'.format('没有数据11111'))
fenye_page2 = html.xpath('//*[@
main()
相关文章: