在PY文件中:

from scrapy.selector import Selector
from scrapy.http import HtmlResponse

url="https://m.mm131.net/"
r=requests.get(url)
r.encoding='gbk'  #根据情况可设置为utf-8
body=r.text
tx=Selector(text=body).xpath('//h2[@class="mm-title"]/text()').extract()
tx1=Selector(text=body).xpath('//article[@class="post"]/div').extract()
tx1=Selector(text=body).xpath('//article[@]/div').extract()
tx2=re.findall('src="(http.*?.jpg)"',str(tx1))
tx4=re.findall('data-img="(http.*?.jpg)"',str(tx1))
#tx2=re.findall('[^(gif)]',str(tx2))
tx3=re.findall('href="(.*?)"',str(tx1))
#print(tx4)
#print(tx2)
#print(tx3)
#print(tx1)
for aa in list(set(tx4)):
    adir=aa.split('/')[-2]
    name=aa.split('/')[-1]
    time.sleep(3)
    if os.path.exists(wz+adir+"/"):
        pass
    else:
        os.mkdir(wz+adir+"/")
    #break
    
    fname=wz+adir+"/"+adir+name
    print(fname)
    baocun(aa,fname)
    #break
    

  

相关文章:

  • 2021-06-28
  • 2022-12-23
  • 2021-10-26
  • 2022-12-23
  • 2022-12-23
  • 2021-06-07
  • 2022-12-23
  • 2022-12-23
猜你喜欢
  • 2022-12-23
  • 2021-06-05
  • 2022-12-23
  • 2020-04-07
  • 2022-12-23
  • 2022-12-23
  • 2021-11-01
相关资源
相似解决方案