longwhite

网站:www.minimp4.com

# coding=utf-8
import requests
from lxml import etree
class Minimpe_moves(object):

    def Getmovies(self,page):
        url = \'http://www.minimp4.com/movie/?page={}\'.format(page)
        html = requests.get(url)
        htmml = etree.HTML(html.text)#解析网页
        href = htmml.xpath(\'//div[@class="meta"]/h1/a/@href\')

        for url_moves in href:
            html1 = requests.get(url_moves)
            htmml1 = etree.HTML(html1.text)
            movie_name = htmml1.xpath(\'//div[@class="movie-meta"]/h1/text()\')#提取电影名字
            #movie_actor =htmml1.xpath(\'//div[@class="movie-meta"]/p[@id="casts"]/a/text()\')#爬取主演名字

            print (movie_name)
            Minimpe_moves.saveMovies(movie_name)
#语法糖 装饰器 静态方法
    @staticmethod
    def saveMovies(data):
        with open(\'movies.txt\',\'a\',encoding = \'utf-8\') as f:
            f.write(data[0]+\'\n\')




#内置属性,别的文件引入这个文件时,无法执行下面代码
if __name__ == "__main__":
    minimp4 = Minimpe_moves ()

    for n in range(11):#爬取1到10页
        minimp4.Getmovies(n)

上图是各个电影主演,基本上每部电影的各个信息都能爬取下来,只需修改对应的xpath即可.

 

 

 

分类:

技术点:

相关文章:

  • 2022-12-23
  • 2021-02-26
  • 2021-02-26
  • 2021-05-12
  • 2021-10-05
  • 2021-10-05
  • 2021-08-01
猜你喜欢
  • 2021-07-14
  • 2022-12-23
  • 2021-10-21
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
相关资源
相似解决方案