1、爬取豆瓣正热映电影
xpath应用
import requests from scrapy.selector import Selector headers = { "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36", "Referer":"https://movie.douban.com/" } url = "https://movie.douban.com/cinema/nowplaying/guangzhou/" response = requests.get(url=url,headers = headers) # print(response.text) selector = Selector(text=response.text) ul = selector.xpath("//div[@id='nowplaying']//ul[@class='lists']") li_list = ul.xpath("./li") movies = [] for ul in li_list: title = ul.xpath("@data-title").extract_first('') # 标题 data_score = ul.xpath("@data-score").extract_first('') # 影片评分 data_release = ul.xpath("@data-release").extract_first('') # 播出年份 data_duration = ul.xpath("@data-duration").extract_first('') # 影片时长 data_director = ul.xpath("@data-director").extract_first('') # 影片导演 data_actors = ul.xpath("@data-actors").extract_first('') # 影片演员 data_img = ul.xpath(".//img/@src").extract_first('') # 影片宣传图片 movie = { "title":title, "data_score":data_score, "data_release":data_release, "data_duration":data_duration, "data_director":data_director, "data_actors":data_actors, "data_img":data_img } movies.append(movie)