import re import requests def get_html(url): headers = { \'User-Agent\': \'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36\'} try: r = requests.get(url, headers=headers) r.raise_for_status() return r.text except: print(\'status_code is not 200\') return None def parse_html(text, info_list): pattern = re.compile(\'<dd>.*?board-index.*?>(\d+)</i>.*?name"><a\' +\'.*?>(.*?)</a>.*?star">\s+主演:(.*?)\s+</p>.*?releasetime">上映时间:(.*?)</p>\' +\'.*?integer">(.*?)</i>.*?fraction">(.*?)</i>\', re.S) items = re.findall(pattern, text) for item in items: info_list.append({ \'movie\': item[1], \'rank\': item[0], \'actors\': item[2], \'time\': item[3], \'score\': item[4] + item[5] }) if __name__ == \'__main__\': url = \'http://maoyan.com/board/4\' info_list = [] for i in range(10): path = url + \'?offset=\' + str(i*10) txt = get_html(path) if txt: parse_html(txt, info_list) for info in info_list: print(info)