import requests,re url=\'https://movie.douban.com/top250\' urlcontent=requests.get(url).text #正则 \'\'\' 实现步骤: 1,先逐个匹配字段 2.对各个字段正则进行拼接 3,实现整体抓取 id:.*?(\d+)</em.*? title:\'title.*?>(.*?)<\' rat:.*?average.*?(\d+.\d).*? comment_num:.*?(\d+)人.* \'\'\' com=re.compile(\'.*?(\d+)</em.*?title.*?>(.*?)<.*?average.*?(\d+.\d).*?(\d+)人\',re.S) ret=com.finditer(urlcontent) for i in ret: print({\'id\':i.group(1),\'title\':i.group(2),\'rat\':i.group(3),\'comment_num\':i.group(4)})