#coding:utf-8 import urllib2 import os import re def dow(url): return urllib2.urlopen(url).read() str=dow(\'http://theater.mtime.com/China_Beijing/\') lst=re.findall(\'\d+家影院上映\d+场\',str) url = \'http://theater.mtime.com/China_Beijing\' req = urllib2.Request(url,headers={\'User-Agent\' : "Magic Browser"}) webpage = urllib2.urlopen(req) strw = webpage.read() #print strw tg_start = strw.find(\'hotplaySvList = [\') #print tg_start#开始 if tg_start == -1: print \'not find start tag\' os._exit(0) tmp = strw[tg_start:-1] tg_end = tmp.find(\';\') if tg_end == -1 : print \'not find end tag\' os._exit(0) tmp = tmp[len(\'hotplaySvList = [\'):tg_end] tar_ls = tmp.split("},{") dict_film = {} i=0 for t0 in tar_ls: ls_t = t0.split(\',\') id = ls_t[0].split(\':\')[-1].strip() film = ls_t[-1].split(\'"\')[-2].strip() print id,film,lst[i] i=i+1 dict_film[id] = film print len(dict_film)