doublekai
#coding:utf-8
import urllib2
import os
import re
def dow(url):
    return urllib2.urlopen(url).read()
str=dow(\'http://theater.mtime.com/China_Beijing/\')
lst=re.findall(\'\d+家影院上映\d+场\',str)
url = \'http://theater.mtime.com/China_Beijing\'
req = urllib2.Request(url,headers={\'User-Agent\' : "Magic Browser"})
webpage = urllib2.urlopen(req)
strw = webpage.read()
#print strw
tg_start = strw.find(\'hotplaySvList = [\')
#print tg_start#开始
if tg_start == -1:
    print \'not find start tag\'
    os._exit(0)
tmp = strw[tg_start:-1]
tg_end = tmp.find(\';\')

if tg_end == -1 :
    print \'not find end tag\'
    os._exit(0)
tmp = tmp[len(\'hotplaySvList = [\'):tg_end]

tar_ls = tmp.split("},{")

dict_film = {}
i=0
for t0 in tar_ls:
    ls_t = t0.split(\',\')
    id = ls_t[0].split(\':\')[-1].strip()
    film = ls_t[-1].split(\'"\')[-2].strip()
    print id,film,lst[i]
    i=i+1

    dict_film[id] = film
print len(dict_film)

 

分类:

技术点:

相关文章: