Traceback (most recent call last):
  File "androidmarket82.py", line 108, in <module>
    main()
  File "androidmarket82.py", line 54, in main
    pattern=re.compile('<label >(.+?)</label>)</div>')#版本号
  File "/usr/lib/python2.7/re.py", line 190, in compile
    return _compile(pattern, flags)
  File "/usr/lib/python2.7/re.py", line 242, in _compile
    raise error, v # invalid expression
sre_constants.error: unbalanced parenthesis



 

#!/usr/env  python
#-*- coding: utf-8  -*-
import requests
import os,sys 
import time
import MySQLdb
import re
num=0
dataresult=[]
def main():
    try:
        conn=MySQLdb.connect(host='localhost',user='root',passwd='123456',db='addressbookdb',charset="utf8")
        conn.query("set names utf8")
    except Exception,e:
        print e
        sys.exit() 
    cursor=conn.cursor() 
    for k in range(51):
        try:
            
            url="http://apk.hiapk.com/apps#"+str(k)+"_1_0_0_0_0_0"
            print url
            html=requests.get(url)
            result=html.content
            pattern=re.compile('''<a target='_blank' title=".+?" href="(.+?)">.+?</a></span>''')
            daresult=re.findall(pattern,result)
            global dataresult
            dataresult+=daresult
            dataresult=list(set(dataresult))
            print len(dataresult)
        except:
            
            time.sleep(30)
            pass
       
                
    f=file("androidmarket.txt","a+")
    content=str(len(dataresult))
    f.write(content)
    f.close()
    print len(dataresult)
    for i in dataresult:
        
        print i
        try:
            html=requests.get(i)
            result=html.content
        except:
            time.sleep(30)
            pass
        pattern=re.compile('<label >(.+?)</label>')#名称
        data0=re.findall(pattern,result)
        print data0[0]
        pattern=re.compile('<label >(.+?)</label>)</div>')#版本号
        data1=re.findall(pattern,result)
        print data1[0]
        pattern=re.compile('<span class="d_u_line"><label >(.+?)</label>')#开发者
        data2=re.findall(pattern,result)
        print data2[0]
        pattern=re.compile('<label >(.+?)</label>')#发布时间
        data3=re.findall(pattern,result)
        print data3[0]
        pattern=re.compile('<span class="font14"><label >(.+?)</label>')#文件大小
        data4=re.findall(pattern,result)
        print data4[0]
        pattern=re.compile('<span class="font14 d_gj_line"><label >(.+?)</label>')#支持固件
        data5=re.findall(pattern,result)
        print data5[0]
        pattern=re.compile('<span >(.+?)</span></a></span></div>')#类别
        data6=re.findall(pattern,result)
        print data6[0]
        pattern=re.compile('<label >([\s\S]*?)</label>')#介绍
        data7=re.findall(pattern,result)
        for items in data7:
            pass#print re.sub('<br />',' ',items)
        sql="insert into androidmarket(name,version,developer,pubtime,filesize,support,classifyintroduction) values(%s,%s,%s,%s,%s,%s,%s,%s)"
        for items in data7:
            try:
                
                values=(data0[0],data1[0],data2[0],data3[0],data4[0],data5[0],data6[0],re.sub('<br />',' ',items))
                
            except:
                pass
            try:
                cursor.execute(sql,values)
                conn.commit()
            except:
                pass
        pattern=re.compile(' <div class="appTitle clearfix">[\s\S]*?<img src=(.+?)/>')
        data=re.findall(pattern,result)
        for j in data:
            print j
   
            try:
                temp=requests.get(j[1:-2])
            except:
                time.sleep(30)
                pass
        global num
        f=file("androidmarket/"+str(num),"w+")
        num=num+1
        print num
        f.write(temp.content)
    cursor.close()
    conn.close()
    f.close()
if  __name__=="__main__":
       main()


 



 

相关文章:

  • 2022-03-08
  • 2021-09-13
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2021-09-27
  • 2022-12-23
  • 2021-06-13
猜你喜欢
  • 2021-10-16
  • 2022-12-23
  • 2022-03-02
  • 2022-12-23
  • 2021-07-02
  • 2021-09-14
相关资源
相似解决方案