import requests from bs4 import BeautifulSoup import sqlite3 conn = sqlite3.connect("test.db") c = conn.cursor() for num in range(1,101): url = "https://cs.lianjia.com/ershoufang/pg%s/"%num headers = { \'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/64.0.3282.140 Safari/537.36\', } req = requests.session() response = req.get(url, headers=headers, verify=False) info = response.text f1 = BeautifulSoup(info,\'lxml\') f2 = f1.find(class_=\'sellListContent\') f3 = f2.find_all(class_=\'clear LOGCLICKDATA\') for i in f3: data_id = i.find(class_="noresultRecommend").get(\'data-housecode\') href = i.find( class_ ="noresultRecommend img ").get(\'href\') title = i.find(class_ ="title").get_text() adress = i.find(class_="houseInfo").get_text().split("|") jage = i.find(class_="totalPrice").get_text() # print(k,data_id, \'|\', title, \'|\', adress, \'|\', jage, \'|\', href) # print("---") dz = \'\' fx = \'\' dx = \'\' cx = \'\' zx = \'\' dt = \'\' if len(adress) == 6: dz = adress[0] fx = adress[1] dx = adress[2] cx = adress[3] zx = adress[4] dt = adress[5] elif len(adress) ==5: dz = adress[0] fx = adress[1] dx = adress[2] cx = adress[3] zx = adress[4] dt = \'None\' # print(dz,fx,dx,cx,zx,dt) elif len(adress) < 5: print(dz, fx, dx, cx, zx, dt) info = {\'nid\':int(data_id), \'title\':title, \'dz\':dz, \'fx\':fx, \'dx\':dx, \'cx\':cx, \'zx\':zx, \'dt\':dt, \'jg\':jage, \'url\':href} # print(info) x = info sql = "insert into rsf(nid,dz,fx,dx,cx,zx,dt,jg,title,url)values(%d,\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\')" % (x[\'nid\'], x[\'dz\'], x[\'fx\'], x[\'dx\'], x[\'cx\'], x[\'zx\'], x[\'dt\'], x[\'jg\'], x[\'title\'], x[\'url\']) cursor = c.execute(sql) conn.commit() # print("OK") conn.close() # import json # file_path = \'info%s.txt\'%num # json_data = json.dumps(info_list).encode(\'utf8\') # with open(file_path,\'wb\') as f: # f.write(json_data)
sqlite3 读取数据
import sqlite3 conn = sqlite3.connect("test.db") c = conn.cursor() #sqlit3 查询数据 cursor = c.execute("SELECT * from rsf") k = 1 for row in cursor: num = float(row[7].split(\'万\')[0]) if 30.0 < num < 50.0: print(k,row[1],row[3],num,row[-2]) k +=1 conn.close()