想看看老家的房价,所以写了脚本,供大家分享下
import requests import time from bs4 import BeautifulSoup # 写入数据库 from mysql import close_db @close_db def write_db(con, param): try: sql = "insert into house(adress, total_price, avg_price, area, title, url) " \ "VALUES (%s,%s,%s,%s,%s,%s)" con.execute(sql, param) except Exception as e: print(e) # 主方法 def main(): # 给请求指定一个请求头来模拟chrome浏览器 headers = {\'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \' \'(KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36\'} page_max = 24 # 爬取地址 for i in range(1, int(page_max) + 1): print("第几页:" + str(i)) if i == 1: house = \'https://ty.lianjia.com/loupan/rs/\' else: house = \'https://ty.lianjia.com/loupan/pg\' + str(i) res = requests.get(house, headers=headers) soup = BeautifulSoup(res.text, \'html.parser\') li_max = soup.find(\'ul\', class_=\'resblock-list-wrapper\').find_all(\'li\') for li in li_max: try: house_param = {} param = [] # 所在小区 title = li.find(\'div\', class_=\'resblock-name\').find(\'a\').text house_param[\'title\'] = title # 小区地址 address = li.find(\'div\', class_=\'resblock-location\').text address = address.replace(\'\n\', \'\') house_param[\'address\'] = address # 访问链接 title_url = li.find(\'a\').attrs[\'href\'] title_url = \'https://ty.fang.lianjia.com\' + title_url # 均价 avg_price = li.find(\'div\', class_=\'resblock-price\').find(\'div\', class_=\'main-price\').find(\'span\', class_=\'number\').text house_param[\'avg_price\'] = avg_price # 建筑面积 square_metre = li.find(\'div\', class_=\'resblock-area\').text square_metre = square_metre.replace(\'\n\', \'\') house_param[\'area\'] = square_metre # 总价 total_price = li.find(\'div\', class_=\'resblock-price\').find(\'div\', class_=\'second\').text house_param[\'total_price\'] = total_price param.append(address) param.append(total_price) param.append(avg_price) param.append(square_metre) param.append(title) param.append(title_url) write_db(param) time.sleep(5) except Exception as e: print(e) if __name__ == \'__main__\': main()
迭代器自己封装的,还不太成熟没有关闭数据库链接
import functools import pymysql # 打开数据库连接 config = { \'host\': \'localhost\', \'port\': 3306, \'user\': \'root\', \'passwd\': \'root\', \'db\': \'test\' } db = pymysql.connect(**config) con = db.cursor() def close_db(func): @functools.wraps(func) def wrapper(*args, **kw): r = func(con, *args, **kw) db.commit() return r return wrapper