yuabnfa

想看看老家的房价,所以写了脚本,供大家分享下

import requests
import time
from bs4 import BeautifulSoup
# 写入数据库
from mysql import close_db


@close_db
def write_db(con, param):
    try:
        sql = "insert into house(adress, total_price, avg_price, area, title, url) " \
              "VALUES (%s,%s,%s,%s,%s,%s)"
        con.execute(sql, param)
    except Exception as e:
        print(e)


# 主方法
def main():
    # 给请求指定一个请求头来模拟chrome浏览器
    headers = {\'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \'
                             \'(KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36\'}
    page_max = 24
    # 爬取地址
    for i in range(1, int(page_max) + 1):
        print("第几页:" + str(i))
        if i == 1:
            house = \'https://ty.lianjia.com/loupan/rs/\'
        else:
            house = \'https://ty.lianjia.com/loupan/pg\' + str(i)
        res = requests.get(house, headers=headers)
        soup = BeautifulSoup(res.text, \'html.parser\')
        li_max = soup.find(\'ul\', class_=\'resblock-list-wrapper\').find_all(\'li\')
        for li in li_max:
            try:
                house_param = {}
                param = []
                # 所在小区
                title = li.find(\'div\', class_=\'resblock-name\').find(\'a\').text
                house_param[\'title\'] = title
                # 小区地址
                address = li.find(\'div\', class_=\'resblock-location\').text
                address = address.replace(\'\n\', \'\')
                house_param[\'address\'] = address
                # 访问链接
                title_url = li.find(\'a\').attrs[\'href\']
                title_url = \'https://ty.fang.lianjia.com\' + title_url
                # 均价
                avg_price = li.find(\'div\', class_=\'resblock-price\').find(\'div\', class_=\'main-price\').find(\'span\',
                                                                                                          class_=\'number\').text
                house_param[\'avg_price\'] = avg_price
                # 建筑面积
                square_metre = li.find(\'div\', class_=\'resblock-area\').text
                square_metre = square_metre.replace(\'\n\', \'\')
                house_param[\'area\'] = square_metre
                # 总价
                total_price = li.find(\'div\', class_=\'resblock-price\').find(\'div\', class_=\'second\').text
                house_param[\'total_price\'] = total_price
                param.append(address)
                param.append(total_price)
                param.append(avg_price)
                param.append(square_metre)
                param.append(title)
                param.append(title_url)
                write_db(param)
                time.sleep(5)
            except Exception as e:
                print(e)


if __name__ == \'__main__\':
    main()

迭代器自己封装的,还不太成熟没有关闭数据库链接

import functools

import pymysql

# 打开数据库连接
config = {
    \'host\': \'localhost\',
    \'port\': 3306,
    \'user\': \'root\',
    \'passwd\': \'root\',
    \'db\': \'test\'
}
db = pymysql.connect(**config)
con = db.cursor()


def close_db(func):
    @functools.wraps(func)
    def wrapper(*args, **kw):
        r = func(con, *args, **kw)
        db.commit()
        return r

    return wrapper

 

分类:

技术点:

相关文章: