from bs4 import BeautifulSoup
import requests
import time
urls = []
def get_link_from(page_number):
for each_number in range(1,page_number):
list_view = \'http://bj.xiaozhu.com/search-duanzufang-p{}-0/\'.format(each_number)
wb_data = requests.get(list_view)
soup = BeautifulSoup(wb_data.text,\'lxml\')
for link in soup.select(\'div.result_btm_con.lodgeunitname\'):
urls.append(link.get(\'detailurl\'))
return urls
def print_gender(class_name):
if class_name == "member_girl_ico":
return "女"
if class_name == "member_girl_icol":
return "男"
def get_item_info(page_number):
urls = get_link_from(page_number)
for url in urls:
wb_data =requests.get(url)
soup = BeautifulSoup(wb_data.text,\'lxml\')
data = {
\'title \': soup.select(\'div.pho_info > h4\')[0].text,
\'address\' : soup.select(\'div.pho_info > p > span\')[0].text.strip(\' \'),
\'price\' : soup.select(\'#pricePart > div.day_l > span\')[0].text,
\'pic\' : soup.select(\'#curBigImage\')[0].get(\'src\'),
\'host_name\' : soup.select(\'div.w_240 > h6\')[0].text,
\'host_gender\' : soup.select(\'div.w_240 > h6 > span\')[0].get(\'class\')[0],
}
print(data)
get_item_info(14)
