dws-love-jfl-1314
from bs4 import BeautifulSoup
import requests
import time

urls = []
def get_link_from(page_number):
for each_number in range(1,page_number):
list_view = \'http://bj.xiaozhu.com/search-duanzufang-p{}-0/\'.format(each_number)
wb_data = requests.get(list_view)
soup = BeautifulSoup(wb_data.text,\'lxml\')
for link in soup.select(\'div.result_btm_con.lodgeunitname\'):
urls.append(link.get(\'detailurl\'))
return urls

def print_gender(class_name):
if class_name == "member_girl_ico":
return "女"
if class_name == "member_girl_icol":
return "男"

def get_item_info(page_number):
urls = get_link_from(page_number)
for url in urls:

wb_data =requests.get(url)
soup = BeautifulSoup(wb_data.text,\'lxml\')
data = {

\'title \': soup.select(\'div.pho_info > h4\')[0].text,
\'address\' : soup.select(\'div.pho_info > p > span\')[0].text.strip(\' \'),
\'price\' : soup.select(\'#pricePart > div.day_l > span\')[0].text,
\'pic\' : soup.select(\'#curBigImage\')[0].get(\'src\'),
\'host_name\' : soup.select(\'div.w_240 > h6\')[0].text,
\'host_gender\' : soup.select(\'div.w_240 > h6 > span\')[0].get(\'class\')[0],
}
print(data)


get_item_info(14)

 


分类:

技术点:

相关文章: