1 from selenium import webdriver 2 from selenium.webdriver.common.by import By 3 from selenium.webdriver.support import expected_conditions as EC 4 from selenium.webdriver.support.wait import WebDriverWait 5 from selenium.common.exceptions import TimeoutException 6 from pyquery import PyQuery as pq 7 import re 8 from config import * 9 import pymongo 10 11 client = pymongo.MongoClient(MONGO_URL) 12 db = client[MONGO_DB] 13 browser = webdriver.Chrome() 14 15 wait = WebDriverWait(browser, 10) 16 17 18 def search(): 19 try: 20 browser.get(\'https://www.taobao.com\') 21 input_ = wait.until( 22 EC.presence_of_element_located((By.CSS_SELECTOR, \'#q\')) 23 ) 24 submit = wait.until( 25 EC.element_to_be_clickable((By.CSS_SELECTOR, \'#J_TSearchForm > div.search-button > button\')) 26 ) 27 28 input_.send_keys(\'xiaomi\') 29 submit.click() 30 31 total = wait.until( 32 EC.presence_of_element_located((By.CSS_SELECTOR, \'#mainsrp-pager > div > div > div > div.total\')) 33 ) 34 get_products() 35 return total.text 36 except TimeoutException: 37 return search() 38 39 def next_page(page_num): 40 try: 41 input_ = wait.until( 42 EC.presence_of_element_located((By.CSS_SELECTOR, \'#mainsrp-pager > div > div > div > div.form > input\')) 43 ) 44 submit = wait.until( 45 EC.element_to_be_clickable((By.CSS_SELECTOR, \'#mainsrp-pager > div > div > div > div.form > span.btn.J_Submit\')) 46 ) 47 input_.clear() 48 input_.send_keys(page_num) 49 submit.click() 50 wait.until(EC.text_to_be_present_in_element( 51 (By.CSS_SELECTOR, \'#mainsrp-pager > div > div > div > ul > li.item.active > span\'), str(page_num))) 52 get_products() 53 except TimeoutException: 54 next_page(page_num) 55 56 def get_products(): 57 wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, \'#mainsrp-itemlist .items .item\'))) 58 html = browser.page_source 59 doc = pq(html) 60 items = doc(\'#mainsrp-itemlist .items .item\').items() 61 for item in items: 62 product ={ 63 \'image\': item.find(\'.pic .img\').attr(\'src\'), 64 \'price\': item.find(\'.price\').text(), 65 \'deal\': item.find(\'.deal-cnt\').text()[:-3], 66 \'title\': item.find(\'.title\').text(), 67 \'shop\': item.find(\'.shop\').text(), 68 \'location\': item.find(\'.location\').text() 69 70 } 71 print(product) 72 save_to_mongo(product) 73 74 def save_to_mongo(result): 75 try: 76 if db[MONGO_TABLE].insert(result): 77 print(\'success save to mongodb\', result) 78 except Exception: 79 print(\'error to mongo\') 80 81 def main(): 82 total = search() 83 total = int(re.compile(\'(\d+)\').search(total).group(1)) 84 # print(total) 85 for i in range(2, total): 86 next_page(i) 87 browser.close() 88 89 if __name__ == \'__main__\': 90 main()
config.py
1 MONGO_URL = \'localhost\' 2 MONGO_DB = \'taobao\' 3 MONGO_TABLE = \'product\'
运行结果:
数据库: