import requests from bs4 import BeautifulSoup def parse_html(num): headers = { \'User-Agent\': \'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36\' } response = requests.get(f\'https://book.douban.com/top250?start={num}\', headers=headers) soup = BeautifulSoup(response.text, \'lxml\') # 书名 all_div = soup.find_all(\'div\', class_=\'pl2\') book_names = [div.find(\'a\')[\'title\'] for div in all_div] # 图书信息 all_p = soup.find_all(\'p\', class_=\'pl\') book_infos = [p.get_text() for p in all_p] # 评分 all_span = soup.find_all(\'span\', class_=\'rating_nums\') book_rates = [span.get_text() for span in all_span] # 简介 all_quote = soup.find_all(\'span\', class_=\'inq\') book_inqs = [quote.get_text() for quote in all_quote] d = \'\' # 组合 for name, info, rate, inq in zip(book_names, book_infos, book_rates, book_inqs): d = f\'{d}书名:{name}\n作者:{info}\n评分:{rate}\n简介:{inq}\n=======================\n\' return d if __name__ == \'__main__\': data = \'\' for i in range(0, 250, 25): data = f\'{data}{parse_html(i)}\' filename = \'豆瓣图书Top250.txt\' with open(filename, \'w\', encoding=\'utf-8\') as f: f.write(data)