Ajax,全称为Asynchronous JavaScript and XML,即异步的JavaScript和XML。它不是一门编程语言,而是利用JavaScript在保证页面不被刷新、页面链接不改变的情况下与服务器交换数据并更新部分网页的技术。
对于传统的网页,如果想更新其内容,那么必须要刷新整个页面,但有了Ajax,便可以在页面不被全部刷新的情况下更新其内容。在这个过程中,页面实际上是在后台与服务器进行了数据交互,获取到数据之后,再利用JavaScript改变网页,这样网页内容就会更新了。
可以到W3School上体验几个示例来感受一下:http://www.w3school.com.cn/ajax/ajax_xmlhttprequest_send.asp
代码如下:
1 #! /usr/bin/env python 2 # coding: utf-8 3 4 import requests 5 from pyquery import PyQuery as pq 6 import pymysql 7 from pymongo import MongoClient 8 import time 9 10 \'\'\' 11 抓取所有微博信息,是通过Ajax进行分析抓取 12 \'\'\' 13 14 15 headers = { 16 \'Host\': \'m.weibo.cn\', 17 \'User-Agent\': \'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36\', 18 \'X-Requested-With\': \'XMLHttpRequest\', 19 \'Referer\': \'https://m.weibo.cn/u/2830678474\' 20 } 21 22 def get_page(page): 23 \'\'\' 24 请求url,并获取内容 25 :param page: 26 :return: 27 \'\'\' 28 params = { 29 \'type\':\'uid\', 30 \'value\':\'2830678474\', 31 \'containerid\':\'1076032830678474\', 32 \'page\':page 33 34 } 35 url = \'https://m.weibo.cn/api/container/getIndex\' 36 try: 37 res = requests.get(url,headers=headers,params=params) 38 if res.status_code == 200: 39 return res.json() 40 except requests.ConnectionError as e: 41 print(\'Error\',e.args) 42 43 44 45 def parse_page(json,page): 46 if json: 47 items = json.get(\'data\').get(\'cards\') 48 for index, item in enumerate(items): 49 if page == 1 and index == 1: 50 continue 51 else: 52 item = item.get(\'mblog\') 53 weibo = {} 54 weibo[\'id\'] = item.get(\'id\') 55 weibo[\'text\'] = pq(item.get(\'text\')).text() 56 weibo[\'attitudes\'] = item.get(\'attitudes_count\') 57 weibo[\'comments\'] = item.get(\'comments_count\') 58 weibo[\'reposts\'] = item.get(\'reposts_count\') 59 yield weibo 60 61 62 #往MySQL中存储 63 def save_mysql(result): 64 65 table = \'weibo\' 66 keys = \', \'.join(result.keys()) 67 values = \', \'.join([\'%s\'] * len(result)) 68 db = pymysql.connect(host=\'127.0.0.1\', user=\'root\', password=\'123456\', port=3306,db=\'spiders\',charset="utf8") 69 cursor = db.cursor() 70 sql = \'INSERT INTO {table}({keys}) VALUES ({values})\'.format(table=table, keys=keys, values=values) 71 try: 72 if cursor.execute(sql, tuple(result.values())): 73 print(\'Successful\') 74 db.commit() 75 except Exception as e: 76 print(\'Failed\',e.args) 77 db.rollback() 78 db.close() 79 80 81 #往mangoDB中存储 82 def save_to_mongo(result): 83 client = MongoClient() 84 db = client[\'weibo\'] 85 collection = db[\'weibo\'] 86 if collection.insert(result): 87 print(\'Saved to Mongo\') 88 89 90 91 if __name__ == \'__main__\': 92 for page in range(1,11): 93 json = get_page(page) 94 results = parse_page(json,page) 95 for result in results: 96 print(result) 97 save_mysql(result) 98 time.sleep(1)