1,分析url,获取热门电影的url
豆瓣采用的市ajax动态加载技术,我们可以直接找到json数据的url
url = 'https://movie.douban.com/j/search_subjectstype=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=330&page_start=0'
1,先获取到json数据
def get_data():
url = 'https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=330&page_start=0'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36'
}
json_data = requests.get(url,headers = headers)
#print(json_data.text)
data = json_data.text
json_data = json.loads(data)
subjects = json_data['subjects']
result = []
for movie in subjects:
row = {
'movie_rate': movie['rate'],
'movie_name': movie['title'],
'movie_url' : movie['url']
}
result.append(row)
return result
2,保存到数据库
def sava_data(data):
config = {
'host':'127.0.0.1',
'port':3306,
'user':'root',
'password':'',
'database':'douban',
'charset':'utf8'
}
conn = pymysql.connect(**config)
cursor = conn.cursor()
sql = '''
insert into t_movie
(movie_rate, movie_name, movie_url)
values
(%(movie_rate)s,%(movie_name)s,%(movie_url)s)
'''
cursor.executemany(sql,data)
conn.commit()
cursor.close()
conn.close()
3,创建数据库
先创建数据库,如何插入数据,插入语句为:
create table t_movie(
id int primary key auto_increment,
movie_rate varchar(200),
movie_name varchar(200),
movie_url varchar(200)
) engine=Innodb charset utf8;
4,完整代码
import json
import requests
import pymysql
def get_data():
url = 'https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=330&page_start=0'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36'
}
json_data = requests.get(url,headers = headers)
#print(json_data.text)
data = json_data.text
json_data = json.loads(data)
subjects = json_data['subjects']
result = []
for movie in subjects:
row = {
'movie_rate': movie['rate'],
'movie_name': movie['title'],
'movie_url' : movie['url']
}
result.append(row)
return result
def sava_data(data):
config = {
'host':'127.0.0.1',
'port':3306,
'user':'root',
'password':'',
'database':'douban',
'charset':'utf8'
}
conn = pymysql.connect(**config)
cursor = conn.cursor()
sql = '''
insert into t_movie
(movie_rate, movie_name, movie_url)
values
(%(movie_rate)s,%(movie_name)s,%(movie_url)s)
'''
cursor.executemany(sql,data)
conn.commit()
cursor.close()
conn.close()
if __name__ == '__main__':
sava_data(get_data())
运行完后查看数据库