【发布时间】:2019-01-03 00:28:59
【问题描述】:
我的脚本是刮 NHL-API,它应该刮掉从 2017020001 到 2017021271 一个赛季的所有比赛。我刚刚完成它并意识到只有最后一场比赛被附加到每个相应的列表中.所以在我的例子中,只有 2017021271 场比赛,而不是之前的 1270 场比赛。
我的代码看起来像这样,我做错了什么?
我知道这与我的临界延迟循环有关,但我不知道如何解决它。感谢您的理解!
#Importing Libraries
import numpy as np
import pandas as pd
import requests
import json
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
#Create Empty lists
player_id = {}
goalie_id = {}
person = []
position = []
skaterstats = []
goalie_person=[]
goalie_position=[]
goalie_stats=[]
team = []
team_goals = []
matchid = []
#Connect to NHL-API
for game_id in range(2017020001, 2017020100, 1):
url = 'https://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id)
r = requests.get(url)
game_data = r.json()
#Get Keys for Players/Goalies
for homeaway in ['home','away']:
player_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('skaters')
player_id[homeaway] = player_dict
for homeaway in ['home','away']:
goalie_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('goalies')
goalie_id[homeaway] = goalie_dict
#Get PlayerStats/TeamStats
for homeaway in player_id:
for playerID in player_id[homeaway]:
play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')
play_dict_gameid = game_data.get('gamePk')
play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('person')
play_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('position')
play_dict_skaterstats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('stats').get('skaterStats')
#Append TeamStats to Empty list
team.append(play_dict_teamname)
team_goals.append(play_dict_teamgoals)
matchid.append(play_dict_gameid)
#Append PlayerStats to Empty list
person.append(play_dict_person)
position.append(play_dict_position)
if play_dict_skaterstats:
skaterstats.append(play_dict_skaterstats)
if not play_dict_skaterstats:
play_dict_skaterstats = {}
play_dict_skaterstats['timeOnIce'] = None
play_dict_skaterstats['assists'] = None
play_dict_skaterstats['goals'] = None
play_dict_skaterstats['shots'] = None
play_dict_skaterstats['hits'] = None
play_dict_skaterstats['powerPlayGoals'] = None
play_dict_skaterstats['powerPlayAssists'] = None
play_dict_skaterstats['penaltyMinutes'] = None
play_dict_skaterstats['faceOffPct'] = None
play_dict_skaterstats['faceOffWins'] = None
play_dict_skaterstats['faceoffTaken'] = None
play_dict_skaterstats['takeaways'] = None
play_dict_skaterstats['giveaways'] = None
play_dict_skaterstats['shortHandedGoals'] = None
play_dict_skaterstats['shortHandedAssists'] = None
play_dict_skaterstats['blocked'] = None
play_dict_skaterstats['plusMinus'] = None
play_dict_skaterstats['evenTimeOnIce'] = None
play_dict_skaterstats['powerPlayTimeOnIce'] = None
play_dict_skaterstats['shortHandedTimeOnIce'] = None
skaterstats.append(play_dict_skaterstats)
#Get GoalieStats
for homeaway in goalie_id:
for goalieID in goalie_id[homeaway]:
play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')
play_dict_gameid = game_data.get('gamePk')
goalie_dict_person = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('person')
goalie_dict_position = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('position')
goalie_dict_stats = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('stats').get('goalieStats')
#Append GoalieStats to Empty list
goalie_person.append(goalie_dict_person)
goalie_position.append(goalie_dict_position)
if goalie_dict_stats:
goalie_stats.append(goalie_dict_stats)
#Append TeamStats to Empty list
team.append(play_dict_teamname)
team_goals.append(play_dict_teamgoals)
matchid.append(play_dict_gameid)
#Create DataFrames for all lists
df_person = pd.DataFrame(person)
df_position = pd.DataFrame(position)
df_skaterstats = pd.DataFrame(skaterstats)
df_team = pd.DataFrame(team)
df_teamgoals = pd.DataFrame(team_goals)
df_gameID = pd.DataFrame(matchid)
df_goalie_per = pd.DataFrame(goalie_person)
df_goalie_pos = pd.DataFrame(goalie_position)
df_goalie_stats = pd.DataFrame(goalie_stats)
【问题讨论】:
-
请重新格式化您问题中的代码,Python 需要适当的缩进。
-
并且请将示例减少到演示问题的最少代码量。
-
@MichaelButscher 我已经在笔记本中完成了这个,所以这正是它的意图,在发布之前检查。很抱歉不能提供更多帮助
-
@mkrieger1 据我所知,整个代码就是问题所在,也许我在描述问题时没有说清楚?如果没有,请告诉我,我会努力做得更好,谢谢。
-
任何引用你的变量 game_data 的东西都需要在你的循环中。所以 game_data = r.json() 之后的所有内容都需要缩进。
标签: python python-2.7 loops web-scraping