【问题标题】:How to fix loops so that all scraped data gets appended to their lists?如何修复循环,以便所有抓取的数据都附加到它们的列表中?
【发布时间】:2019-01-03 00:28:59
【问题描述】:

我的脚本是刮 NHL-API,它应该刮掉从 2017020001 到 2017021271 一个赛季的所有比赛。我刚刚完成它并意识到只有最后一场比赛被附加到每个相应的列表中.所以在我的例子中,只有 2017021271 场比赛,而不是之前的 1270 场比赛。

我的代码看起来像这样,我做错了什么?

我知道这与我的临界延迟循环有关,但我不知道如何解决它。感谢您的理解!

#Importing Libraries 
import numpy as np
import pandas as pd
import requests
import json
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder

#Create Empty lists
player_id = {}
goalie_id = {}

person = []
position = []
skaterstats = []

goalie_person=[]
goalie_position=[]
goalie_stats=[]

team = []
team_goals = []
matchid = []

#Connect to NHL-API
for game_id in range(2017020001, 2017020100, 1):
    url = 'https://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id)
    r = requests.get(url)
    game_data = r.json()

#Get Keys for Players/Goalies
for homeaway in ['home','away']:
    player_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('skaters')
    player_id[homeaway] = player_dict

for homeaway in ['home','away']:
    goalie_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('goalies')
    goalie_id[homeaway] = goalie_dict 

#Get PlayerStats/TeamStats
for homeaway in player_id:
    for playerID in player_id[homeaway]:
    play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
    play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')                
    play_dict_gameid = game_data.get('gamePk')

    play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('person')
    play_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('position')
    play_dict_skaterstats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('stats').get('skaterStats')

     #Append TeamStats to Empty list
    team.append(play_dict_teamname)
    team_goals.append(play_dict_teamgoals)
    matchid.append(play_dict_gameid)

    #Append PlayerStats to Empty list
    person.append(play_dict_person)
    position.append(play_dict_position)
    if play_dict_skaterstats: 
        skaterstats.append(play_dict_skaterstats)
    if not play_dict_skaterstats:
        play_dict_skaterstats = {}
        play_dict_skaterstats['timeOnIce'] = None
        play_dict_skaterstats['assists'] = None
        play_dict_skaterstats['goals'] = None
        play_dict_skaterstats['shots'] = None
        play_dict_skaterstats['hits'] = None
        play_dict_skaterstats['powerPlayGoals'] = None
        play_dict_skaterstats['powerPlayAssists'] = None
        play_dict_skaterstats['penaltyMinutes'] = None
        play_dict_skaterstats['faceOffPct'] = None
        play_dict_skaterstats['faceOffWins'] = None
        play_dict_skaterstats['faceoffTaken'] = None
        play_dict_skaterstats['takeaways'] = None
        play_dict_skaterstats['giveaways'] = None
        play_dict_skaterstats['shortHandedGoals'] = None
        play_dict_skaterstats['shortHandedAssists'] = None
        play_dict_skaterstats['blocked'] = None
        play_dict_skaterstats['plusMinus'] = None
        play_dict_skaterstats['evenTimeOnIce'] = None
        play_dict_skaterstats['powerPlayTimeOnIce'] = None
        play_dict_skaterstats['shortHandedTimeOnIce'] = None

skaterstats.append(play_dict_skaterstats)

#Get GoalieStats
for homeaway in goalie_id:
    for goalieID in goalie_id[homeaway]:
    play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
    play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')                
    play_dict_gameid = game_data.get('gamePk')

    goalie_dict_person = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('person')
    goalie_dict_position = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('position')
    goalie_dict_stats = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('stats').get('goalieStats')

    #Append GoalieStats to Empty list
    goalie_person.append(goalie_dict_person)
    goalie_position.append(goalie_dict_position)
    if goalie_dict_stats: 
        goalie_stats.append(goalie_dict_stats)

    #Append TeamStats to Empty list
    team.append(play_dict_teamname)
    team_goals.append(play_dict_teamgoals)
    matchid.append(play_dict_gameid)

#Create DataFrames for all lists
df_person = pd.DataFrame(person)
df_position = pd.DataFrame(position)
df_skaterstats = pd.DataFrame(skaterstats)

df_team = pd.DataFrame(team)
df_teamgoals = pd.DataFrame(team_goals)
df_gameID = pd.DataFrame(matchid)

df_goalie_per = pd.DataFrame(goalie_person)
df_goalie_pos = pd.DataFrame(goalie_position)
df_goalie_stats = pd.DataFrame(goalie_stats)

【问题讨论】:

  • 请重新格式化您问题中的代码,Python 需要适当的缩进。
  • 并且请将示例减少到演示问题的最少代码量。
  • @MichaelButscher 我已经在笔记本中完成了这个,所以这正是它的意图,在发布之前检查。很抱歉不能提供更多帮助
  • @mkrieger1 据我所知,整个代码就是问题所在,也许我在描述问题时没有说清楚?如果没有,请告诉我,我会努力做得更好,谢谢。
  • 任何引用你的变量 game_data 的东西都需要在你的循环中。所以 game_data = r.json() 之后的所有内容都需要缩进。

标签: python python-2.7 loops web-scraping


【解决方案1】:

修复了缩进以在初始 for 循环中包含您需要的内容。看看这是否解决了您的问题:

#Importing Libraries 
import numpy as np
import pandas as pd
import requests
import json
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder

#Create Empty lists
player_id = {}
goalie_id = {}

person = []
position = []
skaterstats = []

goalie_person=[]
goalie_position=[]
goalie_stats=[]

team = []
team_goals = []
matchid = []

#Connect to NHL-API
for game_id in range(2017020001, 2017020100, 1):
    url = 'https://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id)
    r = requests.get(url)
    game_data = r.json()

    #Get Keys for Players/Goalies
    for homeaway in ['home','away']:
        player_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('skaters')
        player_id[homeaway] = player_dict

    for homeaway in ['home','away']:
        goalie_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('goalies')
        goalie_id[homeaway] = goalie_dict 

    #Get PlayerStats/TeamStats
    for homeaway in player_id:
        for playerID in player_id[homeaway]:
            play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
            play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')                
            play_dict_gameid = game_data.get('gamePk')

            play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('person')
            play_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('position')
            play_dict_skaterstats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('stats').get('skaterStats')

             #Append TeamStats to Empty list
            team.append(play_dict_teamname)
            team_goals.append(play_dict_teamgoals)
            matchid.append(play_dict_gameid)

            #Append PlayerStats to Empty list
            person.append(play_dict_person)
            position.append(play_dict_position)
            if play_dict_skaterstats: 
                skaterstats.append(play_dict_skaterstats)
            if not play_dict_skaterstats:
                play_dict_skaterstats = {}
                play_dict_skaterstats['timeOnIce'] = None
                play_dict_skaterstats['assists'] = None
                play_dict_skaterstats['goals'] = None
                play_dict_skaterstats['shots'] = None
                play_dict_skaterstats['hits'] = None
                play_dict_skaterstats['powerPlayGoals'] = None
                play_dict_skaterstats['powerPlayAssists'] = None
                play_dict_skaterstats['penaltyMinutes'] = None
                play_dict_skaterstats['faceOffPct'] = None
                play_dict_skaterstats['faceOffWins'] = None
                play_dict_skaterstats['faceoffTaken'] = None
                play_dict_skaterstats['takeaways'] = None
                play_dict_skaterstats['giveaways'] = None
                play_dict_skaterstats['shortHandedGoals'] = None
                play_dict_skaterstats['shortHandedAssists'] = None
                play_dict_skaterstats['blocked'] = None
                play_dict_skaterstats['plusMinus'] = None
                play_dict_skaterstats['evenTimeOnIce'] = None
                play_dict_skaterstats['powerPlayTimeOnIce'] = None
                play_dict_skaterstats['shortHandedTimeOnIce'] = None

    skaterstats.append(play_dict_skaterstats)

    #Get GoalieStats
    for homeaway in goalie_id:
        for goalieID in goalie_id[homeaway]:
            play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
            play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')                
            play_dict_gameid = game_data.get('gamePk')

            goalie_dict_person = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('person')
            goalie_dict_position = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('position')
            goalie_dict_stats = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('stats').get('goalieStats')

            #Append GoalieStats to Empty list
            goalie_person.append(goalie_dict_person)
            goalie_position.append(goalie_dict_position)
            if goalie_dict_stats: 
                goalie_stats.append(goalie_dict_stats)

            #Append TeamStats to Empty list
            team.append(play_dict_teamname)
            team_goals.append(play_dict_teamgoals)
            matchid.append(play_dict_gameid)

#Create DataFrames for all lists
df_person = pd.DataFrame(person)
df_position = pd.DataFrame(position)
df_skaterstats = pd.DataFrame(skaterstats)

df_team = pd.DataFrame(team)
df_teamgoals = pd.DataFrame(team_goals)
df_gameID = pd.DataFrame(matchid)

df_goalie_per = pd.DataFrame(goalie_person)
df_goalie_pos = pd.DataFrame(goalie_position)
df_goalie_stats = pd.DataFrame(goalie_stats)

【讨论】:

  • 非常感谢您的帮助,将立即尝试!
  • 完美地工作,而且肯定会更快。谢谢!
  • 酷。看起来您在合并这些数据帧方面还有一些工作要做,但希望这能让您顺利上路。你明白我的意思吗? game_data 是一个单独的循环。基本上不会进入下一个循环,直到它循环通过你的 game_data,因此只会得到最后一场比赛。
  • Nono,其余的代码已经完成,在 API 的一页上测试和编码了 12 个小时,现在当我去获取所有内容时,那部分没有完成它的工作.再次感谢您!
  • 这是我构建的第二个模型,我只是将其全部取下并输入神经网络以预测谁将获胜。第一个模型有游戏,它有玩家数据,然后我将以一种我不知道如何组合它们并希望获得更好的准确性!现在 70%,真的很有趣
猜你喜欢
  • 2019-04-15
  • 1970-01-01
  • 1970-01-01
  • 2014-10-24
  • 2018-02-01
  • 1970-01-01
  • 2020-07-26
  • 2021-11-29
  • 2016-04-22
相关资源
最近更新 更多