PythonNFL2022数据分析

作者 by aigle / 2021-11-30 / 暂无评论

第七组 NFL数据分析

组员张英成张志鹏高跃瑛唐惠琳
贡献度25%25%25%25%

网页版报告请访问https://xidians.com/z.html

一、数据读取

完成人 张志鹏
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

导入相关包


players_df=pd.read_csv('/21秋/数据挖掘/橄榄球比赛/players.csv')
games_df = pd.read_csv('/21秋/数据挖掘/橄榄球比赛/games.csv')
plays_df = pd.read_csv('/21秋/数据挖掘/橄榄球比赛/plays.csv')
tracking_2018_df = pd.read_csv('/21秋/数据挖掘/橄榄球比赛/tracking2018.csv')
nflscouting_df = pd.read_csv('/21秋/数据挖掘/橄榄球比赛/PFFScoutingData.csv')

导入数据文件


games_df.head()# games.csv数据输出

nflscouting_df.head()# PFFScoutingData.csv数据输出

tracking_2018_df.head()# tracking2018数据输出

players_df.head()# 球员数据输出

explore_numerical_types(players_df)# 球员数据处理输出

二、数据统计

完成人 高跃瑛 唐惠琳 张英成

柱状图

完成人 高跃瑛
fig, ((ax1,ax2),(ax3,ax4),(ax5,ax6),(ax7,ax8), (ax9,ax10)) = plt.subplots(5,2, figsize=(15,20))
plays_df.kickLength.plot.hist(bins=50, title='Kick length', grid=True, ax=ax1)
plays_df.loc[plays_df.kickReturnYardage.notnull()]['kickReturnYardage'].plot.hist(bins=50, title='Return result (yds)', grid=True, ax=ax2)
plays_df.playResult.plot.hist(bins=50, title='Play result (yds)', grid=True, ax=ax3)
plays_df.yardsToGo.plot.hist(bins=20, title='Yards to go at play start', grid=True, ax=ax4)
plays_df.penaltyYards.plot.hist(title='Penalty yards', grid=True, ax=ax5)
plays_df.penaltyCodes.value_counts()[:10].plot.bar(title='Penalty codes (top 10)', ax=ax6)
plays_df.specialTeamsPlayType.value_counts().plot.bar(title='Play type', ax=ax7)
plays_df.specialTeamsResult.value_counts().plot.bar(title='Play result breakdown', ax=ax8)
plays_df.loc[plays_df.passResult.notnull()]['passResult'].value_counts().plot.bar(title='Pass result breakdown', ax=ax9)
plays_df.yardlineNumber.plot.hist(bins=20, title='Where plays happen (yardline #)', grid=True, ax=ax10)
plt.tight_layout()


分别生成Kick length、Return result (yds)、Play result (yds)、Yards to go at play start

、Penalty yards、Penalty codes (top 10)、Play type、Play result breakdown、Pass result breakdown

、Where plays happen (yardline #)柱状图

def explore_numerical_types(df):
    # SUMMARY
    df_types = pd.DataFrame(df.dtypes, columns=['Data Type'])
    numerical_cols = df_types[~df_types['Data Type'].isin(['object',
                    'bool'])].index.values
    df_types['Count'] = df.count()
    df_types['Null Values'] = df.isnull().sum()
    df_types['Unique Values'] = df.nunique()
    df_types['Min'] = df[numerical_cols].min()
    df_types['Max'] = df[numerical_cols].max()
    df_types['Average'] = df[numerical_cols].mean()
    df_types['Median'] = df[numerical_cols].median()
    df_types['St. Dev.'] = df[numerical_cols].std()
    return df_types
check = players_df['height'].str.split('-', expand=True)
check.columns = ['first', 'second']
check.loc[(check['second'].notnull()), 'first'] = check[check['second'].notnull()]['first'].astype(np.int16) * 12 + check[check['second'].notnull()]['second'].astype(np.int16)
players_df['height'] = check['first']
players_df['height'] = players_df['height'].astype(np.float32)
players_df['height'] /= 12
players_df['height']/=3.288399
players_df
players_df["height"].value_counts()# 球员体重统计

1.900621    394
1.875279    361
1.925963    322
1.849938    316
1.824596    295
1.951304    290
1.799254    241
1.773913    158
1.976646    152
1.748571     75
2.001987     54
1.723230     33
2.027329     22
1.697888      7
2.052671      6
1.672546      6
Name: height, dtype: int64
plt.figure(figsize=(10, 6))
ax = sns.histplot(players_df['height'], bins=12)
ax.set_title('Height Distribution');# 绘制球员身高分布图

plt.figure(figsize=(10, 6))
ax = sns.histplot(players_df['weight'], bins=12)
ax.set_title('weight Distribution');# 绘制球员体重分布图

饼图

完成人 唐惠琳
fig, (ax1,ax2) = plt.subplots(1,2, figsize=(15,15))  
plays_df.down.value_counts().plot.pie(title='Down when plays happen', ax=ax1)
plays_df.quarter.value_counts().plot.pie(title='Quarter when plays happen', ax=ax2)
plt.tight_layout()


生成Down when plays happen、Quarter when plays happen饼图

对发球和回攻阵型分析

完成人 唐惠琳
kickoffs_df = plays_df.loc[plays_df['specialTeamsPlayType'] == 'Kickoff']#处理开球数据
kickoffs_df.head()#开球数据输出

kickoff_returns_df = kickoffs_df.loc[kickoffs_df['specialTeamsResult'] == 'Return']#处理开球回攻数据
kickoff_returns_df.head()#开球回攻数据输出

kickoff_returns_df['kickType'].value_counts()#开球回攻数据统计

D 2309

P 200

O 124

Q 123

F 109

K 47

S 9

Name: kickType, dtype: int64

绘制开球回攻盒线图

完成人 唐惠琳
ax = kickoff_returns_df[['kickType', 'kickReturnYardage']].boxplot(by='kickType', figsize=(14,6), vert=False)#绘制开球回攻盒线图
ax.set_xlabel(None)
ax.set_title(None)

ax = kickoff_returns_df[['kickoffReturnFormation', 'kickReturnYardage']].boxplot(by='kickoffReturnFormation', figsize=(14,8), vert=False)#绘制开球回攻盒线图
ax.set_xlabel(None)
ax.set_title(None)

散点图

完成人 张英成
kickoff_returns_df.plot.scatter(x='hangTime', y='kickReturnYardage', figsize=(14,6), title="Kickoff Return Yards vs. Kick Hang Time (s)")#绘制开球回攻散点图

plt.plot(p_s[:,0], p_s[:,1],'rs',  markersize = 4, label='speed')
plt.plot(p_sx[:,0], p_sx[:,1],'gs', markersize = 4, label='speed_x')
plt.legend(loc='upper right')

三、数据展示

完成人 张英成 张志鹏
player_position = {"WR" : "Wide Receiver",
                   "CB" : "Cornerback",
                   "RB" : "Running Back",
                   "TE" : "Tight End",
                   "OLB" : "Outside Linebacker",
                   "QB" : "Quarterback",
                   "FS" : "Free Safety",
                   "LB" : "Linebacker",
                   "SS" : "Strong Safety",
                   "ILB" : "Inside Linebacker",
                   "DE" : "Defensive End",
                   "DB" : "Defensive Back",
                   "MLB" : "Middle Linebacker",
                   "DT" : "Defensive Tackle",
                   "FB" : "Fullback",
                   "P" : "Punter",
                   "LS" : "Long snapper",
                   "S" : "Safety",
                   "K" : "Kicker",
                   "HB" : "Running back",
                   "NT" : "Nose Tackle"}#建立球手分类词典
explore_numerical_types(tracking_2018_df)#处理tracking_2018数据

mask_playId = tracking_2018_df['playId'] == 36#设置playId
mask_gameId = tracking_2018_df['gameId'] == 2018123000 #设置gameId
df_track = tracking_2018_df[ np.array( mask_playId )*np.array( mask_gameId ) ]#设置tracking数据
df_track#tracking数据输出

绘制比赛全体球手运动轨迹图

#绘制比赛全体球手运动轨迹图
#完成人 张英成
fig, ax = plt.subplots(figsize=(12, 8))#设置图尺寸
tracking_2018_df.query('gameId == 2018091001').groupby('team') \
    .plot(x='x', y='y', ax=ax, style='.')#设置坐标轴及比赛场选取
plt.legend().remove()

#交互式绘制绘制比赛指定球手运动轨迹图 
#完成人 张英成
fig, ax = plt.subplots(figsize=(12, 8))#设置图尺寸
tracking_2018_df.query('gameId == 2018091609 and position == "WR"').groupby('team') \
    .plot(x='x', y='y', ax=ax, style='.')#设置坐标轴及比赛场选取及选取球手
plt.legend().remove();

#交互式绘制比赛指定球手运动轨迹图 
#完成人 张志鹏
fig, ax = plt.subplots(figsize=(12, 8))#设置图尺寸
tracking_2018_df.query('gameId == 2018091001 and playId == 4033').groupby('team') \
    .plot(x='x', y='y', ax=ax, style='.')#比赛场选取及选取球手
plt.legend().remove();

#交互式绘制比赛指定球手运动轨迹图 
#完成人 张英成
fig, ax = plt.subplots(figsize=(12, 8))#设置图尺寸
tracking_2018_df.query('gameId == 2018091609 and position == "CB"').groupby('team') \
    .plot(x='x', y='y', ax=ax, style='.')#比赛场选取及球手选取
plt.legend().remove();

for display in df_track['displayName'].unique():#循环遍历绘制所有比赛球手运动轨迹
    #完成人 张志鹏
    mask = df_track['displayName'] == display
    plt.plot( df_track[mask]['x'],df_track[mask]['y'],label=display )#根据playID和gameID绘图
plt.legend()
plt.show()

评论已关闭