作者 by aigle / 2021-11-30 / 暂无评论
| 组员 | 张英成 | 张志鹏 | 高跃瑛 | 唐惠琳 |
|---|---|---|---|---|
| 贡献度 | 25% | 25% | 25% | 25% |
网页版报告请访问https://xidians.com/z.html
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt导入相关包
players_df=pd.read_csv('/21秋/数据挖掘/橄榄球比赛/players.csv')
games_df = pd.read_csv('/21秋/数据挖掘/橄榄球比赛/games.csv')
plays_df = pd.read_csv('/21秋/数据挖掘/橄榄球比赛/plays.csv')
tracking_2018_df = pd.read_csv('/21秋/数据挖掘/橄榄球比赛/tracking2018.csv')
nflscouting_df = pd.read_csv('/21秋/数据挖掘/橄榄球比赛/PFFScoutingData.csv')导入数据文件
games_df.head()# games.csv数据输出
nflscouting_df.head()# PFFScoutingData.csv数据输出
tracking_2018_df.head()# tracking2018数据输出
players_df.head()# 球员数据输出
explore_numerical_types(players_df)# 球员数据处理输出
fig, ((ax1,ax2),(ax3,ax4),(ax5,ax6),(ax7,ax8), (ax9,ax10)) = plt.subplots(5,2, figsize=(15,20))
plays_df.kickLength.plot.hist(bins=50, title='Kick length', grid=True, ax=ax1)
plays_df.loc[plays_df.kickReturnYardage.notnull()]['kickReturnYardage'].plot.hist(bins=50, title='Return result (yds)', grid=True, ax=ax2)
plays_df.playResult.plot.hist(bins=50, title='Play result (yds)', grid=True, ax=ax3)
plays_df.yardsToGo.plot.hist(bins=20, title='Yards to go at play start', grid=True, ax=ax4)
plays_df.penaltyYards.plot.hist(title='Penalty yards', grid=True, ax=ax5)
plays_df.penaltyCodes.value_counts()[:10].plot.bar(title='Penalty codes (top 10)', ax=ax6)
plays_df.specialTeamsPlayType.value_counts().plot.bar(title='Play type', ax=ax7)
plays_df.specialTeamsResult.value_counts().plot.bar(title='Play result breakdown', ax=ax8)
plays_df.loc[plays_df.passResult.notnull()]['passResult'].value_counts().plot.bar(title='Pass result breakdown', ax=ax9)
plays_df.yardlineNumber.plot.hist(bins=20, title='Where plays happen (yardline #)', grid=True, ax=ax10)
plt.tight_layout()
分别生成Kick length、Return result (yds)、Play result (yds)、Yards to go at play start
、Penalty yards、Penalty codes (top 10)、Play type、Play result breakdown、Pass result breakdown
、Where plays happen (yardline #)柱状图
def explore_numerical_types(df):
# SUMMARY
df_types = pd.DataFrame(df.dtypes, columns=['Data Type'])
numerical_cols = df_types[~df_types['Data Type'].isin(['object',
'bool'])].index.values
df_types['Count'] = df.count()
df_types['Null Values'] = df.isnull().sum()
df_types['Unique Values'] = df.nunique()
df_types['Min'] = df[numerical_cols].min()
df_types['Max'] = df[numerical_cols].max()
df_types['Average'] = df[numerical_cols].mean()
df_types['Median'] = df[numerical_cols].median()
df_types['St. Dev.'] = df[numerical_cols].std()
return df_types
check = players_df['height'].str.split('-', expand=True)
check.columns = ['first', 'second']
check.loc[(check['second'].notnull()), 'first'] = check[check['second'].notnull()]['first'].astype(np.int16) * 12 + check[check['second'].notnull()]['second'].astype(np.int16)
players_df['height'] = check['first']
players_df['height'] = players_df['height'].astype(np.float32)
players_df['height'] /= 12
players_df['height']/=3.288399
players_df
players_df["height"].value_counts()# 球员体重统计
1.900621 394
1.875279 361
1.925963 322
1.849938 316
1.824596 295
1.951304 290
1.799254 241
1.773913 158
1.976646 152
1.748571 75
2.001987 54
1.723230 33
2.027329 22
1.697888 7
2.052671 6
1.672546 6
Name: height, dtype: int64
plt.figure(figsize=(10, 6))
ax = sns.histplot(players_df['height'], bins=12)
ax.set_title('Height Distribution');# 绘制球员身高分布图

plt.figure(figsize=(10, 6))
ax = sns.histplot(players_df['weight'], bins=12)
ax.set_title('weight Distribution');# 绘制球员体重分布图

fig, (ax1,ax2) = plt.subplots(1,2, figsize=(15,15))
plays_df.down.value_counts().plot.pie(title='Down when plays happen', ax=ax1)
plays_df.quarter.value_counts().plot.pie(title='Quarter when plays happen', ax=ax2)
plt.tight_layout()

生成Down when plays happen、Quarter when plays happen饼图
kickoffs_df = plays_df.loc[plays_df['specialTeamsPlayType'] == 'Kickoff']#处理开球数据
kickoffs_df.head()#开球数据输出

kickoff_returns_df = kickoffs_df.loc[kickoffs_df['specialTeamsResult'] == 'Return']#处理开球回攻数据
kickoff_returns_df.head()#开球回攻数据输出

kickoff_returns_df['kickType'].value_counts()#开球回攻数据统计
P 200
O 124
Q 123
F 109
K 47
S 9
Name: kickType, dtype: int64
ax = kickoff_returns_df[['kickType', 'kickReturnYardage']].boxplot(by='kickType', figsize=(14,6), vert=False)#绘制开球回攻盒线图
ax.set_xlabel(None)
ax.set_title(None)

ax = kickoff_returns_df[['kickoffReturnFormation', 'kickReturnYardage']].boxplot(by='kickoffReturnFormation', figsize=(14,8), vert=False)#绘制开球回攻盒线图
ax.set_xlabel(None)
ax.set_title(None)

kickoff_returns_df.plot.scatter(x='hangTime', y='kickReturnYardage', figsize=(14,6), title="Kickoff Return Yards vs. Kick Hang Time (s)")#绘制开球回攻散点图

plt.plot(p_s[:,0], p_s[:,1],'rs', markersize = 4, label='speed')
plt.plot(p_sx[:,0], p_sx[:,1],'gs', markersize = 4, label='speed_x')
plt.legend(loc='upper right')

player_position = {"WR" : "Wide Receiver",
"CB" : "Cornerback",
"RB" : "Running Back",
"TE" : "Tight End",
"OLB" : "Outside Linebacker",
"QB" : "Quarterback",
"FS" : "Free Safety",
"LB" : "Linebacker",
"SS" : "Strong Safety",
"ILB" : "Inside Linebacker",
"DE" : "Defensive End",
"DB" : "Defensive Back",
"MLB" : "Middle Linebacker",
"DT" : "Defensive Tackle",
"FB" : "Fullback",
"P" : "Punter",
"LS" : "Long snapper",
"S" : "Safety",
"K" : "Kicker",
"HB" : "Running back",
"NT" : "Nose Tackle"}#建立球手分类词典
explore_numerical_types(tracking_2018_df)#处理tracking_2018数据

mask_playId = tracking_2018_df['playId'] == 36#设置playId
mask_gameId = tracking_2018_df['gameId'] == 2018123000 #设置gameId
df_track = tracking_2018_df[ np.array( mask_playId )*np.array( mask_gameId ) ]#设置tracking数据
df_track#tracking数据输出

#绘制比赛全体球手运动轨迹图
#完成人 张英成
fig, ax = plt.subplots(figsize=(12, 8))#设置图尺寸
tracking_2018_df.query('gameId == 2018091001').groupby('team') \
.plot(x='x', y='y', ax=ax, style='.')#设置坐标轴及比赛场选取
plt.legend().remove()

#交互式绘制绘制比赛指定球手运动轨迹图
#完成人 张英成
fig, ax = plt.subplots(figsize=(12, 8))#设置图尺寸
tracking_2018_df.query('gameId == 2018091609 and position == "WR"').groupby('team') \
.plot(x='x', y='y', ax=ax, style='.')#设置坐标轴及比赛场选取及选取球手
plt.legend().remove();

#交互式绘制比赛指定球手运动轨迹图
#完成人 张志鹏
fig, ax = plt.subplots(figsize=(12, 8))#设置图尺寸
tracking_2018_df.query('gameId == 2018091001 and playId == 4033').groupby('team') \
.plot(x='x', y='y', ax=ax, style='.')#比赛场选取及选取球手
plt.legend().remove();

#交互式绘制比赛指定球手运动轨迹图
#完成人 张英成
fig, ax = plt.subplots(figsize=(12, 8))#设置图尺寸
tracking_2018_df.query('gameId == 2018091609 and position == "CB"').groupby('team') \
.plot(x='x', y='y', ax=ax, style='.')#比赛场选取及球手选取
plt.legend().remove();

for display in df_track['displayName'].unique():#循环遍历绘制所有比赛球手运动轨迹
#完成人 张志鹏
mask = df_track['displayName'] == display
plt.plot( df_track[mask]['x'],df_track[mask]['y'],label=display )#根据playID和gameID绘图
plt.legend()
plt.show()

评论已关闭