This report analyzes different statistical measures of the NFL in seasons from 2012–2024.We will analyze how running backs perform when compared to their draft position, the top 20 quarterbacks during this time period, what the best way to punch in a touchdown once you get it in goal to go situations, how the quarterback landscape has changed during this time, and how the distribution of yardage has changed.
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import pandas as pd
import matplotlib.patches as mpatches
df = pd.read_csv(r"C:\Users\jeffr\Documents\yearly_player_stats_offense.csv")
df_draft_vs_prod = df[['player_name','position', 'draft_ovr',
'career_passing_yards', 'career_receiving_yards',
'career_rushing_yards']]
df_draft_vs_prod_RB = df_draft_vs_prod[df_draft_vs_prod['position'] == 'RB'].reset_index(drop=True)
# Create the scatter plot
fig, ax = plt.subplots(figsize=(18,10))
ax.scatter(df_draft_vs_prod_RB['draft_ovr'],
df_draft_vs_prod_RB['career_rushing_yards'])
ax.set_title('Running Back Career Yards by Draft Position', fontsize=18)
ax.set_xlabel('Overall Draft Position', fontsize=14)
ax.set_ylabel('Total Career Rushing Yds', fontsize=14)
ax.yaxis.set_major_formatter(
ticker.FuncFormatter(lambda y, p: format(int(y), ','))
)
fig.tight_layout()
fig
Running backs drafted in the first two rounds do typically trend into more career yardage. However after the first two rounds, running backs drafted in rounds 3-6 have a similar output.
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import pandas as pd
import matplotlib.patches as mpatches
df_QB = df[df['position'] == 'QB'][['player_name', 'position', 'career_passing_yards']].reset_index(drop=True)
df_QB = df_QB.groupby(['player_name'], as_index=False).agg({'position':'first','career_passing_yards':'sum'})
df_QB_top20 = df_QB.sort_values('career_passing_yards',ascending=False).head(20).reset_index(drop=True)
df_QB_top20 = df_QB_top20.sort_values('career_passing_yards', ascending=True)
avg_passyds_Top20 = df_QB_top20['career_passing_yards'].mean()
colors = ['green' if yards > avg_passyds_Top20 else 'red'
for yards in df_QB_top20['career_passing_yards']]
fig,ax = plt.subplots(figsize=(20,12))
df_QB_top20.plot.barh(x='player_name',y='career_passing_yards',color=colors,ax=ax)
plt.xlabel('Career Passing Yards', fontsize=14)
plt.ylabel('Player Name',fontsize=14)
plt.title('Top 20 QBs by Career Passing Yards', fontsize=18)
Above = mpatches.Patch(color = 'green', label = 'Above Average of Top 20')
Below = mpatches.Patch(color = 'red', label = 'Below Average of Top 20')
plt.legend(handles=[Above, Below])
plt.gca().xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, p:format(int(x),',')))
for bar in ax.patches:
value =bar.get_width()
ax.text(value + 1000, bar.get_y() + bar.get_height()/2, f"{value:,.0f}", va='center', fontsize=12)
plt.show()
Matt Stafford is leading the pack with over 40,000 yards more than the next person. There are 9 QBs in the top 20 that exceed the average of the top 20.
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import pandas as pd
import matplotlib.patches as mpatches
df_gtg = df[['season', 'pass_attempts_gtg', 'pass_touchdown_gtg', 'rush_attempts_gtg', 'rush_touchdown_gtg']].groupby(['season'],as_index=False).agg(
{'pass_attempts_gtg':'sum', 'pass_touchdown_gtg':'sum', 'rush_attempts_gtg':'sum', 'rush_touchdown_gtg':'sum'})
df_tds_by_season = df[['season', 'pass_touchdown', 'receiving_touchdown', 'rush_touchdown']].groupby(['season'],as_index=False).agg(
{'pass_touchdown':'sum', 'receiving_touchdown':'sum', 'rush_touchdown':'sum'})
fig, ax = plt.subplots(figsize=(18,10))
ax.plot(df_tds_by_season['season'], df_gtg['pass_attempts_gtg'],label = 'Passing Attempts (Goal to Go)',
linewidth=3,linestyle='--',color='green', marker='o')
ax.plot(df_tds_by_season['season'], df_gtg['pass_touchdown_gtg'],label = 'Passing TD (Goal to Go)',
linewidth=3,color='green', marker='o')
ax.plot(df_tds_by_season['season'], df_gtg['rush_attempts_gtg'],label = 'Rush Attempts (Goal to Go)',
linewidth=3,linestyle='--',color='blue', marker='o')
ax.plot(df_tds_by_season['season'], df_gtg['rush_touchdown_gtg'],label = 'Rush TD (Goal to Go)',
linewidth=3,color='blue', marker='o')
for x,y in zip(df_gtg['season'], df_gtg['pass_attempts_gtg']):
ax.text(x,y + -30,f"{y:,}", color='green', fontsize=10, ha='center')
for x,y in zip(df_gtg['season'], df_gtg['pass_touchdown_gtg']):
ax.text(x,y + -20,f"{y:,}", color='green', fontsize=10, ha='center')
for x,y in zip(df_gtg['season'], df_gtg['rush_touchdown_gtg']):
ax.text(x,y + 20,f"{y:,}", color='blue', fontsize=10, ha='center')
for x,y in zip(df_gtg['season'], df_gtg['rush_attempts_gtg']):
ax.text(x,y + 20,f"{y:,}", color='blue', fontsize=10, ha='center')
ax.set_xlabel('Season', fontsize=18)
ax.set_ylabel('Attempts/TDs', fontsize=18)
ax.set_title('NFL Rushing and Passing Attempts vs TDs in Goal to Go (2012-2024)', fontsize=22)
ax.legend(fontsize=14)
ax.set_xticks(df_gtg['season'])
plt.show()
Passing has a higher probability of getting a touchdown in goal to go situations. Despite the attempts for rushing be much higher in goal to go situations, the amount of touchdowns for rushing and passing are relatively similar.
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import pandas as pd
import matplotlib.patches as mpatches
df_qb_season = df[df['position'] == 'QB'][['season', 'player_name', 'passing_yards']].groupby(['season', 'player_name'], as_index=False).agg({'passing_yards' : 'sum'})
df_qb_season['rank'] = df_qb_season.groupby('season')['passing_yards'].rank(method='first', ascending=False)
df_qb_top5 = df_qb_season[df_qb_season['rank'] <= 5].copy()
df_qb_top5 = df_qb_top5.sort_values(['season','rank']).reset_index(drop=True)
counts = df_qb_top5['player_name'].value_counts()
core_qbs = counts[counts >= 3].index
df_core = df_qb_top5[df_qb_top5['player_name'].isin(core_qbs)]
df_others = df_qb_top5[~df_qb_top5['player_name'].isin(core_qbs)]
fig, ax = plt.subplots(figsize=(18,10))
for qb in df_others['player_name'].unique():
qb_data = df_others[df_others['player_name'] == qb].sort_values('season')
ax.plot(qb_data['season'], qb_data['rank'],
color='lightgray', linewidth=1, alpha = 0.6)
for qb in df_core['player_name'].unique():
qb_data = df_core[df_core['player_name'] == qb].sort_values('season')
ax.plot(qb_data['season'], qb_data['rank'],
marker='o', linewidth=2.5, alpha=0.9, label=qb)
seasons = sorted(df_qb_top5['season'].unique())
rank = sorted(df_qb_top5['rank'].unique())
ax.invert_yaxis()
ax.set_xlabel('Season', fontsize=14)
ax.set_ylabel('Rank (1 = Most Passing Yards)', fontsize=14)
ax.set_title('Top 5 QBs by Passing Yards Each Season (Bump Chart)', fontsize=18)
ax.set_xticks(seasons)
ax.set_yticks(rank)
ax.legend(title="QBs", fontsize=10,loc='center left', bbox_to_anchor=(1,0.5))
plt.show()
From 2012 to 2018 some of the older QBs were consistently in the top 5. However after 2019, the young guys took over the top rankings. Tom Brady, Matt Ryan, Drew Brees, Peyton Manning, Phillip Rivers, and Ben Roethlisberger were all consistently in the top 5 before 2018. Howevever, due to retirements, decrease in performance, and new talent joining the league, they fell out of the top 5 after 2018. New QBs like Patrick Mahomes, Joe Burrow, and Jared Goff took the top spots post 2018.
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import pandas as pd
import matplotlib.patches as mpatches
df_yds_by_season = df[['season', 'passing_yards', 'rushing_yards', 'receiving_yards', 'total_yards']].groupby(['season'], as_index=False).agg({
'passing_yards':'sum', 'rushing_yards':'sum', 'receiving_yards':'sum', 'total_yards':'sum'})
df_heat = df_yds_by_season.set_index('season').T
plt.figure(figsize=(18,6))
ax = sns.heatmap(df_heat, cmap="YlGnBu", annot=True,fmt=",.0f",linewidths=.5,cbar_kws={'label' : ''})
cbar = ax.collections[0].colorbar
cbar.ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
ax.set_yticklabels(['Passing Yards', 'Receiving Yards', 'Rushing Yards', 'Total Yards'], rotation=0)
plt.title("NFL Yardage Distribution by Season", fontsize=18)
plt.xlabel("Season",fontsize=14)
plt.ylabel("Type of Yardage",fontsize=14,labelpad=10)
plt.show()
The amount of yards gained by teams in the NFL has increased significantly since 2012. With total yardage reaching it’s peak in 2021, the total yards has leveled out in recent years. Passing and receiving yards have always been leading the way above rushing yards.