The sample data is a .csv file that has some of the offensive statistics for the Jets and Giants. Here is what the sample data looked like.
library(tidyr)
library(dplyr)
library(zoo)
library(stringr)
library(scales)
#import .csv file
theFile = "nfl_stats.csv"
stats <- read.csv(theFile, header = FALSE, stringsAsFactors = FALSE)
unlink(theFile)
head(stats)
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
1 Team Stats Year NA NA NA NA NA NA NA
2 2007 2008 2009 2010 2011 2012 2013 2014
3 Jets Total Passing Yds 3014 3303 2380 3242 3297 2891 2932 2946
4 Total Rushing Yards 1701 2004 2756 2374 1692 1896 2158 2280
5 Total First Downs 286 308 280 307 301 299 280 289
6 Touchdowns 26 48 37 39 45 31 27 27
#place header in same row
stats$V1[2] <- stats$V1[1]
stats$V2[2] <- stats$V2[1]
#add header row, remove superflorous rows
header <- c("Team","Stats",2007:2014)
colnames(stats) <- header
stats <- stats[-c(1,2,7), ]
#change all blank cells in team column to NA
stats$Team <- ifelse(stats$Team == "", NA, stats$Team)
#so I can repeat the team names in the columns
stats$Team <- na.locf(stats$Team, na.rm=TRUE)
#change Yds to Yards
stats$Stats <- str_replace_all(stats$Stats, "Yds", "Yards")
#gather the data to create an observation for each team by year per stat
new_stats <- stats %>%
gather(Year, Amount, 3:10)
#change all data to numeric
new_stats$Amount <- as.numeric(new_stats$Amount)
The data has been transformed from wide to long.
head(new_stats)
Team Stats Year Amount
1 Jets Total Passing Yards 2007 3014
2 Jets Total Rushing Yards 2007 1701
3 Jets Total First Downs 2007 286
4 Jets Touchdowns 2007 26
5 Giants Total Passing Yards 2007 3154
6 Giants Total Rushing Yards 2007 2148
Now, we can perform some analysis.
#add column for total yards and % of yards for passing/rushing
yards <- as.data.frame(new_stats %>%
filter(grepl('Yards', Stats)) %>%
group_by(Team, Year) %>%
mutate(total_yards=sum(Amount)) %>%
mutate(rate=(Amount/total_yards)) %>%
filter(rate > .66)) %>%
arrange(-rate, Year)
The following is every instance where a team passed or rushed more than 66% of the time:
yards
Team Stats Year Amount total_yards rate
1 Giants Total Passing Yards 2011 4734 6161 0.7683818
2 Giants Total Passing Yards 2013 3588 4920 0.7292683
3 Giants Total Passing Yards 2014 4272 5875 0.7271489
4 Giants Total Passing Yards 2009 4019 5856 0.6863046
5 Giants Total Passing Yards 2012 3825 5687 0.6725866
6 Jets Total Passing Yards 2011 3297 4989 0.6608539