library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#Create an Example ###Dataset used: NFL Elo from FiveThirtyEight.com https://github.com/fivethirtyeight/data/tree/master/nfl-elo Purpose: Analyze the greatest team in the NFL, Houston Texans, data and ELO
####Importing and Filtering NFL Elo data Filter/cleanse for Houston Texans data
nfl_import <- read_csv("nfl_elo.csv")
## Parsed with column specification:
## cols(
## .default = col_logical(),
## date = col_date(format = ""),
## season = col_double(),
## neutral = col_double(),
## team1 = col_character(),
## team2 = col_character(),
## elo1_pre = col_double(),
## elo2_pre = col_double(),
## elo_prob1 = col_double(),
## elo_prob2 = col_double(),
## elo1_post = col_double(),
## elo2_post = col_double(),
## score1 = col_double(),
## score2 = col_double()
## )
## See spec(...) for full column specifications.
## Warning: 229980 parsing failures.
## row col expected actual file
## 1054 playoff 1/0/T/F/TRUE/FALSE c 'nfl_elo.csv'
## 1115 playoff 1/0/T/F/TRUE/FALSE c 'nfl_elo.csv'
## 1169 playoff 1/0/T/F/TRUE/FALSE c 'nfl_elo.csv'
## 1224 playoff 1/0/T/F/TRUE/FALSE c 'nfl_elo.csv'
## 1280 playoff 1/0/T/F/TRUE/FALSE c 'nfl_elo.csv'
## .... ....... .................. ...... .............
## See problems(...) for more details.
hou_data <- filter(nfl_import, team1 == 'HOU' | team2 == 'HOU')
hou_data <- hou_data[, colSums(is.na(hou_data)) != nrow(hou_data)]
hou_clean_data <- (hou_data %>% drop_na())
hou_home_data <- filter(hou_clean_data, team1 == 'HOU')
hou_away_data <- filter(hou_clean_data, team2 == 'HOU')
####Get Texans specific data
home_stats <- select(hou_home_data, contains('1'), score2)
home_stats$result <- if_else(home_stats$score1 > home_stats$score2, "W", "L")
away_stats <- select(hou_away_data, contains('2'), score1)
away_stats$result <- if_else(away_stats$score2 > away_stats$score1, "W", "L")
stats1 <- select(home_stats, c(2,3,4,7))
colnames(stats1) <- c("preelo", "probability", "postelo", "result")
stats2 <- select(away_stats, c(2,3,4,7))
colnames(stats2) <- c("preelo", "probability", "postelo", "result")
hou_stats <- bind_rows(stats1, stats2)
ggplot(hou_stats, aes(preelo,probability, colours = result)) + ggtitle("Pre Game Elo vs Probability on Winning") + geom_point(aes(colour = result))
ggplot(hou_stats, aes(postelo,probability, colours = result)) + ggtitle("Post Game Elo vs Probability on Winning") + geom_point(aes(colour = result))