library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

#Create an Example ###Dataset used: NFL Elo from FiveThirtyEight.com https://github.com/fivethirtyeight/data/tree/master/nfl-elo Purpose: Analyze the greatest team in the NFL, Houston Texans, data and ELO

####Importing and Filtering NFL Elo data Filter/cleanse for Houston Texans data

nfl_import <- read_csv("nfl_elo.csv")
## Parsed with column specification:
## cols(
##   .default = col_logical(),
##   date = col_date(format = ""),
##   season = col_double(),
##   neutral = col_double(),
##   team1 = col_character(),
##   team2 = col_character(),
##   elo1_pre = col_double(),
##   elo2_pre = col_double(),
##   elo_prob1 = col_double(),
##   elo_prob2 = col_double(),
##   elo1_post = col_double(),
##   elo2_post = col_double(),
##   score1 = col_double(),
##   score2 = col_double()
## )
## See spec(...) for full column specifications.
## Warning: 229980 parsing failures.
##  row     col           expected actual          file
## 1054 playoff 1/0/T/F/TRUE/FALSE      c 'nfl_elo.csv'
## 1115 playoff 1/0/T/F/TRUE/FALSE      c 'nfl_elo.csv'
## 1169 playoff 1/0/T/F/TRUE/FALSE      c 'nfl_elo.csv'
## 1224 playoff 1/0/T/F/TRUE/FALSE      c 'nfl_elo.csv'
## 1280 playoff 1/0/T/F/TRUE/FALSE      c 'nfl_elo.csv'
## .... ....... .................. ...... .............
## See problems(...) for more details.
hou_data <- filter(nfl_import, team1 == 'HOU' | team2 == 'HOU')
hou_data <- hou_data[, colSums(is.na(hou_data)) != nrow(hou_data)]
hou_clean_data <- (hou_data %>% drop_na())
hou_home_data <- filter(hou_clean_data, team1 == 'HOU')
hou_away_data <- filter(hou_clean_data, team2 == 'HOU')

####Get Texans specific data

home_stats <- select(hou_home_data, contains('1'), score2)
home_stats$result <- if_else(home_stats$score1 > home_stats$score2, "W", "L")
away_stats <- select(hou_away_data, contains('2'), score1)
away_stats$result <- if_else(away_stats$score2 > away_stats$score1, "W", "L")
stats1 <- select(home_stats, c(2,3,4,7))
colnames(stats1) <- c("preelo", "probability", "postelo", "result")
stats2 <- select(away_stats, c(2,3,4,7))
colnames(stats2) <- c("preelo", "probability", "postelo", "result")

hou_stats <- bind_rows(stats1, stats2)
ggplot(hou_stats, aes(preelo,probability, colours = result)) + ggtitle("Pre Game Elo vs Probability on Winning") + geom_point(aes(colour = result))

ggplot(hou_stats, aes(postelo,probability, colours = result)) + ggtitle("Post Game Elo vs Probability on Winning") + geom_point(aes(colour = result))