library(dplyr)

Pre-Season Polls

preseason_polls <- read.csv("/home/jcross/MarchMadness/data/preseasonmatched.csv")
head(preseason_polls)
preseason_polls %>% filter(Season==2019) 

Dolphin Ratings

dolphin <- read.csv('/home/jcross/MarchMadness/data/dolphinmatched.csv')

predDolphin <- function(r1, r2){
  pnorm(r1-r2)
}

head(dolphin)
predDolphin(1.992, 1.980)
predDolphin(1.980, 1.992)

538’s Elo Ratings

fivethirtyeight_men <- read.csv('/home/jcross/MarchMadness/data/fivethirtyeight_men_matched.csv')
fivethirtyeight_women <- read.csv('/home/jcross/MarchMadness/data/fivethirtyeight_women_matched.csv')

head(fivethirtyeight_men)

pred538 <- function(r1, r2){
  1/(1+ 10^((r2-r1)*30.464/400))
}

# If villanova played virginia last year
pred538(94.92, 93.07)
pred538(93.07, 94.92)

#games.to.predict.538 <- games.to.predict.538 %>% mutate(pred538 = pred538(team_rating.x, team_rating.y))

BPI Ratings

library(rvest); library(XML); library(dplyr)
TeamSpellings.plus <- read.csv("/home/jcross/MarchMadness/data/TeamSpellings.plus.csv")

espnbpi <-read_html("http://www.espn.com/mens-college-basketball/bpi/_/view/bpi")

bpi <- espnbpi %>% 
  html_nodes("table") %>%
  html_table()

bpi <- bpi[[2]]

for (i in 2:8)
{
  page_url <- paste0("http://www.espn.com/mens-college-basketball/bpi/_/view/bpi/page/",i)
  espnbpi_temp <-read_html(page_url)
  bpi_temp <- espnbpi_temp %>% 
    html_nodes("table") %>%
    html_table()
  bpi_temp <- bpi_temp[[2]]
  bpi <- rbind(bpi, bpi_temp)
}
#ignore any error messagee


library(stringr)
bpi$TEAMcleanup <- bpi$TEAM
bpi$TEAMcleanup[grepl("^[[:upper:]]+$", bpi$TEAM)] <-
  tolower(substr(bpi$TEAM[grepl("^[[:upper:]]+$", bpi$TEAM)], 1, nchar(bpi$TEAM[grepl("^[[:upper:]]+$", bpi$TEAM)])/2))

bpi$TEAMcleanup <- str_trim(gsub("[A-Z]{3,}","",bpi$TEAMcleanup))
bpi$TEAMcleanup <- trimws((tolower(bpi$TEAMcleanup)))
bpi <- bpi %>% filter(TEAMcleanup != "team")
bpi.matched <- inner_join(bpi, TeamSpellings.plus, by=c("TEAMcleanup"="name_spelling"))
bpi.unmatched <- anti_join(bpi, TeamSpellings.plus, by=c("TEAMcleanup"="name_spelling"))
nrow(bpi.matched); nrow(bpi.unmatched)
head(bpi.unmatched)
unique(bpi.unmatched$TEAMcleanup)

head(bpi.matched)

predBPI <- function(r1, r2){
  pnorm((r1-r2)/10.5)
}

# If Virginia faces Duke, this year

predBPI(12.2, 10.8)
predBPI(10.8, 12.2)