library(dplyr)
Pre-Season Polls
preseason_polls <- read.csv("/home/jcross/MarchMadness/data/preseasonmatched.csv")
head(preseason_polls)
preseason_polls %>% filter(Season==2019)
Dolphin Ratings
dolphin <- read.csv('/home/jcross/MarchMadness/data/dolphinmatched.csv')
predDolphin <- function(r1, r2){
pnorm(r1-r2)
}
head(dolphin)
predDolphin(1.992, 1.980)
predDolphin(1.980, 1.992)
538’s Elo Ratings
fivethirtyeight_men <- read.csv('/home/jcross/MarchMadness/data/fivethirtyeight_men_matched.csv')
fivethirtyeight_women <- read.csv('/home/jcross/MarchMadness/data/fivethirtyeight_women_matched.csv')
head(fivethirtyeight_men)
pred538 <- function(r1, r2){
1/(1+ 10^((r2-r1)*30.464/400))
}
# If villanova played virginia last year
pred538(94.92, 93.07)
pred538(93.07, 94.92)
#games.to.predict.538 <- games.to.predict.538 %>% mutate(pred538 = pred538(team_rating.x, team_rating.y))
BPI Ratings
library(rvest); library(XML); library(dplyr)
TeamSpellings.plus <- read.csv("/home/jcross/MarchMadness/data/TeamSpellings.plus.csv")
espnbpi <-read_html("http://www.espn.com/mens-college-basketball/bpi/_/view/bpi")
bpi <- espnbpi %>%
html_nodes("table") %>%
html_table()
bpi <- bpi[[2]]
for (i in 2:8)
{
page_url <- paste0("http://www.espn.com/mens-college-basketball/bpi/_/view/bpi/page/",i)
espnbpi_temp <-read_html(page_url)
bpi_temp <- espnbpi_temp %>%
html_nodes("table") %>%
html_table()
bpi_temp <- bpi_temp[[2]]
bpi <- rbind(bpi, bpi_temp)
}
#ignore any error messagee
library(stringr)
bpi$TEAMcleanup <- bpi$TEAM
bpi$TEAMcleanup[grepl("^[[:upper:]]+$", bpi$TEAM)] <-
tolower(substr(bpi$TEAM[grepl("^[[:upper:]]+$", bpi$TEAM)], 1, nchar(bpi$TEAM[grepl("^[[:upper:]]+$", bpi$TEAM)])/2))
bpi$TEAMcleanup <- str_trim(gsub("[A-Z]{3,}","",bpi$TEAMcleanup))
bpi$TEAMcleanup <- trimws((tolower(bpi$TEAMcleanup)))
bpi <- bpi %>% filter(TEAMcleanup != "team")
bpi.matched <- inner_join(bpi, TeamSpellings.plus, by=c("TEAMcleanup"="name_spelling"))
bpi.unmatched <- anti_join(bpi, TeamSpellings.plus, by=c("TEAMcleanup"="name_spelling"))
nrow(bpi.matched); nrow(bpi.unmatched)
head(bpi.unmatched)
unique(bpi.unmatched$TEAMcleanup)
head(bpi.matched)
predBPI <- function(r1, r2){
pnorm((r1-r2)/10.5)
}
# If Virginia faces Duke, this year
predBPI(12.2, 10.8)
predBPI(10.8, 12.2)