# Load necessary libraries
library(dplyr)
library(data.table)
library(knitr)
# Load prediction data from model
Results <- readRDS("saferesults.Rds")
# Load the 2018 bracket data and team names
tourneyslots <- fread("NCAATourneySlots.csv") %>% filter(Season == 2018)
tourneySeeds <- fread('NCAATourneySeeds.csv') %>% filter(Season == 2018)
teams <- fread('teams.csv')
# Attach team names to seed info
tourneySeeds <- tourneySeeds %>%
left_join(teams, by = 'TeamID') %>%
select(Season, Seed, TeamID, TeamName)
# Manually set the play-in winners
tourneyslots[5,4] <- "W16b"
tourneyslots[10,4] <- "W11a"
tourneyslots[18, 4] <- "X11b"
tourneyslots[29, 4] <- "Z16b"
# Complete Bracket
bracket <- tourneyslots %>%
left_join(select(tourneySeeds, -Season), by=c('StrongSeed'='Seed')) %>%
left_join(select(tourneySeeds, -Season), by=c('WeakSeed'='Seed'))
A team needs to have a predicted spread more than 2 points beyond the sportsbook spread in order to have greater than a 55% chance of winning ATS (against the spread), which is the minimum winning percent needed to break even. In the ModelChoice variable, the model will return “No Choice” if the model prediction is within 2 points of the sportsbook spread or otherwise return which team the model predicts has a > 55% chance to cover the spread.
# Append model predicted spread
predictionbracket <- bracket %>%
left_join(select(Results, TeamName.x, TeamName.y, Prediction, Win_Prob), by = c('TeamName.x' = 'TeamName.x', 'TeamName.y'= 'TeamName.y'))
# Get the rows of lesser seeds
na_rows <- which(is.na(predictionbracket$Prediction))
# Impute prediction data from lesser seeds
for (i in na_rows[which(na_rows < 37)]) {
predictionbracket[i,9] <- -Results[which(predictionbracket[i,6] == Results$TeamName.y & predictionbracket[i,8] == Results$TeamName.x),9]
predictionbracket[i,10] <- 1 - Results[which(predictionbracket[i,6] == Results$TeamName.y & predictionbracket[i,8] == Results$TeamName.x),10]
}
# All Round 1 Spreads as of 12:30pm Thursday
# Spreads are positive if strong seed is favored, negative if underdogs
spreads <- data.frame(
matrix(
c(
"St Bonaventure", -3.5,
"Long Island", -4.5,
"Arizona St", 1,
"NC Central", -4.5,
"Rhode Island", 1.5,
"Tennessee", 11.5,
"Gonzaga", 13.5,
"Kansas", 14.5,
"Duke", 20,
"Miami FL", 2,
"Ohio St", 8,
"Seton Hall", 3,
"Villanova", 23,
"Kentucky", 5,
"Houston", 4,
"Texas Tech", 11,
"Virginia Tech", 2.5,
"Arizona", 8.5,
"Michigan", 10,
"Florida", 5.5,
"Texas A&M", 2.5,
"Purdue", 20.5,
"Wichita St", 11.5,
"Cincinnati", 14,
"North Carolina", 19.5,
"Arkansas", -1.5,
"West Virginia", 10.5,
"Nevada", 0,
"Creighton", 1,
"Michigan St", 14.5,
"Xavier", 19.5,
"Auburn", 9,
"Virginia", 21,
"TCU", 4,
"Missouri", -1.5,
"Clemson", 4.5),
ncol = 2, byrow = TRUE), stringsAsFactors = FALSE
)
names(spreads) <- c("Team", "Spread")
spreads$Spread <- as.numeric(spreads$Spread)
# Append spreads on to bracket
predictionbracket <- predictionbracket %>%
left_join(spreads, by = c('TeamName.x' = 'Team')) %>%
mutate(diff = Prediction - Spread)
# Determine who model would bet on
predictionbracket$ModelChoice <- NA
for (i in 1:36){
if(predictionbracket$diff[i] < -2) {predictionbracket$ModelChoice[i] <- predictionbracket$TeamName.y[i]}
if(predictionbracket$diff[i] > 2) {predictionbracket$ModelChoice[i] <- predictionbracket$TeamName.x[i]}
if(predictionbracket$diff[i] <= 2 & predictionbracket$diff[i] >= -2) {predictionbracket$ModelChoice[i] <- "No choice"}
}
# Input Game Results Manually
# Eventually I need to scrape this data from web
game_result <- data.frame(
matrix(
c(
"St Bonaventure", 65, 58,
"Long Island", 61, 71,
"Arizona St", 56, 60,
"NC Central", 46, 64,
"Rhode Island", 83, 78,
"Tennessee", 73, 47,
"Gonzaga", 68, 64,
"Kansas", 76, 60,
"Duke", 89, 67,
"Miami FL", 62, 64,
"Ohio St", 81, 73,
"Seton Hall", 94, 83,
"Villanova", 87, 61,
"Kentucky", 78, 73,
"Houston", 67, 65,
"Texas Tech", 70, 60,
"Virginia Tech", 83, 86,
"Arizona", 68, 89,
"Michigan", 61, 47,
"Florida", 77, 62),
ncol = 3, byrow = TRUE), stringsAsFactors = FALSE
)
names(game_result) <- c("Team", "Team.x.score", "Team.y.score")
game_result$Team.x.score <- as.numeric(game_result$Team.x.score)
game_result$Team.y.score <- as.numeric(game_result$Team.y.score)
# Append Game Results to Bracket
finalbracket <- predictionbracket %>%
left_join(game_result, by = c('TeamName.x' = 'Team'))
# Determine who covered the spread
finalbracket$ATSWinner <- NA
for (i in 1:36){
FinalDiff <- finalbracket$Team.x.score[i] - finalbracket$Team.y.score[i]
if(!is.na(FinalDiff)) {
if(FinalDiff < finalbracket$Spread[i])
{finalbracket$ATSWinner[i] <- finalbracket$TeamName.y[i]}
if(FinalDiff > finalbracket$Spread[i])
{finalbracket$ATSWinner[i] <- finalbracket$TeamName.x[i]}
if(FinalDiff == finalbracket$Spread[i])
{finalbracket$ATSWinner[i] <- "Push"}
}
}
# Determine if Model Prediction was Correct, Incorrect, or Not Made
finalbracket$Accuracy <- NA
for (i in 1:36){
if(!is.na(finalbracket$ATSWinner[i])) {
if(finalbracket$ModelChoice[i] == finalbracket$ATSWinner[i])
{finalbracket$Accuracy[i] <- "Correct"}
if(finalbracket$ModelChoice[i] != finalbracket$ATSWinner[i])
{finalbracket$Accuracy[i] <- "Incorrect"}
if(finalbracket$ATSWinner[i] == "Push")
{finalbracket$Accuracy[i] <- "Push"}
}
if(is.na(finalbracket$ATSWinner[i]))
{finalbracket$Accuracy[i] <- NA}
if(finalbracket$ModelChoice[i] == "No choice")
{finalbracket$Accuracy[i] <- NA}
}
Note: The Prediction variable outputs how many points the StrongSeed is expected to beat the WeakSeed by. So Prediction will be 3 if the StrongSeed team is favored by 3 points. This is the opposite of conventional wagering notation (-3). The Spread variable is not the conventional spread, but gives how many points the StrongSeed team is favored over the WeakSeed team by. The Win_Prob variable is the probability of the StrongSeed team winning the game straight up.
# TABLE OF ALL RESULTS
output_table <- finalbracket %>%
filter(!is.na(TeamName.x)) %>%
select(StrongSeed, WeakSeed, TeamName.x, TeamName.y, Spread, Prediction, diff, Win_Prob, ModelChoice, Team.x.score, Team.y.score, ATSWinner, Accuracy)
kable(output_table)
| StrongSeed | WeakSeed | TeamName.x | TeamName.y | Spread | Prediction | diff | Win_Prob | ModelChoice | Team.x.score | Team.y.score | ATSWinner | Accuracy |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| W11a | W11b | St Bonaventure | UCLA | -3.5 | 2.6 | 6.1 | 0.576 | St Bonaventure | 65 | 58 | St Bonaventure | Correct |
| W16a | W16b | Long Island | Radford | -4.5 | 0.9 | 5.4 | 0.525 | Long Island | 61 | 71 | Radford | Incorrect |
| X11a | X11b | Arizona St | Syracuse | 1.0 | 1.3 | 0.3 | 0.539 | No choice | 56 | 60 | Syracuse | NA |
| Z16a | Z16b | NC Central | TX Southern | -4.5 | 9.3 | 13.8 | 0.766 | NC Central | 46 | 64 | TX Southern | Incorrect |
| W01 | W16b | Villanova | Radford | 23.0 | 9.5 | -13.5 | 0.774 | Radford | 87 | 61 | Villanova | Incorrect |
| W02 | W15 | Purdue | CS Fullerton | 20.5 | 13.4 | -7.1 | 0.885 | CS Fullerton | NA | NA | NA | NA |
| W03 | W14 | Texas Tech | SF Austin | 11.0 | 11.4 | 0.4 | 0.828 | No choice | 70 | 60 | SF Austin | NA |
| W04 | W13 | Wichita St | Marshall | 11.5 | 4.3 | -7.2 | 0.623 | Marshall | NA | NA | NA | NA |
| W05 | W12 | West Virginia | Murray St | 10.5 | 1.1 | -9.4 | 0.532 | Murray St | NA | NA | NA | NA |
| W06 | W11a | Florida | St Bonaventure | 5.5 | 8.0 | 2.5 | 0.730 | Florida | 77 | 62 | Florida | Correct |
| W07 | W10 | Arkansas | Butler | -1.5 | -1.4 | 0.1 | 0.459 | No choice | NA | NA | NA | NA |
| W08 | W09 | Virginia Tech | Alabama | 2.5 | 9.2 | 6.7 | 0.765 | Virginia Tech | 83 | 86 | Alabama | Incorrect |
| X01 | X16 | Kansas | Penn | 14.5 | 6.8 | -7.7 | 0.696 | Penn | 76 | 60 | Kansas | Incorrect |
| X02 | X15 | Duke | Iona | 20.0 | 15.0 | -5.0 | 0.929 | Iona | 89 | 67 | Duke | Incorrect |
| X03 | X14 | Michigan St | Bucknell | 14.5 | 7.8 | -6.7 | 0.723 | Bucknell | NA | NA | NA | NA |
| X04 | X13 | Auburn | Col Charleston | 9.0 | -0.1 | -9.1 | 0.498 | Col Charleston | NA | NA | NA | NA |
| X05 | X12 | Clemson | New Mexico St | 4.5 | -0.3 | -4.8 | 0.492 | New Mexico St | NA | NA | NA | NA |
| X06 | X11b | TCU | Syracuse | 4.0 | 1.0 | -3.0 | 0.529 | Syracuse | NA | NA | NA | NA |
| X07 | X10 | Rhode Island | Oklahoma | 1.5 | 4.7 | 3.2 | 0.635 | Rhode Island | 83 | 78 | Rhode Island | Correct |
| X08 | X09 | Seton Hall | NC State | 3.0 | 1.1 | -1.9 | 0.531 | No choice | 94 | 83 | Seton Hall | NA |
| Y01 | Y16 | Virginia | UMBC | 21.0 | 3.5 | -17.5 | 0.599 | UMBC | NA | NA | NA | NA |
| Y02 | Y15 | Cincinnati | Georgia St | 14.0 | 7.1 | -6.9 | 0.703 | Georgia St | NA | NA | NA | NA |
| Y03 | Y14 | Tennessee | Wright St | 11.5 | 5.4 | -6.1 | 0.654 | Wright St | 73 | 47 | Tennessee | Incorrect |
| Y04 | Y13 | Arizona | Buffalo | 8.5 | -7.1 | -15.6 | 0.297 | Buffalo | 68 | 89 | Buffalo | Correct |
| Y05 | Y12 | Kentucky | Davidson | 5.0 | 1.7 | -3.3 | 0.548 | Davidson | 78 | 73 | Push | Push |
| Y06 | Y11 | Miami FL | Loyola-Chicago | 2.0 | 6.7 | 4.7 | 0.693 | Miami FL | 62 | 64 | Loyola-Chicago | Incorrect |
| Y07 | Y10 | Nevada | Texas | 0.0 | 6.2 | 6.2 | 0.678 | Nevada | NA | NA | NA | NA |
| Y08 | Y09 | Creighton | Kansas St | 1.0 | -1.1 | -2.1 | 0.468 | Kansas St | NA | NA | NA | NA |
| Z01 | Z16b | Xavier | TX Southern | 19.5 | 15.6 | -3.9 | 0.947 | TX Southern | NA | NA | NA | NA |
| Z02 | Z15 | North Carolina | Lipscomb | 19.5 | 13.1 | -6.4 | 0.878 | Lipscomb | NA | NA | NA | NA |
| Z03 | Z14 | Michigan | Montana | 10.0 | 5.2 | -4.8 | 0.650 | Montana | 61 | 47 | Michigan | Incorrect |
| Z04 | Z13 | Gonzaga | UNC Greensboro | 13.5 | 7.8 | -5.7 | 0.725 | UNC Greensboro | 68 | 64 | UNC Greensboro | Correct |
| Z05 | Z12 | Ohio St | S Dakota St | 8.0 | 8.9 | 0.9 | 0.756 | No choice | 81 | 73 | Push | NA |
| Z06 | Z11 | Houston | San Diego St | 4.0 | 5.1 | 1.1 | 0.646 | No choice | 67 | 65 | San Diego St | NA |
| Z07 | Z10 | Texas A&M | Providence | 2.5 | 2.3 | -0.2 | 0.565 | No choice | NA | NA | NA | NA |
| Z08 | Z09 | Missouri | Florida St | -1.5 | -11.0 | -9.5 | 0.186 | Florida St | NA | NA | NA | NA |
# OVERALL RECORD
table(output_table$Accuracy)
##
## Correct Incorrect Push
## 5 9 1
I didn’t even think about the play-in games, but I feel that my model should have been able to predict them fairly. Turns out the model would have gone 1-2 there, oh well.
My day 1 record was 4-7-1. I knew from the beginning there is something wrong with my predictions for the top seeds, especially 1s and 2s, so I stayed away from following my model on any of those. Other kagglers have had this same issue and have manually input values for them, some trouble I didn’t want to bother with until I had the rest of the model running well. I’ll focus on that at some point in the future.
So when eliminating the 1 & 2 seeds Villanova, Kansas, and Duke from the conversation and the “No Choice” games where my model prediction was within two points of the spread, the only games the model advised me to wager on were:
The model’s ATS record today on those games was 4-4-1. Overall (disregarding 1s and 2s), the model’s record for the tournament is at 5-6-1. Small sample size, so looking forward to seeing tomorrow’s results.
For Day 2, I will be continuing to disregard the 1 and 2 seeds. The model’s recommended wagers are:
# Day 2 Recommended Wagers
Day2 <- output_table %>% filter(abs(diff) > 2 & is.na(ATSWinner))
kable(Day2)
| StrongSeed | WeakSeed | TeamName.x | TeamName.y | Spread | Prediction | diff | Win_Prob | ModelChoice | Team.x.score | Team.y.score | ATSWinner | Accuracy |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| W02 | W15 | Purdue | CS Fullerton | 20.5 | 13.4 | -7.1 | 0.885 | CS Fullerton | NA | NA | NA | NA |
| W04 | W13 | Wichita St | Marshall | 11.5 | 4.3 | -7.2 | 0.623 | Marshall | NA | NA | NA | NA |
| W05 | W12 | West Virginia | Murray St | 10.5 | 1.1 | -9.4 | 0.532 | Murray St | NA | NA | NA | NA |
| X03 | X14 | Michigan St | Bucknell | 14.5 | 7.8 | -6.7 | 0.723 | Bucknell | NA | NA | NA | NA |
| X04 | X13 | Auburn | Col Charleston | 9.0 | -0.1 | -9.1 | 0.498 | Col Charleston | NA | NA | NA | NA |
| X05 | X12 | Clemson | New Mexico St | 4.5 | -0.3 | -4.8 | 0.492 | New Mexico St | NA | NA | NA | NA |
| X06 | X11b | TCU | Syracuse | 4.0 | 1.0 | -3.0 | 0.529 | Syracuse | NA | NA | NA | NA |
| Y01 | Y16 | Virginia | UMBC | 21.0 | 3.5 | -17.5 | 0.599 | UMBC | NA | NA | NA | NA |
| Y02 | Y15 | Cincinnati | Georgia St | 14.0 | 7.1 | -6.9 | 0.703 | Georgia St | NA | NA | NA | NA |
| Y07 | Y10 | Nevada | Texas | 0.0 | 6.2 | 6.2 | 0.678 | Nevada | NA | NA | NA | NA |
| Y08 | Y09 | Creighton | Kansas St | 1.0 | -1.1 | -2.1 | 0.468 | Kansas St | NA | NA | NA | NA |
| Z01 | Z16b | Xavier | TX Southern | 19.5 | 15.6 | -3.9 | 0.947 | TX Southern | NA | NA | NA | NA |
| Z02 | Z15 | North Carolina | Lipscomb | 19.5 | 13.1 | -6.4 | 0.878 | Lipscomb | NA | NA | NA | NA |
| Z08 | Z09 | Missouri | Florida St | -1.5 | -11.0 | -9.5 | 0.186 | Florida St | NA | NA | NA | NA |
The model should avoid Purdue, Virginia, Cincinnati, Xavier, and UNC and advises to take:
Best bets in that crowd are Florida St, Murray St, and College of Charleston. I’ll be putting some $ on a few of those moneylines too.