Load Libraries and Data

# Load necessary libraries
library(dplyr)
library(data.table)
library(knitr)
# Load prediction data from model
Results <- readRDS("saferesults.Rds")

# Load the 2018 bracket data and team names
tourneyslots <- fread("NCAATourneySlots.csv") %>% filter(Season == 2018)
tourneySeeds <- fread('NCAATourneySeeds.csv') %>% filter(Season == 2018)
teams <- fread('teams.csv')

Create Bracket

# Attach team names to seed info
tourneySeeds <- tourneySeeds %>% 
    left_join(teams, by = 'TeamID') %>% 
    select(Season, Seed, TeamID, TeamName)

# Manually set the play-in winners
tourneyslots[5,4] <- "W16b"
tourneyslots[10,4] <- "W11a"
tourneyslots[18, 4] <- "X11b"
tourneyslots[29, 4] <- "Z16b"

# Complete Bracket
bracket <- tourneyslots %>% 
    left_join(select(tourneySeeds, -Season), by=c('StrongSeed'='Seed')) %>% 
    left_join(select(tourneySeeds, -Season), by=c('WeakSeed'='Seed')) 

Making Predictions ATS

A team needs to have a predicted spread more than 2 points beyond the sportsbook spread in order to have greater than a 55% chance of winning ATS (against the spread), which is the minimum winning percent needed to break even. In the ModelChoice variable, the model will return “No Choice” if the model prediction is within 2 points of the sportsbook spread or otherwise return which team the model predicts has a > 55% chance to cover the spread.

# Append model predicted spread
predictionbracket <- bracket %>% 
    left_join(select(Results, TeamName.x, TeamName.y, Prediction, Win_Prob), by = c('TeamName.x' = 'TeamName.x', 'TeamName.y'= 'TeamName.y'))

# Get the rows of lesser seeds
na_rows <- which(is.na(predictionbracket$Prediction))

# Impute prediction data from lesser seeds
for (i in na_rows[which(na_rows < 37)]) {
    predictionbracket[i,9] <- -Results[which(predictionbracket[i,6] == Results$TeamName.y & predictionbracket[i,8] == Results$TeamName.x),9]
    
    predictionbracket[i,10] <- 1 - Results[which(predictionbracket[i,6] == Results$TeamName.y & predictionbracket[i,8] == Results$TeamName.x),10]
}
# All Round 1 Spreads as of 12:30pm Thursday
# Spreads are positive if strong seed is favored, negative if underdogs
spreads <- data.frame(
    matrix(
        c(
"St Bonaventure", -3.5,
"Long Island", -4.5,
"Arizona St", 1,
"NC Central", -4.5,
"Rhode Island", 1.5,
"Tennessee", 11.5,
"Gonzaga", 13.5,
"Kansas", 14.5,
"Duke", 20,
"Miami FL", 2,
"Ohio St", 8,
"Seton Hall", 3,
"Villanova", 23,
"Kentucky", 5,
"Houston", 4,
"Texas Tech", 11,
"Virginia Tech", 2.5,
"Arizona", 8.5,
"Michigan", 10,
"Florida", 5.5,
"Texas A&M", 2.5,
"Purdue", 20.5,
"Wichita St", 11.5,
"Cincinnati", 14,
"North Carolina", 19.5,
"Arkansas", -1.5,
"West Virginia", 10.5,
"Nevada", 0,
"Creighton", 1,
"Michigan St", 14.5,
"Xavier", 19.5,
"Auburn", 9,
"Virginia", 21,
"TCU", 4,
"Missouri", -1.5,
"Clemson", 4.5), 
    ncol = 2, byrow = TRUE), stringsAsFactors = FALSE
)

names(spreads) <- c("Team", "Spread")
spreads$Spread <- as.numeric(spreads$Spread)

# Append spreads on to bracket
predictionbracket <- predictionbracket %>% 
    left_join(spreads, by = c('TeamName.x' = 'Team')) %>% 
    mutate(diff = Prediction - Spread)

# Determine who model would bet on
predictionbracket$ModelChoice <- NA
for (i in 1:36){
    if(predictionbracket$diff[i] < -2) {predictionbracket$ModelChoice[i] <- predictionbracket$TeamName.y[i]}
    if(predictionbracket$diff[i] > 2) {predictionbracket$ModelChoice[i] <- predictionbracket$TeamName.x[i]}
    if(predictionbracket$diff[i] <= 2 & predictionbracket$diff[i] >= -2) {predictionbracket$ModelChoice[i] <- "No choice"}
}

Game Results

# Input Game Results Manually
# Eventually I need to scrape this data from web
game_result <- data.frame(
    matrix(
        c(
"St Bonaventure", 65, 58, 
"Long Island", 61, 71,
"Arizona St", 56, 60,
"NC Central", 46, 64,
"Rhode Island", 83, 78,
"Tennessee", 73, 47,
"Gonzaga", 68, 64,
"Kansas", 76, 60,
"Duke", 89, 67,
"Miami FL", 62, 64,
"Ohio St", 81, 73,
"Seton Hall", 94, 83,
"Villanova", 87, 61,
"Kentucky", 78, 73,
"Houston", 67, 65,
"Texas Tech", 70, 60,
"Virginia Tech", 83, 86,
"Arizona", 68, 89,
"Michigan", 61, 47,
"Florida", 77, 62), 
    ncol = 3, byrow = TRUE), stringsAsFactors = FALSE
)
names(game_result) <- c("Team", "Team.x.score", "Team.y.score")
game_result$Team.x.score <- as.numeric(game_result$Team.x.score)
game_result$Team.y.score <- as.numeric(game_result$Team.y.score)

# Append Game Results to Bracket
finalbracket <- predictionbracket %>% 
    left_join(game_result, by = c('TeamName.x' = 'Team'))
# Determine who covered the spread
finalbracket$ATSWinner <- NA
for (i in 1:36){
    FinalDiff <- finalbracket$Team.x.score[i] - finalbracket$Team.y.score[i]
    if(!is.na(FinalDiff)) {
        if(FinalDiff < finalbracket$Spread[i]) 
            {finalbracket$ATSWinner[i] <- finalbracket$TeamName.y[i]}
        if(FinalDiff > finalbracket$Spread[i]) 
            {finalbracket$ATSWinner[i] <- finalbracket$TeamName.x[i]}
        if(FinalDiff == finalbracket$Spread[i]) 
            {finalbracket$ATSWinner[i] <- "Push"}
    }
}

Accuracy of Predictions

# Determine if Model Prediction was Correct, Incorrect, or Not Made
finalbracket$Accuracy <- NA
for (i in 1:36){
    if(!is.na(finalbracket$ATSWinner[i])) {
        if(finalbracket$ModelChoice[i] == finalbracket$ATSWinner[i])      
            {finalbracket$Accuracy[i] <- "Correct"}
        if(finalbracket$ModelChoice[i] != finalbracket$ATSWinner[i]) 
            {finalbracket$Accuracy[i] <- "Incorrect"}
        if(finalbracket$ATSWinner[i] == "Push") 
            {finalbracket$Accuracy[i] <- "Push"}
    }
    if(is.na(finalbracket$ATSWinner[i])) 
        {finalbracket$Accuracy[i] <- NA}
    if(finalbracket$ModelChoice[i] == "No choice")
        {finalbracket$Accuracy[i] <- NA}
}

Final Results

Note: The Prediction variable outputs how many points the StrongSeed is expected to beat the WeakSeed by. So Prediction will be 3 if the StrongSeed team is favored by 3 points. This is the opposite of conventional wagering notation (-3). The Spread variable is not the conventional spread, but gives how many points the StrongSeed team is favored over the WeakSeed team by. The Win_Prob variable is the probability of the StrongSeed team winning the game straight up.

# TABLE OF ALL RESULTS
output_table <- finalbracket %>% 
    filter(!is.na(TeamName.x)) %>% 
    select(StrongSeed, WeakSeed, TeamName.x, TeamName.y, Spread, Prediction, diff, Win_Prob, ModelChoice, Team.x.score, Team.y.score, ATSWinner, Accuracy)
kable(output_table)
StrongSeed WeakSeed TeamName.x TeamName.y Spread Prediction diff Win_Prob ModelChoice Team.x.score Team.y.score ATSWinner Accuracy
W11a W11b St Bonaventure UCLA -3.5 2.6 6.1 0.576 St Bonaventure 65 58 St Bonaventure Correct
W16a W16b Long Island Radford -4.5 0.9 5.4 0.525 Long Island 61 71 Radford Incorrect
X11a X11b Arizona St Syracuse 1.0 1.3 0.3 0.539 No choice 56 60 Syracuse NA
Z16a Z16b NC Central TX Southern -4.5 9.3 13.8 0.766 NC Central 46 64 TX Southern Incorrect
W01 W16b Villanova Radford 23.0 9.5 -13.5 0.774 Radford 87 61 Villanova Incorrect
W02 W15 Purdue CS Fullerton 20.5 13.4 -7.1 0.885 CS Fullerton NA NA NA NA
W03 W14 Texas Tech SF Austin 11.0 11.4 0.4 0.828 No choice 70 60 SF Austin NA
W04 W13 Wichita St Marshall 11.5 4.3 -7.2 0.623 Marshall NA NA NA NA
W05 W12 West Virginia Murray St 10.5 1.1 -9.4 0.532 Murray St NA NA NA NA
W06 W11a Florida St Bonaventure 5.5 8.0 2.5 0.730 Florida 77 62 Florida Correct
W07 W10 Arkansas Butler -1.5 -1.4 0.1 0.459 No choice NA NA NA NA
W08 W09 Virginia Tech Alabama 2.5 9.2 6.7 0.765 Virginia Tech 83 86 Alabama Incorrect
X01 X16 Kansas Penn 14.5 6.8 -7.7 0.696 Penn 76 60 Kansas Incorrect
X02 X15 Duke Iona 20.0 15.0 -5.0 0.929 Iona 89 67 Duke Incorrect
X03 X14 Michigan St Bucknell 14.5 7.8 -6.7 0.723 Bucknell NA NA NA NA
X04 X13 Auburn Col Charleston 9.0 -0.1 -9.1 0.498 Col Charleston NA NA NA NA
X05 X12 Clemson New Mexico St 4.5 -0.3 -4.8 0.492 New Mexico St NA NA NA NA
X06 X11b TCU Syracuse 4.0 1.0 -3.0 0.529 Syracuse NA NA NA NA
X07 X10 Rhode Island Oklahoma 1.5 4.7 3.2 0.635 Rhode Island 83 78 Rhode Island Correct
X08 X09 Seton Hall NC State 3.0 1.1 -1.9 0.531 No choice 94 83 Seton Hall NA
Y01 Y16 Virginia UMBC 21.0 3.5 -17.5 0.599 UMBC NA NA NA NA
Y02 Y15 Cincinnati Georgia St 14.0 7.1 -6.9 0.703 Georgia St NA NA NA NA
Y03 Y14 Tennessee Wright St 11.5 5.4 -6.1 0.654 Wright St 73 47 Tennessee Incorrect
Y04 Y13 Arizona Buffalo 8.5 -7.1 -15.6 0.297 Buffalo 68 89 Buffalo Correct
Y05 Y12 Kentucky Davidson 5.0 1.7 -3.3 0.548 Davidson 78 73 Push Push
Y06 Y11 Miami FL Loyola-Chicago 2.0 6.7 4.7 0.693 Miami FL 62 64 Loyola-Chicago Incorrect
Y07 Y10 Nevada Texas 0.0 6.2 6.2 0.678 Nevada NA NA NA NA
Y08 Y09 Creighton Kansas St 1.0 -1.1 -2.1 0.468 Kansas St NA NA NA NA
Z01 Z16b Xavier TX Southern 19.5 15.6 -3.9 0.947 TX Southern NA NA NA NA
Z02 Z15 North Carolina Lipscomb 19.5 13.1 -6.4 0.878 Lipscomb NA NA NA NA
Z03 Z14 Michigan Montana 10.0 5.2 -4.8 0.650 Montana 61 47 Michigan Incorrect
Z04 Z13 Gonzaga UNC Greensboro 13.5 7.8 -5.7 0.725 UNC Greensboro 68 64 UNC Greensboro Correct
Z05 Z12 Ohio St S Dakota St 8.0 8.9 0.9 0.756 No choice 81 73 Push NA
Z06 Z11 Houston San Diego St 4.0 5.1 1.1 0.646 No choice 67 65 San Diego St NA
Z07 Z10 Texas A&M Providence 2.5 2.3 -0.2 0.565 No choice NA NA NA NA
Z08 Z09 Missouri Florida St -1.5 -11.0 -9.5 0.186 Florida St NA NA NA NA
# OVERALL RECORD
table(output_table$Accuracy)
## 
##   Correct Incorrect      Push 
##         5         9         1

Discussion

Play-In Games

I didn’t even think about the play-in games, but I feel that my model should have been able to predict them fairly. Turns out the model would have gone 1-2 there, oh well.

Day 1

My day 1 record was 4-7-1. I knew from the beginning there is something wrong with my predictions for the top seeds, especially 1s and 2s, so I stayed away from following my model on any of those. Other kagglers have had this same issue and have manually input values for them, some trouble I didn’t want to bother with until I had the rest of the model running well. I’ll focus on that at some point in the future.

So when eliminating the 1 & 2 seeds Villanova, Kansas, and Duke from the conversation and the “No Choice” games where my model prediction was within two points of the spread, the only games the model advised me to wager on were:

  • URI -2.5 (W)
  • Wright St +12 (L)
  • UNC Greensboro +12.5 (W)
  • Miami FL -2 (L)
  • Davidson +5 (P)
  • Va Tech -2.5 (L)
  • Buffalo +9 (W)
  • Florida -5.5 (W)
  • Montana +11 (L)

The model’s ATS record today on those games was 4-4-1. Overall (disregarding 1s and 2s), the model’s record for the tournament is at 5-6-1. Small sample size, so looking forward to seeing tomorrow’s results.

For Day 2, I will be continuing to disregard the 1 and 2 seeds. The model’s recommended wagers are:

# Day 2 Recommended Wagers
Day2 <- output_table %>% filter(abs(diff) > 2 & is.na(ATSWinner))
kable(Day2)
StrongSeed WeakSeed TeamName.x TeamName.y Spread Prediction diff Win_Prob ModelChoice Team.x.score Team.y.score ATSWinner Accuracy
W02 W15 Purdue CS Fullerton 20.5 13.4 -7.1 0.885 CS Fullerton NA NA NA NA
W04 W13 Wichita St Marshall 11.5 4.3 -7.2 0.623 Marshall NA NA NA NA
W05 W12 West Virginia Murray St 10.5 1.1 -9.4 0.532 Murray St NA NA NA NA
X03 X14 Michigan St Bucknell 14.5 7.8 -6.7 0.723 Bucknell NA NA NA NA
X04 X13 Auburn Col Charleston 9.0 -0.1 -9.1 0.498 Col Charleston NA NA NA NA
X05 X12 Clemson New Mexico St 4.5 -0.3 -4.8 0.492 New Mexico St NA NA NA NA
X06 X11b TCU Syracuse 4.0 1.0 -3.0 0.529 Syracuse NA NA NA NA
Y01 Y16 Virginia UMBC 21.0 3.5 -17.5 0.599 UMBC NA NA NA NA
Y02 Y15 Cincinnati Georgia St 14.0 7.1 -6.9 0.703 Georgia St NA NA NA NA
Y07 Y10 Nevada Texas 0.0 6.2 6.2 0.678 Nevada NA NA NA NA
Y08 Y09 Creighton Kansas St 1.0 -1.1 -2.1 0.468 Kansas St NA NA NA NA
Z01 Z16b Xavier TX Southern 19.5 15.6 -3.9 0.947 TX Southern NA NA NA NA
Z02 Z15 North Carolina Lipscomb 19.5 13.1 -6.4 0.878 Lipscomb NA NA NA NA
Z08 Z09 Missouri Florida St -1.5 -11.0 -9.5 0.186 Florida St NA NA NA NA

The model should avoid Purdue, Virginia, Cincinnati, Xavier, and UNC and advises to take:

  • Marshall +11.5
  • Murray St +10.5
  • Bucknell +14.5 (I’m not putting my own money on this, but will consider it an outcome to evaluate the model on)
  • Col. Charleston +9
  • New Mexico St +4.5
  • Syracuse +4
  • Kansas St +1 (I don’t like this personally, but also will evaluate as valid)
  • Nevada EVEN
  • Florida St -1.5

Best bets in that crowd are Florida St, Murray St, and College of Charleston. I’ll be putting some $ on a few of those moneylines too.