Overview

Inspired by Kaggle’s 2018 NCAA March Madness Contest, I created a prediction model for the results of the 2018 NCAA Men’s Basketball tournament against the spread and am keeping track of the results. This document will keep a running track of all the predictions and the model’s accuracy.

Getting Started

Helper Functions

There were seven tasks to be repeated in each round:

  1. Add the model’s predictions for each game on to the bracket
  2. Gather and add the sportsbook spread for each game
  3. Determining if the model’s prediction differed enough from sportsbook to choose a wager
  4. Gathering and adding the final scores from the games
  5. Determining the ATS winner of the game and the model’s accuracy
  6. Advancing the game winners into the next round
  7. Create output tables

The seven functions below accomplish those tasks.

# Predict outcomes and input into bracket
predict_ncaa_round <- function(round){
    # Subset data by round
    if(round == 0){rows <- c(1:4)}
    if(round == 1){rows <- c(5:36)}    
    if(round == 2){rows <- c(37:52)}
    if(round == 3){rows <- c(53:60)}
    if(round == 4){rows <- c(61:64)}    
    if(round == 5){rows <- c(65:66)}
    if(round == 6){rows <- 67}
    
    # Append model predicted spread for higher seeds
    bracket <- bracket[rows,] %>% 
        left_join(select(PredictionData, TeamName.x, TeamName.y, Prediction, Win_Prob), by = c('TeamName.x' = 'TeamName.x', 'TeamName.y'= 'TeamName.y'))  
    
    # Get the rows of lesser seeds
    na_rows <- which(is.na(bracket$Prediction))

    # Impute prediction data from lesser seeds
    for (i in na_rows) {
        bracket[i, 7] <- -PredictionData[which(bracket[i, 5] == PredictionData$TeamName.y & bracket[i,6] == PredictionData$TeamName.x), 9]
    
        bracket[i, 8] <- 1 - PredictionData[which(bracket[i, 5] == PredictionData$TeamName.y & bracket[i,6] == PredictionData$TeamName.x), 10]
    }
    
    bracket
}
# Appending Sportsbook Spreads
attachSpreads <- function(round, spreadDF){
    DF <- spreadDF
    
    # Append actual spreads from bookies
    names(DF) <- c("Team", "Spread")
    DF$Spread <- as.numeric(DF$Spread)

    # Append spreads on to bracket
    round <- round %>% 
        left_join(DF, by = c('TeamName.x' = 'Team')) %>% 
        mutate(Diff = Prediction - Spread)
    
    round
}
# Appending Game Results
attachGameResults <- function(round, resultsDF){
    DF <- resultsDF
    names(DF) <- c("Team", "Team.x.score", "Team.y.score")
    DF$Team.x.score <- as.numeric(DF$Team.x.score)
    DF$Team.y.score <- as.numeric(DF$Team.y.score)

    # Append Game Results to Bracket
    round <- round %>% 
        left_join(DF, by = c('TeamName.x' = 'Team'))
    
    round
}
# Determine who model would bet on ATS
ATSchoice <- function(bracket){
    bracket$ModelChoice <- NA
    for (i in c(1:length(bracket$Diff))){
        if(bracket$Diff[i] < -2) 
            {bracket$ModelChoice[i] <- bracket$TeamName.y[i]}
        if(bracket$Diff[i] > 2) 
            {bracket$ModelChoice[i] <- bracket$TeamName.x[i]}
        if(bracket$Diff[i] <= 2 & bracket$Diff[i] >= -2) 
            {bracket$ModelChoice[i] <- "No choice"}
    }
    bracket
}
# Assessing Model Accuracy ATS
ATSresults <- function(bracket){
    # Determine ATS winner
    bracket$ATSWinner <- NA
    for (i in c(1:length(bracket$Diff))){
        FinalDiff <- bracket$Team.x.score[i] - bracket$Team.y.score[i]
        if(!is.na(FinalDiff)) {
            if(FinalDiff < bracket$Spread[i]) 
                {bracket$ATSWinner[i] <- bracket$TeamName.y[i]}
            if(FinalDiff > bracket$Spread[i]) 
                {bracket$ATSWinner[i] <- bracket$TeamName.x[i]}
            if(FinalDiff == bracket$Spread[i]) 
                {bracket$ATSWinner[i] <- "Push"}
        }
    }
    # Determine if Model Prediction was Correct, Incorrect, or Not Made
    bracket$Accuracy <- NA
    for (i in c(1:length(bracket$Diff))){
        if(!is.na(bracket$ATSWinner[i])) {
            if(bracket$ModelChoice[i] == bracket$ATSWinner[i])      
                {bracket$Accuracy[i] <- "Correct"}
            if(bracket$ModelChoice[i] != bracket$ATSWinner[i]) 
                {bracket$Accuracy[i] <- "Incorrect"}
            if(bracket$ATSWinner[i] == "Push") 
                {bracket$Accuracy[i] <- "Push"}
        }
        if(is.na(bracket$ATSWinner[i])) 
            {bracket$Accuracy[i] <- NA}
        if(bracket$ModelChoice[i] == "No choice")
            {bracket$Accuracy[i] <- NA}
    }
    bracket
}
# Putting winners into next round
advance_winners <- function(x){
    for (i in c(1:length(x$Slot))){
        slot <- x$Slot[i]
        if(x$Team.x.score[i] > x$Team.y.score[i]) {
            winner <- x$TeamName.x[i]
        } else{
            winner <- x$TeamName.y[i]
        }
    
        rownum <- which(bracket[,c(3:4)] == slot, arr.ind = TRUE)[1]
        colnum <- which(bracket[,c(3:4)] == slot, arr.ind = TRUE)[2]

        if(colnum == 1) 
            {bracket$TeamName.x[rownum] <- winner}
        else 
            {bracket$TeamName.y[rownum] <- winner}
    }   
    bracket
}
# Create Prediction Table
predictionTable <- function(bracket){
    predictionstable <- bracket %>% 
        select(StrongSeed, WeakSeed, TeamName.x, TeamName.y, Prediction, Spread, ModelChoice, Diff) %>%
        arrange(desc(abs(Diff)))
    
    predictionstable %>%
        kable("html") %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 9) %>% 
        footnote(general = "A positive value in Prediction or Spread indicates TeamName.x being favored by that many points.  A negative value indicates TeamName.y being favored by that many points.")
}

# Create Results & Accuracy Table
resultsTable <- function(bracket){
    results_table <- bracket %>% 
        select(StrongSeed, TeamName.x, WeakSeed, TeamName.y, Prediction, Spread, ModelChoice, Team.x.score, Team.y.score, ATSWinner, Accuracy)

    results_table %>% 
        kable("html") %>% 
        kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 9)
}

Gathering of Spreads from Internet

Throughout the tournament, spreads were gathered manually from the internet and put into these tables. The date and time of the spreads are recorded. Spreads are positive if strong seed is favored, negative if underdogs.

# Playin Spreads retroactively added after the games
playinspreads <- data.frame(
    matrix(
        c(
"St Bonaventure", -3.5,
"Long Island", -4.5,
"Arizona St", 1,
"NC Central", -4.5), 
    ncol = 2, byrow = TRUE), stringsAsFactors = FALSE)

# Round 1 Spreads from Wednesday night
round1spreads <- data.frame(
    matrix(
        c(
"Rhode Island", 1.5,
"Tennessee", 11.5,
"Gonzaga", 13.5,
"Kansas", 14.5,
"Duke", 20,
"Miami FL", 2,
"Ohio St", 8,
"Seton Hall", 3,
"Villanova", 23,
"Kentucky", 5,
"Houston", 4,
"Texas Tech", 11,
"Virginia Tech", 2.5,
"Arizona", 8.5,
"Michigan", 10,
"Florida", 5.5,
"Texas A&M", 2.5,
"Purdue", 20.5,
"Wichita St", 11.5,
"Cincinnati", 14,
"North Carolina", 19.5,
"Arkansas", -1.5,
"West Virginia", 10.5,
"Nevada", 0,
"Creighton", 1,
"Michigan St", 14.5,
"Xavier", 19.5,
"Auburn", 9,
"Virginia", 21,
"TCU", 4,
"Missouri", -1.5,
"Clemson", 4.5), 
    ncol = 2, byrow = TRUE), stringsAsFactors = FALSE)

# Round 2 Spreads from Saturday 11:30am
round2spreads <- data.frame(
    matrix(
        c(
"Villanova", 11,
"Purdue", 3.5,
"Texas Tech", 0,
"Marshall", -11,
"Kansas", 4.5,
"Duke", 10,
"Michigan St", 9,
"Auburn", 1,
"UMBC", -12,
"Cincinnati", 7,
"Tennessee", 6,
"Buffalo", -7.5,
"Xavier", 4,
"North Carolina", 6,
"Michigan", 3,
"Gonzaga", 2.5), 
    ncol = 2, byrow = TRUE), stringsAsFactors = FALSE)

# Round 3 (Sweet 16) Spreads from 7:00pm Monday
round3spreads <- data.frame(
    matrix(
        c(
"Villanova", 5.5,
"Purdue", 1.5,
"Kansas", 4.5,
"Duke", 11,
"Kansas St", -6,
"Nevada", 1.5,
"Florida St", -5.5,
"Texas A&M", -2.5), 
    ncol = 2, byrow = TRUE), stringsAsFactors = FALSE)

# All Round 4 Spreads from 7:00pm Monday
round4spreads <- data.frame(
    matrix(
        c(
"Villanova", 7,
"Kansas", -3.5,
"Kansas St", 1.5,
"Florida St", -4.5), 
    ncol = 2, byrow = TRUE), stringsAsFactors = FALSE)

# All Round 5 Spreads from 8:30pm Thursday
round5spreads <- data.frame(
    matrix(
        c(
"Villanova", 5,
"Loyola-Chicago", -5.5), 
    ncol = 2, byrow = TRUE), stringsAsFactors = FALSE)

# All Round 6 Spreads from Sunday at 10:30pm
round6spreads <- data.frame(
    matrix(
        c(
"Villanova", 7),
    ncol = 2, byrow = TRUE), stringsAsFactors = FALSE)

Collecting Game Results from Internet

Throughout the tournament, the game results were gathered manually from the internet and put into these tables. This task needs to eventually be automated.

# Playin Game Results 
playinresults <- data.frame(
    matrix(
        c(
"St Bonaventure", 65, 58, 
"Long Island", 61, 71,
"Arizona St", 56, 60,
"NC Central", 46, 64), ncol = 3, byrow = TRUE), stringsAsFactors = FALSE)

# Round 1 Game Results
round1results <- data.frame(
    matrix(
        c(
"St Bonaventure", 65, 58, 
"Long Island", 61, 71,
"Arizona St", 56, 60,
"NC Central", 46, 64,
"Rhode Island", 83, 78,
"Tennessee", 73, 47,
"Gonzaga", 68, 64,
"Kansas", 76, 60,
"Duke", 89, 67,
"Miami FL", 62, 64,
"Ohio St", 81, 73,
"Seton Hall", 94, 83,
"Villanova", 87, 61,
"Kentucky", 78, 73,
"Houston", 67, 65,
"Texas Tech", 70, 60,
"Virginia Tech", 83, 86,
"Arizona", 68, 89,
"Michigan", 61, 47,
"Florida", 77, 62,
"Texas A&M", 73, 69,
"Purdue", 74, 48,
"Wichita St", 75, 81,
"Cincinnati", 68, 53,
"North Carolina", 84, 66,
"Arkansas", 62, 79,
"West Virginia", 85, 68,
"Nevada", 87, 83,
"Creighton", 59, 69,
"Michigan St", 82, 78,
"Xavier", 91, 72,
"Auburn", 62, 58,
"Virginia", 54, 74,
"TCU", 52, 57,
"Missouri", 54, 67,
"Clemson", 79, 68), ncol = 3, byrow = TRUE), stringsAsFactors = FALSE)

# Round 2 Game Results
round2results <- data.frame(
    matrix(
        c(
"Villanova", 81, 58,
"Texas Tech", 69, 66,
"Duke", 87, 62,
"Tennessee", 62, 63,
"Buffalo", 75, 95,
"Michigan", 64, 63,
"Gonzaga", 90, 84,
"Kansas", 83, 79,
"Purdue", 76, 73,
"Michigan St", 53, 55,
"North Carolina", 65, 86,
"Cincinnati", 73, 75,
"Auburn", 53, 84,
"UMBC", 43, 50,
"Xavier", 70, 75,
"Marshall", 71, 94),  
    ncol = 3, byrow = TRUE), stringsAsFactors = FALSE)

# Round 3 Game Results
round3results <- data.frame(
    matrix(
        c(
        "Villanova", 90, 78,
        "Purdue", 65, 78,
        "Kansas", 80, 76,
        "Duke", 69, 65,
        "Kansas St", 61, 58,
        "Nevada", 68, 69,
        "Florida St", 75, 60,
        "Texas A&M", 72, 99),  
    ncol = 3, byrow = TRUE), stringsAsFactors = FALSE)

# Round 4 Game Results
round4results <- data.frame(
    matrix(
        c(
        "Villanova", 71, 59,
        "Kansas", 85, 81,
        "Kansas St", 62, 78,
        "Florida St", 54, 58),  
    ncol = 3, byrow = TRUE), stringsAsFactors = FALSE)

# Round 5 Game Results
round5results <- data.frame(
    matrix(
        c(
        "Villanova", 95, 79,
        "Loyola-Chicago", 57, 69),
    ncol = 3, byrow = TRUE), stringsAsFactors = FALSE)


# Round 6 Game Results
round6results <- data.frame(
    matrix(
        c(
        "Villanova", 79, 62),
    ncol = 3, byrow = TRUE), stringsAsFactors = FALSE)

Creating Initial 2018 Bracket

The prediction model is an XGBoost model with 5-fold cross validation. The cleaning and preparation of the data and the training and testing of that model are in a script which can be found here [not releasing at the moment]. The prediction data for all possible pairs of opponents are saved into the file “saferesults.Rds”. The prediction data is not Bayesian; that is, the model predictions are strictly based on pre-tournament performance.

# Load prediction data from model
PredictionData <- readRDS("saferesults.Rds")
# Load the 2018 bracket data and team names
tourneyslots <- fread("NCAATourneySlots.csv") %>% filter(Season == 2018)
tourneySeeds <- fread('NCAATourneySeeds.csv') %>% filter(Season == 2018)
teams <- fread('teams.csv')

# Attach team names to seed info
tourneySeeds <- tourneySeeds %>% 
    left_join(teams, by = 'TeamID') %>% 
    select(Season, Seed, TeamID, TeamName)

# Manually set the play-in winners
tourneyslots[5, 4] <- "W16b"
tourneyslots[10, 4] <- "W11a"
tourneyslots[18, 4] <- "X11b"
tourneyslots[29, 4] <- "Z16b"

# Complete Bracket
bracket <- tourneyslots %>% 
    left_join(select(tourneySeeds, -c(Season, TeamID)), by=c('StrongSeed'='Seed')) %>% 
    left_join(select(tourneySeeds, -c(Season, TeamID)), by=c('WeakSeed'='Seed')) 

Play-in Round

Model Predictions

First, we will append the model predicted spreads, the bookie spreads, and the differences between the two on to the bracket.

# Append model predicted spreads to bracket
playin <- predict_ncaa_round(round = 0)

# Append actual spreads from sportsbook
playin <- attachSpreads(playin, playinspreads)

A team needs to have a predicted spread differing by more than 2 points from the sportsbook spread in order to have greater than a 55% chance of winning ATS (against the spread), which is the minimum winning percent needed to break even. In the ModelChoice variable, the model will return “No Choice” if the model prediction is within 2 points of the sportsbook spread or otherwise return which team the model predicts has a > 55% chance to cover the spread.

# Determine who model would bet on ATS
playin <- ATSchoice(playin)

# Output of Model Predictions ATS
predictionTable(playin)
StrongSeed WeakSeed TeamName.x TeamName.y Prediction Spread ModelChoice Diff
Z16a Z16b NC Central TX Southern 9.3 -4.5 NC Central 13.8
W11a W11b St Bonaventure UCLA 2.6 -3.5 St Bonaventure 6.1
W16a W16b Long Island Radford 0.9 -4.5 Long Island 5.4
X11a X11b Arizona St Syracuse 1.3 1.0 No choice 0.3
Note:
A positive value in Prediction or Spread indicates TeamName.x being favored by that many points. A negative value indicates TeamName.y being favored by that many points.

The model recommends: NC Central, St Bonaventure, and Long Island.

Play-in Game Results and Model Performance

# Append Final Scores of Games
playin <- attachGameResults(playin, playinresults)

# Determine ATS winner and Model's Accuracy ATS
playin <- ATSresults(playin)

# Table of all outcomes
resultsTable(playin)
StrongSeed TeamName.x WeakSeed TeamName.y Prediction Spread ModelChoice Team.x.score Team.y.score ATSWinner Accuracy
W11a St Bonaventure W11b UCLA 2.6 -3.5 St Bonaventure 65 58 St Bonaventure Correct
W16a Long Island W16b Radford 0.9 -4.5 Long Island 61 71 Radford Incorrect
X11a Arizona St X11b Syracuse 1.3 1.0 No choice 56 60 Syracuse NA
Z16a NC Central Z16b TX Southern 9.3 -4.5 NC Central 46 64 TX Southern Incorrect
# Summary of results
table(playin$Accuracy)
## 
##   Correct Incorrect 
##         1         2

The model’s record was 1-2 ATS for the play-in games.

Round 1

Model Predictions

# Append model predicted spreads to bracket
round1 <- predict_ncaa_round(1)

# Append actual spreads from sportsbook
round1 <- attachSpreads(round1, round1spreads)

# Determine who model would bet on ATS
round1 <- ATSchoice(round1)

As in the playin round, the model will return “No Choice” if the model prediction is within 2 points of the sportsbook spread. Otherwise, the model returned which team it predicts has a > 55% chance to cover the spread.

There is something wrong with my predictions for the top seeds, especially 1s and 2s, so I stayed away from following my model on any of those. Other kagglers have had this same issue and have manually input values for them, some trouble I didn’t want to bother with until I had the rest of the model running well. I’ll focus on that at some point in the future. For now, I’m going to manually input “No Choice” for all 1 and 2 seeds of Villanova, Kansas, Duke, Purdue, Cincinnati, Xavier, UNC, and Virginia.

# Manually set 1 and 2 seeds to no choice
round1$ModelChoice[c(1,2,9,10,17,18,25,26)] <- "No choice"
# Output of Model Predictions ATS
predictionTable(round1)
StrongSeed WeakSeed TeamName.x TeamName.y Prediction Spread ModelChoice Diff
Y01 Y16 Virginia UMBC 3.5 21.0 No choice -17.5
Y04 Y13 Arizona Buffalo -7.1 8.5 Buffalo -15.6
W01 W16b Villanova Radford 9.5 23.0 No choice -13.5
Z08 Z09 Missouri Florida St -11.0 -1.5 Florida St -9.5
W05 W12 West Virginia Murray St 1.1 10.5 Murray St -9.4
X04 X13 Auburn Col Charleston -0.1 9.0 Col Charleston -9.1
X01 X16 Kansas Penn 6.8 14.5 No choice -7.7
W04 W13 Wichita St Marshall 4.3 11.5 Marshall -7.2
W02 W15 Purdue CS Fullerton 13.4 20.5 No choice -7.1
Y02 Y15 Cincinnati Georgia St 7.1 14.0 No choice -6.9
X03 X14 Michigan St Bucknell 7.8 14.5 Bucknell -6.7
W08 W09 Virginia Tech Alabama 9.2 2.5 Virginia Tech 6.7
Z02 Z15 North Carolina Lipscomb 13.1 19.5 No choice -6.4
Y07 Y10 Nevada Texas 6.2 0.0 Nevada 6.2
Y03 Y14 Tennessee Wright St 5.4 11.5 Wright St -6.1
Z04 Z13 Gonzaga UNC Greensboro 7.8 13.5 UNC Greensboro -5.7
X02 X15 Duke Iona 15.0 20.0 No choice -5.0
X05 X12 Clemson New Mexico St -0.3 4.5 New Mexico St -4.8
Z03 Z14 Michigan Montana 5.2 10.0 Montana -4.8
Y06 Y11 Miami FL Loyola-Chicago 6.7 2.0 Miami FL 4.7
Z01 Z16b Xavier TX Southern 15.6 19.5 No choice -3.9
Y05 Y12 Kentucky Davidson 1.7 5.0 Davidson -3.3
X07 X10 Rhode Island Oklahoma 4.7 1.5 Rhode Island 3.2
X06 X11b TCU Syracuse 1.0 4.0 Syracuse -3.0
W06 W11a Florida St Bonaventure 8.0 5.5 Florida 2.5
Y08 Y09 Creighton Kansas St -1.1 1.0 Kansas St -2.1
X08 X09 Seton Hall NC State 1.1 3.0 No choice -1.9
Z06 Z11 Houston San Diego St 5.1 4.0 No choice 1.1
Z05 Z12 Ohio St S Dakota St 8.9 8.0 No choice 0.9
W03 W14 Texas Tech SF Austin 11.4 11.0 No choice 0.4
Z07 Z10 Texas A&M Providence 2.3 2.5 No choice -0.2
W07 W10 Arkansas Butler -1.4 -1.5 No choice 0.1
Note:
A positive value in Prediction or Spread indicates TeamName.x being favored by that many points. A negative value indicates TeamName.y being favored by that many points.

Round 1 Game Results and Model Performance

# Append Final Scores of Games
round1 <- attachGameResults(round1, round1results)

# Determine ATS winner and Model's Accuracy ATS
round1 <- ATSresults(round1)

# Table of Outcomes in Round 1
resultsTable(round1)
StrongSeed TeamName.x WeakSeed TeamName.y Prediction Spread ModelChoice Team.x.score Team.y.score ATSWinner Accuracy
W01 Villanova W16b Radford 9.5 23.0 No choice 87 61 Villanova NA
W02 Purdue W15 CS Fullerton 13.4 20.5 No choice 74 48 Purdue NA
W03 Texas Tech W14 SF Austin 11.4 11.0 No choice 70 60 SF Austin NA
W04 Wichita St W13 Marshall 4.3 11.5 Marshall 75 81 Marshall Correct
W05 West Virginia W12 Murray St 1.1 10.5 Murray St 85 68 West Virginia Incorrect
W06 Florida W11a St Bonaventure 8.0 5.5 Florida 77 62 Florida Correct
W07 Arkansas W10 Butler -1.4 -1.5 No choice 62 79 Butler NA
W08 Virginia Tech W09 Alabama 9.2 2.5 Virginia Tech 83 86 Alabama Incorrect
X01 Kansas X16 Penn 6.8 14.5 No choice 76 60 Kansas NA
X02 Duke X15 Iona 15.0 20.0 No choice 89 67 Duke NA
X03 Michigan St X14 Bucknell 7.8 14.5 Bucknell 82 78 Bucknell Correct
X04 Auburn X13 Col Charleston -0.1 9.0 Col Charleston 62 58 Col Charleston Correct
X05 Clemson X12 New Mexico St -0.3 4.5 New Mexico St 79 68 Clemson Incorrect
X06 TCU X11b Syracuse 1.0 4.0 Syracuse 52 57 Syracuse Correct
X07 Rhode Island X10 Oklahoma 4.7 1.5 Rhode Island 83 78 Rhode Island Correct
X08 Seton Hall X09 NC State 1.1 3.0 No choice 94 83 Seton Hall NA
Y01 Virginia Y16 UMBC 3.5 21.0 No choice 54 74 UMBC NA
Y02 Cincinnati Y15 Georgia St 7.1 14.0 No choice 68 53 Cincinnati NA
Y03 Tennessee Y14 Wright St 5.4 11.5 Wright St 73 47 Tennessee Incorrect
Y04 Arizona Y13 Buffalo -7.1 8.5 Buffalo 68 89 Buffalo Correct
Y05 Kentucky Y12 Davidson 1.7 5.0 Davidson 78 73 Push Push
Y06 Miami FL Y11 Loyola-Chicago 6.7 2.0 Miami FL 62 64 Loyola-Chicago Incorrect
Y07 Nevada Y10 Texas 6.2 0.0 Nevada 87 83 Nevada Correct
Y08 Creighton Y09 Kansas St -1.1 1.0 Kansas St 59 69 Kansas St Correct
Z01 Xavier Z16b TX Southern 15.6 19.5 No choice 91 72 TX Southern NA
Z02 North Carolina Z15 Lipscomb 13.1 19.5 No choice 84 66 Lipscomb NA
Z03 Michigan Z14 Montana 5.2 10.0 Montana 61 47 Michigan Incorrect
Z04 Gonzaga Z13 UNC Greensboro 7.8 13.5 UNC Greensboro 68 64 UNC Greensboro Correct
Z05 Ohio St Z12 S Dakota St 8.9 8.0 No choice 81 73 Push NA
Z06 Houston Z11 San Diego St 5.1 4.0 No choice 67 65 San Diego St NA
Z07 Texas A&M Z10 Providence 2.3 2.5 No choice 73 69 Texas A&M NA
Z08 Missouri Z09 Florida St -11.0 -1.5 Florida St 54 67 Florida St Correct
# Summary of all tourney results
entiretourney <- rbind(playin, round1)
table(entiretourney$Accuracy)
## 
##   Correct Incorrect      Push 
##        12         8         1

The model’s record was 11-6-1 ATS in round 1 action and is 12-8-1 ATS over the entire tournament. The model would have been 3-5 in the 1 and 2 seeded games.

One last interesting note is that a lot of 1 seeds seemed to not cover the first half spread. I heard this has some historical traction to it. Something to keep in mind next year.

# Recording Round 1 Winners into Round 2
bracket <- advance_winners(round1)

Round 2

Model Predictions

The model predictions for all round 2 games are on the rightmost column in the output table below.

# Append model predicted spreads to bracket
round2 <- predict_ncaa_round(2)

# Append actual spreads from sportsbook
round2 <- attachSpreads(round2, round2spreads)

# Determine who model would bet on ATS
round2 <- ATSchoice(round2)

# Output of model recommendations
predictionTable(round2)
StrongSeed WeakSeed TeamName.x TeamName.y Prediction Spread ModelChoice Diff
R1Y4 R1Y5 Buffalo Kentucky 2.2 -7.5 Buffalo 9.7
R1Y1 R1Y8 UMBC Kansas St -3.5 -12.0 UMBC 8.5
R1Z4 R1Z5 Gonzaga Ohio St 10.6 2.5 Gonzaga 8.1
R1W4 R1W5 Marshall West Virginia -4.6 -11.0 Marshall 6.4
R1X2 R1X7 Duke Rhode Island 3.8 10.0 Rhode Island -6.2
R1W2 R1W7 Purdue Butler 8.1 3.5 Purdue 4.6
R1Z1 R1Z8 Xavier Florida St -0.2 4.0 Florida St -4.2
R1X3 R1X6 Michigan St Syracuse 5.2 9.0 Syracuse -3.8
R1Z2 R1Z7 North Carolina Texas A&M 3.3 6.0 Texas A&M -2.7
R1W1 R1W8 Villanova Alabama 8.3 11.0 Alabama -2.7
R1X1 R1X8 Kansas Seton Hall 2.2 4.5 Seton Hall -2.3
R1Y2 R1Y7 Cincinnati Nevada 9.1 7.0 Cincinnati 2.1
R1X4 R1X5 Auburn Clemson -0.6 1.0 No choice -1.6
R1W3 R1W6 Texas Tech Florida -0.7 0.0 No choice -0.7
R1Z3 R1Z6 Michigan Houston 3.5 3.0 No choice 0.5
R1Y3 R1Y6 Tennessee Loyola-Chicago 5.8 6.0 No choice -0.2
Note:
A positive value in Prediction or Spread indicates TeamName.x being favored by that many points. A negative value indicates TeamName.y being favored by that many points.

The model is still picking against all #1 seeds, but no longer against all #2 seeds. So I’m going to start evaluating model on all recommendations, while still hesitant to wager on #1 seed games. The recommendations are:

  • Alabama +11
  • Purdue -3.5
  • Marshall +11
  • Seton Hall +4.5
  • URI +10
  • Syracuse +9
  • UMBC +12
  • Cincinnati -7
  • Buffalo +7.5
  • Florida St +4
  • Texas A&M +6
  • Gonzaga -2.5

Round 2 Game Results and Model Performance

# Append Final Scores of Games
round2 <- attachGameResults(round2, round2results)

# Determine ATS winner and Model's Accuracy ATS
round2 <- ATSresults(round2)

# Table of round 2 results and outcomes
resultsTable(round2)
StrongSeed TeamName.x WeakSeed TeamName.y Prediction Spread ModelChoice Team.x.score Team.y.score ATSWinner Accuracy
R1W1 Villanova R1W8 Alabama 8.3 11.0 Alabama 81 58 Villanova Incorrect
R1W2 Purdue R1W7 Butler 8.1 3.5 Purdue 76 73 Butler Incorrect
R1W3 Texas Tech R1W6 Florida -0.7 0.0 No choice 69 66 Texas Tech NA
R1W4 Marshall R1W5 West Virginia -4.6 -11.0 Marshall 71 94 West Virginia Incorrect
R1X1 Kansas R1X8 Seton Hall 2.2 4.5 Seton Hall 83 79 Seton Hall Correct
R1X2 Duke R1X7 Rhode Island 3.8 10.0 Rhode Island 87 62 Duke Incorrect
R1X3 Michigan St R1X6 Syracuse 5.2 9.0 Syracuse 53 55 Syracuse Correct
R1X4 Auburn R1X5 Clemson -0.6 1.0 No choice 53 84 Clemson NA
R1Y1 UMBC R1Y8 Kansas St -3.5 -12.0 UMBC 43 50 UMBC Correct
R1Y2 Cincinnati R1Y7 Nevada 9.1 7.0 Cincinnati 73 75 Nevada Incorrect
R1Y3 Tennessee R1Y6 Loyola-Chicago 5.8 6.0 No choice 62 63 Loyola-Chicago NA
R1Y4 Buffalo R1Y5 Kentucky 2.2 -7.5 Buffalo 75 95 Kentucky Incorrect
R1Z1 Xavier R1Z8 Florida St -0.2 4.0 Florida St 70 75 Florida St Correct
R1Z2 North Carolina R1Z7 Texas A&M 3.3 6.0 Texas A&M 65 86 Texas A&M Correct
R1Z3 Michigan R1Z6 Houston 3.5 3.0 No choice 64 63 Houston NA
R1Z4 Gonzaga R1Z5 Ohio St 10.6 2.5 Gonzaga 90 84 Gonzaga Correct
# Summary of all tourney results
entiretourney <- rbind(entiretourney, round2)
table(entiretourney$Accuracy)
## 
##   Correct Incorrect      Push 
##        18        14         1

In round 2, the model was 6-6 ATS. In total, the model is 18-14-1 through the first two rounds of the tournament for an overall success rate of 56.25%.

# Recording Round 2 Winners into Round 3
bracket <- advance_winners(round2)

Round 3 - Sweet 16

Model Predictions

The model predictions for all round 3 games are on the rightmost column in the output table below.

# Append model predicted spreads to bracket
round3 <- predict_ncaa_round(3)

# Append actual spreads from sportsbook
round3 <- attachSpreads(round3, round3spreads)

# Determine who model would bet on ATS
round3 <- ATSchoice(round3)

# Output of model recommendations
predictionTable(round3)
StrongSeed WeakSeed TeamName.x TeamName.y Prediction Spread ModelChoice Diff
R2X2 R2X3 Duke Syracuse 4.3 11.0 Syracuse -6.7
R2Y1 R2Y4 Kansas St Kentucky -2.3 -6.0 Kansas St 3.7
R2X1 R2X4 Kansas Clemson 8.2 4.5 Kansas 3.7
R2W1 R2W4 Villanova West Virginia 2.9 5.5 West Virginia -2.6
R2Y2 R2Y3 Nevada Loyola-Chicago -0.8 1.5 Loyola-Chicago -2.3
R2W2 R2W3 Purdue Texas Tech 0.6 1.5 No choice -0.9
R2Z1 R2Z4 Florida St Gonzaga -5.9 -5.5 No choice -0.4
R2Z2 R2Z3 Texas A&M Michigan -2.9 -2.5 No choice -0.4
Note:
A positive value in Prediction or Spread indicates TeamName.x being favored by that many points. A negative value indicates TeamName.y being favored by that many points.

The model recommendations for round 3 are not as strong as in previous rounds. The recommendations in order of strength are:

  • Syracuse +11
  • Kansas St +6
  • Kansas -4.5

Fringe picks:

  • West Virginia +5.5
  • Loyola-Chicago +1.5

Round 3 Game Results and Model Performance

# Append Final Scores of Games
round3 <- attachGameResults(round3, round3results)

# Determine ATS winner and Model's Accuracy ATS
round3 <- ATSresults(round3)

# Table of round 3 results and outcomes
resultsTable(round3)
StrongSeed TeamName.x WeakSeed TeamName.y Prediction Spread ModelChoice Team.x.score Team.y.score ATSWinner Accuracy
R2W1 Villanova R2W4 West Virginia 2.9 5.5 West Virginia 90 78 Villanova Incorrect
R2W2 Purdue R2W3 Texas Tech 0.6 1.5 No choice 65 78 Texas Tech NA
R2X1 Kansas R2X4 Clemson 8.2 4.5 Kansas 80 76 Clemson Incorrect
R2X2 Duke R2X3 Syracuse 4.3 11.0 Syracuse 69 65 Syracuse Correct
R2Y1 Kansas St R2Y4 Kentucky -2.3 -6.0 Kansas St 61 58 Kansas St Correct
R2Y2 Nevada R2Y3 Loyola-Chicago -0.8 1.5 Loyola-Chicago 68 69 Loyola-Chicago Correct
R2Z1 Florida St R2Z4 Gonzaga -5.9 -5.5 No choice 75 60 Florida St NA
R2Z2 Texas A&M R2Z3 Michigan -2.9 -2.5 No choice 72 99 Michigan NA
# Summary of all tourney results
entiretourney <- rbind(entiretourney, round3)
table(entiretourney$Accuracy)
## 
##   Correct Incorrect      Push 
##        21        16         1

The model went 3-2 ATS in the Sweet 16 and is now 21-16-1 throughout the tourney.

# Recording Round 3 Winners into Round 4
bracket <- advance_winners(round3)

Round 4 - Elite 8

Model Predictions

The model predictions for all round 4 games are on the rightmost column in the output table below.

# Append model predicted spreads to bracket
round4 <- predict_ncaa_round(4)

# Append actual spreads from sportsbook
round4 <- attachSpreads(round4, round4spreads)

# Determine who model would bet on ATS
round4 <- ATSchoice(round4)

# Output of model recommendations
predictionTable(round4)
StrongSeed WeakSeed TeamName.x TeamName.y Prediction Spread ModelChoice Diff
R3W1 R3W2 Villanova Texas Tech -5.1 7.0 Texas Tech -12.1
R3Y1 R3Y2 Kansas St Loyola-Chicago -1.9 1.5 Loyola-Chicago -3.4
R3Z1 R3Z2 Florida St Michigan -1.7 -4.5 Florida St 2.8
R3X1 R3X2 Kansas Duke -3.4 -3.5 No choice 0.1
Note:
A positive value in Prediction or Spread indicates TeamName.x being favored by that many points. A negative value indicates TeamName.y being favored by that many points.

The model recommendations for round 4 are Texas Tech, Loyola-Chicago, and Florida St.

Round 4 Game Results and Model Performance

# Append Final Scores of Games
round4 <- attachGameResults(round4, round4results)

# Determine ATS winner and Model's Accuracy ATS
round4 <- ATSresults(round4)

# Table of results
resultsTable(round4)
StrongSeed TeamName.x WeakSeed TeamName.y Prediction Spread ModelChoice Team.x.score Team.y.score ATSWinner Accuracy
R3W1 Villanova R3W2 Texas Tech -5.1 7.0 Texas Tech 71 59 Villanova Incorrect
R3X1 Kansas R3X2 Duke -3.4 -3.5 No choice 85 81 Kansas NA
R3Y1 Kansas St R3Y2 Loyola-Chicago -1.9 1.5 Loyola-Chicago 62 78 Loyola-Chicago Correct
R3Z1 Florida St R3Z2 Michigan -1.7 -4.5 Florida St 54 58 Florida St Correct
# Summary of all tourney results
entiretourney <- rbind(entiretourney, round4)
table(entiretourney$Accuracy)
## 
##   Correct Incorrect      Push 
##        23        17         1

The model went 2-1 ATS in the Elite 8 and is now 23-17-1 through the entire tourney for a 57.5% success rate. Considering the sample size of 40 games, a 95% confidence interval for the true model’s accuracy is between 42.2% and 72.8%.

# Recording Round 4 Winners into Round 5
bracket <- advance_winners(round4)

Round 5 - Final 4

Model Predictions

The model predictions for all round 5 games are on the rightmost column in the output table below.

# Append model predicted spreads to bracket
round5 <- predict_ncaa_round(5)

# Append actual spreads from sportsbook
round5 <- attachSpreads(round5, round5spreads)

# Determine who model would bet on ATS
round5 <- ATSchoice(round5)

# Output of model recommendations
predictionTable(round5)
StrongSeed WeakSeed TeamName.x TeamName.y Prediction Spread ModelChoice Diff
R4W1 R4X1 Villanova Kansas 1.9 5.0 Kansas -3.1
R4Y1 R4Z1 Loyola-Chicago Michigan -6.8 -5.5 No choice -1.3
Note:
A positive value in Prediction or Spread indicates TeamName.x being favored by that many points. A negative value indicates TeamName.y being favored by that many points.

The only model recommendation for the Final Four is Kansas.

Round 5 Game Results and Model Performance

# Append Final Scores of Games
round5 <- attachGameResults(round5, round5results)

# Determine ATS winner and Model's Accuracy ATS
round5 <- ATSresults(round5)

# Table of results
resultsTable(round5)
StrongSeed TeamName.x WeakSeed TeamName.y Prediction Spread ModelChoice Team.x.score Team.y.score ATSWinner Accuracy
R4W1 Villanova R4X1 Kansas 1.9 5.0 Kansas 95 79 Villanova Incorrect
R4Y1 Loyola-Chicago R4Z1 Michigan -6.8 -5.5 No choice 57 69 Michigan NA
# Summary of all tourney results
entiretourney <- rbind(entiretourney, round5)
table(entiretourney$Accuracy)
## 
##   Correct Incorrect      Push 
##        23        18         1

Kansas was incorrect and the model is now 23-18-1 through the tournament.

# Recording Round 5 Winners into Round 6
bracket <- advance_winners(round5)

Round 6 - NCAA Championship

Model Predictions

# Append model predicted spreads to bracket
round6 <- predict_ncaa_round(6)
# Append actual spreads from sportsbook
round6 <- attachSpreads(round6, round6spreads)
# Determine who model would bet on ATS
round6 <- ATSchoice(round6)

# Output of model recommendations
predictionTable(round6)
StrongSeed WeakSeed TeamName.x TeamName.y Prediction Spread ModelChoice Diff
R5WX R5YZ Villanova Michigan 2.9 7 Michigan -4.1
Note:
A positive value in Prediction or Spread indicates TeamName.x being favored by that many points. A negative value indicates TeamName.y being favored by that many points.

The model’s recommendation for the NCAA Championship is Michigan. It says Villanova is about 3 points better, rather than the 7 point spread.

Round 6 Game Results and Model Performance

Michigan did not cover the spread which is another loss for the model.

Over the course of the tournament, the model had a record of 23-19-1 ATS for 54.8% accuracy. That came in just under the 55% accuracy that was the objective of the model. I might revisit this to do further analysis and also will look for ways to improve the prediction model in next season’s tournament.