library(tidyr)
library(e1071)
library(tidyverse)
library(readxl)
library(lubridate)

Loading Dataset

getwd()
## [1] "/Users/lemonpie/Desktop/bet"
setwd("/Users/lemonpie/Desktop/bet")
bet <- read_excel("dataset.xlsx")
bet %>%
  head(10)
## # A tibble: 10 x 10
##    Sharp      SportTypeAbbr GameDate            GameWeek Away  Home  Pick   Line
##    <chr>      <chr>         <dttm>                 <dbl> <chr> <chr> <chr> <dbl>
##  1 Al Iannaz~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~  -9  
##  2 Ben Volin  NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Hous~   9  
##  3 Benjamin ~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Hous~   9  
##  4 Bob Glaub~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~  -9  
##  5 Brad Gagn~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~  -9.5
##  6 Brent Sob~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Hous~   9.5
##  7 Brian Cos~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~  -9.5
##  8 Bryan Dav~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~  -9.5
##  9 Calvin Wa~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~  -9.5
## 10 Case Keef~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Hous~   9.5
## # ... with 2 more variables: PickType <chr>, Result <chr>
summary(bet)
##     Sharp           SportTypeAbbr         GameDate                  
##  Length:8113        Length:8113        Min.   :2020-09-11 00:20:00  
##  Class :character   Class :character   1st Qu.:2020-09-20 17:00:00  
##  Mode  :character   Mode  :character   Median :2020-09-27 17:00:00  
##                                        Mean   :2020-10-05 22:49:19  
##                                        3rd Qu.:2020-10-04 17:00:00  
##                                        Max.   :2021-01-24 20:05:00  
##     GameWeek         Away               Home               Pick          
##  Min.   :1.000   Length:8113        Length:8113        Length:8113       
##  1st Qu.:2.000   Class :character   Class :character   Class :character  
##  Median :3.000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2.728                                                           
##  3rd Qu.:4.000                                                           
##  Max.   :4.000                                                           
##       Line             PickType            Result         
##  Min.   :-190.0000   Length:8113        Length:8113       
##  1st Qu.:  -6.0000   Class :character   Class :character  
##  Median :  -2.5000   Mode  :character   Mode  :character  
##  Mean   :  -0.1064                                        
##  3rd Qu.:   5.5000                                        
##  Max.   : 285.0000

Data Wrangling

#Create three different datasets to seperate Moneyline, Over&Under and ATS Bets

moneyline <- filter(bet, PickType == "Moneyline")

head(moneyline)
## # A tibble: 6 x 10
##   Sharp       SportTypeAbbr GameDate            GameWeek Away  Home  Pick   Line
##   <chr>       <chr>         <dttm>                 <dbl> <chr> <chr> <chr> <dbl>
## 1 Doc         NFL           2020-09-13 17:00:00        1 Seat~ Atla~ Atla~   115
## 2 John Fisher NFL           2020-09-13 17:00:00        1 Gree~ Minn~ Gree~   146
## 3 Mark Franco NFL           2020-09-13 17:00:00        1 Seat~ Atla~ Seat~  -112
## 4 Matt Blunt  NFL           2020-09-13 17:00:00        1 Seat~ Atla~ Seat~  -112
## 5 Matt Blunt  NFL           2020-09-13 17:00:00        1 Gree~ Minn~ Minn~  -135
## 6 Michael Bl~ NFL           2020-09-13 17:00:00        1 Gree~ Minn~ Minn~  -135
## # ... with 2 more variables: PickType <chr>, Result <chr>
over <- filter(bet, PickType == "Over")
under <- filter(bet, PickType == "Under")

#bind the over and under dataframes
overunder <- rbind(over, under)

head(overunder)
## # A tibble: 6 x 10
##   Sharp       SportTypeAbbr GameDate            GameWeek Away  Home  Pick   Line
##   <chr>       <chr>         <dttm>                 <dbl> <chr> <chr> <chr> <dbl>
## 1 Kyle Hunter NFL           2020-09-13 20:05:00        1 Los ~ Cinc~ Los ~  49.5
## 2 MIT Simula~ NFL           2020-09-13 20:05:00        1 Los ~ Cinc~ Los ~  49.5
## 3 Pat Hawkins NFL           2020-09-13 20:05:00        1 Los ~ Cinc~ Los ~  49.5
## 4 Bruce Mars~ NFL           2020-09-13 20:25:00        1 Tamp~ New ~ New ~  47.5
## 5 John Fisher NFL           2020-09-13 20:25:00        1 Tamp~ New ~ New ~  47.5
## 6 John Fisher NFL           2020-09-13 20:25:00        1 Ariz~ San ~ San ~  46  
## # ... with 2 more variables: PickType <chr>, Result <chr>
ats <- filter(bet, PickType == "ATS")

ats
## # A tibble: 7,929 x 10
##    Sharp      SportTypeAbbr GameDate            GameWeek Away  Home  Pick   Line
##    <chr>      <chr>         <dttm>                 <dbl> <chr> <chr> <chr> <dbl>
##  1 Al Iannaz~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~  -9  
##  2 Ben Volin  NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Hous~   9  
##  3 Benjamin ~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Hous~   9  
##  4 Bob Glaub~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~  -9  
##  5 Brad Gagn~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~  -9.5
##  6 Brent Sob~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Hous~   9.5
##  7 Brian Cos~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~  -9.5
##  8 Bryan Dav~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~  -9.5
##  9 Calvin Wa~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~  -9.5
## 10 Case Keef~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Hous~   9.5
## # ... with 7,919 more rows, and 2 more variables: PickType <chr>, Result <chr>
 ats_raw <- ats
#create column- did the sharps pick the hometeam in their bet?

ats$homepick = ifelse(ats$Home == ats$Pick, 1, 0)
ats %>% relocate(homepick, .before = Line)
## # A tibble: 7,929 x 11
##    Sharp   SportTypeAbbr GameDate            GameWeek Away  Home  Pick  homepick
##    <chr>   <chr>         <dttm>                 <dbl> <chr> <chr> <chr>    <dbl>
##  1 Al Ian~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~        1
##  2 Ben Vo~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Hous~        0
##  3 Benjam~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Hous~        0
##  4 Bob Gl~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~        1
##  5 Brad G~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~        1
##  6 Brent ~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Hous~        0
##  7 Brian ~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~        1
##  8 Bryan ~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~        1
##  9 Calvin~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Kans~        1
## 10 Case K~ NFL           2020-09-11 00:20:00        1 Hous~ Kans~ Hous~        0
## # ... with 7,919 more rows, and 3 more variables: Line <dbl>, PickType <chr>,
## #   Result <chr>
#change result column to 1,0 depending on Win or Loss

ats$result<-ifelse(ats$Result =="Win", 1 , 0)


#absolute value of spread line
ats$absline <- abs(ats$Line)


#favorites
ats$favorite<-ifelse(ats$Line < 0,1,0)


#make new column- change time to categorical values- 

#w1 <- as.POSIXct("2020-09-11 00:20:00")
#w1end <- as.POSIXct("2020-09-27 05:00:00")
#int <- new_interval(date1, date2)

#df[df$datetime %within% int,]

#The times seem to be off by four hours, possibly due to time zones as no NFL game starts at 12:20 AM, either way not used. 
#Mutated ats_raw 

df <- ats_raw
df2 <- df %>%
  mutate(Favorite = case_when(Pick == Home & Line < 0 ~ Home,
                              Pick == Home & Line > 0 ~ Away,
                              Pick == Away & Line < 0 ~ Away,
                              Pick == Away & Line > 0 ~ Home)) %>%
  select(-c(SportTypeAbbr, Line, PickType)) %>%
  mutate(FavoriteWin = case_when(Pick == Favorite & Result == "Win" ~ 1,
                                  Pick != Favorite & Result == "Loss" ~ 1,
                                  Pick == Favorite & Result == "Loss" ~ 0,
                                  Pick != Favorite & Result == "Win" ~ 0)) %>%
  select(-c(GameWeek, Result)) %>%
  mutate(Pick = case_when(Pick == Favorite ~ "Favorite", Pick != Favorite ~ "Non-Favorite")) %>%
  select(Sharp, GameDate, Home, Away, Favorite, FavoriteWin, Pick) %>%
  mutate(PickFav = case_when(Pick == "Favorite" ~ 1, Pick == "Non-Favorite" ~ 0)) %>%
  mutate(PickNonFav = case_when(Pick == "Non-Favorite" ~ 1, Pick == "Favorite" ~ 0)) %>%
  select(-Pick) %>%
  group_by(GameDate, Home, Away, Favorite, FavoriteWin) %>%
  summarize(PickFavSum = sum(PickFav), PickNonFavSum = sum(PickNonFav)) %>%
  mutate(PickTotal = PickFavSum + PickNonFavSum) %>%
  mutate(PercFav = 100 * PickFavSum/PickTotal) %>%
  select(-c(PickFavSum, PickNonFavSum))
## `summarise()` has grouped output by 'GameDate', 'Home', 'Away', 'Favorite'. You
## can override using the `.groups` argument.
#Create bins of 5% to look at distribution
df3 <- df2 %>% 
  # filter(FavoriteWin == 1) %>%
  filter(PercFav >= 50) %>%
  mutate(PercFav = case_when(PercFav >= 50 & PercFav < 55 ~ 50,
                             PercFav >= 55 & PercFav < 60 ~ 55,
                             PercFav >= 60 & PercFav < 65 ~ 60,
                             PercFav >= 65 & PercFav < 70 ~ 65,
                             PercFav >= 70 & PercFav < 75 ~ 70,
                             PercFav >= 75 & PercFav < 80 ~ 75,
                             PercFav >= 80 & PercFav < 85 ~ 80,
                             PercFav >= 85 & PercFav < 90 ~ 85,
                             PercFav >= 90 & PercFav < 95 ~ 90,
                             PercFav >= 95 & PercFav < 100 ~ 95)) %>%
  filter(PercFav >= 50)

df3 
## # A tibble: 65 x 7
## # Groups:   GameDate, Home, Away, Favorite [65]
##    GameDate            Home        Away   Favorite FavoriteWin PickTotal PercFav
##    <dttm>              <chr>       <chr>  <chr>          <dbl>     <dbl>   <dbl>
##  1 2020-09-11 00:20:00 Kansas City Houst~ Kansas ~           1        78      50
##  2 2020-09-13 17:00:00 Atlanta     Seatt~ Seattle            1        84      50
##  3 2020-09-13 17:00:00 Buffalo     New Y~ Buffalo            1        75      75
##  4 2020-09-13 17:00:00 Carolina    Las V~ Las Veg~           1        86      50
##  5 2020-09-13 17:00:00 New England Miami  New Eng~           1        94      50
##  6 2020-09-13 20:05:00 Cincinnati  Los A~ Los Ang~           0        33      75
##  7 2020-09-13 20:25:00 New Orleans Tampa~ New Orl~           1        91      55
##  8 2020-09-14 00:20:00 Los Angeles Dallas Dallas             0        83      55
##  9 2020-09-14 23:10:00 New York    Pitts~ Pittsbu~           1        82      70
## 10 2020-09-15 02:20:00 Denver      Tenne~ Tenness~           0        55      70
## # ... with 55 more rows
df4 <- df3 %>% group_by(PercFav) %>% summarize(avgwin = mean(FavoriteWin)*100, n = length(FavoriteWin))
  
  
df4
## # A tibble: 8 x 3
##   PercFav avgwin     n
##     <dbl>  <dbl> <int>
## 1      50   55.6     9
## 2      55   50      10
## 3      60   56.2    16
## 4      65   33.3    12
## 5      70   70      10
## 6      75   60       5
## 7      80    0       1
## 8      90  100       2
barplot(height = df4$avgwin, names.arg = df4$PercFav, beside=TRUE, 
     col="#69b3a2", xlab="5% bins of Sharps Taking the Favorite", ylab="% of Bet Hitting" , main = "As more sharps take the favorite, do teams cover more?")

There was only 1 game in the bin 80-85% of sharps taking the favorite, hence the misleading 0% in the graph. Positive relationship between more sharps taking the favorite and teams covering!

#Who were the best sharps?

gf <- glm(result ~ Sharp - 1, family = binomial, data = ats)
1/(1 + exp(-sort(gf$coefficients)))
##         SharpZach Reeder       SharpMIT Simulator    SharpJustin Terranova 
##             3.488403e-06             2.500000e-01             2.857143e-01 
##          SharpGold Sheet          SharpAlex Smart     SharpSpencer Schultz 
##             2.857143e-01             2.941176e-01             3.000000e-01 
##          SharpScott Bell         SharpZack Cimini        SharpTom Fornelli 
##             3.043478e-01             3.333333e-01             3.461538e-01 
##      SharpBarrett Sallee          SharpMatt Blunt       SharpMarc Lawrence 
##             3.461538e-01             3.500000e-01             3.500000e-01 
##       SharpMichael Black      SharpBruce Marshall         SharpBill Bender 
##             3.500000e-01             3.529412e-01             3.750000e-01 
##        SharpTed Holmlund           SharpMark Hale        SharpBrian Bitler 
##             3.809524e-01             3.809524e-01             3.846154e-01 
##      SharpCorby Davidson          SharpAndy Iskoe       SharpDan Harralson 
##             3.913043e-01             4.000000e-01             4.090909e-01 
##     SharpNick Shepkowski           SharpBen Volin        SharpJeff Hartman 
##             4.090909e-01             4.098361e-01             4.098361e-01 
##        SharpErik Sommers        SharpAidan Curran        SharpMatt Stagner 
##             4.130435e-01             4.137931e-01             4.222222e-01 
##         SharpDennis Dodd          SharpJerry Palm         SharpJoe Deleone 
##             4.230769e-01             4.230769e-01             4.230769e-01 
##           SharpBen Burns         SharpClay Travis         SharpJohn Halpin 
##             4.285714e-01             4.285714e-01             4.310345e-01 
##            SharpPat Lane           SharpMike Clay        SharpKegan Reneau 
##             4.333333e-01             4.354839e-01             4.375000e-01 
##       SharpPhil Harrison     SharpCynthia Frelund       SharpJoe Manniello 
##             4.390244e-01             4.406780e-01             4.426230e-01 
##      SharpBrian Costello          SharpNate Davis          SharpJohn Dixon 
##             4.426230e-01             4.426230e-01             4.444444e-01 
##        SharpJori Epstein   SharpChristopher Gates       SharpPaul Schwartz 
##             4.482759e-01             4.482759e-01             4.500000e-01 
##            SharpASA Wins      SharpGill Alexander          SharpMatt Fargo 
##             4.500000e-01             4.512195e-01             4.545455e-01 
##       SharpPFF Analytics       SharpTim Cowlishaw      SharpChip Patterson 
##             4.571429e-01             4.590164e-01             4.615385e-01 
##      SharpWarren Ludford         SharpSteve Serby          SharpJoe Nelson 
##             4.642857e-01             4.655172e-01             4.666667e-01 
##           SharpPaul Bovi   SharpMaster Tesfatsion        SharpWill Brinson 
##             4.666667e-01             4.666667e-01             4.677419e-01 
##           SharpVic Tafur           SharpTim Lynch       SharpHank Goldberg 
##             4.677419e-01             4.680851e-01             4.736842e-01 
##     SharpMark Cannizzaro       SharpLorenzo Reyes     SharpMichael Gehlken 
##             4.754098e-01             4.754098e-01             4.754098e-01 
##          SharpChris Shaw         SharpJimmy Adams      SharpZach Braziller 
##             4.761905e-01             4.761905e-01             4.761905e-01 
##     SharpFiveThirtyEight      SharpChip Chirimbes           SharpOddsShark 
##             4.814815e-01             4.814815e-01             4.827586e-01 
##          SharpDamon Marx         SharpDave Blezow           SharpMatt Lane 
##             4.833333e-01             4.838710e-01             4.888889e-01 
##     SharpJonathan Delong        SharpJeff Feyerer         SharpDrew Loftis 
##             4.888889e-01             5.000000e-01             5.000000e-01 
##           SharpJim Feist         SharpJim McBride     SharpGregg Rosenthal 
##             5.000000e-01             5.000000e-01             5.000000e-01 
##         SharpJustin Hier         SharpJohn Breech       SharpDaniel Belton 
##             5.000000e-01             5.000000e-01             5.000000e-01 
##                 SharpDoc         SharpKyle Hunter     SharpLester Wiltfong 
##             5.000000e-01             5.000000e-01             5.000000e-01 
##       SharpGary Gramling         SharpDavid Moore            SharpTom Rock 
##             5.000000e-01             5.000000e-01             5.081967e-01 
##        SharpCraig Miller         SharpPete Fiutak   SharpKevin Sherrington 
##             5.084746e-01             5.116279e-01             5.119048e-01 
##      SharpNeil Greenberg         SharpBob Glauber SharpMichael David Smith 
##             5.161290e-01             5.161290e-01             5.161290e-01 
##      SharpCalvin Watkins      SharpKeagan Stiefel    SharpBernd Buchmasser 
##             5.166667e-01             5.172414e-01             5.172414e-01 
##              SharpMarima     SharpSam Householder          SharpBrett Vito 
##             5.172414e-01             5.172414e-01             5.217391e-01 
##       SharpChuck Carlton            SharpSam Blum         SharpPaul Noonan 
##             5.217391e-01             5.217391e-01             5.217391e-01 
##        SharpTadd Haislop         SharpJared Dubin          SharpGreg Smith 
##             5.227273e-01             5.238095e-01             5.238095e-01 
##        SharpNewy Scruggs       SharpShannon White         SharpMike Florio 
##             5.243902e-01             5.245902e-01             5.245902e-01 
##          SharpSam Farmer     SharpDaniel Jeremiah       SharpStephen Nover 
##             5.245902e-01             5.250000e-01             5.294118e-01 
##           SharpTom Schad       SharpBrian Edwards       SharpTara Sullivan 
##             5.322581e-01             5.333333e-01             5.333333e-01 
##        SharpScott Steehn       SharpBen Kercheval        SharpBill Marzano 
##             5.375000e-01             5.384615e-01             5.384615e-01 
##       SharpPeter Daubert    SharpJason La Canfora           SharpGary Bart 
##             5.384615e-01             5.384615e-01             5.405405e-01 
##        SharpJarrett Bell         SharpJohn Owning        SharpHowie Kussoy 
##             5.409836e-01             5.409836e-01             5.432099e-01 
##            SharpStitches        SharpMarcas Grant         SharpJohn Fisher 
##             5.432099e-01             5.454545e-01             5.454545e-01 
##       SharpCFN Consensus       SharpColleen Wolfe         SharpPete Prisco 
##             5.454545e-01             5.454545e-01             5.483871e-01 
##         SharpMs.Charleen        SharpMichael Beck    SharpBenjamin Hoffman 
##             5.483871e-01             5.483871e-01             5.500000e-01 
##       SharpMatthew Terry        SharpDave Richard           SharpAdam Rank 
##             5.517241e-01             5.555556e-01             5.555556e-01 
##          SharpNick Shook          SharpTom Childs       SharpWhiskeyranger 
##             5.555556e-01             5.555556e-01             5.555556e-01 
##      SharpDave Schofield      SharpGary Davenport      SharpNeil The Greek 
##             5.573770e-01             5.573770e-01             5.625000e-01 
##       SharpKeith Stewart   SharpGeoffrey Benedict       SharpRyan Dunleavy 
##             5.625000e-01             5.645161e-01             5.645161e-01 
##      SharpJose Rodriguez       SharpBradley Smith        SharpFrank Schwab 
##             5.652174e-01             5.652174e-01             5.666667e-01 
##         SharpBrad Gagnon      SharpBrent Sobleski       SharpAl Iannazzone 
##             5.666667e-01             5.666667e-01             5.762712e-01 
##            SharpRon Kopp         SharpCase Keefer       SharpSheil Kapadia 
##             5.777778e-01             5.789474e-01             5.806452e-01 
##         SharpDana Larson       SharpJacob Infante            SharpCT Smith 
##             5.806452e-01             5.806452e-01             5.862069e-01 
##       SharpEric Thompson           SharpChad Finn        SharpPete Sweeney 
##             5.862069e-01             5.869565e-01             5.869565e-01 
##     SharpJamey Eisenberg         SharpPat Hawkins       SharpJohnny Parlay 
##             5.873016e-01             5.882353e-01             5.909091e-01 
##        SharpEvan Western         SharpBrian Hines       SharpEd Brodmarkle 
##             5.909091e-01             6.000000e-01             6.000000e-01 
##         SharpRyan Wilson         SharpRyan Fowler         SharpCraig Stout 
##             6.060606e-01             6.065574e-01             6.086957e-01 
##        SharpKent Swanson          SharpRic Renner      SharpRobert Schmitz 
##             6.086957e-01             6.086957e-01             6.136364e-01 
##        SharpKalyn Kahler        SharpKevin Rogers         SharpBrian Lewis 
##             6.136364e-01             6.153846e-01             6.190476e-01 
##        SharpFrank Platko         SharpMatt Miller      SharpBill Zimmerman 
##             6.206897e-01             6.222222e-01             6.222222e-01 
##        SharpJeremy Mauss          SharpMike Jones       SharpMicah Roberts 
##             6.265060e-01             6.271186e-01             6.363636e-01 
##         SharpBryan Davis        SharpJon Meerdink      SharpScott Thurston 
##             6.451613e-01             6.451613e-01             6.500000e-01 
##     SharpPatrick Schmidt         SharpJon Barnett         SharpVinnie Iyer 
##             6.538462e-01             6.551724e-01             6.612903e-01 
##        SharpIan St Clair         SharpKevin Nogle        SharpJoe Williams 
##             6.875000e-01             7.000000e-01             7.142857e-01 
##         SharpMark Franco         SharpJeremy Conn     SharpScott Pritchard 
##             8.000000e-01             9.999965e-01             9.999965e-01 
##     SharpJonathan Jorcin         SharpRay Monohan            SharpCJ Olson 
##             9.999965e-01             9.999965e-01             9.999965e-01

This list has its limitations. Zach Reeder was the "worst" sharp but only had one bet. Scott Prichard, CJ Olson and other sharps that had a 100% hit rate only had one bet as well.

Logistic Model looking at Home Team pick and Home Team covering

#Tried a different way to manipulate the data
gdf <- ats %>% mutate(Matchup = paste(GameWeek,Home,Away)) %>% group_by(Matchup) %>% filter(Result != "Push") %>% 
  summarize(HTC = any((Result == "Win") & (Pick == Home)) || any((Result == "Loss") & (Pick == "Away")),
            HTP = mean(Pick == Home) - 0.5,
            ABL = mean(absline)
            )
gdf
## # A tibble: 102 x 4
##    Matchup                     HTC       HTP   ABL
##    <chr>                       <lgl>   <dbl> <dbl>
##  1 1 Atlanta Seattle           FALSE -0.0357  1.95
##  2 1 Baltimore Cleveland       TRUE  -0.0402  7.70
##  3 1 Buffalo New York          TRUE   0.26    6.48
##  4 1 Carolina Las Vegas        FALSE -0.0116  3.01
##  5 1 Cincinnati Los Angeles    TRUE   0.0455  3.03
##  6 1 Denver Tennessee          TRUE  -0.158   2.20
##  7 1 Detroit Chicago           FALSE  0.0517  2.91
##  8 1 Jacksonville Indianapolis TRUE  -0.125   7.89
##  9 1 Kansas City Houston       TRUE   0.0128  9.38
## 10 1 Los Angeles Dallas        TRUE  -0.0904  2.91
## # ... with 92 more rows
lrfit <- glm(HTC ~ HTP*ABL, family = binomial, data = gdf)
summary(lrfit)
## 
## Call:
## glm(formula = HTC ~ HTP * ABL, family = binomial, data = gdf)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7099  -1.1240   0.1649   1.1775   1.4460  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.12998    0.31679  -0.410    0.682
## HTP          0.61699    1.76856   0.349    0.727
## ABL          0.02483    0.03208   0.774    0.439
## HTP:ABL      0.16277    0.17779   0.916    0.360
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 141.36  on 101  degrees of freedom
## Residual deviance: 135.55  on  98  degrees of freedom
## AIC: 143.55
## 
## Number of Fisher Scoring iterations: 5
lrfit <- glm(HTC ~ HTP, family = binomial, data = gdf)
summary(lrfit)
## 
## Call:
## glm(formula = HTC ~ HTP, family = binomial, data = gdf)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6723  -1.1231   0.7533   1.1542   1.4368  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  0.06875    0.20281   0.339    0.735  
## HTP          2.09166    1.09467   1.911    0.056 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 141.36  on 101  degrees of freedom
## Residual deviance: 137.45  on 100  degrees of freedom
## AIC: 141.45
## 
## Number of Fisher Scoring iterations: 4

Tried logistic regression- one with an interaction with absoluteline and one without. Not significant looking at pvalues. Hypothesis: When more sharps pick home teams, it is more likely for the home team to cover...

model1 <- glm(result ~ homepick + absline, data = ats, family = binomial)
model1
## 
## Call:  glm(formula = result ~ homepick + absline, family = binomial, 
##     data = ats)
## 
## Coefficients:
## (Intercept)     homepick      absline  
##    0.214818    -0.376533     0.005362  
## 
## Degrees of Freedom: 7928 Total (i.e. Null);  7926 Residual
## Null Deviance:       10980 
## Residual Deviance: 10910     AIC: 10920
#Nevermind

Future Considerations and Issues

With more time, I would try a bayesian model, starting with sharps with a 50% hit rate and update as new bets are placed. Some issues with alternate lines. Is it possible to have data on when these sharps enter their picks to see line changes and closing line value? The moneyline and overunder dataset only had 34 and 150 observations but it would be interesting to look at them and compare with spread analysis.