Data Wrangling
#Create three different datasets to seperate Moneyline, Over&Under and ATS Bets
moneyline <- filter(bet, PickType == "Moneyline")
head(moneyline)
## # A tibble: 6 x 10
## Sharp SportTypeAbbr GameDate GameWeek Away Home Pick Line
## <chr> <chr> <dttm> <dbl> <chr> <chr> <chr> <dbl>
## 1 Doc NFL 2020-09-13 17:00:00 1 Seat~ Atla~ Atla~ 115
## 2 John Fisher NFL 2020-09-13 17:00:00 1 Gree~ Minn~ Gree~ 146
## 3 Mark Franco NFL 2020-09-13 17:00:00 1 Seat~ Atla~ Seat~ -112
## 4 Matt Blunt NFL 2020-09-13 17:00:00 1 Seat~ Atla~ Seat~ -112
## 5 Matt Blunt NFL 2020-09-13 17:00:00 1 Gree~ Minn~ Minn~ -135
## 6 Michael Bl~ NFL 2020-09-13 17:00:00 1 Gree~ Minn~ Minn~ -135
## # ... with 2 more variables: PickType <chr>, Result <chr>
over <- filter(bet, PickType == "Over")
under <- filter(bet, PickType == "Under")
#bind the over and under dataframes
overunder <- rbind(over, under)
head(overunder)
## # A tibble: 6 x 10
## Sharp SportTypeAbbr GameDate GameWeek Away Home Pick Line
## <chr> <chr> <dttm> <dbl> <chr> <chr> <chr> <dbl>
## 1 Kyle Hunter NFL 2020-09-13 20:05:00 1 Los ~ Cinc~ Los ~ 49.5
## 2 MIT Simula~ NFL 2020-09-13 20:05:00 1 Los ~ Cinc~ Los ~ 49.5
## 3 Pat Hawkins NFL 2020-09-13 20:05:00 1 Los ~ Cinc~ Los ~ 49.5
## 4 Bruce Mars~ NFL 2020-09-13 20:25:00 1 Tamp~ New ~ New ~ 47.5
## 5 John Fisher NFL 2020-09-13 20:25:00 1 Tamp~ New ~ New ~ 47.5
## 6 John Fisher NFL 2020-09-13 20:25:00 1 Ariz~ San ~ San ~ 46
## # ... with 2 more variables: PickType <chr>, Result <chr>
ats <- filter(bet, PickType == "ATS")
ats
## # A tibble: 7,929 x 10
## Sharp SportTypeAbbr GameDate GameWeek Away Home Pick Line
## <chr> <chr> <dttm> <dbl> <chr> <chr> <chr> <dbl>
## 1 Al Iannaz~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Kans~ -9
## 2 Ben Volin NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Hous~ 9
## 3 Benjamin ~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Hous~ 9
## 4 Bob Glaub~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Kans~ -9
## 5 Brad Gagn~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Kans~ -9.5
## 6 Brent Sob~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Hous~ 9.5
## 7 Brian Cos~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Kans~ -9.5
## 8 Bryan Dav~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Kans~ -9.5
## 9 Calvin Wa~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Kans~ -9.5
## 10 Case Keef~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Hous~ 9.5
## # ... with 7,919 more rows, and 2 more variables: PickType <chr>, Result <chr>
ats_raw <- ats
#create column- did the sharps pick the hometeam in their bet?
ats$homepick = ifelse(ats$Home == ats$Pick, 1, 0)
ats %>% relocate(homepick, .before = Line)
## # A tibble: 7,929 x 11
## Sharp SportTypeAbbr GameDate GameWeek Away Home Pick homepick
## <chr> <chr> <dttm> <dbl> <chr> <chr> <chr> <dbl>
## 1 Al Ian~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Kans~ 1
## 2 Ben Vo~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Hous~ 0
## 3 Benjam~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Hous~ 0
## 4 Bob Gl~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Kans~ 1
## 5 Brad G~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Kans~ 1
## 6 Brent ~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Hous~ 0
## 7 Brian ~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Kans~ 1
## 8 Bryan ~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Kans~ 1
## 9 Calvin~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Kans~ 1
## 10 Case K~ NFL 2020-09-11 00:20:00 1 Hous~ Kans~ Hous~ 0
## # ... with 7,919 more rows, and 3 more variables: Line <dbl>, PickType <chr>,
## # Result <chr>
#change result column to 1,0 depending on Win or Loss
ats$result<-ifelse(ats$Result =="Win", 1 , 0)
#absolute value of spread line
ats$absline <- abs(ats$Line)
#favorites
ats$favorite<-ifelse(ats$Line < 0,1,0)
#make new column- change time to categorical values-
#w1 <- as.POSIXct("2020-09-11 00:20:00")
#w1end <- as.POSIXct("2020-09-27 05:00:00")
#int <- new_interval(date1, date2)
#df[df$datetime %within% int,]
#The times seem to be off by four hours, possibly due to time zones as no NFL game starts at 12:20 AM, either way not used.
#Mutated ats_raw
df <- ats_raw
df2 <- df %>%
mutate(Favorite = case_when(Pick == Home & Line < 0 ~ Home,
Pick == Home & Line > 0 ~ Away,
Pick == Away & Line < 0 ~ Away,
Pick == Away & Line > 0 ~ Home)) %>%
select(-c(SportTypeAbbr, Line, PickType)) %>%
mutate(FavoriteWin = case_when(Pick == Favorite & Result == "Win" ~ 1,
Pick != Favorite & Result == "Loss" ~ 1,
Pick == Favorite & Result == "Loss" ~ 0,
Pick != Favorite & Result == "Win" ~ 0)) %>%
select(-c(GameWeek, Result)) %>%
mutate(Pick = case_when(Pick == Favorite ~ "Favorite", Pick != Favorite ~ "Non-Favorite")) %>%
select(Sharp, GameDate, Home, Away, Favorite, FavoriteWin, Pick) %>%
mutate(PickFav = case_when(Pick == "Favorite" ~ 1, Pick == "Non-Favorite" ~ 0)) %>%
mutate(PickNonFav = case_when(Pick == "Non-Favorite" ~ 1, Pick == "Favorite" ~ 0)) %>%
select(-Pick) %>%
group_by(GameDate, Home, Away, Favorite, FavoriteWin) %>%
summarize(PickFavSum = sum(PickFav), PickNonFavSum = sum(PickNonFav)) %>%
mutate(PickTotal = PickFavSum + PickNonFavSum) %>%
mutate(PercFav = 100 * PickFavSum/PickTotal) %>%
select(-c(PickFavSum, PickNonFavSum))
## `summarise()` has grouped output by 'GameDate', 'Home', 'Away', 'Favorite'. You
## can override using the `.groups` argument.
#Create bins of 5% to look at distribution
df3 <- df2 %>%
# filter(FavoriteWin == 1) %>%
filter(PercFav >= 50) %>%
mutate(PercFav = case_when(PercFav >= 50 & PercFav < 55 ~ 50,
PercFav >= 55 & PercFav < 60 ~ 55,
PercFav >= 60 & PercFav < 65 ~ 60,
PercFav >= 65 & PercFav < 70 ~ 65,
PercFav >= 70 & PercFav < 75 ~ 70,
PercFav >= 75 & PercFav < 80 ~ 75,
PercFav >= 80 & PercFav < 85 ~ 80,
PercFav >= 85 & PercFav < 90 ~ 85,
PercFav >= 90 & PercFav < 95 ~ 90,
PercFav >= 95 & PercFav < 100 ~ 95)) %>%
filter(PercFav >= 50)
df3
## # A tibble: 65 x 7
## # Groups: GameDate, Home, Away, Favorite [65]
## GameDate Home Away Favorite FavoriteWin PickTotal PercFav
## <dttm> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2020-09-11 00:20:00 Kansas City Houst~ Kansas ~ 1 78 50
## 2 2020-09-13 17:00:00 Atlanta Seatt~ Seattle 1 84 50
## 3 2020-09-13 17:00:00 Buffalo New Y~ Buffalo 1 75 75
## 4 2020-09-13 17:00:00 Carolina Las V~ Las Veg~ 1 86 50
## 5 2020-09-13 17:00:00 New England Miami New Eng~ 1 94 50
## 6 2020-09-13 20:05:00 Cincinnati Los A~ Los Ang~ 0 33 75
## 7 2020-09-13 20:25:00 New Orleans Tampa~ New Orl~ 1 91 55
## 8 2020-09-14 00:20:00 Los Angeles Dallas Dallas 0 83 55
## 9 2020-09-14 23:10:00 New York Pitts~ Pittsbu~ 1 82 70
## 10 2020-09-15 02:20:00 Denver Tenne~ Tenness~ 0 55 70
## # ... with 55 more rows
df4 <- df3 %>% group_by(PercFav) %>% summarize(avgwin = mean(FavoriteWin)*100, n = length(FavoriteWin))
df4
## # A tibble: 8 x 3
## PercFav avgwin n
## <dbl> <dbl> <int>
## 1 50 55.6 9
## 2 55 50 10
## 3 60 56.2 16
## 4 65 33.3 12
## 5 70 70 10
## 6 75 60 5
## 7 80 0 1
## 8 90 100 2
barplot(height = df4$avgwin, names.arg = df4$PercFav, beside=TRUE,
col="#69b3a2", xlab="5% bins of Sharps Taking the Favorite", ylab="% of Bet Hitting" , main = "As more sharps take the favorite, do teams cover more?")

There was only 1 game in the bin 80-85% of sharps taking the favorite, hence the misleading 0% in the graph. Positive relationship between more sharps taking the favorite and teams covering!
#Who were the best sharps?
gf <- glm(result ~ Sharp - 1, family = binomial, data = ats)
1/(1 + exp(-sort(gf$coefficients)))
## SharpZach Reeder SharpMIT Simulator SharpJustin Terranova
## 3.488403e-06 2.500000e-01 2.857143e-01
## SharpGold Sheet SharpAlex Smart SharpSpencer Schultz
## 2.857143e-01 2.941176e-01 3.000000e-01
## SharpScott Bell SharpZack Cimini SharpTom Fornelli
## 3.043478e-01 3.333333e-01 3.461538e-01
## SharpBarrett Sallee SharpMatt Blunt SharpMarc Lawrence
## 3.461538e-01 3.500000e-01 3.500000e-01
## SharpMichael Black SharpBruce Marshall SharpBill Bender
## 3.500000e-01 3.529412e-01 3.750000e-01
## SharpTed Holmlund SharpMark Hale SharpBrian Bitler
## 3.809524e-01 3.809524e-01 3.846154e-01
## SharpCorby Davidson SharpAndy Iskoe SharpDan Harralson
## 3.913043e-01 4.000000e-01 4.090909e-01
## SharpNick Shepkowski SharpBen Volin SharpJeff Hartman
## 4.090909e-01 4.098361e-01 4.098361e-01
## SharpErik Sommers SharpAidan Curran SharpMatt Stagner
## 4.130435e-01 4.137931e-01 4.222222e-01
## SharpDennis Dodd SharpJerry Palm SharpJoe Deleone
## 4.230769e-01 4.230769e-01 4.230769e-01
## SharpBen Burns SharpClay Travis SharpJohn Halpin
## 4.285714e-01 4.285714e-01 4.310345e-01
## SharpPat Lane SharpMike Clay SharpKegan Reneau
## 4.333333e-01 4.354839e-01 4.375000e-01
## SharpPhil Harrison SharpCynthia Frelund SharpJoe Manniello
## 4.390244e-01 4.406780e-01 4.426230e-01
## SharpBrian Costello SharpNate Davis SharpJohn Dixon
## 4.426230e-01 4.426230e-01 4.444444e-01
## SharpJori Epstein SharpChristopher Gates SharpPaul Schwartz
## 4.482759e-01 4.482759e-01 4.500000e-01
## SharpASA Wins SharpGill Alexander SharpMatt Fargo
## 4.500000e-01 4.512195e-01 4.545455e-01
## SharpPFF Analytics SharpTim Cowlishaw SharpChip Patterson
## 4.571429e-01 4.590164e-01 4.615385e-01
## SharpWarren Ludford SharpSteve Serby SharpJoe Nelson
## 4.642857e-01 4.655172e-01 4.666667e-01
## SharpPaul Bovi SharpMaster Tesfatsion SharpWill Brinson
## 4.666667e-01 4.666667e-01 4.677419e-01
## SharpVic Tafur SharpTim Lynch SharpHank Goldberg
## 4.677419e-01 4.680851e-01 4.736842e-01
## SharpMark Cannizzaro SharpLorenzo Reyes SharpMichael Gehlken
## 4.754098e-01 4.754098e-01 4.754098e-01
## SharpChris Shaw SharpJimmy Adams SharpZach Braziller
## 4.761905e-01 4.761905e-01 4.761905e-01
## SharpFiveThirtyEight SharpChip Chirimbes SharpOddsShark
## 4.814815e-01 4.814815e-01 4.827586e-01
## SharpDamon Marx SharpDave Blezow SharpMatt Lane
## 4.833333e-01 4.838710e-01 4.888889e-01
## SharpJonathan Delong SharpJeff Feyerer SharpDrew Loftis
## 4.888889e-01 5.000000e-01 5.000000e-01
## SharpJim Feist SharpJim McBride SharpGregg Rosenthal
## 5.000000e-01 5.000000e-01 5.000000e-01
## SharpJustin Hier SharpJohn Breech SharpDaniel Belton
## 5.000000e-01 5.000000e-01 5.000000e-01
## SharpDoc SharpKyle Hunter SharpLester Wiltfong
## 5.000000e-01 5.000000e-01 5.000000e-01
## SharpGary Gramling SharpDavid Moore SharpTom Rock
## 5.000000e-01 5.000000e-01 5.081967e-01
## SharpCraig Miller SharpPete Fiutak SharpKevin Sherrington
## 5.084746e-01 5.116279e-01 5.119048e-01
## SharpNeil Greenberg SharpBob Glauber SharpMichael David Smith
## 5.161290e-01 5.161290e-01 5.161290e-01
## SharpCalvin Watkins SharpKeagan Stiefel SharpBernd Buchmasser
## 5.166667e-01 5.172414e-01 5.172414e-01
## SharpMarima SharpSam Householder SharpBrett Vito
## 5.172414e-01 5.172414e-01 5.217391e-01
## SharpChuck Carlton SharpSam Blum SharpPaul Noonan
## 5.217391e-01 5.217391e-01 5.217391e-01
## SharpTadd Haislop SharpJared Dubin SharpGreg Smith
## 5.227273e-01 5.238095e-01 5.238095e-01
## SharpNewy Scruggs SharpShannon White SharpMike Florio
## 5.243902e-01 5.245902e-01 5.245902e-01
## SharpSam Farmer SharpDaniel Jeremiah SharpStephen Nover
## 5.245902e-01 5.250000e-01 5.294118e-01
## SharpTom Schad SharpBrian Edwards SharpTara Sullivan
## 5.322581e-01 5.333333e-01 5.333333e-01
## SharpScott Steehn SharpBen Kercheval SharpBill Marzano
## 5.375000e-01 5.384615e-01 5.384615e-01
## SharpPeter Daubert SharpJason La Canfora SharpGary Bart
## 5.384615e-01 5.384615e-01 5.405405e-01
## SharpJarrett Bell SharpJohn Owning SharpHowie Kussoy
## 5.409836e-01 5.409836e-01 5.432099e-01
## SharpStitches SharpMarcas Grant SharpJohn Fisher
## 5.432099e-01 5.454545e-01 5.454545e-01
## SharpCFN Consensus SharpColleen Wolfe SharpPete Prisco
## 5.454545e-01 5.454545e-01 5.483871e-01
## SharpMs.Charleen SharpMichael Beck SharpBenjamin Hoffman
## 5.483871e-01 5.483871e-01 5.500000e-01
## SharpMatthew Terry SharpDave Richard SharpAdam Rank
## 5.517241e-01 5.555556e-01 5.555556e-01
## SharpNick Shook SharpTom Childs SharpWhiskeyranger
## 5.555556e-01 5.555556e-01 5.555556e-01
## SharpDave Schofield SharpGary Davenport SharpNeil The Greek
## 5.573770e-01 5.573770e-01 5.625000e-01
## SharpKeith Stewart SharpGeoffrey Benedict SharpRyan Dunleavy
## 5.625000e-01 5.645161e-01 5.645161e-01
## SharpJose Rodriguez SharpBradley Smith SharpFrank Schwab
## 5.652174e-01 5.652174e-01 5.666667e-01
## SharpBrad Gagnon SharpBrent Sobleski SharpAl Iannazzone
## 5.666667e-01 5.666667e-01 5.762712e-01
## SharpRon Kopp SharpCase Keefer SharpSheil Kapadia
## 5.777778e-01 5.789474e-01 5.806452e-01
## SharpDana Larson SharpJacob Infante SharpCT Smith
## 5.806452e-01 5.806452e-01 5.862069e-01
## SharpEric Thompson SharpChad Finn SharpPete Sweeney
## 5.862069e-01 5.869565e-01 5.869565e-01
## SharpJamey Eisenberg SharpPat Hawkins SharpJohnny Parlay
## 5.873016e-01 5.882353e-01 5.909091e-01
## SharpEvan Western SharpBrian Hines SharpEd Brodmarkle
## 5.909091e-01 6.000000e-01 6.000000e-01
## SharpRyan Wilson SharpRyan Fowler SharpCraig Stout
## 6.060606e-01 6.065574e-01 6.086957e-01
## SharpKent Swanson SharpRic Renner SharpRobert Schmitz
## 6.086957e-01 6.086957e-01 6.136364e-01
## SharpKalyn Kahler SharpKevin Rogers SharpBrian Lewis
## 6.136364e-01 6.153846e-01 6.190476e-01
## SharpFrank Platko SharpMatt Miller SharpBill Zimmerman
## 6.206897e-01 6.222222e-01 6.222222e-01
## SharpJeremy Mauss SharpMike Jones SharpMicah Roberts
## 6.265060e-01 6.271186e-01 6.363636e-01
## SharpBryan Davis SharpJon Meerdink SharpScott Thurston
## 6.451613e-01 6.451613e-01 6.500000e-01
## SharpPatrick Schmidt SharpJon Barnett SharpVinnie Iyer
## 6.538462e-01 6.551724e-01 6.612903e-01
## SharpIan St Clair SharpKevin Nogle SharpJoe Williams
## 6.875000e-01 7.000000e-01 7.142857e-01
## SharpMark Franco SharpJeremy Conn SharpScott Pritchard
## 8.000000e-01 9.999965e-01 9.999965e-01
## SharpJonathan Jorcin SharpRay Monohan SharpCJ Olson
## 9.999965e-01 9.999965e-01 9.999965e-01
This list has its limitations. Zach Reeder was the "worst" sharp but only had one bet. Scott Prichard, CJ Olson and other sharps that had a 100% hit rate only had one bet as well.
Logistic Model looking at Home Team pick and Home Team covering
#Tried a different way to manipulate the data
gdf <- ats %>% mutate(Matchup = paste(GameWeek,Home,Away)) %>% group_by(Matchup) %>% filter(Result != "Push") %>%
summarize(HTC = any((Result == "Win") & (Pick == Home)) || any((Result == "Loss") & (Pick == "Away")),
HTP = mean(Pick == Home) - 0.5,
ABL = mean(absline)
)
gdf
## # A tibble: 102 x 4
## Matchup HTC HTP ABL
## <chr> <lgl> <dbl> <dbl>
## 1 1 Atlanta Seattle FALSE -0.0357 1.95
## 2 1 Baltimore Cleveland TRUE -0.0402 7.70
## 3 1 Buffalo New York TRUE 0.26 6.48
## 4 1 Carolina Las Vegas FALSE -0.0116 3.01
## 5 1 Cincinnati Los Angeles TRUE 0.0455 3.03
## 6 1 Denver Tennessee TRUE -0.158 2.20
## 7 1 Detroit Chicago FALSE 0.0517 2.91
## 8 1 Jacksonville Indianapolis TRUE -0.125 7.89
## 9 1 Kansas City Houston TRUE 0.0128 9.38
## 10 1 Los Angeles Dallas TRUE -0.0904 2.91
## # ... with 92 more rows
lrfit <- glm(HTC ~ HTP*ABL, family = binomial, data = gdf)
summary(lrfit)
##
## Call:
## glm(formula = HTC ~ HTP * ABL, family = binomial, data = gdf)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7099 -1.1240 0.1649 1.1775 1.4460
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.12998 0.31679 -0.410 0.682
## HTP 0.61699 1.76856 0.349 0.727
## ABL 0.02483 0.03208 0.774 0.439
## HTP:ABL 0.16277 0.17779 0.916 0.360
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 141.36 on 101 degrees of freedom
## Residual deviance: 135.55 on 98 degrees of freedom
## AIC: 143.55
##
## Number of Fisher Scoring iterations: 5
lrfit <- glm(HTC ~ HTP, family = binomial, data = gdf)
summary(lrfit)
##
## Call:
## glm(formula = HTC ~ HTP, family = binomial, data = gdf)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6723 -1.1231 0.7533 1.1542 1.4368
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.06875 0.20281 0.339 0.735
## HTP 2.09166 1.09467 1.911 0.056 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 141.36 on 101 degrees of freedom
## Residual deviance: 137.45 on 100 degrees of freedom
## AIC: 141.45
##
## Number of Fisher Scoring iterations: 4
Tried logistic regression- one with an interaction with absoluteline and one without. Not significant looking at pvalues. Hypothesis: When more sharps pick home teams, it is more likely for the home team to cover...
model1 <- glm(result ~ homepick + absline, data = ats, family = binomial)
model1
##
## Call: glm(formula = result ~ homepick + absline, family = binomial,
## data = ats)
##
## Coefficients:
## (Intercept) homepick absline
## 0.214818 -0.376533 0.005362
##
## Degrees of Freedom: 7928 Total (i.e. Null); 7926 Residual
## Null Deviance: 10980
## Residual Deviance: 10910 AIC: 10920
#Nevermind