In response to a column from Football Perspective regarding whether sportsbooks offer an efficient market, I looked at performance of underdogs getting 10.5 points or more over the same time period in the column. Note I used all weeks for each season, the column uses weeks 7 through 16.
For historical data I used the files accessible from this site. To begin:
nfllines.In the code below we'll grab the 2013 file separately (it's not in the .zip file).
## import files make sure you've saved the 1978 - 2012 files in folder called
## nfllines under your current working directory
# add 2013 file to nfllines folder (not included in .zip file) for inclusion
# in for loop
write.table(read.csv("http://www.repole.com/sun4cast/stats/nfl2013lines.csv"),
file = paste0(getwd(), "/nfllines/nfl2013lines.csv"), row.names = FALSE,
quote = FALSE, sep = ",")
# create vector of csv file names and years for each file
scores_list <- grep(".csv", list.files((paste0(getwd(), "/nfllines"))), value = TRUE)
# set years to use, 1990 - 2013
years <- c(1990:2013)
# load libraries
library(dplyr)
# loop through each file name and read csv file
i <- 1
for (i in i:length(years)) {
df <- read.csv(paste0(getwd(), "/nfllines/", grep(years[i], scores_list,
value = TRUE)), stringsAsFactors = FALSE)
# each file has one row for each game, split into home/visitor data frames
# then combine into one for easier manipulation
df1 <- select(df, Visitor, Home.Team, Visitor.Score, Home.Score, Line)
df2 <- select(df, Home.Team, Visitor, Home.Score, Visitor.Score, Line)
# change column names for rbind
names(df1) <- c("Team", "Opp", "PF", "PA", "Line")
names(df2) <- c("Team", "Opp", "PF", "PA", "Line")
# fix home line
df2$Line <- df2$Line * -1
# combine data frames
df <- rbind_list(df1, df2)
# set year
df$Year <- years[i]
# create or append to data frames
if (i == 1) {
df_f <- df
} else {
df_f <- rbind_list(df_f, df)
}
}
# rename and clean up
df <- df_f
rm(df_f, df1, df2)
## calculate w_line and l_line columns
i <- 1
for (i in i:nrow(df)) {
if ((df$PF[i] - df$PA[i]) + df$Line[i] > 0) {
df$W_Line[i] <- 1
df$L_Line[i] <- 0
} else if ((df$PF[i] - df$PA[i]) + df$Line[i] < 0) {
df$W_Line[i] <- 0
df$L_Line[i] <- 1
} else {
df$W_Line[i] <- 0
df$L_Line[i] <- 0
}
}
# look at just underdogs getting 10.5+ points
dft <- subset(df, Line >= 10.5)
# summarize the table
dft <- dft %>% group_by(Year) %>% summarise(W = sum(W_Line), L = sum(L_Line))
# calculate ATS percentage by year
dft$Pct <- round(dft$W/(dft$W + dft$L), 2)
# calculate units up/down by year based on -110 juice (+1 unit for win, -1.1
# unit for loss)
dft$units <- dft$W - dft$L * 1.1
# sort by year ascending
dft <- dft[order(dft$Year), ]
row.names(dft) <- NULL
ATS percentage 1990 - 2013:
# show 1990 - 2013 winning percentage
round(sum(dft$W)/(sum(dft$W) + sum(dft$L)), 4)
## [1] 0.5552
Cumulative units won for that time period, assuming you risk 1.1 units to make 1 (-110 vig/juice):
# show 1990 - 2013 total units up/down
sum(dft$units)
## [1] 39.4
Now a waterfall chart showing cumulative units won by year:
## create plot create cumulative sum of units by year
dft$cumunits <- cumsum(dft$units)
# set W/L year values for plot colors
i <- 1
for (i in i:nrow(dft)) {
if (dft$units[i] > 0) {
dft$wlyear[i] <- "W"
} else if (dft$units[i] < 0) {
dft$wlyear[i] <- "L"
} else {
dft$wlyear[i] <- "P"
}
}
# set values for waterfall plot
dft$start <- c(0, head(dft$cumunits, -1))
dft$id <- 1:nrow(dft)
# create plot
library(ggplot2)
ggplot(dft, aes(fill = factor(wlyear))) + geom_rect(aes(xmin = id - 0.45, xmax = id +
0.45, ymin = start, ymax = cumunits)) + ylab("cumulative units won (lost)") +
scale_x_continuous(breaks = 1:length(dft$id), labels = dft$Year) + theme(axis.text.x = element_text(angle = 90),
legend.position = "none") + ggtitle("NFL: Units Won on 10.5+ Underdogs, 1990 - 2013, -110 juice")
Not a bad investment, would be interesting to see this compared to gains in DJIA, S&P 500, etc.