Ray Fair and Bootstrapped Election Predictions

Data Science

Predicting Presidential Elections (and other things)

The Data

When on the Server:

presdata <- read.csv('/home/rstudioshared/shared_files/data/rayfair.csv', header=TRUE)

From the shared Google Drive Folder

presdata <- read.csv('Data_Science_Data/election_forecasting/rayfair.csv', header=TRUE)

library(dplyr); library(ggplot2)
presdata.train <- presdata %>% filter(YEAR <= 1996)
presdata.test <- presdata %>% filter(YEAR >= 2000)

The Model and Test Set Predictions

m <- lm(VP~I:G+DPER+DUR+I:P+I:Z, data=presdata.train)
predict(m, presdata.test)

##        1        2        3        4        5 
## 49.08382 43.36632 54.06060 52.06577 46.62942

The Model and 1000 Test Set Predictions

bootstrapped_predictions <- replicate(1e3,
{m <- lm(VP~I:G+DPER+DUR+I:P+I:Z, data=sample_frac(presdata.train, 1, replace = TRUE));
predict(m, presdata.test)})

presdata.test$YEAR

## [1] 2000 2004 2008 2012 2016

apply(bootstrapped_predictions, 1, mean)

##        1        2        3        4        5 
## 49.16461 43.58039 54.15532 52.00042 46.68897

apply(bootstrapped_predictions, 1, sd)

##         1         2         3         4         5 
## 1.2547021 1.8628002 1.2776056 1.1944066 0.8877778

Boxplots

m <- lm(VP~I:G+DPER+DUR+I:P+I:Z, data=presdata.train)


boxplot(bootstrapped_predictions, use.cols=FALSE, names=seq(2000,2016,4),
        main="Bootstrapped Predictions and Actual Results (Red)")
points(1:5, presdata.test$VP, col="red", pch=8, cex=3)
points(1:5, predict(m, presdata.test), col="blue", pch=8, cex=3)