library(ggplot2)
library(maps)
## Warning: package 'maps' was built under R version 3.1.3
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.1.3
statesMap <- map_data("state")
str(statesMap)
## 'data.frame':    15537 obs. of  6 variables:
##  $ long     : num  -87.5 -87.5 -87.5 -87.5 -87.6 ...
##  $ lat      : num  30.4 30.4 30.4 30.3 30.3 ...
##  $ group    : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ order    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ region   : chr  "alabama" "alabama" "alabama" "alabama" ...
##  $ subregion: chr  NA NA NA NA ...
table(statesMap$group)
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
##  202  149  312  516   79   91   94   10  872  381  233  329  257  256  113 
##   16   17   18   19   20   21   22   23   24   25   26   27   28   29   30 
##  397  650  399  566   36  220   30  460  370  373  382  315  238  208   70 
##   31   32   33   34   35   36   37   38   39   40   41   42   43   44   45 
##  125  205   78   16  290   21  168   37  733   12  105  238  284  236  172 
##   46   47   48   49   50   51   52   53   54   55   56   57   58   59   60 
##   66  304  166  289 1088   59  129   96   15  623   17   17   19   44  448 
##   61   62   63 
##  373  388   68

Draw a map of USA

ggplot(statesMap, aes(x=long, y=lat, group=group)) +
  geom_polygon(fill="white", color="black")

Load election data and color by state

polling <- read.csv("PollingImputed.csv")
Train <- subset(polling, polling$Year <= 2008)
Test <- subset(polling, polling$Year == 2012)

Create a logistic regression model

mod2 <- glm(Republican ~ SurveyUSA + DiffCount, data=Train, family="binomial")
TestPrediction <- predict(mod2, newdata=Test, type="response")
TestPredictionBinary <- as.numeric(TestPrediction > 0.5)
predictionDataFrame <- data.frame(TestPrediction, TestPredictionBinary, Test$State)

How many states is our binary prediction 1 corresponding to Republican?

sum(predictionDataFrame$TestPredictionBinary == 1)
## [1] 22

What is the average predicted probability of our model (on the Test set, for 2012)?

mean(predictionDataFrame$TestPrediction)
## [1] 0.4852626

Coloring the states by prediction.

predictionDataFrame$region = tolower(predictionDataFrame$Test.State)
predictionMap = merge(statesMap, predictionDataFrame, by = "region")
predictionMap = predictionMap[order(predictionMap$order),]
ggplot(predictionMap, aes(x=long, y=lat, group=group, fill=TestPredictionBinary)) +
  geom_polygon(color = "black")

Use binary colors - red for GOP and blue for Democrats

ggplot(predictionMap, aes(x=long, y=lat, group=group, fill=TestPredictionBinary)) +
  geom_polygon(color="black") + 
  scale_fill_gradient(low="blue", high ="red", guide="legend", breaks=c(0,1), labels=c("Democrat", "Republican"), name="Prediction 2012")

What was our predicted probability for the state of Florida?

predictionDataFrame[which(predictionDataFrame$Test.State == "Florida"), ]
##    TestPrediction TestPredictionBinary Test.State  region
## 24      0.9640395                    1    Florida florida