library(ggplot2)
library(maps)
## Warning: package 'maps' was built under R version 3.1.3
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.1.3
statesMap <- map_data("state")
str(statesMap)
## 'data.frame': 15537 obs. of 6 variables:
## $ long : num -87.5 -87.5 -87.5 -87.5 -87.6 ...
## $ lat : num 30.4 30.4 30.4 30.3 30.3 ...
## $ group : num 1 1 1 1 1 1 1 1 1 1 ...
## $ order : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "alabama" "alabama" "alabama" "alabama" ...
## $ subregion: chr NA NA NA NA ...
table(statesMap$group)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 202 149 312 516 79 91 94 10 872 381 233 329 257 256 113
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
## 397 650 399 566 36 220 30 460 370 373 382 315 238 208 70
## 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
## 125 205 78 16 290 21 168 37 733 12 105 238 284 236 172
## 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 66 304 166 289 1088 59 129 96 15 623 17 17 19 44 448
## 61 62 63
## 373 388 68
Draw a map of USA
ggplot(statesMap, aes(x=long, y=lat, group=group)) +
geom_polygon(fill="white", color="black")
Load election data and color by state
polling <- read.csv("PollingImputed.csv")
Train <- subset(polling, polling$Year <= 2008)
Test <- subset(polling, polling$Year == 2012)
Create a logistic regression model
mod2 <- glm(Republican ~ SurveyUSA + DiffCount, data=Train, family="binomial")
TestPrediction <- predict(mod2, newdata=Test, type="response")
TestPredictionBinary <- as.numeric(TestPrediction > 0.5)
predictionDataFrame <- data.frame(TestPrediction, TestPredictionBinary, Test$State)
How many states is our binary prediction 1 corresponding to Republican?
sum(predictionDataFrame$TestPredictionBinary == 1)
## [1] 22
What is the average predicted probability of our model (on the Test set, for 2012)?
mean(predictionDataFrame$TestPrediction)
## [1] 0.4852626
Coloring the states by prediction.
predictionDataFrame$region = tolower(predictionDataFrame$Test.State)
predictionMap = merge(statesMap, predictionDataFrame, by = "region")
predictionMap = predictionMap[order(predictionMap$order),]
ggplot(predictionMap, aes(x=long, y=lat, group=group, fill=TestPredictionBinary)) +
geom_polygon(color = "black")
Use binary colors - red for GOP and blue for Democrats
ggplot(predictionMap, aes(x=long, y=lat, group=group, fill=TestPredictionBinary)) +
geom_polygon(color="black") +
scale_fill_gradient(low="blue", high ="red", guide="legend", breaks=c(0,1), labels=c("Democrat", "Republican"), name="Prediction 2012")
What was our predicted probability for the state of Florida?
predictionDataFrame[which(predictionDataFrame$Test.State == "Florida"), ]
## TestPrediction TestPredictionBinary Test.State region
## 24 0.9640395 1 Florida florida