A look at 538's accuracy.

I'm interested in how well Nate Silver's 538 model performed against the stupider approach of just averaging polls by state.

library(ggplot2)
##### Read in the data ####

silver <- read.delim("silver.txt")
npr <- read.delim("npr_results.txt")

##### Adjust for formatting differences ####
silver$State <- tolower(silver$State)
npr$State <- tolower(npr$State)

##### Merge ####
comp <- merge(silver, npr)

Only states with poll averages work for this comparison

comp <- subset(comp, !is.na(Obama.avg))

Obama Comparison

Apologies to Hadley Wickham for using with()

obama_rmse_avg <- with(comp, sqrt(mean((Obama.avg - Obama)^2)))
obama_rmse_538 <- with(comp, sqrt(mean((Obama.538 - Obama)^2)))

c(obama_rmse_avg, obama_rmse_538)
## [1] 3.338 1.813
obama_rmse_avg/obama_rmse_538
## [1] 1.841

Romney Comparison

romney_rmse_avg <- with(comp, sqrt(mean((Romney.avg - Romney)^2)))
romney_rmse_538 <- with(comp, sqrt(mean((Romney.538 - Romney)^2)))

c(romney_rmse_avg, romney_rmse_538)
## [1] 4.128 1.714
romney_rmse_avg/romney_rmse_538
## [1] 2.408

Weighted by Electoral Votes

obama_weighted_rmse_avg <- with(comp, sqrt(sum((((Obama - Obama.avg)^2) * 
    EV))/sum(EV)))
obama_weighted_rmse_538 <- with(comp, sqrt(sum((((Obama - Obama.538)^2) * 
    EV))/sum(EV)))

c(obama_weighted_rmse_avg, obama_weighted_rmse_538)
## [1] 3.221 1.462
obama_weighted_rmse_avg/obama_weighted_rmse_538
## [1] 2.203


romney_weighted_rmse_avg <- with(comp, sqrt(sum((((Romney - Romney.avg)^2) * 
    EV))/sum(EV)))
romney_weighted_rmse_538 <- with(comp, sqrt(sum((((Romney - Romney.538)^2) * 
    EV))/sum(EV)))

c(romney_weighted_rmse_avg, romney_weighted_rmse_538)
## [1] 3.079 1.506
romney_weighted_rmse_avg/romney_weighted_rmse_538
## [1] 2.045

Some nice plots

ggplot(comp, aes(Obama.avg-Obama, Obama.538-Obama, color = Obama > Romney))+
  stat_smooth(method = loess, aes(group = 1), color = "darkgrey")+
  geom_point(aes(size = EV))+
  geom_abline()+
  scale_area()+
  coord_fixed()+
  scale_color_brewer(palette = "Set1")+
  xlim(-9.5, 5.8)+
  ylim(-9.5, 5.8)+
  theme_bw()
## Warning: minimal value for n is 3, returning requested palette with 3
## different levels
## Warning: minimal value for n is 3, returning requested palette with 3
## different levels

plot of chunk unnamed-chunk-6


ggplot(comp, aes(Romney.avg-Romney, Romney.538-Romney,color = Obama > Romney))+
  stat_smooth(method = loess, aes(group = 1), color = "darkgrey")+
  geom_point(aes(size = EV))+
  geom_abline()+
  scale_area()+
  scale_color_brewer(palette = "Set1")+  
  coord_fixed()+
  xlim(-10.3, 5.8)+
  ylim(-10.3, 5.8)+
  theme_bw()
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: minimal value for n is 3, returning requested palette with 3
## different levels
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: minimal value for n is 3, returning requested palette with 3
## different levels

plot of chunk unnamed-chunk-7