library(dplyr)
library(readr)
library(stringr)
library(tidyr)
library(readxl)

# download area and population info from Census
download.file("http://www2.census.gov/prod2/statcomp/usac/excel/LND01.xls", "LND01.xls")
download.file("http://www2.census.gov/prod2/statcomp/usac/excel/POP01.xls", "POP01.xls")

# according to metadata, this is Land Area in 2010 and resident population in 2010
us_county_area <- read_excel("LND01.xls") %>%
  transmute(CountyCode = as.character(as.integer(STCOU)),
            Area = LND110210D)

us_county_population <- read_excel("POP01.xls") %>%
  transmute(CountyCode = as.character(as.integer(STCOU)),
            Population = POP010210D)
election_url <- "https://raw.githubusercontent.com/Prooffreader/election_2016_data/master/data/presidential_general_election_2016_by_county.csv"
county_data <- read_csv(election_url) %>%
  group_by(CountyCode = as.character(fips)) %>%
  mutate(TotalVotes = sum(votes)) %>%
  ungroup() %>%
  mutate(name = str_replace(name, ".\\. ", "")) %>%
  filter(name %in% c("Trump", "Clinton", "Johnson", "Stein")) %>%
  transmute(County = str_replace(geo_name, " County", ""),
            State = state,
            CountyCode = as.character(fips),
            Candidate = name,
            Percent = vote_pct / 100,
            TotalVotes) %>%
  spread(Candidate, Percent, fill = 0) %>%
  inner_join(us_county_population, by = "CountyCode") %>%
  inner_join(us_county_area, by = "CountyCode")

county_data
## # A tibble: 3,111 × 10
##      County   State CountyCode TotalVotes Clinton Johnson Stein Trump
##       <chr>   <chr>      <chr>      <int>   <dbl>   <dbl> <dbl> <dbl>
## 1   Autauga Alabama       1001      24661   0.240   0.022 0.004 0.734
## 2   Baldwin Alabama       1003      94090   0.196   0.026 0.005 0.774
## 3   Barbour Alabama       1005      10390   0.467   0.009 0.002 0.523
## 4      Bibb Alabama       1007       8748   0.214   0.014 0.002 0.770
## 5    Blount Alabama       1009      25384   0.085   0.013 0.004 0.899
## 6   Bullock Alabama       1011       4701   0.751   0.005 0.002 0.242
## 7    Butler Alabama       1013       8685   0.428   0.007 0.001 0.563
## 8   Calhoun Alabama       1015      47376   0.279   0.024 0.006 0.692
## 9  Chambers Alabama       1017      13778   0.418   0.012 0.003 0.566
## 10 Cherokee Alabama       1019      10503   0.145   0.014 0.002 0.839
## # ... with 3,101 more rows, and 2 more variables: Population <dbl>,
## #   Area <dbl>
write_csv(county_data, "county_election_2016.csv")
library(ggplot2)
library(scales)
theme_set(theme_bw())

ggplot(county_data, aes(Population / Area, Trump)) +
  geom_point() +
  scale_x_log10() +
  scale_y_continuous(labels = percent_format()) +
  xlab("Population density (ppl / square mile)") +
  ylab("% of votes going to Trump") +
  geom_text(aes(label = County), vjust = 1, hjust = 1, check_overlap = TRUE) +
  geom_smooth(method = "lm") +
  ggtitle("Population density vs Trump voters by county")