Voting data available from 2016 King County Election results

Load necessary libraries

library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(forcats)

Fetch Election Results Data

url <- "http://aqua.kingcounty.gov/elections/2016/nov-general/results/election-night-abstract-by-precinct.csv"
if (!file.exists("./data/vote_results.csv")) { 
  download.file(url, "./data/vote_results.csv")
}
dat <- read_csv("./data/vote_results.csv")
## Parsed with column specification:
## cols(
##   Precinct = col_character(),
##   Race = col_character(),
##   LEG = col_integer(),
##   CC = col_integer(),
##   CG = col_integer(),
##   CounterGroup = col_character(),
##   Party = col_character(),
##   CounterType = col_character(),
##   SumOfCount = col_integer()
## )

Glance at data. Could use glimpse here, but the RNotebook data viewer is so nice, that I just display it instead.

dat
## # A tibble: 643,163 × 9
##    Precinct             Race   LEG    CC    CG CounterGroup Party
##       <chr>            <chr> <int> <int> <int>        <chr> <chr>
## 1     ADAIR Advisory Vote 14    45     3     1        Total    NP
## 2     ADAIR Advisory Vote 14    45     3     1        Total    NP
## 3     ADAIR Advisory Vote 14    45     3     1        Total    NP
## 4     ADAIR Advisory Vote 14    45     3     1        Total    NP
## 5     ADAIR Advisory Vote 14    45     3     1        Total    NP
## 6     ADAIR Advisory Vote 14    45     3     1        Total    NP
## 7     ADAIR Advisory Vote 15    45     3     1        Total    NP
## 8     ADAIR Advisory Vote 15    45     3     1        Total    NP
## 9     ADAIR Advisory Vote 15    45     3     1        Total    NP
## 10    ADAIR Advisory Vote 15    45     3     1        Total    NP
## # ... with 643,153 more rows, and 2 more variables: CounterType <chr>,
## #   SumOfCount <int>

Filter down to our legislative district.

our_district <- filter(dat, Race == "Sound Transit Proposition 1" & LEG == 43) %>%
  group_by(LEG, Precinct, CounterType) %>% 
  summarise(count = sum(SumOfCount)) %>% 
  arrange(Precinct, CounterType)
our_district
## Source: local data frame [1,272 x 4]
## Groups: LEG, Precinct [212]
## 
##      LEG    Precinct       CounterType count
##    <int>       <chr>             <chr> <int>
## 1     43 SEA 43-1262          Approved   208
## 2     43 SEA 43-1262 Registered Voters   483
## 3     43 SEA 43-1262          Rejected    72
## 4     43 SEA 43-1262 Times Blank Voted     6
## 5     43 SEA 43-1262     Times Counted   286
## 6     43 SEA 43-1262  Times Over Voted     0
## 7     43 SEA 43-1263          Approved   142
## 8     43 SEA 43-1263 Registered Voters   342
## 9     43 SEA 43-1263          Rejected    55
## 10    43 SEA 43-1263 Times Blank Voted    12
## # ... with 1,262 more rows

I would like to plot this out with the individual precincts, but I couldn’t quickly find shape files for our voting precincts. Instead, look at the two precincts that are closes to me.

filter(our_district, Precinct %in% c("SEA 43-1365", "SEA 43-1375")) %>% 
  spread("CounterType", "count")
## Source: local data frame [2 x 8]
## Groups: LEG, Precinct [2]
## 
##     LEG    Precinct Approved `Registered Voters` Rejected
## * <int>       <chr>    <int>               <int>    <int>
## 1    43 SEA 43-1365      227                 604       60
## 2    43 SEA 43-1375      142                 351       35
## # ... with 3 more variables: `Times Blank Voted` <int>, `Times
## #   Counted` <int>, `Times Over Voted` <int>

MAssage the data, most exploratory at this stage.

graph_dat <- filter(our_district, Precinct %in% c("SEA 43-1365", "SEA 43-1375")) %>% 
  spread("CounterType", "count") %>% 
  mutate(pct_voted = `Times Counted`/`Registered Voters` * 100) %>% 
  mutate(bad_votes = `Times Blank Voted` + `Rejected`) %>% 
  mutate(non_voting = `Registered Voters` - `Times Counted` - bad_votes) %>% 
  gather(category, value, Approved, Rejected, non_voting, bad_votes) %>% 
  ungroup()
arrange(graph_dat, Precinct)
## # A tibble: 8 × 9
##     LEG    Precinct `Registered Voters` `Times Blank Voted`
##   <int>       <chr>               <int>               <int>
## 1    43 SEA 43-1365                 604                  11
## 2    43 SEA 43-1365                 604                  11
## 3    43 SEA 43-1365                 604                  11
## 4    43 SEA 43-1365                 604                  11
## 5    43 SEA 43-1375                 351                   6
## 6    43 SEA 43-1375                 351                   6
## 7    43 SEA 43-1375                 351                   6
## 8    43 SEA 43-1375                 351                   6
## # ... with 5 more variables: `Times Counted` <int>, `Times Over
## #   Voted` <int>, pct_voted <dbl>, category <chr>, value <int>

Display our precincts with percentage of total registered voters. The limits should probably be expanded a touch to avoid the percentage labels from getting clipped.

graph_dat$category <- as.factor(graph_dat$category) %>% 
  fct_relevel(., c("Approved", "Rejected", "non_voting", "bad_votes"))
gg <- ggplot(graph_dat, aes(x=category, y=value, 
                      fill=category))
gg <- gg + geom_col()
gg <- gg + facet_grid(~ Precinct)
gg <- gg + scale_fill_brewer(type = "qual", guide =FALSE)
gg <- gg + theme_minimal()
gg <- gg + labs(title = "Sound Transit 3 Votes for Home Precincts", 
                subtitle = "Percentages are of total registered voters",
                x=NULL,
                y="Votes", 
                caption = "Data from King County")
gg <- gg + geom_text(aes(label = scales::percent(value / `Registered Voters`),
                         y = value),
                     vjust = -.5)
gg