Choropleth Issues

Harold Nelson

2022-10-31

Setup

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(socviz)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
## 
## Attaching package: 'ggmap'
## 
## The following object is masked from 'package:plotly':
## 
##     wind
library(maps)
## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map
library(mapproj)
library(tidycensus)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(cartogram)

The 2016 Election

Look at the election dataframe from socviz.

glimpse(election)
## Rows: 51
## Columns: 22
## $ state        <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "California",…
## $ st           <chr> "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FL…
## $ fips         <dbl> 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, …
## $ total_vote   <dbl> 2123372, 318608, 2604657, 1130635, 14237893, 2780247, 164…
## $ vote_margin  <dbl> 588708, 46933, 91234, 304378, 4269978, 136386, 224357, 50…
## $ winner       <chr> "Trump", "Trump", "Trump", "Trump", "Clinton", "Clinton",…
## $ party        <chr> "Republican", "Republican", "Republican", "Republican", "…
## $ pct_margin   <dbl> 0.2773, 0.1473, 0.0350, 0.2692, 0.2999, 0.0491, 0.1364, 0…
## $ r_points     <dbl> 27.72, 14.73, 3.50, 26.92, -29.99, -4.91, -13.64, -11.38,…
## $ d_points     <dbl> -27.72, -14.73, -3.50, -26.92, 29.99, 4.91, 13.64, 11.38,…
## $ pct_clinton  <dbl> 34.36, 36.55, 44.58, 33.65, 61.48, 48.16, 54.57, 53.09, 9…
## $ pct_trump    <dbl> 62.08, 51.28, 48.08, 60.57, 31.49, 43.25, 40.93, 41.71, 4…
## $ pct_johnson  <dbl> 2.09, 5.88, 4.08, 2.64, 3.36, 5.18, 2.96, 3.33, 1.58, 2.1…
## $ pct_other    <dbl> 1.46, 6.29, 3.25, 3.13, 3.66, 3.41, 1.55, 1.88, 3.47, 1.8…
## $ clinton_vote <dbl> 729547, 116454, 1161167, 380494, 8753792, 1338870, 897572…
## $ trump_vote   <dbl> 1318255, 163387, 1252401, 684872, 4483814, 1202484, 67321…
## $ johnson_vote <dbl> 44467, 18725, 106327, 29829, 478500, 144121, 48676, 14757…
## $ other_vote   <dbl> 31103, 20042, 84762, 35440, 521787, 94772, 25457, 8327, 1…
## $ ev_dem       <dbl> 0, 0, 0, 0, 55, 9, 7, 3, 3, 0, 0, 3, 0, 20, 0, 0, 0, 0, 0…
## $ ev_rep       <dbl> 9, 3, 11, 6, 0, 0, 0, 0, 0, 29, 16, 0, 4, 0, 11, 6, 6, 8,…
## $ ev_oth       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ census       <chr> "South", "West", "West", "South", "West", "West", "Northe…

The Map

us_states <- map_data("state")
head(us_states)
##        long      lat group order  region subregion
## 1 -87.46201 30.38968     1     1 alabama      <NA>
## 2 -87.48493 30.37249     1     2 alabama      <NA>
## 3 -87.52503 30.37249     1     3 alabama      <NA>
## 4 -87.53076 30.33239     1     4 alabama      <NA>
## 5 -87.57087 30.32665     1     5 alabama      <NA>
## 6 -87.58806 30.32665     1     6 alabama      <NA>

Merge

Merge the election data with the map data. Look at the merged data with head(). Note the redundancy in the election data. The election data is repeated for every point required to define a state boundary.

election$region <- tolower(election$state)
us_states_elec <- left_join(us_states, election)
## Joining, by = "region"
head(us_states_elec)
##        long      lat group order  region subregion   state st fips total_vote
## 1 -87.46201 30.38968     1     1 alabama      <NA> Alabama AL    1    2123372
## 2 -87.48493 30.37249     1     2 alabama      <NA> Alabama AL    1    2123372
## 3 -87.52503 30.37249     1     3 alabama      <NA> Alabama AL    1    2123372
## 4 -87.53076 30.33239     1     4 alabama      <NA> Alabama AL    1    2123372
## 5 -87.57087 30.32665     1     5 alabama      <NA> Alabama AL    1    2123372
## 6 -87.58806 30.32665     1     6 alabama      <NA> Alabama AL    1    2123372
##   vote_margin winner      party pct_margin r_points d_points pct_clinton
## 1      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 2      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 3      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 4      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 5      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 6      588708  Trump Republican     0.2773    27.72   -27.72       34.36
##   pct_trump pct_johnson pct_other clinton_vote trump_vote johnson_vote
## 1     62.08        2.09      1.46       729547    1318255        44467
## 2     62.08        2.09      1.46       729547    1318255        44467
## 3     62.08        2.09      1.46       729547    1318255        44467
## 4     62.08        2.09      1.46       729547    1318255        44467
## 5     62.08        2.09      1.46       729547    1318255        44467
## 6     62.08        2.09      1.46       729547    1318255        44467
##   other_vote ev_dem ev_rep ev_oth census
## 1      31103      0      9      0  South
## 2      31103      0      9      0  South
## 3      31103      0      9      0  South
## 4      31103      0      9      0  South
## 5      31103      0      9      0  South
## 6      31103      0      9      0  South
nrow(us_states_elec)
## [1] 15537

Use tidycensus

Get the state map data using get_acs.

options(tigris_use_cache = TRUE)
tc_state = get_acs(geography = "state",variable = "B01001_001",geometry = TRUE,shift_geo = TRUE)
## Getting data from the 2016-2020 5-year ACS
## Warning: The `shift_geo` argument is deprecated and will be removed in a future
## release. We recommend using `tigris::shift_geometry()` instead.
## Using feature geometry obtained from the albersusa package
## Please note: Alaska and Hawaii are being shifted and are not to scale.
## old-style crs object detected; please recreate object with a recent sf::st_crs()
head(tc_state)
## Simple feature collection with 6 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -2031905 ymin: -1470717 xmax: 2295505 ymax: 67481.2
## CRS:           +proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +a=6370997 +b=6370997 +units=m +no_defs
##   GEOID                 NAME   variable estimate moe
## 1    04              Arizona B01001_001  7174064  NA
## 2    05             Arkansas B01001_001  3011873  NA
## 3    06           California B01001_001 39346023  NA
## 4    08             Colorado B01001_001  5684926  NA
## 5    09          Connecticut B01001_001  3570549  NA
## 6    11 District of Columbia B01001_001   701974  NA
##                         geometry
## 1 MULTIPOLYGON (((-1111066 -8...
## 2 MULTIPOLYGON (((557903.1 -1...
## 3 MULTIPOLYGON (((-1853480 -9...
## 4 MULTIPOLYGON (((-613452.9 -...
## 5 MULTIPOLYGON (((2226838 519...
## 6 MULTIPOLYGON (((1960720 -41...

Merge

Merge the election data with the new map data.

tc_state = tc_state %>% 
  mutate(GEOID = as.numeric(GEOID))

election_sf = election %>% 
  rename(GEOID = fips) %>% 
  left_join(tc_state) %>%
  st_as_sf()
## Joining, by = "GEOID"
head(election_sf)
## Simple feature collection with 6 features and 27 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -2100000 ymin: -2500000 xmax: 1431374 ymax: -39583.33
## CRS:           +proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +a=6370997 +b=6370997 +units=m +no_defs
## # A tibble: 6 × 28
##   state st    GEOID total…¹ vote_…² winner party pct_m…³ r_poi…⁴ d_poi…⁵ pct_c…⁶
##   <chr> <chr> <dbl>   <dbl>   <dbl> <chr>  <chr>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 Alab… AL        1  2.12e6  588708 Trump  Repu…  0.277    27.7   -27.7     34.4
## 2 Alas… AK        2  3.19e5   46933 Trump  Repu…  0.147    14.7   -14.7     36.6
## 3 Ariz… AZ        4  2.60e6   91234 Trump  Repu…  0.035     3.5    -3.5     44.6
## 4 Arka… AR        5  1.13e6  304378 Trump  Repu…  0.269    26.9   -26.9     33.6
## 5 Cali… CA        6  1.42e7 4269978 Clint… Demo…  0.300   -30.0    30.0     61.5
## 6 Colo… CO        8  2.78e6  136386 Clint… Demo…  0.0491   -4.91    4.91    48.2
## # … with 17 more variables: pct_trump <dbl>, pct_johnson <dbl>,
## #   pct_other <dbl>, clinton_vote <dbl>, trump_vote <dbl>, johnson_vote <dbl>,
## #   other_vote <dbl>, ev_dem <dbl>, ev_rep <dbl>, ev_oth <dbl>, census <chr>,
## #   region <chr>, NAME <chr>, variable <chr>, estimate <dbl>, moe <dbl>,
## #   geometry <MULTIPOLYGON [m]>, and abbreviated variable names ¹​total_vote,
## #   ²​vote_margin, ³​pct_margin, ⁴​r_points, ⁵​d_points, ⁶​pct_clinton
nrow(election_sf)
## [1] 51

First Choropleth

Do a simple choropleth on the variable winner.

election_sf %>% 
  ggplot(aes(fill = winner)) +
  scale_fill_manual(values =c("blue","red")) +
  geom_sf() +
  ggtitle("Standard Choropleth of 2016 Election")

Coninuous Cartogram

carto_cont = cartogram_cont(election_sf,weight = "total_vote")
## Mean size error for iteration 1: 3.79157076556864
## Mean size error for iteration 2: 2.71483119894093
## Mean size error for iteration 3: 2.14881277886967
## Mean size error for iteration 4: 1.7759433070484
## Mean size error for iteration 5: 1.5175740021827
## Mean size error for iteration 6: 1.34221078292575
## Mean size error for iteration 7: 1.22627763271174
## Mean size error for iteration 8: 1.15152128429377
## Mean size error for iteration 9: 1.10386870453725
## Mean size error for iteration 10: 1.07380666095244
## Mean size error for iteration 11: 1.05495673823613
## Mean size error for iteration 12: 1.04257442291999
## Mean size error for iteration 13: 1.03438479769453
## Mean size error for iteration 14: 1.02862160664123
## Mean size error for iteration 15: 1.02434998496373
ggplot(carto_cont,aes(fill = winner)) + 
scale_fill_manual(values =c("blue","red")) + 
geom_sf() +
ggtitle("Continuous Cartogram of 2016 Election")

Dorling Cartogram

carto_dorling = cartogram_dorling(election_sf,weight = "total_vote")
ggplot(carto_dorling,aes(fill = winner)) + 
scale_fill_manual(values =c("blue","red")) + 
geom_sf() +
ggtitle("Dorling Cartogram of 2016 Election")

Non-Contiguous Cartogram

carto_ncont = cartogram_ncont(election_sf,weight = "total_vote")
ggplot(carto_ncont,aes(fill = winner)) + 
scale_fill_manual(values =c("blue","red")) + 
geom_sf() +
ggtitle("Non-Contigous Cartogram of 2016 Election")