Notes Mar 24

Harold Nelson

3/24/2021

Insttall the package urbnmapr.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.4
## ✓ tibble  3.0.5     ✓ dplyr   1.0.3
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(socviz)
library(tidycensus)
# devtools::install_github("UrbanInstitute/urbnmapr")
library(urbnmapr)

Install theme_map() from the appendix of Healy’s text.

Answer

theme_map <- function(base_size=9, base_family="") {
    require(grid)
    theme_bw(base_size=base_size, base_family=base_family) %+replace%
        theme(axis.line=element_blank(),
              axis.text=element_blank(),
              axis.ticks=element_blank(),
              axis.title=element_blank(),
              panel.background=element_blank(),
              panel.border=element_blank(),
              panel.grid=element_blank(),
              panel.spacing=unit(0, "lines"),
              plot.background=element_blank(),
              legend.justification = c(0,0),
              legend.position = c(0,0)
              )
}

Get the US States map from the maps package.

Answer

library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
us_states <- map_data("state")
head(us_states)
##        long      lat group order  region subregion
## 1 -87.46201 30.38968     1     1 alabama      <NA>
## 2 -87.48493 30.37249     1     2 alabama      <NA>
## 3 -87.52503 30.37249     1     3 alabama      <NA>
## 4 -87.53076 30.33239     1     4 alabama      <NA>
## 5 -87.57087 30.32665     1     5 alabama      <NA>
## 6 -87.58806 30.32665     1     6 alabama      <NA>

We can see that states are called regions and are in lower case. The following code appears early in Chapter 7.

p <- ggplot(data = us_states,
            aes(x = long, y = lat,
                group = group, fill = region))

p + geom_polygon(color = "gray90", size = 0.1) + guides(fill = FALSE)

What’s going on?

Note that Alaska and Hawaii are not included.

The interiors of the polygons are supposed to be filled according to region. However, the variable region really contains the names of the states.

What is the meaning of “group”.

Get tables of region and group to unravel this.

Answer

table(us_states$region)
## 
##              alabama              arizona             arkansas 
##                  202                  149                  312 
##           california             colorado          connecticut 
##                  516                   79                   91 
##             delaware district of columbia              florida 
##                   94                   10                  872 
##              georgia                idaho             illinois 
##                  381                  233                  329 
##              indiana                 iowa               kansas 
##                  257                  256                  113 
##             kentucky            louisiana                maine 
##                  397                  650                  399 
##             maryland        massachusetts             michigan 
##                  566                  286                  830 
##            minnesota          mississippi             missouri 
##                  373                  382                  315 
##              montana             nebraska               nevada 
##                  238                  208                   70 
##        new hampshire           new jersey           new mexico 
##                  125                  205                   78 
##             new york       north carolina         north dakota 
##                  495                  782                  105 
##                 ohio             oklahoma               oregon 
##                  238                  284                  236 
##         pennsylvania         rhode island       south carolina 
##                  172                   66                  304 
##         south dakota            tennessee                texas 
##                  166                  289                 1088 
##                 utah              vermont             virginia 
##                   59                  129                  734 
##           washington        west virginia            wisconsin 
##                  545                  373                  388 
##              wyoming 
##                   68
table(us_states$group)
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
##  202  149  312  516   79   91   94   10  872  381  233  329  257  256  113  397 
##   17   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32 
##  650  399  566   36  220   30  460  370  373  382  315  238  208   70  125  205 
##   33   34   35   36   37   38   39   40   41   42   43   44   45   46   47   48 
##   78   16  290   21  168   37  733   12  105  238  284  236  172   66  304  166 
##   49   50   51   52   53   54   55   56   57   58   59   60   61   62   63 
##  289 1088   59  129   96   15  623   17   17   19   44  448  373  388   68

There are 49 states including DC, but there are 63 groups. Each group is a polygon and some states require more than one polygon. The geom geom_polygon requires the group aesthetic when it has to draw more than one polygon.

The albers projection makes the map more realistic.

Create the map with the proper values for the US.

Answer

p <- ggplot(data = us_states,
            mapping = aes(x = long, y = lat,
                          group = group, fill = region))

p + geom_polygon(color = "gray90", size = 0.1) +
    coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
    guides(fill = FALSE)

Election Maps

First join the election results with the map data.

Note how the variable name state in the election dataframe is used to create a version of itself which matches the name in the state map dataframe.

election$region <- tolower(election$state)
us_states_elec <- left_join(us_states, election)
## Joining, by = "region"
head(us_states_elec)
##        long      lat group order  region subregion   state st fips total_vote
## 1 -87.46201 30.38968     1     1 alabama      <NA> Alabama AL    1    2123372
## 2 -87.48493 30.37249     1     2 alabama      <NA> Alabama AL    1    2123372
## 3 -87.52503 30.37249     1     3 alabama      <NA> Alabama AL    1    2123372
## 4 -87.53076 30.33239     1     4 alabama      <NA> Alabama AL    1    2123372
## 5 -87.57087 30.32665     1     5 alabama      <NA> Alabama AL    1    2123372
## 6 -87.58806 30.32665     1     6 alabama      <NA> Alabama AL    1    2123372
##   vote_margin winner      party pct_margin r_points d_points pct_clinton
## 1      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 2      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 3      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 4      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 5      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 6      588708  Trump Republican     0.2773    27.72   -27.72       34.36
##   pct_trump pct_johnson pct_other clinton_vote trump_vote johnson_vote
## 1     62.08        2.09      1.46       729547    1318255        44467
## 2     62.08        2.09      1.46       729547    1318255        44467
## 3     62.08        2.09      1.46       729547    1318255        44467
## 4     62.08        2.09      1.46       729547    1318255        44467
## 5     62.08        2.09      1.46       729547    1318255        44467
## 6     62.08        2.09      1.46       729547    1318255        44467
##   other_vote ev_dem ev_rep ev_oth census
## 1      31103      0      9      0  South
## 2      31103      0      9      0  South
## 3      31103      0      9      0  South
## 4      31103      0      9      0  South
## 5      31103      0      9      0  South
## 6      31103      0      9      0  South

Note that we have a valid state name, postal code and fips code in the result.

Now let’s look at the various versions of the choropleth of election results.

First p0.

Answer

party_colors <- c("#2E74C0", "#CB454A") 
p0 <- ggplot(data = us_states_elec,
             mapping = aes(x = long, y = lat,
                           group = group, fill = party))
p0

Now p1

Answer

p1 <- p0 + geom_polygon(color = "gray90", size = 0.1) +
    coord_map(projection = "albers", lat0 = 39, lat1 = 45) 
p1

Now p2

p2 <- p1 + scale_fill_manual(values = party_colors) +
    labs(title = "Election Results 2016", fill = NULL)
p2 

Now p2 with theme_map()

Answer

p2 + theme_map() 
## Loading required package: grid

The problem with choropleths is that they emphsize area. A sparsely populated state like Montana looks as important as California. Cartograms are used to reshape maps so that the visual impression of importance can be assigned to a relevant variable like poplation. To construct cartograms we need a better version of our map of the states. We can get this from urbanmapr and get the population data at the same time.

The map is a simple features object “sf”.

Answer

states <- get_urbn_map("states", sf = TRUE)
str(states)
## Classes 'sf' and 'data.frame':   51 obs. of  4 variables:
##  $ state_fips: chr  "01" "04" "08" "09" ...
##  $ state_abbv: chr  "AL" "AZ" "CO" "CT" ...
##  $ state_name: chr  "Alabama" "Arizona" "Colorado" "Connecticut" ...
##  $ geometry  :sfc_MULTIPOLYGON of length 51; first list element: List of 4
##   ..$ :List of 1
##   .. ..$ : num [1:7, 1:2] 1150023 1150663 1151851 1152027 1152075 ...
##   ..$ :List of 1
##   .. ..$ : num [1:8, 1:2] 1130796 1131126 1132215 1132707 1132972 ...
##   ..$ :List of 1
##   .. ..$ : num [1:24, 1:2] 1126242 1128740 1130238 1132739 1136115 ...
##   ..$ :List of 1
##   .. ..$ : num [1:756, 1:2] 1091350 1091285 1091268 1091159 1091155 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA
##   ..- attr(*, "names")= chr [1:3] "state_fips" "state_abbv" "state_name"

Note that there are only 51 rows in this dataframe, while there are more than 15,000 in the us_states dataframe. The map information is stored in a single column called geometry. The items in this column are complex lists.

Now lets join the election data with the map.

Answer

library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
elec_map = st_as_sf(left_join(election,states, by = c("state" = "state_name")))
elec_map %>% ggplot(aes(fill = party)) +
    geom_sf()

Now we want to start using tmap and Cartogram.

library(tmap)
library(cartogram)
tmap_mode("plot")
## tmap mode set to plotting

Read the bottom of the instructions at https://tlorusso.github.io/geodata_workshop/tmap_package#the-cartogram-function

Answer

# First make the cartogram

elec_carto = cartogram_cont(elec_map,"total_vote",itermax = 5)
## Mean size error for iteration 1: 3.78335021082783
## Mean size error for iteration 2: 2.70840752621813
## Mean size error for iteration 3: 2.14400800561415
## Mean size error for iteration 4: 1.7717806024278
## Mean size error for iteration 5: 1.51381249022066
# Then display a choropleth version using tmap()

tm_shape(elec_carto) + 
    tm_fill(col = "pct_trump")

Read the documentation on cartogram and use cartogram_dorling(). Look at tm_text() to label things.

Answer

# First make the cartogram

elec_carto_d = cartogram_dorling(elec_map,"total_vote",itermax = 5)

# Then display a choropleth version using tmap()

tm_shape(elec_carto_d) + 
    tm_fill(col = "pct_trump") +
    tm_text("state",size = .4)

Now get wa_wide from ACS. get population B01001_001 and median household income B19013_001 for the counties of Washington State. Ask for wide output.

Answer

wa_wide <- get_acs(geography ="county", 
                     state = "WA",
                     variables = c(hhincome = "B19013_001", 
                            pop = "B01001_001"), 
                     output = "wide",
                   geometry = TRUE)
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
wa_wide$NAME = gsub(" County, Washington","",wa_wide$NAME)
head(wa_wide)
## Simple feature collection with 6 features and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.3549 ymin: 45.54354 xmax: -118.1983 ymax: 47.94077
## geographic CRS: NAD83
##   GEOID         NAME hhincomeE hhincomeM   popE popM
## 1 53035       Kitsap     75411      1403 265882   NA
## 2 53021     Franklin     63584      2180  92009   NA
## 3 53011        Clark     75253      1323 473252   NA
## 4 53027 Grays Harbor     51240      2489  72779   NA
## 5 53005       Benton     69023      1606 197518   NA
## 6 53015      Cowlitz     54506      1908 106778   NA
##                         geometry
## 1 MULTIPOLYGON (((-122.5049 4...
## 2 MULTIPOLYGON (((-119.457 46...
## 3 MULTIPOLYGON (((-122.796 45...
## 4 MULTIPOLYGON (((-123.8845 4...
## 5 MULTIPOLYGON (((-119.8751 4...
## 6 MULTIPOLYGON (((-123.2183 4...

Make the map projected using crs 3857. Then create a cartogram using population. Display the income distribution by county using the cartogram.

Answer

library(cartogram)
wa_proj = st_transform(wa_wide,crs = 3857)
wa_carto = cartogram_cont(wa_proj,"popE",itermax = 5)
## Mean size error for iteration 1: 6.21406019838974
## Mean size error for iteration 2: 5.3781858573901
## Mean size error for iteration 3: 4.62749035059395
## Mean size error for iteration 4: 3.94553903510898
## Mean size error for iteration 5: 3.33909287160703
tm_shape(wa_carto) + 
    tm_polygons(col = "hhincomeE", n = 6)