# Import libraries
packages <- c("tidyverse", "tidycensus", "sf", "plotly", "mapview", "janitor")
invisible(lapply(packages, library, character.only=TRUE)) # Dont display library import
This lab will use the tidycensus package to assess data from the American community Survey (ACS) and create both spatial and non-spatial visuals.
This section will work with the % of population with a graduate degree (DP02_0066P). This variable will be explored within the State of Minnesota’s counties.
Get information on ACS 5 yr estimates from 2017-2021 for MN counties. Use the % with grad degree variable and clean data.
# Issue w key so dont use one
Sys.unsetenv("CENSUS_API_KEY")
# Define variables and area of interest
mn_grad <- get_acs(
geography = "county",
state = "MN",
variables = "DP02_0066P",
survey = "acs5",
year = 2021,
geometry = FALSE)
mn_grad <- mn_grad %>%
clean_names() %>% # makes names lower case w/_
rename(pct_grad = estimate, # % with graduate degree
moe_grad = moe) # margin of error
head(mn_grad)
## # A tibble: 6 × 5
## geoid name variable pct_grad moe_grad
## <chr> <chr> <chr> <dbl> <dbl>
## 1 27001 Aitkin County, Minnesota DP02_0066P 5.3 0.7
## 2 27003 Anoka County, Minnesota DP02_0066P 9 0.4
## 3 27005 Becker County, Minnesota DP02_0066P 8 0.8
## 4 27007 Beltrami County, Minnesota DP02_0066P 12 1.1
## 5 27009 Benton County, Minnesota DP02_0066P 6.5 1
## 6 27011 Big Stone County, Minnesota DP02_0066P 7.4 1.5
Output Figure 1 Table with row for each county, % residents in that county with a grad degree, and margin of error (moe_grad).
The table values will be sorted from lowest to highest %.
#highest %
top_grad <- mn_grad %>% # using mn county dataset,
arrange(desc(pct_grad)) %>% # sort counties from highest to lowest.
slice_head(n = 5) # keep only top 5 rows
top_grad # print result
## # A tibble: 5 × 5
## geoid name variable pct_grad moe_grad
## <chr> <chr> <chr> <dbl> <dbl>
## 1 27109 Olmsted County, Minnesota DP02_0066P 20.9 0.8
## 2 27053 Hennepin County, Minnesota DP02_0066P 19.3 0.3
## 3 27123 Ramsey County, Minnesota DP02_0066P 17.4 0.5
## 4 27163 Washington County, Minnesota DP02_0066P 16.6 0.7
## 5 27031 Cook County, Minnesota DP02_0066P 16.5 2.8
#lowest %
bottom_grad <- mn_grad %>%
arrange(pct_grad) %>%
slice_head(n = 5) # keep only bottom 5 rows
bottom_grad
## # A tibble: 5 × 5
## geoid name variable pct_grad moe_grad
## <chr> <chr> <chr> <dbl> <dbl>
## 1 27125 Red Lake County, Minnesota DP02_0066P 3.1 0.9
## 2 27129 Renville County, Minnesota DP02_0066P 3.3 0.5
## 3 27143 Sibley County, Minnesota DP02_0066P 3.5 0.6
## 4 27155 Traverse County, Minnesota DP02_0066P 3.7 1.3
## 5 27159 Wadena County, Minnesota DP02_0066P 3.7 0.8
These tables show that more populous, urban counties (ex. Hennepin County) have a higher % of grad degree holders relative to less populous rural counties (ex. Red Lake County). Olmstead county came out on top which was surprising because I hadn’t heard of it. Turns out Rochester is in Olmstead county, which is where Mayo Clinic is so that makes sense.
A margin of error (moe) plot will be created to show point estimates and the 90% moe for each county. The counties will appear in order of % on the plot.
# Reorder counties by % grad degrees
mn_grad_plot <- mn_grad %>%
arrange(pct_grad) %>% # sort from lowest to highest
mutate(rank = row_number()) %>% # assign ranking to each county
filter(rank <= 10 | rank > n() - 10) %>% # keep bottom/top 10 counties so plot is legible
mutate(name = forcats::fct_reorder(name, pct_grad)) # reorder factor levels
# build moe plot
gg_moe <- ggplot(mn_grad_plot, aes(x = pct_grad, y = name)) + # x = estimate, y = reordered county name
geom_errorbarh(
aes(
xmin = pct_grad - moe_grad, # left end of error bar
xmax = pct_grad + moe_grad # right end of error bar
),
height = 0.2, # bar thickness
color = "grey50" # color error bars light grey
) +
geom_point(size = 2) + # add point estimate
labs(
title = "Graduate Degree Attainment by County", # main title
subtitle = "Minnesota, ACS 2017–2021 5-year estimates\nPoints show estimates; bars show 90% margins of error",
x = "Percent with graduate or professional degree",
y = "",
caption = "Source: U.S. Census Bureau, ACS 2017–2021 (DP02_0066P)." # source
)
gg_moe # display the moe plot
These plots show that not all counties are equally precise. Larger counties often have a narrow margin of error, while smaller ones have larger margins.
An interactive version of this plot will be created to make it easier to inspect the data and make the moes more understandeable.
# create interactive version of moe plot
ggplotly(gg_moe)
Part B will use an ACS table to map median household income (B19013_001) for each census tract in Hennepin County, MN. Mapview will be used to create an interactive map and ggplot2 will be used to create a static choropleth map.
hennepin_income <- get_acs(geography = "tract",
state = "MN",
county = "Hennepin",
variables = "B19013_001",
survey = "acs5",
year = 2021,
geometry = TRUE) %>%
mutate(med_income = estimate) # add a clearer name for plotting
## | | | 0% | |= | 2% | |== | 3% | |=== | 4% | |==== | 6% | |===== | 7% | |====== | 8% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 17% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================= | 25% | |=================== | 27% | |==================== | 28% | |===================== | 30% | |======================= | 33% | |========================= | 36% | |========================== | 38% | |============================ | 39% | |============================= | 41% | |============================== | 43% | |=============================== | 44% | |================================ | 46% | |================================= | 47% | |================================== | 49% | |=================================== | 50% | |==================================== | 52% | |====================================== | 54% | |======================================= | 55% | |======================================== | 57% | |========================================= | 58% | |=========================================== | 62% | |============================================= | 65% | |================================================ | 68% | |================================================= | 70% | |=================================================== | 73% | |====================================================== | 78% | |======================================================= | 79% | |========================================================= | 81% | |========================================================== | 82% | |=========================================================== | 84% | |============================================================= | 87% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================= | 94% | |=================================================================== | 95% | |==================================================================== | 97% | |======================================================================| 100%
hennepin_income # print incomes
## Simple feature collection with 329 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -93.76838 ymin: 44.78538 xmax: -93.17722 ymax: 45.24662
## Geodetic CRS: NAD83
## First 10 features:
## GEOID NAME variable
## 1 27053024300 Census Tract 243, Hennepin County, Minnesota B19013_001
## 2 27053110500 Census Tract 1105, Hennepin County, Minnesota B19013_001
## 3 27053024006 Census Tract 240.06, Hennepin County, Minnesota B19013_001
## 4 27053022801 Census Tract 228.01, Hennepin County, Minnesota B19013_001
## 5 27053026908 Census Tract 269.08, Hennepin County, Minnesota B19013_001
## 6 27053025401 Census Tract 254.01, Hennepin County, Minnesota B19013_001
## 7 27053108600 Census Tract 1086, Hennepin County, Minnesota B19013_001
## 8 27053026824 Census Tract 268.24, Hennepin County, Minnesota B19013_001
## 9 27053106000 Census Tract 1060, Hennepin County, Minnesota B19013_001
## 10 27053000102 Census Tract 1.02, Hennepin County, Minnesota B19013_001
## estimate moe geometry med_income
## 1 72240 5745 MULTIPOLYGON (((-93.31881 4... 72240
## 2 80157 5307 MULTIPOLYGON (((-93.22237 4... 80157
## 3 143125 22624 MULTIPOLYGON (((-93.35044 4... 143125
## 4 133958 34619 MULTIPOLYGON (((-93.34793 4... 133958
## 5 110246 3614 MULTIPOLYGON (((-93.39145 4... 110246
## 6 68711 11097 MULTIPOLYGON (((-93.28347 4... 68711
## 7 57470 15799 MULTIPOLYGON (((-93.24995 4... 57470
## 8 127819 26964 MULTIPOLYGON (((-93.36073 4... 127819
## 9 23492 5316 MULTIPOLYGON (((-93.25966 4... 23492
## 10 59750 11634 MULTIPOLYGON (((-93.29919 4... 59750
An interactive version of this map is made. These are easier for the user to explore. It makes it easier to identify areas based on median income.
# Create interactive mapview
mapview(hennepin_income, #define acs dataset (tract geoms)
zcol = "med_income", # define variable that determines coloring
legend = TRUE, # display a legend
layer.name = "Median household income (USD)") # name for map layer
This map will better display median income.
# Create static choropleth map of median household income
ggplot(hennepin_income) +
geom_sf(aes(fill = med_income), color = NA) + # fill each tract based on median income, dont show boundary lines
scale_fill_viridis_c(option = "plasma", # pick color palette
name = "Median income (USD)", # define legend name
na.value = "grey80") + # fill missing values with grey
# set labels
labs(title = "Median Household Income by Census Tract",
subtitle = "Hennepin County, Minnesota (ACS 2017–2021 5-year estimates)",
caption = "Source: U.S. Census Bureau, ACS 2017–2021 (B19013_001).") +
theme_minimal() # define basemap style
This map shows that some census tracts have much higher incomes than other census tracts. These align with socioeconomic differences within the county.