Disclaimer: The content of this RMarkdown note came from a course called Analyzing US Census Data in R in datacamp.
Analysts across industries rely on data from the United States Census Bureau in their work. In this course, students will learn how to work with Census tabular and spatial data in the R environment. The course focuses on the tidycensus
package for acquiring data from the decennial US Census and American Community survey in a tidyverse-friendly format, and the tigris
package for accessing Census geographic data within R. By the end of this course, students will be able to rapidly visualize and explore demographic data from the Census Bureau using ggplot2
and other tidyverse
tools, and make maps of Census demographic data with only a few lines of R code.
library(tidycensus)
library(tidyverse)
library(sf)
# Get dataset with geometry set to TRUE
orange_value <- get_acs(geography = "tract", state = "NH",
county = "Grafton",
variable = "B25077_001",
geometry = TRUE)
# Plot the estimate to view a map of the data
plot(orange_value["estimate"])
Geometry is currently supported in tidycensus
for geographies in the core Census hierarchy - state, county, Census tract, block group, and block - as well as zip code tabulation areas.
If you are interested in mapping data for other geographies, however, you can download the equivalent boundary file from the Census Bureau using the tigris
package and join your demographic data to it for mapping.
# Load package
library(tigris) # needed for the school_districts function
# Get an income dataset for Idaho by school district from tyidycensus
idaho_income <- get_acs(geography = "school district (unified)",
variables = "B19013_001",
state = "NH")
# Get a school district dataset for Idaho from tigris
idaho_school <- school_districts(state = "NH", type = "unified", class = "sf")
# Join the income dataset to the boundaries dataset
id_school_joined <- left_join(idaho_school, idaho_income, by = "GEOID")
plot(id_school_joined["estimate"])
# Get a dataset of median home values from the 1-year ACS
state_value <- get_acs(geography = "state",
variable = "B25077_001",
survey = "acs1",
geometry = TRUE,
shift_geo = TRUE)
# Plot the dataset to view the shifted geometry
plot(state_value["estimate"])
Choropleth maps, which visualize statistical variation through the shading of areas, are among the most popular ways to map demographic data. Census or ACS data acquired with tidycensus can be mapped in this way in ggplot2
with geom_sf
using the estimate column as a fill
aesthetic.
# Get a dataset of median home values by Census tract for Marin County, California
marin_value <- get_acs(geography = "tract",
variable = "B25077_001",
state = "NH",
county = "Grafton",
geometry = TRUE)
# Create a choropleth map with ggplot
ggplot(marin_value, aes(fill = estimate)) +
geom_sf()
Interpretation
ggplot2
version 3.0 integrated the viridis
color palettes, which are perceptually uniform and legible to colorblind individuals and in black and white. For these reasons, the viridis palettes have become very popular for data visualization, including for choropleth mapping.
# Set continuous viridis palettes for your map
ggplot(marin_value, aes(fill = estimate, col = estimate)) +
geom_sf() +
# to request a continuous viridis palette for the tract fill
scale_fill_viridis_c() +
# to request a continuous viridis palette for the tract borders
scale_color_viridis_c()
Clean up some map elements and add some descriptive information to provide context to your map.
# Set the color guide to FALSE and add a subtitle and caption to your map
ggplot(marin_value, aes(fill = estimate, color = estimate)) +
geom_sf() +
scale_fill_viridis_c(labels = scales::dollar) +
# to avoid double-plotting of the legend
scale_color_viridis_c(guide = FALSE) +
# for a box to place the map
theme_minimal() +
# crs for tilting, datum for gridlines
coord_sf(crs = 26911, datum = NA) +
labs(title = "Median owner-occupied housing value by Census tract",
subtitle = "Marin County, California",
caption = "Data source: 2012-2016 ACS.\nData acquired with the R tidycensus package.",
# fill for title of the legend
fill = "ACS estimate")
library(sf)
library(sf)
# Generate point centers
centers <- st_centroid(state_value)
# Set size parameter and the size range
ggplot() +
geom_sf(data = state_value, fill = "white") +
geom_sf(data = centers, aes(size = estimate), shape = 21,
fill = "lightblue", alpha = 0.7, show.legend = "point") +
scale_size_continuous(range = c(1, 20))
# v15 <- load_variables(2016, "acs5", cache = TRUE)
# View(v15)
nh_age <- get_acs(geography = "county",
variable = c(Male_70to74 = "B01001_022",
Male_75to79 = "B01001_023",
Male_80to84 = "B01001_024",
Male_85over = "B01001_025",
Female_70to74 = "B01001_046",
Female_75to79 = "B01001_047",
Female_80to84 = "B01001_048",
Female_85over = "B01001_049"),
state = "NH",
summary_var = "B01001_001",
geometry = TRUE) %>%
mutate(percent = estimate / summary_est)
nh_age_both_sex <-
nh_age %>%
select(-c(moe, summary_moe, percent)) %>%
spread(variable, estimate) %>%
mutate(All_70to74 = Female_70to74 + Male_70to74,
All_75to79 = Female_75to79 + Male_75to79,
All_80to84 = Female_80to84 + Male_80to84,
All_85over = Female_85over + Male_85over) %>%
select(-c(Female_70to74, Female_75to79, Female_80to84, Female_85over,
Male_70to74, Male_75to79, Male_80to84, Male_85over)) %>%
gather(variable, estimate, All_70to74:All_85over) %>%
mutate(percent = estimate / summary_est)
nh_age_both_sex
nh_age_both_sex %>%
ggplot(aes(fill = percent, color = percent)) +
geom_sf() +
coord_sf(datum = NA) +
facet_wrap(~variable)
library(mapview)
# Map the orange_value dataset interactively
mapview(orange_value)
# Map your data by the estimate column
mapView(orange_value,
zcol = "estimate")