knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(here)
library(janitor)
library(sf)
library(tidycensus)
library(plotly)
library(mapview)
# census_api_key("YOUR KEY GOES HERE", install = TRUE)
This R Markdown report contains the results from two unrelated analyses. Both utilize data from the U.S. Census Bureau (obtained using the “tidycensus” package).
The code chunk below uses get_acs() to obtain the estimated percentage of the age-25+ population that has a graduate degree within each county in Michigan, according to the 2017-2021 5-year American Community Survey. Then it creates a static point plot that also shows the margine of error for each estimate. The plot will be displayed in the Analysis section that follows.
adv_deg <- get_acs(
geography = "county",
variables = c(percent_grad = "DP02_0066P"),
year = 2021,
state = "Michigan"
)
# Create point plot
adv_deg_cnty_plot <- adv_deg %>%
ggplot(aes(x = estimate, y = reorder(NAME, estimate))) + #sorts y-axis descending
geom_errorbar(aes(xmin = estimate - moe, xmax = estimate + moe),
width = 0.5, linewidth = 0.5) +
geom_point(color = 'darkblue', size = 2) +
scale_y_discrete(labels = function(x) str_remove(x, "County, Michigan")) + #clean up y-axis labels
labs(title = "Percentage of Population Age 25+ with Graduate Degree",
caption = "Data acquired with R and tidycensus",
x = "ACS estimate (percent)",
y = "Counties in Michigan") +
theme_minimal()
The seven counties in Michigan in which more than 15 percent of the adult population has a graduate degree are Washtenaw, Oakland, Leelanau, Ingham, Keweenaw (though with a wide margin of error), Kalamazoo, and Emmet. These counties are the home of major universities or the (second?) home of affluent residents.
The eleven counties in which less than 5 percent of the adult population has a graduate degree are Clare, Arenac, Oscoda, Ogemaw, Branch, Tuscola, Ontonagon, Missaukee, Ionia, Montcalm, and Lake. All of these counties (except Branch and Ontonagon) are in the north/central rural areas of the state.
# Display point plot with error bars created above
adv_deg_cnty_plot
# Generate interactive plot using static plot
ggplotly(adv_deg_cnty_plot, tooltip = 'x')
The raw data file contains all public libraries in the U.S. in 2022. It is a preliminary version of Public Libraries Survey, and older vintages of these data are available from the Institute of Museum and Library Services.
The code chunk that follows accomplishes the following tasks:
# Load the raw data
publiclibraries22 <- read_csv(here("data","publiclibraries22.csv"))
# Load the processed data from last cycle that contains point locations
publiclibraries21 <- read_csv(here("data","publiclibraries21.csv"))
publiclibraries21_geo <- publiclibraries21 %>%
select(FSCSKEY, LONGITUD, LATITUDE, LOCALE_ADD)
# 1. Merge the point location and locale variables from 21 to 22 data
all_public_libraries <- publiclibraries22 %>%
left_join(publiclibraries21_geo,
by="FSCSKEY")
# 2./3. Clean the data
illinois_libraries <- all_public_libraries %>%
clean_names() %>%
filter(stabr == 'IL') %>% # select only Illinois libraries
filter(fscskey != "-3") %>% # remove closed libraries
filter(fscskey != "IL8034") %>% #drop record that doesn't exist in 2021 data
rename(library_id = fscskey, # rename variables to be easier to remember
library_name = libname,
legal_basis = c_legbas,
geography_code = geocode,
service_area_pop = popu_lsa,
cardholders = regbor,
long = longitud,
lat = latitude,
locale12 = locale_add
) %>%
select(library_id, library_name, legal_basis, geography_code,
service_area_pop, cardholders, long, lat, locale12) %>%
# 4. Separate the geographic_code variable into parts
separate(geography_code,
c("geography_type", "geography_precision"), # name the new variables
2, # split after 2nd character
remove = FALSE) %>% #keep the original variable
# 5. Recode geography_type values to be human readable
mutate(geography_type = case_match(geography_type,
'CO' ~ 'county',
'CD' ~ 'county subdivision',
'MD' ~ 'multi-county subdivision',
'PL' ~ 'place',
'MP' ~ 'multi-place',
'SU' ~ 'school district (unified)',
'OT' ~ 'other'
)) %>%
# 6. Calculate proportion of cardholders to service area population
mutate(users_pop_proportion = cardholders / service_area_pop) %>%
# 7. Collapse 12 locale levels to 4
mutate(locale4 = case_match(locale12,
11:13 ~ 1,
21:23 ~ 2,
31:33 ~ 3,
41:43 ~ 4
)) %>%
# Convert locale4 to a factor with labels
mutate(locale4 = factor(locale4, #use factor() NOT as.factor()!
labels = c("city", "suburb", "town", "rural"),
ordered = TRUE))
# 8. Convert dataframe to sf object with point geometry
illinois_libraries_geo <- illinois_libraries %>%
st_as_sf(coords = c("long", "lat"),
crs = 4269) # important to specify here to enable spatial join later
# 9. Filter only libraries with reported census geo type of 'place'
illinois_libraries_geo_place <- illinois_libraries_geo %>%
filter(geography_type == "place")
# 10. Get 2020 population counts for places in Illinois
illinois_2020pop_places <- get_decennial(
geography = 'place',
variables = 'P1_001N',
year = 2020,
state = 'illinois',
geometry = TRUE,
progress_bar = FALSE
)
# 10b. Get 2020 Illinois state total and state polygon
illinois_2020pop_total <- get_decennial(
geography = 'state',
variables = 'P1_001N',
year = 2020,
state = 'illinois',
geometry = TRUE,
progress_bar = FALSE
)
# 11. Spatial join the place polygons with the library point locations
illinois_2020pop_places_libraries <- illinois_2020pop_places %>%
st_join(illinois_libraries_geo_place,
left = FALSE) # default to inner join
# 12. Calculate the ratio of 2020 Census count to reported library service area pop
illinois_2020pop_places_libraries <- illinois_2020pop_places_libraries %>%
mutate(pop_align = value/service_area_pop) # 'value' is 2020 count
# this direction had more manageable scale than service_area_pop/value
##Alternative join not used but saved for later
# Spatial join the library point locations with the place polygons
#illinois_libraries_geo_place_join <- illinois_libraries_geo_place %>%
# st_join(illinois_2020pop_places,
# left = TRUE)
The static and interactive maps below show that most records have a comparison ratio around 1 (colored aquamarine). The interactive map is necessary to zoom in on different areas of the state to identify places were the 2020 Census count is smaller than the reported LSA population (pop_align < 1; blues) and places where the 2020 Census count is larger than the reported LSA population (pop_align > 1; yellows).
# Create static map plot of population comparison ratio
illinois_2020pop_places_libraries %>%
ggplot(aes(fill = pop_align)) +
geom_sf(data = illinois_2020pop_total, fill = NA) + # add Illinois outline
geom_sf(data = illinois_2020pop_places_libraries) +
scale_fill_viridis_c() +
theme_void() +
labs(title = str_wrap("Ratio of 2020 Decennial Census Count to Reported Library Service Area Population", width = 40),
subtitle = "For Illinois public libraries with municipal service areas",
fill = str_wrap("2020 Census Count / Reported LSA Population", width = 20),
caption = str_wrap("2020 Decennial Census Counts obtained with tidycensus R package; Public library locations from IMLS's PLS, FY 2021", width = 70)
)
# Static plot above is really hard to see, so this provides interactive version
mapview(illinois_2020pop_places_libraries, zcol = 'pop_align')