This script uses R packages including TidyCensus to calculate and map Census-district-level population changes as estimated by the 2016 and 2021 five-year American Community Survey datasets. Both datasets are available at https://data.census.gov/. A U.S. Census Bureau API key is required to run the script. To get a key, visit https://api.census.gov/data/key_signup.html.
See Output for a look at the output the script produces.
# Installing required packages
if (!require("dplyr")) install.packages("dplyr")
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("leaflet")) install.packages("leaflet")
if (!require("mapview")) install.packages("mapview")
if (!require("leaflet.extras2")) install.packages("leaflet.extras2")
if (!require("tidycensus")) install.packages("tidycensus")
if (!require("sf")) install.packages("sf")
library(dplyr)
library(tidyverse)
library(ggplot2) #From the tidyverse package
library(readr) #From the tidyverse package
library(leaflet)
library(tidycensus)
library(sf)
library(leaflet.extras2)
library(leafpop)
options(tigris_use_cache = TRUE)
options(scipen = 999)
# NOTE: Obtain a Census API key from
# https://api.census.gov/data/key_signup.html
# and replace the census_api_key code line's
# PastYourAPIKeyBetwenTheseQuoteMarks text with your
# API key.
#census_api_key("PasteYourAPIKeyBetwenTheseQuoteMarks")
# Getting ACS variable info
# Change "2021" to your preferred year.
# Change "acs1" to "acs5" if you want five-year data.
DetailedTables <- load_variables(2016, "acs5", cache = TRUE)
SubjectTables <- load_variables(2016, "acs5/subject", cache = TRUE)
ProfileTables <- load_variables(2016, "acs5/profile", cache = TRUE)
AllVariables <- bind_rows(DetailedTables,SubjectTables,ProfileTables)
rm(DetailedTables,SubjectTables,ProfileTables)
# Some variables to try:
# B25064_001, Median gross rent, in dollars.
# DP04_0142P, Pct. renter HH with GRAPI of 35 percent or more.
# B25071_001, Median GRAPI, all renter-occupied HH.
# B25088_002, Median monthly owner costs, HH with a mortgage.
# B25092_002, Median monthly owner costs as % of HH income, HH with mortgage.
# B01001_001, Head count.
# DP03_0099P, percentage of people with no health insurance coverage.
# DP02_0154P, percentage of households with broadband. For < 2017, use DP02_0152P
# DP03_0025, median commute time to work, in minutes, workers age 16 or older.
# DP02_0114P, percentage age 5 and older who speak a language other than English at home.
# B19013_001, median household income.
# DP02_0068P, percentage of the population age 25 and older who hold a four-year college degree or a higher degree.
# S2201_C04_001, percentage of households receiving SNAP benefits in the past 12 months.
# DP02_0004P, percentage of households occupied by a "cohabiting" couple.
# Getting the Year 1 data
Data_Y1 <- get_acs(geography = "county subdivision",
state = "TN",
variables = c(Var_Y1 = "B01001_001"),#Edit variable name
year = 2016,
survey = "acs5", #Note: Must use 5-year data
output = "wide",
geometry = TRUE)
# Getting the Year 2 data
Data_Y2 <- get_acs(geography = "county subdivision",
state = "TN",
variables = c(Var_Y2 = "B01001_001"),#Edit variable name
year = 2021,
survey = "acs5", #Note: Must use 5-year data
output = "wide",
geometry = FALSE)
counties <- "Shelby County|Tipton County|Fayette County"
Data_Y2$SelectedCounty <- ifelse(grepl(counties,
Data_Y2$NAME,
ignore.case = TRUE),1,0)
Data_Y2 <- Data_Y2[Data_Y2$SelectedCounty == '1',]
# Merging the Year 1 and Year 2 data, using the GEOID variable
# in each dataset as a key
Mergeddata <- merge(Data_Y1, Data_Y2, by = "GEOID")
rm(Data_Y1,Data_Y2)
# Making pretty column names for the map file
Mergeddata$Subdivision <- Mergeddata$NAME.x
Mergeddata$Change <- Mergeddata$Var_Y2E - Mergeddata$Var_Y1E
# Testing the significance of each subdivision's change
Mergeddata$Sig <- significance(Mergeddata$Var_Y1E,
Mergeddata$Var_Y2E,
Mergeddata$Var_Y1M,
Mergeddata$Var_Y2M,
clevel = 0.90)
Mergeddata$Significance <- ifelse(Mergeddata$"Sig" == "TRUE",
"Significant",
"Nonsignificant")
# Sorting the data
Mergeddata <- Mergeddata[order(Mergeddata$Change, decreasing = TRUE),]
#Cutting unneeded columns and rearranging and renaming the data
ChangeData <- Mergeddata
ChangeData$From <- ChangeData$Var_Y1E
ChangeData$To <- ChangeData$Var_Y2E
keepvars <- c("GEOID","Subdivision",
"From", "To",
"Change","Significance",
"geometry")
ChangeData <- ChangeData[keepvars]
rm(Mergeddata,
keepvars)
# Displaying the data
View(ChangeData)
# Graphing
ggplot(ChangeData, aes(x = Change, y = reorder(Subdivision, Change))) +
geom_point(size = 3, color = "royalblue") +
theme_minimal(base_size = 12.5) +
labs(title = "Change in My Variable",
x = "Change",
y = "") +
scale_x_continuous()
# Making a map
mapviewOptions(basemaps.color.shuffle = FALSE)
PopMap <- mapview(ChangeData, zcol = "Change",
col.regions = RColorBrewer::brewer.pal(9, "Blues"), alpha.regions = .5,
layer.name = "Pop. change, 2016-2021",
popup = popupTable(ChangeData,
feature.id = FALSE,
row.numbers = FALSE,
zcol = c("Subdivision",
"From",
"To",
"Change",
"Significance")))
PopMap
# Making a .kml Map File
ChangeData <- ChangeData[colSums(!is.na(ChangeData)) > 0]
st_write(ChangeData,"ChangeMapData.kml", append = FALSE)
# Making a CSV data file
ChangeDataNG <- st_drop_geometry(ChangeData, drop = TRUE)
ChangeDataNG <- ChangeDataNG %>%
separate(Subdivision,
into = c("District","County","State"), sep = ",")
write_csv(ChangeDataNG,"ChangeCSVData.csv")
Below is the output the script produces
# Installing required packages
if (!require("dplyr")) install.packages("dplyr")
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
if (!require("tidyverse")) install.packages("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.1 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.1.8
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
if (!require("leaflet")) install.packages("leaflet")
## Loading required package: leaflet
if (!require("mapview")) install.packages("mapview")
## Loading required package: mapview
if (!require("leaflet.extras2")) install.packages("leaflet.extras2")
## Loading required package: leaflet.extras2
if (!require("tidycensus")) install.packages("tidycensus")
## Loading required package: tidycensus
if (!require("sf")) install.packages("sf")
## Loading required package: sf
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr)
library(tidyverse)
library(ggplot2) #From the tidyverse package
library(readr) #From the tidyverse package
library(leaflet)
library(tidycensus)
library(sf)
library(leaflet.extras2)
library(leafpop)
options(tigris_use_cache = TRUE)
options(scipen = 999)
# NOTE: Obtain a Census API key from
# https://api.census.gov/data/key_signup.html
# and replace the census_api_key code line's
# PastYourAPIKeyBetwenTheseQuoteMarks text with your
# API key.
#census_api_key("PasteYourAPIKeyBetwenTheseQuoteMarks")
# Getting ACS variable info
# Change "2021" to your preferred year.
# Change "acs1" to "acs5" if you want five-year data.
DetailedTables <- load_variables(2016, "acs5", cache = TRUE)
SubjectTables <- load_variables(2016, "acs5/subject", cache = TRUE)
ProfileTables <- load_variables(2016, "acs5/profile", cache = TRUE)
AllVariables <- bind_rows(DetailedTables,SubjectTables,ProfileTables)
rm(DetailedTables,SubjectTables,ProfileTables)
# Some variables to try:
# B25064_001, Median gross rent, in dollars.
# DP04_0142P, Pct. renter HH with GRAPI of 35 percent or more.
# B25071_001, Median GRAPI, all renter-occupied HH.
# B25088_002, Median monthly owner costs, HH with a mortgage.
# B25092_002, Median monthly owner costs as % of HH income, HH with mortgage.
# B01001_001, Head count.
# DP03_0099P, percentage of people with no health insurance coverage.
# DP02_0154P, percentage of households with broadband. For < 2017, use DP02_0152P
# DP03_0025, median commute time to work, in minutes, workers age 16 or older.
# DP02_0114P, percentage age 5 and older who speak a language other than English at home.
# B19013_001, median household income.
# DP02_0068P, percentage of the population age 25 and older who hold a four-year college degree or a higher degree.
# S2201_C04_001, percentage of households receiving SNAP benefits in the past 12 months.
# DP02_0004P, percentage of households occupied by a "cohabiting" couple.
# Getting the Year 1 data
Data_Y1 <- get_acs(geography = "county subdivision",
state = "TN",
variables = c(Var_Y1 = "B01001_001"),#Edit variable name
year = 2016,
survey = "acs5", #Note: Must use 5-year data
output = "wide",
geometry = TRUE)
## Getting data from the 2012-2016 5-year ACS
# Getting the Year 2 data
Data_Y2 <- get_acs(geography = "county subdivision",
state = "TN",
variables = c(Var_Y2 = "B01001_001"),#Edit variable name
year = 2021,
survey = "acs5", #Note: Must use 5-year data
output = "wide",
geometry = FALSE)
## Getting data from the 2017-2021 5-year ACS
counties <- "Shelby County|Tipton County|Fayette County"
Data_Y2$SelectedCounty <- ifelse(grepl(counties,
Data_Y2$NAME,
ignore.case = TRUE),1,0)
Data_Y2 <- Data_Y2[Data_Y2$SelectedCounty == '1',]
# Merging the Year 1 and Year 2 data, using the GEOID variable
# in each dataset as a key
Mergeddata <- merge(Data_Y1, Data_Y2, by = "GEOID")
rm(Data_Y1,Data_Y2)
# Making pretty column names for the map file
Mergeddata$Subdivision <- Mergeddata$NAME.x
Mergeddata$Change <- Mergeddata$Var_Y2E - Mergeddata$Var_Y1E
# Testing the significance of each subdivision's change
Mergeddata$Sig <- significance(Mergeddata$Var_Y1E,
Mergeddata$Var_Y2E,
Mergeddata$Var_Y1M,
Mergeddata$Var_Y2M,
clevel = 0.90)
Mergeddata$Significance <- ifelse(Mergeddata$"Sig" == "TRUE",
"Significant",
"Nonsignificant")
# Sorting the data
Mergeddata <- Mergeddata[order(Mergeddata$Change, decreasing = TRUE),]
#Cutting unneeded columns and rearranging and renaming the data
ChangeData <- Mergeddata
ChangeData$From <- ChangeData$Var_Y1E
ChangeData$To <- ChangeData$Var_Y2E
keepvars <- c("GEOID","Subdivision",
"From", "To",
"Change","Significance",
"geometry")
ChangeData <- ChangeData[keepvars]
rm(Mergeddata,
keepvars)
# Displaying the data
View(ChangeData)
# Graphing
ggplot(ChangeData, aes(x = Change, y = reorder(Subdivision, Change))) +
geom_point(size = 3, color = "royalblue") +
theme_minimal(base_size = 12.5) +
labs(title = "Change in My Variable",
x = "Change",
y = "") +
scale_x_continuous()
# Making a map
mapviewOptions(basemaps.color.shuffle = FALSE)
PopMap <- mapview(ChangeData, zcol = "Change",
col.regions = RColorBrewer::brewer.pal(9, "Blues"), alpha.regions = .5,
layer.name = "Pop. change, 2016-2021",
popup = popupTable(ChangeData,
feature.id = FALSE,
row.numbers = FALSE,
zcol = c("Subdivision",
"From",
"To",
"Change",
"Significance")))
## Warning: Found less unique colors (9) than unique zcol values (30)!
## Interpolating color vector to match number of zcol values.
PopMap
# Making a .kml Map File
ChangeData <- ChangeData[colSums(!is.na(ChangeData)) > 0]
st_write(ChangeData,"ChangeMapData.kml", append = FALSE)
## Writing layer `ChangeMapData' to data source `ChangeMapData.kml' using driver `KML'
## Writing 30 features with 6 fields and geometry type Multi Polygon.
# Making a CSV data file
ChangeDataNG <- st_drop_geometry(ChangeData, drop = TRUE)
ChangeDataNG <- ChangeDataNG %>%
separate(Subdivision,
into = c("District","County","State"), sep = ",")
write_csv(ChangeDataNG,"ChangeCSVData.csv")