Description

This script uses R packages including TidyCensus to calculate and map Census-district-level population changes as estimated by the 2016 and 2021 five-year American Community Survey datasets. Both datasets are available at https://data.census.gov/. A U.S. Census Bureau API key is required to run the script. To get a key, visit https://api.census.gov/data/key_signup.html.

See Output for a look at the output the script produces.

# Installing required packages

if (!require("dplyr")) install.packages("dplyr")
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("leaflet")) install.packages("leaflet")
if (!require("mapview")) install.packages("mapview")
if (!require("leaflet.extras2")) install.packages("leaflet.extras2")
if (!require("tidycensus")) install.packages("tidycensus")
if (!require("sf")) install.packages("sf")

library(dplyr)
library(tidyverse)
library(ggplot2) #From the tidyverse package
library(readr) #From the tidyverse package
library(leaflet)
library(tidycensus)
library(sf)
library(leaflet.extras2)
library(leafpop)

options(tigris_use_cache = TRUE)
options(scipen = 999)

# NOTE: Obtain a Census API key from 
# https://api.census.gov/data/key_signup.html
# and replace the census_api_key code line's 
# PastYourAPIKeyBetwenTheseQuoteMarks text with your
# API key.

#census_api_key("PasteYourAPIKeyBetwenTheseQuoteMarks")

# Getting ACS variable info
# Change "2021" to your preferred year.
# Change "acs1" to "acs5" if you want five-year data.

DetailedTables <- load_variables(2016, "acs5", cache = TRUE)
SubjectTables <- load_variables(2016, "acs5/subject", cache = TRUE)
ProfileTables <- load_variables(2016, "acs5/profile", cache = TRUE)
AllVariables <- bind_rows(DetailedTables,SubjectTables,ProfileTables)
rm(DetailedTables,SubjectTables,ProfileTables)

# Some variables to try:
# B25064_001, Median gross rent, in dollars.
# DP04_0142P, Pct. renter HH with GRAPI of 35 percent or more.
# B25071_001, Median GRAPI, all renter-occupied HH.
# B25088_002, Median monthly owner costs, HH with a mortgage.
# B25092_002, Median monthly owner costs as % of HH income, HH with mortgage.
# B01001_001, Head count.
# DP03_0099P, percentage of people with no health insurance coverage.
# DP02_0154P, percentage of households with broadband. For < 2017, use DP02_0152P
# DP03_0025, median commute time to work, in minutes, workers age 16 or older.
# DP02_0114P, percentage age 5 and older who speak a language other than English at home.
# B19013_001, median household income.
# DP02_0068P, percentage of the population age 25 and older who hold a four-year college degree or a higher degree.
# S2201_C04_001, percentage of households receiving SNAP benefits in the past 12 months.
# DP02_0004P, percentage of households occupied by a "cohabiting" couple.

# Getting the Year 1 data
Data_Y1 <- get_acs(geography = "county subdivision",
                   state = "TN",
                   variables = c(Var_Y1 = "B01001_001"),#Edit variable name
                   year = 2016,
                   survey = "acs5", #Note: Must use 5-year data
                   output = "wide",
                   geometry = TRUE)

# Getting the Year 2 data
Data_Y2 <- get_acs(geography = "county subdivision",
                   state = "TN",
                   variables = c(Var_Y2 = "B01001_001"),#Edit variable name
                   year = 2021,
                   survey = "acs5", #Note: Must use 5-year data
                   output = "wide",
                   geometry = FALSE)

counties <- "Shelby County|Tipton County|Fayette County"

Data_Y2$SelectedCounty <- ifelse(grepl(counties,
                                      Data_Y2$NAME,
                                      ignore.case = TRUE),1,0)
Data_Y2 <- Data_Y2[Data_Y2$SelectedCounty == '1',]

# Merging the Year 1 and Year 2 data, using the GEOID variable
# in each dataset as a key
Mergeddata <- merge(Data_Y1, Data_Y2, by = "GEOID")
rm(Data_Y1,Data_Y2)

# Making pretty column names for the map file
Mergeddata$Subdivision <- Mergeddata$NAME.x
Mergeddata$Change <- Mergeddata$Var_Y2E - Mergeddata$Var_Y1E

# Testing the significance of each subdivision's change

Mergeddata$Sig <- significance(Mergeddata$Var_Y1E,
                               Mergeddata$Var_Y2E,
                               Mergeddata$Var_Y1M,
                               Mergeddata$Var_Y2M,
                               clevel = 0.90)
Mergeddata$Significance <- ifelse(Mergeddata$"Sig" == "TRUE",
                                  "Significant",
                                  "Nonsignificant")

# Sorting the data
Mergeddata <- Mergeddata[order(Mergeddata$Change, decreasing = TRUE),]

#Cutting unneeded columns and rearranging and renaming the data
ChangeData <- Mergeddata
ChangeData$From <- ChangeData$Var_Y1E
ChangeData$To <- ChangeData$Var_Y2E
keepvars <- c("GEOID","Subdivision",
              "From", "To",
              "Change","Significance",
              "geometry")
ChangeData <- ChangeData[keepvars]
rm(Mergeddata,
   keepvars)

# Displaying the data
View(ChangeData)

# Graphing
ggplot(ChangeData, aes(x = Change, y = reorder(Subdivision, Change))) + 
  geom_point(size = 3, color = "royalblue") + 
  theme_minimal(base_size = 12.5) + 
  labs(title = "Change in My Variable", 
       x = "Change", 
       y = "") + 
  scale_x_continuous()

# Making a map

mapviewOptions(basemaps.color.shuffle = FALSE)

PopMap <- mapview(ChangeData, zcol = "Change",
                  col.regions = RColorBrewer::brewer.pal(9, "Blues"), alpha.regions = .5,
                  layer.name = "Pop. change, 2016-2021",
                  popup = popupTable(ChangeData,
                                     feature.id = FALSE,
                                     row.numbers = FALSE,
                                     zcol = c("Subdivision",
                                              "From",
                                              "To",
                                              "Change",
                                              "Significance")))

PopMap 

# Making a .kml Map File
ChangeData <- ChangeData[colSums(!is.na(ChangeData)) > 0]
st_write(ChangeData,"ChangeMapData.kml", append = FALSE)

# Making a CSV data file
ChangeDataNG <- st_drop_geometry(ChangeData, drop = TRUE)

ChangeDataNG <- ChangeDataNG %>% 
  separate(Subdivision,
           into = c("District","County","State"), sep = ",")
write_csv(ChangeDataNG,"ChangeCSVData.csv")

Output

Below is the output the script produces

# Installing required packages

if (!require("dplyr")) install.packages("dplyr")

## Loading required package: dplyr

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

if (!require("tidyverse")) install.packages("tidyverse")

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.1     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.1.8
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

if (!require("leaflet")) install.packages("leaflet")

## Loading required package: leaflet

if (!require("mapview")) install.packages("mapview")

## Loading required package: mapview

if (!require("leaflet.extras2")) install.packages("leaflet.extras2")

## Loading required package: leaflet.extras2

if (!require("tidycensus")) install.packages("tidycensus")

## Loading required package: tidycensus

if (!require("sf")) install.packages("sf")

## Loading required package: sf
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE

library(dplyr)
library(tidyverse)
library(ggplot2) #From the tidyverse package
library(readr) #From the tidyverse package
library(leaflet)
library(tidycensus)
library(sf)
library(leaflet.extras2)
library(leafpop)

options(tigris_use_cache = TRUE)
options(scipen = 999)

# NOTE: Obtain a Census API key from 
# https://api.census.gov/data/key_signup.html
# and replace the census_api_key code line's 
# PastYourAPIKeyBetwenTheseQuoteMarks text with your
# API key.

#census_api_key("PasteYourAPIKeyBetwenTheseQuoteMarks")

# Getting ACS variable info
# Change "2021" to your preferred year.
# Change "acs1" to "acs5" if you want five-year data.

DetailedTables <- load_variables(2016, "acs5", cache = TRUE)
SubjectTables <- load_variables(2016, "acs5/subject", cache = TRUE)
ProfileTables <- load_variables(2016, "acs5/profile", cache = TRUE)
AllVariables <- bind_rows(DetailedTables,SubjectTables,ProfileTables)
rm(DetailedTables,SubjectTables,ProfileTables)

# Some variables to try:
# B25064_001, Median gross rent, in dollars.
# DP04_0142P, Pct. renter HH with GRAPI of 35 percent or more.
# B25071_001, Median GRAPI, all renter-occupied HH.
# B25088_002, Median monthly owner costs, HH with a mortgage.
# B25092_002, Median monthly owner costs as % of HH income, HH with mortgage.
# B01001_001, Head count.
# DP03_0099P, percentage of people with no health insurance coverage.
# DP02_0154P, percentage of households with broadband. For < 2017, use DP02_0152P
# DP03_0025, median commute time to work, in minutes, workers age 16 or older.
# DP02_0114P, percentage age 5 and older who speak a language other than English at home.
# B19013_001, median household income.
# DP02_0068P, percentage of the population age 25 and older who hold a four-year college degree or a higher degree.
# S2201_C04_001, percentage of households receiving SNAP benefits in the past 12 months.
# DP02_0004P, percentage of households occupied by a "cohabiting" couple.

# Getting the Year 1 data
Data_Y1 <- get_acs(geography = "county subdivision",
                   state = "TN",
                   variables = c(Var_Y1 = "B01001_001"),#Edit variable name
                   year = 2016,
                   survey = "acs5", #Note: Must use 5-year data
                   output = "wide",
                   geometry = TRUE)

## Getting data from the 2012-2016 5-year ACS

# Getting the Year 2 data
Data_Y2 <- get_acs(geography = "county subdivision",
                   state = "TN",
                   variables = c(Var_Y2 = "B01001_001"),#Edit variable name
                   year = 2021,
                   survey = "acs5", #Note: Must use 5-year data
                   output = "wide",
                   geometry = FALSE)

## Getting data from the 2017-2021 5-year ACS

counties <- "Shelby County|Tipton County|Fayette County"

Data_Y2$SelectedCounty <- ifelse(grepl(counties,
                                      Data_Y2$NAME,
                                      ignore.case = TRUE),1,0)
Data_Y2 <- Data_Y2[Data_Y2$SelectedCounty == '1',]

# Merging the Year 1 and Year 2 data, using the GEOID variable
# in each dataset as a key
Mergeddata <- merge(Data_Y1, Data_Y2, by = "GEOID")
rm(Data_Y1,Data_Y2)

# Making pretty column names for the map file
Mergeddata$Subdivision <- Mergeddata$NAME.x
Mergeddata$Change <- Mergeddata$Var_Y2E - Mergeddata$Var_Y1E

# Testing the significance of each subdivision's change

Mergeddata$Sig <- significance(Mergeddata$Var_Y1E,
                               Mergeddata$Var_Y2E,
                               Mergeddata$Var_Y1M,
                               Mergeddata$Var_Y2M,
                               clevel = 0.90)
Mergeddata$Significance <- ifelse(Mergeddata$"Sig" == "TRUE",
                                  "Significant",
                                  "Nonsignificant")

# Sorting the data
Mergeddata <- Mergeddata[order(Mergeddata$Change, decreasing = TRUE),]

#Cutting unneeded columns and rearranging and renaming the data
ChangeData <- Mergeddata
ChangeData$From <- ChangeData$Var_Y1E
ChangeData$To <- ChangeData$Var_Y2E
keepvars <- c("GEOID","Subdivision",
              "From", "To",
              "Change","Significance",
              "geometry")
ChangeData <- ChangeData[keepvars]
rm(Mergeddata,
   keepvars)

# Displaying the data
View(ChangeData)

# Graphing
ggplot(ChangeData, aes(x = Change, y = reorder(Subdivision, Change))) + 
  geom_point(size = 3, color = "royalblue") + 
  theme_minimal(base_size = 12.5) + 
  labs(title = "Change in My Variable", 
       x = "Change", 
       y = "") + 
  scale_x_continuous()

# Making a map

mapviewOptions(basemaps.color.shuffle = FALSE)

PopMap <- mapview(ChangeData, zcol = "Change",
                  col.regions = RColorBrewer::brewer.pal(9, "Blues"), alpha.regions = .5,
                  layer.name = "Pop. change, 2016-2021",
                  popup = popupTable(ChangeData,
                                     feature.id = FALSE,
                                     row.numbers = FALSE,
                                     zcol = c("Subdivision",
                                              "From",
                                              "To",
                                              "Change",
                                              "Significance")))

## Warning: Found less unique colors (9) than unique zcol values (30)! 
## Interpolating color vector to match number of zcol values.

PopMap

# Making a .kml Map File
ChangeData <- ChangeData[colSums(!is.na(ChangeData)) > 0]
st_write(ChangeData,"ChangeMapData.kml", append = FALSE)

## Writing layer `ChangeMapData' to data source `ChangeMapData.kml' using driver `KML'
## Writing 30 features with 6 fields and geometry type Multi Polygon.

# Making a CSV data file
ChangeDataNG <- st_drop_geometry(ChangeData, drop = TRUE)

ChangeDataNG <- ChangeDataNG %>% 
  separate(Subdivision,
           into = c("District","County","State"), sep = ",")
write_csv(ChangeDataNG,"ChangeCSVData.csv")

Mapping division-level population change for the 901 area code

Dr. Ken Blake

2023-08-15

Description

Output