## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Linking to GEOS 3.14.1, GDAL 3.12.1, PROJ 9.7.1; sf_use_s2() is TRUE
##
## Your original .Renviron will be backed up and stored in your R HOME directory if needed.
##
## Your API key has been stored in your .Renviron and can be accessed by Sys.getenv("CENSUS_API_KEY").
## To use now, restart R or run `readRenviron("~/.Renviron")`
## [1] "a8578c37366fe6218e255d7178b4a5bac5f67566"
# ============================================================
# Texas ACS 5-Year ZCTA Data
# Population, Income, Demographics, Employment, Commute, Housing
# ============================================================
# Install once if needed
# install.packages(c("tidycensus", "tidyverse", "sf", "openxlsx"))
library(tidycensus)
library(tidyverse)
library(sf)
library(openxlsx)
# ------------------------------------------------------------
# 1. Census API key
# ------------------------------------------------------------
# Get a free key from: https://api.census.gov/data/key_signup.html
# Run this once, then restart R:
# census_api_key("a8578c37366fe6218e255d7178b4a5bac5f67566", install = TRUE, overwrite = TRUE)
# ------------------------------------------------------------
# 2. Settings
# ------------------------------------------------------------
# ============================================================
# Fixed Texas ACS 5-Year ZCTA Download
# Avoids: unknown/unsupported geography hierarchy
# ============================================================
# install.packages(c("tidycensus", "tidyverse", "sf", "tigris", "openxlsx"))
library(tidycensus)
library(tidyverse)
library(sf)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(openxlsx)
options(tigris_use_cache = TRUE)
# census_api_key("YOUR_API_KEY", install = TRUE, overwrite = TRUE)
acs_year <- 2023
acs_vars <- c(
total_pop = "B01003_001",
median_hh_income = "B19013_001",
per_capita_income = "B19301_001",
poverty_count = "B17001_002",
poverty_universe = "B17001_001",
white_alone = "B02001_002",
black_alone = "B02001_003",
asian_alone = "B02001_005",
hispanic = "B03003_003",
edu_total_25plus = "B15003_001",
less_than_hs = "B15003_002",
hs_grad = "B15003_017",
some_college = "B15003_019",
bachelors = "B15003_022",
graduate_degree = "B15003_023",
labor_force = "B23025_003",
employed = "B23025_004",
unemployed = "B23025_005",
workers_16plus = "B08301_001",
drive_alone = "B08301_003",
carpool = "B08301_004",
public_transit = "B08301_010",
bicycle = "B08301_018",
walked = "B08301_019",
worked_home = "B08301_021",
housing_units = "B25001_001",
occupied_units = "B25003_001",
owner_occupied = "B25003_002",
renter_occupied = "B25003_003",
median_home_value = "B25077_001",
median_gross_rent = "B25064_001"
)
# ------------------------------------------------------------
# 1. Download all U.S. ZCTA ACS data
# Do NOT use state = "TX" here
# ------------------------------------------------------------
zcta_acs_raw <- get_acs(
geography = "zcta",
variables = acs_vars,
year = acs_year,
survey = "acs5",
output = "wide",
geometry = FALSE
)
## Getting data from the 2019-2023 5-year ACS
# ------------------------------------------------------------
# 2. Get ZCTA geometries and Texas boundary
# ------------------------------------------------------------
zcta_geo <- zctas(year = acs_year) %>%
st_transform(5070)
tx_boundary <- states(year = acs_year) %>%
filter(STUSPS == "TX") %>%
st_transform(5070)
# Keep ZCTAs that intersect Texas
tx_zcta_ids <- zcta_geo %>%
st_filter(tx_boundary) %>%
st_drop_geometry() %>%
transmute(zcta = ZCTA5CE20)
# ------------------------------------------------------------
# 3. Filter ACS data to Texas ZCTAs
# ------------------------------------------------------------
tx_zcta <- zcta_acs_raw %>%
rename(
zcta = GEOID,
zcta_name = NAME
) %>%
semi_join(tx_zcta_ids, by = "zcta") %>%
select(zcta, zcta_name, ends_with("E")) %>%
rename_with(~ stringr::str_remove(.x, "E$"), ends_with("E"))
# ------------------------------------------------------------
# 4. Create useful percentage variables
# ------------------------------------------------------------
tx_zcta_master <- tx_zcta %>%
mutate(
pct_poverty = 100 * poverty_count / poverty_universe,
pct_white = 100 * white_alone / total_pop,
pct_black = 100 * black_alone / total_pop,
pct_asian = 100 * asian_alone / total_pop,
pct_hispanic = 100 * hispanic / total_pop,
college_plus = bachelors + graduate_degree,
pct_less_than_hs = 100 * less_than_hs / edu_total_25plus,
pct_college_plus = 100 * college_plus / edu_total_25plus,
employment_rate = 100 * employed / labor_force,
unemployment_rate = 100 * unemployed / labor_force,
pct_drive_alone = 100 * drive_alone / workers_16plus,
pct_carpool = 100 * carpool / workers_16plus,
pct_public_transit = 100 * public_transit / workers_16plus,
pct_bicycle = 100 * bicycle / workers_16plus,
pct_walked = 100 * walked / workers_16plus,
pct_worked_home = 100 * worked_home / workers_16plus,
pct_owner_occupied = 100 * owner_occupied / occupied_units,
pct_renter_occupied = 100 * renter_occupied / occupied_units
) %>%
select(
zcta, zcta_name,
total_pop,
median_hh_income,
per_capita_income,
pct_poverty,
pct_white,
pct_black,
pct_asian,
pct_hispanic,
pct_less_than_hs,
pct_college_plus,
labor_force,
employed,
unemployed,
employment_rate,
unemployment_rate,
workers_16plus,
pct_drive_alone,
pct_carpool,
pct_public_transit,
pct_bicycle,
pct_walked,
pct_worked_home,
housing_units,
occupied_units,
owner_occupied,
renter_occupied,
pct_owner_occupied,
pct_renter_occupied,
median_home_value,
median_gross_rent
)
# ------------------------------------------------------------
# 5. Save output
# ------------------------------------------------------------
##write_csv(tx_zcta_master, "Texas_ACS5_ZCTA_Master.csv")
##write.xlsx(tx_zcta_master, "Texas_ACS5_ZCTA_Master.xlsx", overwrite = TRUE)
glimpse(tx_zcta_master)
## Rows: 2,093
## Columns: 32
## $ zcta <chr> "70631", "70639", "70653", "70661", "70668", "7100…
## $ zcta_name <chr> "ZCTA5 70631", "ZCTA5 70639", "ZCTA5 70653", "ZCTA…
## $ total_pop <dbl> 326, 603, 3297, 1712, 6347, 1403, 3366, 791, 10874…
## $ median_hh_income <dbl> 52059, 77656, 60625, 32763, 48492, 44981, 51997, 5…
## $ per_capita_income <dbl> 32233, 33971, 31680, 25354, 27422, 23110, 46091, 2…
## $ pct_poverty <dbl> 3.300330, 9.452736, 14.404432, 40.946262, 25.58581…
## $ pct_white <dbl> 86.50307, 98.67330, 89.89991, 84.28738, 90.90909, …
## $ pct_black <dbl> 1.8404908, 0.0000000, 6.1267819, 0.0000000, 6.2234…
## $ pct_asian <dbl> 0.00000000, 0.00000000, 0.09099181, 0.81775701, 0.…
## $ pct_hispanic <dbl> 0.3067485, 0.0000000, 0.8795875, 6.0747664, 12.635…
## $ pct_less_than_hs <dbl> 2.4154589, 0.0000000, 1.2505583, 4.5068027, 1.3285…
## $ pct_college_plus <dbl> 24.637681, 5.603448, 8.485931, 9.693878, 9.251208,…
## $ labor_force <dbl> 162, 251, 1401, 593, 2366, 659, 1493, 275, 4114, 1…
## $ employed <dbl> 156, 240, 1352, 561, 2290, 639, 1493, 253, 3879, 1…
## $ unemployed <dbl> 6, 11, 49, 32, 76, 20, 0, 22, 235, 62, 385, 58, 27…
## $ employment_rate <dbl> 96.29630, 95.61753, 96.50250, 94.60371, 96.78783, …
## $ unemployment_rate <dbl> 3.703704, 4.382470, 3.497502, 5.396290, 3.212172, …
## $ workers_16plus <dbl> 156, 222, 1347, 561, 2249, 639, 1397, 242, 3833, 1…
## $ pct_drive_alone <dbl> 91.66667, 90.99099, 83.96437, 81.28342, 84.12628, …
## $ pct_carpool <dbl> 3.846154, 0.000000, 14.031180, 11.586453, 8.181414…
## $ pct_public_transit <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.…
## $ pct_bicycle <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.…
## $ pct_walked <dbl> 0.0000000, 0.0000000, 1.1135857, 4.9910873, 0.1778…
## $ pct_worked_home <dbl> 4.4871795, 9.0090090, 0.8908686, 0.0000000, 0.0000…
## $ housing_units <dbl> 411, 244, 1666, 855, 2650, 795, 1678, 354, 4578, 1…
## $ occupied_units <dbl> 154, 205, 1359, 628, 2101, 641, 1650, 294, 4054, 1…
## $ owner_occupied <dbl> 154, 197, 1132, 518, 1492, 582, 1399, 260, 3600, 9…
## $ renter_occupied <dbl> 0, 8, 227, 110, 609, 59, 251, 34, 454, 348, 1343, …
## $ pct_owner_occupied <dbl> 100.00000, 96.09756, 83.29654, 82.48408, 71.01380,…
## $ pct_renter_occupied <dbl> 0.000000, 3.902439, 16.703458, 17.515924, 28.98619…
## $ median_home_value <dbl> 134400, 102800, 112100, 84400, 141800, 182500, 212…
## $ median_gross_rent <dbl> NA, NA, 562, NA, 957, NA, 576, 780, 1019, 498, 694…