Life Expectancy Areal-weighted split tract averaging (2010 to 2020 census tracts)

Author

Kaitlan Wong

Getting averages for tracts in 2020 that did not exist in 2010 (overlap older tracts).

library(readr)
library(readxl)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(stringr)

life_path <- "TX_A.XLSX"
rel_path  <- "texas_2010_2020_tracts_kw.csv"

to_geoid11 <- function(x) sprintf("%011s", as.character(x))

# life expectancy (2010 tracts)
life <- read_excel(life_path) |>
  mutate(
    GEOID_2010 = to_geoid11(`Tract ID`),
    le_2015 = as.numeric(`e(0)`)
  ) |>
  select(GEOID_2010, le_2015)

# relationship tract file
rel <- read_csv(rel_path, show_col_types = FALSE) |>
  mutate(
    GEOID_2010 = to_geoid11(GEOID_TRACT_10),
    GEOID_2020 = to_geoid11(GEOID_TRACT_20),
    w = as.numeric(AREALAND_PART) / as.numeric(AREALAND_TRACT_10)
  ) |>
  filter(substr(GEOID_2020, 1, 2) == "48") |>
  select(GEOID_2010, GEOID_2020, w)

# join, drop missing LE, renormalize weights, aggregate
life_2020 <- rel |>
  left_join(life, by = "GEOID_2010") |>
  filter(!is.na(le_2015)) |>
  group_by(GEOID_2020) |>
  mutate(w_norm = w / sum(w)) |>
  summarise(
    life_expectancy_2020 = sum(le_2015 * w_norm),
    .groups = "drop"
  )

write_csv(
  life_2020,
  "life_expectancy_2010_interpolated_to_2020_tracts.csv"
)
range(life_2020$life_expectancy_2020, na.rm = TRUE)
[1] 60.70912 89.70000