Senior Homestead Exemption Data Pull

ACS

Will use “Get pums”

PUMS Variables

#install.packages("tidycensus")
library(tidycensus)
#View(pums_variables)
#census_api_key("8272dbba8796d820ac67e804f3e8dd0516a5ca10", overwrite = FALSE, install = TRUE)

pums_pull <- get_pums(variables=c("HHLDRRAC1P" , "HHLDRHISP" , "SEX", "HHLDRAGEP","TEN","OCPIP","GRPIP"),
         state="Texas",
         year="2023",
         rep_weights="housing",
         survey="acs1")
Getting data from the 2023 1-year ACS Public Use Microdata Sample
         #recode=TRUE)
  • TEN = Tenure

    • 1 = owned with mortgage

    • 2 = owned free & clear

    • 3 = Rented

    • 4 = Occupied w/o payment of rent

    • 0 = NA

  • HHLDRRAC1P = race coded of HHder

  • HHLDRHISP = Eth of HHder

  • OCPIP = Selected Monthly Owner Costs as a Percentage of Household Income

    • 0 = NA

    • 1 min to 100 max

    • 101 = 101% or more

  • GRPIP = Gross Rent as a Percentage of Household Income

    • 0 = NA

    • 1 to 100

    • 101 = 101% or more

  • SEX - sex

  • HHLDRAGEP - age of HHder

Cleaning & Coding Variables

library(car)
Loading required package: carData
#Sex Recode
pums_pull$SEX <- as.numeric(pums_pull$SEX)
pums_pull$sex_c <- Recode(pums_pull$SEX, recodes="1='Male'; 2='Female'; else=NA", as.factor=T)

pums_pull$tenure_c <- Recode(as.numeric(pums_pull$TEN), recodes="1:2 = 'Owned';3:4='Rented';0=NA", as.factor=T)
Warning in is.factor(var): NAs introduced by coercion
# cost cost burdened (across tenure)
pums_pull$CostBurd <- ifelse(pums_pull$OCPIP==0 & pums_pull$GRPIP==0,NA,
                      ifelse(pums_pull$OCPIP>30 | # OR
                        pums_pull$GRPIP>30, 
                             "1. Cost-Burdened (>30%)",
                      ifelse(pums_pull$OCPIP<=30 | pums_pull$GRPIP <=30, 
                              "2. Not Cost-Burdened",NA)))

pums_pull$CostBurd_d <- ifelse(pums_pull$OCPIP==0 & pums_pull$GRPIP==0,NA,
                      ifelse((pums_pull$OCPIP<=50 & pums_pull$OCPIP>30) | # OR
                              (pums_pull$GRPIP <=50 & pums_pull$GRPIP>30), 
                             "1. Cost-Burdened (>30%)",
                      ifelse(pums_pull$OCPIP>50 | pums_pull$GRPIP >50, 
                              "2. Severely Cost-Burdened (>50%)",
                      ifelse(pums_pull$OCPIP<=30 | pums_pull$GRPIP <=30, 
                              "3. Not Cost-Burdened",NA))))



#Householder Age
pums_pull$hhder_age_d <- as.numeric(pums_pull$HHLDRAGEP)
pums_pull$hhder_age_c <- Recode(pums_pull$HHLDRAGEP, recodes="1:19 = '1. Under 20'; 20:34='2. 20 to 34'; 35:45='3. 35 to 44'; 45:54 = '4. 45 to 54'; 55:64 = '5. 55 to 64' ; 65:109='6. 65 and older';0=NA", as.factor=T)



# Recoding Hispanic/Latino ethnicity
pums_pull$HHLDRHISP <- as.numeric(pums_pull$HHLDRHISP)
Warning: NAs introduced by coercion
pums_pull$hisp_HH <- Recode(pums_pull$HHLDRHISP, recodes = "1='Not Hispanic'; 2:24='Hispanic/Latino'; else = NA", as.factor=T)

# Recoding Race 
pums_pull$HHLDRRAC1P  <- as.numeric(pums_pull$HHLDRRAC1P)
Warning: NAs introduced by coercion
pums_pull$race_c <- Recode(pums_pull$HHLDRRAC1P, recodes = "1='White'; 2='Black'; 3:5='Other or 2+'; 6:7='Asian/PI'; 8:9='Other or 2+'; else=NA", as.factor=T)

#Creating Interaction Term for Race/Ethnicity to create simplified categories
pums_pull$race_eth <- interaction(pums_pull$hisp_HH, pums_pull$race_c, sep = "_")

# Redefining Race/Eth Categories from Interaction Terms
pums_pull$race_eth  <- as.factor(ifelse(substr(as.character(pums_pull$race_eth),1,8) == "Hispanic", "Hispanic, Any Race", as.character(pums_pull$race_eth)))

pums_pull$race_eth <-Recode(pums_pull$race_eth, recodes="'Not Hispanic_Other or 2+'='Other NH'; 'Not Hispanic_White'='White NH';'Not Hispanic_Black'='Black NH';'Not Hispanic_Native American'='Other NH'; 'Not Hispanic_Asian/PI'='Asian/PI NH'", as.factor=T)


#not weighted yet - just testing
#table(pums_pull$CostBurd,pums_pull$tenure_c, pums_pull$race_eth)

Housing burden by homeownership status and age of HHder

library(dplyr)

Attaching package: 'dplyr'
The following object is masked from 'package:car':

    recode
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
#HOUSEHOLD WEIGHTS DESIGN
library(srvyr)

Attaching package: 'srvyr'
The following object is masked from 'package:stats':

    filter
library(survey)
Loading required package: grid
Loading required package: Matrix
Loading required package: survival

Attaching package: 'survey'
The following object is masked from 'package:graphics':

    dotchart
#filter for only householders SPORDER = 1
pums_pull_headHH<-pums_pull[pums_pull$SPORDER==1,]


pums_design <- pums_pull_headHH %>%
  to_survey(type = "housing", 
            design = "rep_weights")
  #filter(!is.na(pums_pull$CostBurd)) #gets rid of any values where we have no info



#Housing burden by homeownership status and age

burden_tot <- svyby(formula = ~CostBurd_d, by = ~hhder_age_c+tenure_c, pums_design, svytotal, na.rm=TRUE)

burden_pct <- svyby(formula = ~CostBurd_d, by = ~hhder_age_c+tenure_c, pums_design, svymean, na.rm=TRUE)

#install.packages("writexl")
library(writexl)
sheets <- list("burden_tot" = burden_tot, "burden_pct" = burden_pct) 
write_xlsx(sheets, "../2025_Homeownership/Housing_CostBurden_Tenure_Age.xlsx")


#Housing burden by homeownership status and age ADD RACE

burden_tot <- svyby(formula = ~CostBurd_d, by = ~hhder_age_c+tenure_c+race_eth, pums_design, svytotal, na.rm=TRUE)

burden_pct <- svyby(formula = ~CostBurd_d, by = ~hhder_age_c+tenure_c+race_eth, pums_design, svymean, na.rm=TRUE)

#install.packages("writexl")
library(writexl)
sheets2 <- list("burden_tot" = burden_tot, "burden_pct" = burden_pct) 
write_xlsx(sheets2, "../2025_Homeownership/Housing_CostBurden_Tenure_Age_Race.xlsx")