Tucson Housing Credit Evaluation 2000 - 2010

Author

Justin Betts

Published

December 28, 2024

Introduction

The purpose of this report is to demonstrate proficiency with various evaluation and data analytics techniques. I’m leaving in more code than found in typical reports for review. Using data available from the US Census, we’ll evaluate the effectiveness of the New Markets Tax Credit Program (NMTC) and the Low Income Housing Tax Credit (LIHTC).

library(dplyr)
library(DT)
library(ggplot2)
library(here)
library(import)
library(quarto)
library(stargazer)
library(tidycensus)

crosswalk <- read.csv("https://raw.githubusercontent.com/DS4PS/cpp-529-master/master/data/cbsatocountycrosswalk.csv",  stringsAsFactors=F, colClasses="character")

import::here("S_TYPE",
             "d",
             "df",
             "cbsa_stats_df",
             "nmtc.treat",
             "lihtc.treat",
             "PLOTS",
             "%>%",
             .from = here::here("analysis/Betts-Utilities.R"),
             .character_only = TRUE)

Create the Data Set

# confirm crosswalk data pulled for Tucson
tucson_msp <- grep("^TUC", crosswalk$msaname, value=TRUE)

# find Tucson in crosswalk data
tucson.msp <- crosswalk$msaname == tucson_msp

# load FIPS data and omit non-values
tucson.fips <- crosswalk$fipscounty[tucson.msp]
tucson.fips <- na.omit(tucson.fips)

# pull in state FIPS
state.fips <- substr(tucson.fips, 1, 2)

# pull in county FIPS
county.fips <- substr(tucson.fips, 3, 5)

# combine CBSA with MSP
tucson.cbsa <- crosswalk$cbsa[tucson.msp] 
# omit empty sets
tucson.cbsa <- na.omit(tucson.cbsa)
# create data set with Tucson data and display as table
d2 <- d |>
  filter(cbsa %in% tucson.cbsa)
d2 <- d2 |>
  mutate(OnlyNMTC = ifelse(num.nmtc == 1 & num.lihtc == 0, 1, 0)) |>
  mutate(OnlyLIHTC = ifelse(num.lihtc == 1 & num.nmtc == 0, 1, 0)) |>
  mutate(BothCredits = ifelse(num.nmtc == 1 & num.lihtc == 1, 1, 0))

datatable(d2)

Descriptive Statistics for Tucson Households

Median Value of a Tucson Home in 2000 (all values adjusted for inflation to 2010)

median(d2$mhv.00)
[1] 129950.3

Visualizing the Data Using the Native Plot Function

hist(d2$mhv.00, breaks=100, xlim=c(0,500000), 
      col="gray40", border="white",
      axes=F, 
      xlab="MHV(median = $130k)",
      ylab="",
      main="Median Home Value in 2000")
axis(side=1, at=seq(0,500000,100000), 
      labels=c("$0","$100k","$200k","$300k","$400k","$500k"))
abline(v=median(d2$mhv.00, na.rm=T), col="coral", lwd=3)

Calculating Growth and Visualizing Data Using ggplot2

# calculate mean and median to graph
mean.x <- mean(d2$mhv.growth)
median.x <- median(d2$mhv.growth)
hg <- hist(d2$mhv.growth, breaks=1000, plot=FALSE) #needed for scaling graph with growth
ymax <- max(hg$count)

# build the histogram and set reasonable limits
ggplot(d2, aes(x = mhv.growth)) +
  geom_histogram(binwidth = 1, fill = "gray40", color = "ivory") + 
  scale_x_continuous(limits = c(0, 100),  # keeping scale at 100 despite outliers
                     breaks = seq(0, 100, by = 10),
                     labels = paste0(seq(0, 100, by = 10), "%")) + 
  scale_y_continuous(breaks = NULL) +
  labs(title = "Tucson Home Value Growth 2000 to 2010",
       x = "",
       y = "") +
  geom_vline(xintercept=mean.x, col="cornflowerblue", linetype="dashed", linewidth=1) +
  geom_vline(xintercept=median.x, col="coral", linetype="dashed", linewidth=1) +
  annotate("text", x = 65, y = 0.5*ymax, 
           label = paste0("Mean = ", round(mean.x, 0), "%"),
           color = "cornflowerblue", size = 7, hjust = 0) +
  annotate("text", x = 65, y = 0.6*ymax, 
           label = paste0("Median = ", round(median.x, 0), "%"),
           color = "coral", size = 7, hjust = 0) +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        plot.title = element_text(size = 15))

Tucson Household Income in 2000

Average Income for All Tucson Households

mean(d2$hinc00, na.rm=T)
[1] 52824.14

Average Income for Households Receiving LIHTC

mean(d2$hinc00[d2$num.lihtc > 0], na.rm=T)
[1] 29969.89

Average Income for Households Not Receiving LIHTC

mean(d2$hinc00[d2$num.lihtc == 0], na.rm=T)
[1] 54517.04

Average Income for Households Receiving NMTC

mean(d2$hinc00[d2$num.nmtc > 0], na.rm=T)
[1] 29129.83

Average Income for Households Not Receiving NMTC

mean(d2$hinc00[d2$num.nmtc == 0], na.rm=T)
[1] 53453.19

Tucson Home Values in 2000

Average Home Value of Tucson Homes

mean(d2$mhv.00, na.rm=T)
[1] 147868.7

Average Home Value for Households Receiving LIHTC

mean(d2$mhv.00[d2$num.lihtc > 0], na.rm=T)
[1] 125691

Average Home Value for Households Not Receiving LIHTC

mean(d2$mhv.00[d2$num.lihtc == 0], na.rm=T)
[1] 149511.5

Average Home Value for Households Receiving NMTC

mean(d2$mhv.00[d2$num.nmtc > 0], na.rm=T)
[1] 210914.1

Average Home Value for Households Not Receiving NMTC

mean(d2$mhv.00[d2$num.nmtc == 0], na.rm=T)
[1] 146194.9

Home Value Growth Rates

Growth of Households Receiving LIHTC

summary(d2$growth[d2$num.lihtc > 0], na.rm=T)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-72.036   6.127  34.607  29.586  43.983 181.324 

Growth of Households Not Receiving LIHTC

summary(d2$growth[d2$num.lihtc == 0], na.rm=T)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 -63.10   16.09   25.44   28.79   40.05  184.45       3 

Growth of Households Receiving NMTC

summary(d2$growth[d2$num.nmtc > 0], na.rm=T)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 -72.04   12.51   25.54   18.93   43.62   76.74 

Growth of Households Not Receiving NMTC

summary(d2$growth[d2$num.nmtc == 0], na.rm=T)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 -63.10   15.88   26.45   29.11   40.56  184.45       3 

Visualization of Growth

gridExtra::grid.arrange(PLOTS$mhv_growth$lihtc, 
                         PLOTS$mhv_growth$nmtc, 
                         nrow = 1)

Comparison between LIHTC & NMTC

# average home value
mean(d2$mhv.00[d2$num.nmtc > 0], na.rm = T) - mean(d2$mhv.00[d2$num.lihtc > 0], na.rm = T)
[1] 85223.1
# average growth
mean(d2$growth[d2$num.nmtc > 0], na.rm = T) - mean(d2$growth[d2$num.lihtc > 0] ,a.rm = T)
[1] -10.65397

Observations

While those receiving LIHTC and NMTC have a similar average incomes, the average household value for those receiving NMTC is $85,223.1 more than those receiving LIHTC. Furthermore, the home value growth for those receiving NMTC is more than 10 points less than those receiving LIHTC. While the objects of the programs may be similar, boosting opportunities for those outside the housing market, the populations are quite distinct.

Preparing Data for Modeling

saveRDS(d2, here::here("data/rodeo/betts-rodeodata.rds"))
invisible(readRDS(file = here::here("data/rodeo/betts-rodeodata.rds")))

Data Manifest

# 2000
ltdb.raw.2000s <- read.csv(here::here("data/raw/ltdb_std_2000_sample.csv"))
ltdb.raw.2000f <- read.csv(here::here("data/raw/LTDB_Std_2000_fullcount.csv"))
ltdb.rodeo.2000 <- readRDS(here::here("data/rodeo/LTDB-2000.rds"))

# 2010
ltdb.raw.2010s <- read.csv(here::here("data/raw/ltdb_std_2010_sample.csv"))
ltdb.raw.2010f <- read.csv(here::here("data/raw/LTDB_Std_2010_fullcount.csv"))
ltdb.rodeo.2010 <- readRDS(here::here("data/rodeo/LTDB-2010.rds"))

complete.rodeo <- readRDS(file = here::here("data/rodeo/betts-rodeodata.rds"))

Evaluating Program Impact

As observed above, we’re going to want to dig a bit to see which households are receiving which benefits, as some qualify for both NMTC and LIHTC, as program impact for each benefit might get skewed.

sum(d2$num.nmtc)
[1] 6
sum(d2$num.lihtc)
[1] 17
sum(d2$num.nmtc & d2$num.lihtc)
[1] 3
General Regresssion Models

This model relies on the difference-in-differences regression to help control for other factors outside of the treatment.

Here’s the general model, where we’re going to evaluate the relationship between median home values from 2000 with median home values for 2010 for homes that did and did not receive assistance from federal programs.

m1 <- lm(y ~ treat + post + treat*post)

And here we’ll run the model for all households receiving the NMTC credit:

GenNMTC <- lm(mhv.00 ~ num.nmtc + mhv.10 + num.nmtc*mhv.10, data = d2)

stargazer(GenNMTC,
           type = S_TYPE,
           dep.var.labels = c("2000 Median Home Value(MHV)"),
           covariate.labels = c("Received NMTC", "2010 MHV", "Received NMTC * 2010 MHV"),
           digits = 2)
Dependent variable:
2000 Median Home Value(MHV)
Received NMTC -329,280.60***
(48,790.09)
2010 MHV 0.64***
(0.03)
Received NMTC * 2010 MHV 2.94***
(0.32)
Constant 23,481.01***
(5,590.17)
Observations 232
R2 0.77
Adjusted R2 0.77
Residual Std. Error 40,915.99 (df = 228)
F Statistic 258.40*** (df = 3; 228)
Note: p<0.1; p<0.05; p<0.01

Here we see that the 2000 median home value for those receiving the NMTC is over 300,000 less than those not receiving the credit, which, intuitively, makes sense, given the purpose of the program is to revitalize low-income communities. We see a statistically significant relationship in median home value growth over time, which again makes sense, but for evaluation purposes for the NMTC, every dollar invested through the the NMTC program increased the value by 2.94 dollars - not a bad investment. An \(R^2\) of 0.77 isn’t bad either, for explaining the policy impact.


Here’s the model for tracts with only NMTC credit:

OnlyNMTC <- lm(mhv.00 ~ OnlyNMTC + mhv.10 + OnlyNMTC*mhv.10, data = d2)

stargazer(OnlyNMTC, 
          type = S_TYPE,
          dep.var.labels = c("2000 Median Home Value(MHV)"),
          covariate.labels = c("Received Only NMTC", "2010 MHV", "Only Received NMTC * 2010 MHV"),
          digits = 2)
Dependent variable:
2000 Median Home Value(MHV)
Received Only NMTC 18,442.23
(92,170.61)
2010 MHV 0.64***
(0.03)
Only Received NMTC * 2010 MHV -0.29
(0.60)
Constant 24,688.96***
(6,836.98)
Observations 232
R2 0.66
Adjusted R2 0.65
Residual Std. Error 50,371.25 (df = 228)
F Statistic 144.64*** (df = 3; 228)
Note: p<0.1; p<0.05; p<0.01

For households only using NMTC, we see a different trend. For every dollar invested, we see a decrease of 0.29 in home value in 2010.


For LIHTC, we see a similar trend: for households only using LIHTC, we see a decrease in investment.

GenLIHTC <- lm(mhv.00 ~ num.lihtc + mhv.10 + num.lihtc*mhv.10, data=d2)

stargazer(GenLIHTC,
          type = S_TYPE,
          dep.var.labels = c("2000 Median Home Value(MHV)"),
          covariate.labels = c("Received LIHTC", "2010 MHV", "Received LIHTC * 2010 MHV"),
           digits = 2)
Dependent variable:
2000 Median Home Value(MHV)
Received LIHTC -93,471.52***
(21,436.72)
2010 MHV 0.64***
(0.03)
Received LIHTC * 2010 MHV 1.16***
(0.18)
Constant 22,845.65***
(6,513.87)
Observations 232
R2 0.71
Adjusted R2 0.71
Residual Std. Error 45,968.74 (df = 228)
F Statistic 188.92*** (df = 3; 228)
Note: p<0.1; p<0.05; p<0.01
OnlyLIHTC <- lm(mhv.00 ~ OnlyLIHTC + mhv.10 + OnlyLIHTC*mhv.10, data = d2)

stargazer(OnlyLIHTC,
          type = S_TYPE,
          dep.var.labels = c("2000 Median Home Value(MHV)"),
          covariate.labels = c("Received Only LIHTC", "2010 MHV", "Only Received LIHTC * 2010 MHV"),
          digits = 2)
Dependent variable:
2000 Median Home Value(MHV)
Received Only LIHTC -9,024.50
(39,385.16)
2010 MHV 0.64***
(0.03)
Only Received LIHTC * 2010 MHV -0.06
(0.31)
Constant 26,124.71***
(7,050.17)
Observations 232
R2 0.66
Adjusted R2 0.65
Residual Std. Error 50,329.24 (df = 228)
F Statistic 145.01*** (df = 3; 228)
Note: p<0.1; p<0.05; p<0.01


Unsurprisingly, for households using both credits, we see a much larger increase:

BothCredits <- lm(mhv.00 ~ BothCredits + mhv.10 + BothCredits*mhv.10, data =d2)

stargazer(BothCredits,
          type = S_TYPE,
          dep.var.labels = c("2000 Median Home Value(MHV)"),
          covariate.labels = c("Received Both", "2010 MHV", "Received Both * 2010 MHV"),
          digits = 2)
Dependent variable:
2000 Median Home Value(MHV)
Received Both -546,354.10***
(43,307.39)
2010 MHV 0.64***
(0.02)
Received Both * 2010 MHV 5.26***
(0.28)
Constant 23,139.30***
(3,755.04)
Observations 232
R2 0.90
Adjusted R2 0.89
Residual Std. Error 27,650.95 (df = 228)
F Statistic 656.19*** (df = 3; 228)
Note: p<0.1; p<0.05; p<0.01


Regresssions with Variables

We can also run the models to see how different populations within the community are affected by the programs.

p.hisp00 <- log(as.numeric(d2$p.hisp.00))
p.hisp10 <- log(as.numeric(d2$p.hisp.10))
p.pov00 <- log(as.numeric(d2$pov.rate.00) + 0.0000001)
p.pov10 <- log(as.numeric(d2$pov.rate.10))
p.prof00 <- log(as.numeric(d2$prof00))
p.prof10 <- log(as.numeric(d2$prof12))
m5 <- lm(mhv.00 ~ num.nmtc + p.hisp00 + p.pov00 + p.prof00 + mhv.10 + num.nmtc*mhv.10, data = d2)


Dependent variable:
2000 Median Home Value(MHV)
Received NMTC -321,025.90***
(48,446.03)
Percent Hispanic -7,878.24**
(3,800.61)
Percent in Poverty -2,073.08
(1,497.72)
Percent Professional 4,105.64
(3,337.53)
2010 MHV 0.58***
(0.03)
Received NMTC * 2010 MHV 2.96***
(0.31)
Constant 37,433.33
(23,650.93)
Observations 232
R2 0.78
Adjusted R2 0.78
Residual Std. Error 40,386.58 (df = 225)
F Statistic 134.11*** (df = 6; 225)
Note: p<0.1; p<0.05; p<0.01


m0 <- lm(mhv.00 ~ num.lihtc + p.hisp00 + p.pov00 + p.prof00 + mhv.10 + num.lihtc*mhv.10, data = d2)
Dependent variable:
2000 Median Home Value(MHV)
Received LIHTC -91,546.20***
(21,444.26)
Percent Hispanic -5,802.22
(4,258.49)
Percent in Poverty -1,847.41
(1,706.07)
Percent Professional 392.65
(3,774.94)
2010 MHV 0.60***
(0.04)
LIHTC * 2010 MHV 1.18***
(0.18)
Constant 48,400.24*
(26,880.79)
Observations 232
R2 0.72
Adjusted R2 0.71
Residual Std. Error 45,894.02 (df = 225)
F Statistic 95.39*** (df = 6; 225)
Note: p<0.1; p<0.05; p<0.01