library(dplyr)
library(DT)
library(ggplot2)
library(here)
library(import)
library(quarto)
library(stargazer)
library(tidycensus)
crosswalk <- read.csv("https://raw.githubusercontent.com/DS4PS/cpp-529-master/master/data/cbsatocountycrosswalk.csv", stringsAsFactors=F, colClasses="character")
import::here("S_TYPE",
"d",
"df",
"cbsa_stats_df",
"nmtc.treat",
"lihtc.treat",
"PLOTS",
"%>%",
.from = here::here("analysis/Betts-Utilities.R"),
.character_only = TRUE)Tucson Housing Credit Evaluation 2000 - 2010
Introduction
The purpose of this report is to demonstrate proficiency with various evaluation and data analytics techniques. I’m leaving in more code than found in typical reports for review. Using data available from the US Census, we’ll evaluate the effectiveness of the New Markets Tax Credit Program (NMTC) and the Low Income Housing Tax Credit (LIHTC).
Create the Data Set
# confirm crosswalk data pulled for Tucson
tucson_msp <- grep("^TUC", crosswalk$msaname, value=TRUE)
# find Tucson in crosswalk data
tucson.msp <- crosswalk$msaname == tucson_msp
# load FIPS data and omit non-values
tucson.fips <- crosswalk$fipscounty[tucson.msp]
tucson.fips <- na.omit(tucson.fips)
# pull in state FIPS
state.fips <- substr(tucson.fips, 1, 2)
# pull in county FIPS
county.fips <- substr(tucson.fips, 3, 5)
# combine CBSA with MSP
tucson.cbsa <- crosswalk$cbsa[tucson.msp]
# omit empty sets
tucson.cbsa <- na.omit(tucson.cbsa)# create data set with Tucson data and display as table
d2 <- d |>
filter(cbsa %in% tucson.cbsa)
d2 <- d2 |>
mutate(OnlyNMTC = ifelse(num.nmtc == 1 & num.lihtc == 0, 1, 0)) |>
mutate(OnlyLIHTC = ifelse(num.lihtc == 1 & num.nmtc == 0, 1, 0)) |>
mutate(BothCredits = ifelse(num.nmtc == 1 & num.lihtc == 1, 1, 0))
datatable(d2)Descriptive Statistics for Tucson Households
Median Value of a Tucson Home in 2000 (all values adjusted for inflation to 2010)
median(d2$mhv.00)[1] 129950.3
Visualizing the Data Using the Native Plot Function
hist(d2$mhv.00, breaks=100, xlim=c(0,500000),
col="gray40", border="white",
axes=F,
xlab="MHV(median = $130k)",
ylab="",
main="Median Home Value in 2000")
axis(side=1, at=seq(0,500000,100000),
labels=c("$0","$100k","$200k","$300k","$400k","$500k"))
abline(v=median(d2$mhv.00, na.rm=T), col="coral", lwd=3)Calculating Growth and Visualizing Data Using ggplot2
# calculate mean and median to graph
mean.x <- mean(d2$mhv.growth)
median.x <- median(d2$mhv.growth)
hg <- hist(d2$mhv.growth, breaks=1000, plot=FALSE) #needed for scaling graph with growth
ymax <- max(hg$count)
# build the histogram and set reasonable limits
ggplot(d2, aes(x = mhv.growth)) +
geom_histogram(binwidth = 1, fill = "gray40", color = "ivory") +
scale_x_continuous(limits = c(0, 100), # keeping scale at 100 despite outliers
breaks = seq(0, 100, by = 10),
labels = paste0(seq(0, 100, by = 10), "%")) +
scale_y_continuous(breaks = NULL) +
labs(title = "Tucson Home Value Growth 2000 to 2010",
x = "",
y = "") +
geom_vline(xintercept=mean.x, col="cornflowerblue", linetype="dashed", linewidth=1) +
geom_vline(xintercept=median.x, col="coral", linetype="dashed", linewidth=1) +
annotate("text", x = 65, y = 0.5*ymax,
label = paste0("Mean = ", round(mean.x, 0), "%"),
color = "cornflowerblue", size = 7, hjust = 0) +
annotate("text", x = 65, y = 0.6*ymax,
label = paste0("Median = ", round(median.x, 0), "%"),
color = "coral", size = 7, hjust = 0) +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
plot.title = element_text(size = 15))Tucson Household Income in 2000
Average Income for All Tucson Households
mean(d2$hinc00, na.rm=T)[1] 52824.14
Average Income for Households Receiving LIHTC
mean(d2$hinc00[d2$num.lihtc > 0], na.rm=T)[1] 29969.89
Average Income for Households Not Receiving LIHTC
mean(d2$hinc00[d2$num.lihtc == 0], na.rm=T)[1] 54517.04
Average Income for Households Receiving NMTC
mean(d2$hinc00[d2$num.nmtc > 0], na.rm=T)[1] 29129.83
Average Income for Households Not Receiving NMTC
mean(d2$hinc00[d2$num.nmtc == 0], na.rm=T)[1] 53453.19
Tucson Home Values in 2000
Average Home Value of Tucson Homes
mean(d2$mhv.00, na.rm=T)[1] 147868.7
Average Home Value for Households Receiving LIHTC
mean(d2$mhv.00[d2$num.lihtc > 0], na.rm=T)[1] 125691
Average Home Value for Households Not Receiving LIHTC
mean(d2$mhv.00[d2$num.lihtc == 0], na.rm=T)[1] 149511.5
Average Home Value for Households Receiving NMTC
mean(d2$mhv.00[d2$num.nmtc > 0], na.rm=T)[1] 210914.1
Average Home Value for Households Not Receiving NMTC
mean(d2$mhv.00[d2$num.nmtc == 0], na.rm=T)[1] 146194.9
Home Value Growth Rates
Growth of Households Receiving LIHTC
summary(d2$growth[d2$num.lihtc > 0], na.rm=T) Min. 1st Qu. Median Mean 3rd Qu. Max.
-72.036 6.127 34.607 29.586 43.983 181.324
Growth of Households Not Receiving LIHTC
summary(d2$growth[d2$num.lihtc == 0], na.rm=T) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-63.10 16.09 25.44 28.79 40.05 184.45 3
Growth of Households Receiving NMTC
summary(d2$growth[d2$num.nmtc > 0], na.rm=T) Min. 1st Qu. Median Mean 3rd Qu. Max.
-72.04 12.51 25.54 18.93 43.62 76.74
Growth of Households Not Receiving NMTC
summary(d2$growth[d2$num.nmtc == 0], na.rm=T) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-63.10 15.88 26.45 29.11 40.56 184.45 3
Visualization of Growth
gridExtra::grid.arrange(PLOTS$mhv_growth$lihtc,
PLOTS$mhv_growth$nmtc,
nrow = 1)Comparison between LIHTC & NMTC
# average home value
mean(d2$mhv.00[d2$num.nmtc > 0], na.rm = T) - mean(d2$mhv.00[d2$num.lihtc > 0], na.rm = T)[1] 85223.1
# average growth
mean(d2$growth[d2$num.nmtc > 0], na.rm = T) - mean(d2$growth[d2$num.lihtc > 0] ,a.rm = T)[1] -10.65397
Observations
While those receiving LIHTC and NMTC have a similar average incomes, the average household value for those receiving NMTC is $85,223.1 more than those receiving LIHTC. Furthermore, the home value growth for those receiving NMTC is more than 10 points less than those receiving LIHTC. While the objects of the programs may be similar, boosting opportunities for those outside the housing market, the populations are quite distinct.
Preparing Data for Modeling
saveRDS(d2, here::here("data/rodeo/betts-rodeodata.rds"))
invisible(readRDS(file = here::here("data/rodeo/betts-rodeodata.rds")))Data Manifest
# 2000
ltdb.raw.2000s <- read.csv(here::here("data/raw/ltdb_std_2000_sample.csv"))
ltdb.raw.2000f <- read.csv(here::here("data/raw/LTDB_Std_2000_fullcount.csv"))
ltdb.rodeo.2000 <- readRDS(here::here("data/rodeo/LTDB-2000.rds"))
# 2010
ltdb.raw.2010s <- read.csv(here::here("data/raw/ltdb_std_2010_sample.csv"))
ltdb.raw.2010f <- read.csv(here::here("data/raw/LTDB_Std_2010_fullcount.csv"))
ltdb.rodeo.2010 <- readRDS(here::here("data/rodeo/LTDB-2010.rds"))
complete.rodeo <- readRDS(file = here::here("data/rodeo/betts-rodeodata.rds"))Evaluating Program Impact
As observed above, we’re going to want to dig a bit to see which households are receiving which benefits, as some qualify for both NMTC and LIHTC, as program impact for each benefit might get skewed.
sum(d2$num.nmtc)[1] 6
sum(d2$num.lihtc)[1] 17
sum(d2$num.nmtc & d2$num.lihtc)[1] 3
General Regresssion Models
This model relies on the difference-in-differences regression to help control for other factors outside of the treatment.
Here’s the general model, where we’re going to evaluate the relationship between median home values from 2000 with median home values for 2010 for homes that did and did not receive assistance from federal programs.
m1 <- lm(y ~ treat + post + treat*post)And here we’ll run the model for all households receiving the NMTC credit:
GenNMTC <- lm(mhv.00 ~ num.nmtc + mhv.10 + num.nmtc*mhv.10, data = d2)
stargazer(GenNMTC,
type = S_TYPE,
dep.var.labels = c("2000 Median Home Value(MHV)"),
covariate.labels = c("Received NMTC", "2010 MHV", "Received NMTC * 2010 MHV"),
digits = 2)| Dependent variable: | |
| 2000 Median Home Value(MHV) | |
| Received NMTC | -329,280.60*** |
| (48,790.09) | |
| 2010 MHV | 0.64*** |
| (0.03) | |
| Received NMTC * 2010 MHV | 2.94*** |
| (0.32) | |
| Constant | 23,481.01*** |
| (5,590.17) | |
| Observations | 232 |
| R2 | 0.77 |
| Adjusted R2 | 0.77 |
| Residual Std. Error | 40,915.99 (df = 228) |
| F Statistic | 258.40*** (df = 3; 228) |
| Note: | p<0.1; p<0.05; p<0.01 |
Here we see that the 2000 median home value for those receiving the NMTC is over 300,000 less than those not receiving the credit, which, intuitively, makes sense, given the purpose of the program is to revitalize low-income communities. We see a statistically significant relationship in median home value growth over time, which again makes sense, but for evaluation purposes for the NMTC, every dollar invested through the the NMTC program increased the value by 2.94 dollars - not a bad investment. An \(R^2\) of 0.77 isn’t bad either, for explaining the policy impact.
Here’s the model for tracts with only NMTC credit:
OnlyNMTC <- lm(mhv.00 ~ OnlyNMTC + mhv.10 + OnlyNMTC*mhv.10, data = d2)
stargazer(OnlyNMTC,
type = S_TYPE,
dep.var.labels = c("2000 Median Home Value(MHV)"),
covariate.labels = c("Received Only NMTC", "2010 MHV", "Only Received NMTC * 2010 MHV"),
digits = 2)| Dependent variable: | |
| 2000 Median Home Value(MHV) | |
| Received Only NMTC | 18,442.23 |
| (92,170.61) | |
| 2010 MHV | 0.64*** |
| (0.03) | |
| Only Received NMTC * 2010 MHV | -0.29 |
| (0.60) | |
| Constant | 24,688.96*** |
| (6,836.98) | |
| Observations | 232 |
| R2 | 0.66 |
| Adjusted R2 | 0.65 |
| Residual Std. Error | 50,371.25 (df = 228) |
| F Statistic | 144.64*** (df = 3; 228) |
| Note: | p<0.1; p<0.05; p<0.01 |
For households only using NMTC, we see a different trend. For every dollar invested, we see a decrease of 0.29 in home value in 2010.
For LIHTC, we see a similar trend: for households only using LIHTC, we see a decrease in investment.
GenLIHTC <- lm(mhv.00 ~ num.lihtc + mhv.10 + num.lihtc*mhv.10, data=d2)
stargazer(GenLIHTC,
type = S_TYPE,
dep.var.labels = c("2000 Median Home Value(MHV)"),
covariate.labels = c("Received LIHTC", "2010 MHV", "Received LIHTC * 2010 MHV"),
digits = 2)| Dependent variable: | |
| 2000 Median Home Value(MHV) | |
| Received LIHTC | -93,471.52*** |
| (21,436.72) | |
| 2010 MHV | 0.64*** |
| (0.03) | |
| Received LIHTC * 2010 MHV | 1.16*** |
| (0.18) | |
| Constant | 22,845.65*** |
| (6,513.87) | |
| Observations | 232 |
| R2 | 0.71 |
| Adjusted R2 | 0.71 |
| Residual Std. Error | 45,968.74 (df = 228) |
| F Statistic | 188.92*** (df = 3; 228) |
| Note: | p<0.1; p<0.05; p<0.01 |
OnlyLIHTC <- lm(mhv.00 ~ OnlyLIHTC + mhv.10 + OnlyLIHTC*mhv.10, data = d2)
stargazer(OnlyLIHTC,
type = S_TYPE,
dep.var.labels = c("2000 Median Home Value(MHV)"),
covariate.labels = c("Received Only LIHTC", "2010 MHV", "Only Received LIHTC * 2010 MHV"),
digits = 2)| Dependent variable: | |
| 2000 Median Home Value(MHV) | |
| Received Only LIHTC | -9,024.50 |
| (39,385.16) | |
| 2010 MHV | 0.64*** |
| (0.03) | |
| Only Received LIHTC * 2010 MHV | -0.06 |
| (0.31) | |
| Constant | 26,124.71*** |
| (7,050.17) | |
| Observations | 232 |
| R2 | 0.66 |
| Adjusted R2 | 0.65 |
| Residual Std. Error | 50,329.24 (df = 228) |
| F Statistic | 145.01*** (df = 3; 228) |
| Note: | p<0.1; p<0.05; p<0.01 |
Unsurprisingly, for households using both credits, we see a much larger increase:
BothCredits <- lm(mhv.00 ~ BothCredits + mhv.10 + BothCredits*mhv.10, data =d2)
stargazer(BothCredits,
type = S_TYPE,
dep.var.labels = c("2000 Median Home Value(MHV)"),
covariate.labels = c("Received Both", "2010 MHV", "Received Both * 2010 MHV"),
digits = 2)| Dependent variable: | |
| 2000 Median Home Value(MHV) | |
| Received Both | -546,354.10*** |
| (43,307.39) | |
| 2010 MHV | 0.64*** |
| (0.02) | |
| Received Both * 2010 MHV | 5.26*** |
| (0.28) | |
| Constant | 23,139.30*** |
| (3,755.04) | |
| Observations | 232 |
| R2 | 0.90 |
| Adjusted R2 | 0.89 |
| Residual Std. Error | 27,650.95 (df = 228) |
| F Statistic | 656.19*** (df = 3; 228) |
| Note: | p<0.1; p<0.05; p<0.01 |
Regresssions with Variables
We can also run the models to see how different populations within the community are affected by the programs.
p.hisp00 <- log(as.numeric(d2$p.hisp.00))
p.hisp10 <- log(as.numeric(d2$p.hisp.10))
p.pov00 <- log(as.numeric(d2$pov.rate.00) + 0.0000001)
p.pov10 <- log(as.numeric(d2$pov.rate.10))
p.prof00 <- log(as.numeric(d2$prof00))
p.prof10 <- log(as.numeric(d2$prof12))m5 <- lm(mhv.00 ~ num.nmtc + p.hisp00 + p.pov00 + p.prof00 + mhv.10 + num.nmtc*mhv.10, data = d2)| Dependent variable: | |
| 2000 Median Home Value(MHV) | |
| Received NMTC | -321,025.90*** |
| (48,446.03) | |
| Percent Hispanic | -7,878.24** |
| (3,800.61) | |
| Percent in Poverty | -2,073.08 |
| (1,497.72) | |
| Percent Professional | 4,105.64 |
| (3,337.53) | |
| 2010 MHV | 0.58*** |
| (0.03) | |
| Received NMTC * 2010 MHV | 2.96*** |
| (0.31) | |
| Constant | 37,433.33 |
| (23,650.93) | |
| Observations | 232 |
| R2 | 0.78 |
| Adjusted R2 | 0.78 |
| Residual Std. Error | 40,386.58 (df = 225) |
| F Statistic | 134.11*** (df = 6; 225) |
| Note: | p<0.1; p<0.05; p<0.01 |
m0 <- lm(mhv.00 ~ num.lihtc + p.hisp00 + p.pov00 + p.prof00 + mhv.10 + num.lihtc*mhv.10, data = d2)| Dependent variable: | |
| 2000 Median Home Value(MHV) | |
| Received LIHTC | -91,546.20*** |
| (21,444.26) | |
| Percent Hispanic | -5,802.22 |
| (4,258.49) | |
| Percent in Poverty | -1,847.41 |
| (1,706.07) | |
| Percent Professional | 392.65 |
| (3,774.94) | |
| 2010 MHV | 0.60*** |
| (0.04) | |
| LIHTC * 2010 MHV | 1.18*** |
| (0.18) | |
| Constant | 48,400.24* |
| (26,880.79) | |
| Observations | 232 |
| R2 | 0.72 |
| Adjusted R2 | 0.71 |
| Residual Std. Error | 45,894.02 (df = 225) |
| F Statistic | 95.39*** (df = 6; 225) |
| Note: | p<0.1; p<0.05; p<0.01 |