Research Paper Draft

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readr)
library(readxl)
library(car)

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(stargazer)

## Warning: package 'stargazer' was built under R version 4.5.2

## 
## Please cite as: 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

library(lmtest)

## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

t8 <- read_csv("Table8.csv")

## Rows: 3221 Columns: 272
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (6): source, sumlevel, geoid, name, st, cnty
## dbl (266): T8_est1, T8_est2, T8_est3, T8_est4, T8_est5, T8_est6, T8_est7, T8...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

t8_tx <- t8 %>%
filter(st == 48)

chas_tx <- t8_tx %>% 
  mutate(
    # only counties in Texas
    total_renters = T8_est68,
    
    # severe rent burden: all renter households with cost burden > 50% 
severe_rent_burden_n = T8_est76 + T8_est89 + T8_est102 + T8_est115 + T8_est128,
   
severe_burden_pct = 100 * severe_rent_burden_n / total_renters,
    
    # housing quality problems: unit lacks complete plumbing OR kitchen
hq_problem_n = T8_est71 + T8_est74 + T8_est77 + T8_est80 + T8_est84 + T8_est87 + T8_est90 + T8_est93 +
T8_est97 + T8_est100 + T8_est103 + T8_est106 +
T8_est110 + T8_est113 + T8_est116 + T8_est119 +
T8_est123 + T8_est126 + T8_est129 + T8_est132,
    
hq_problem_pct = 100 * hq_problem_n / total_renters,

#household income (low income less than 30% HAMFI plus between 30% and 50%.)
low_income_n = T8_est69 + T8_est82,
  
low_income_pct = 100 * low_income_n / total_renters
  )

model_percents <- lm(severe_burden_pct ~ hq_problem_pct + low_income_pct,data = chas_tx)

summary(model_percents)

## 
## Call:
## lm(formula = severe_burden_pct ~ hq_problem_pct + low_income_pct, 
##     data = chas_tx)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.207  -3.832   0.716   4.272  46.190 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     5.23734    1.57581   3.324  0.00102 ** 
## hq_problem_pct -0.18958    0.18024  -1.052  0.29389    
## low_income_pct  0.27035    0.03719   7.270 4.56e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.069 on 251 degrees of freedom
## Multiple R-squared:  0.1757, Adjusted R-squared:  0.1692 
## F-statistic: 26.76 on 2 and 251 DF,  p-value: 2.92e-11

model_counts <- lm(severe_rent_burden_n ~ hq_problem_n + low_income_n,data = chas_tx)

summary(model_counts)

## 
## Call:
## lm(formula = severe_rent_burden_n ~ hq_problem_n + low_income_n, 
##     data = chas_tx)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7075.6  -104.8   106.7   190.1  3354.6 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -215.92253   55.11760  -3.917 0.000115 ***
## hq_problem_n    2.41424    0.40640   5.941 9.43e-09 ***
## low_income_n    0.50200    0.01452  34.577  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 819.4 on 251 degrees of freedom
## Multiple R-squared:  0.9967, Adjusted R-squared:  0.9967 
## F-statistic: 3.769e+04 on 2 and 251 DF,  p-value: < 2.2e-16

raintest(model_percents)

## 
##  Rainbow test
## 
## data:  model_percents
## Rain = 0.68361, df1 = 127, df2 = 124, p-value = 0.9831

plot(model_percents,which=1)

#pass

durbinWatsonTest(model_percents)

##  lag Autocorrelation D-W Statistic p-value
##    1     -0.00575984      2.009762    0.95
##  Alternative hypothesis: rho != 0

#Pass

plot(model_percents,which=3)

bptest(model_percents)

## 
##  studentized Breusch-Pagan test
## 
## data:  model_percents
## BP = 14.039, df = 2, p-value = 0.0008943

#Fail homoscedasticity...likely due to percentages

shapiro.test(model_percents$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  model_percents$residuals
## W = 0.93343, p-value = 2.726e-09

plot(model_percents,which=2)

#Fail

vif(model_percents)

## hq_problem_pct low_income_pct 
##       1.001043       1.001043

#Pass

model_glm <- glm(severe_burden_pct/100 ~ hq_problem_pct + low_income_pct,data = chas_tx,family = quasibinomial())

summary(model_glm)

## 
## Call:
## glm(formula = severe_burden_pct/100 ~ hq_problem_pct + low_income_pct, 
##     family = quasibinomial(), data = chas_tx)
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -2.48698    0.12230 -20.336  < 2e-16 ***
## hq_problem_pct -0.01639    0.01447  -1.133    0.258    
## low_income_pct  0.02042    0.00277   7.370 2.46e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.03693774)
## 
##     Null deviance: 12.973  on 253  degrees of freedom
## Residual deviance: 10.945  on 251  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5

descriptive_stats <- data.frame(
  severe_burden_pct = chas_tx$severe_burden_pct,
  hq_problem_pct = chas_tx$hq_problem_pct,
  low_income_pct = chas_tx$low_income_pct
)

descriptive_stats <- descriptive_stats %>% 
  rename(
    `Severe Rent Burden (%)     `   = severe_burden_pct,
    `Housing Quality Problems (%)     ` = hq_problem_pct,
    `Low Income Households (%)     `    = low_income_pct
  )

stargazer(
  descriptive_stats,
  type = "html",   
  title = "Descriptive Statistics for County-Level Variables",
  summary = TRUE,
  summary.stat = c("n", "mean", "sd", "min", "max"),
  digits = 2,
  single.row = TRUE,
  column.sep.width = "30pt"
)

**Descriptive Statistics for County-Level Variables**

Statistic	N	Mean	St. Dev.	Min	Max

Severe Rent Burden (%)	254	15.49	7.76	0.00	69.00
Housing Quality Problems (%)	254	2.26	2.47	0.00	20.00
Low Income Households (%)	254	39.51	11.96	7.62	77.73

hist(chas_tx$severe_burden_pct)

hist(chas_tx$hq_problem_pct)

hist(chas_tx$low_income_pct)

#Descriptive Statistics

```

Research Paper Draft

2025-11-16