library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(readxl)
library(car)    
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(stargazer)
## Warning: package 'stargazer' was built under R version 4.5.2
## 
## Please cite as: 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
t8 <- read_csv("Table8.csv")
## Rows: 3221 Columns: 272
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (6): source, sumlevel, geoid, name, st, cnty
## dbl (266): T8_est1, T8_est2, T8_est3, T8_est4, T8_est5, T8_est6, T8_est7, T8...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
t8_tx <- t8 %>%
filter(st == 48)
chas_tx <- t8_tx %>% 
  mutate(
    # only counties in Texas
    total_renters = T8_est68,
    
    # severe rent burden: all renter households with cost burden > 50% 
severe_rent_burden_n = T8_est76 + T8_est89 + T8_est102 + T8_est115 + T8_est128,
   
severe_burden_pct = 100 * severe_rent_burden_n / total_renters,
    
    # housing quality problems: unit lacks complete plumbing OR kitchen
hq_problem_n = T8_est71 + T8_est74 + T8_est77 + T8_est80 + T8_est84 + T8_est87 + T8_est90 + T8_est93 +
T8_est97 + T8_est100 + T8_est103 + T8_est106 +
T8_est110 + T8_est113 + T8_est116 + T8_est119 +
T8_est123 + T8_est126 + T8_est129 + T8_est132,
    
hq_problem_pct = 100 * hq_problem_n / total_renters,

#household income (low income less than 30% HAMFI plus between 30% and 50%.)
low_income_n = T8_est69 + T8_est82,
  
low_income_pct = 100 * low_income_n / total_renters
  )
model_percents <- lm(severe_burden_pct ~ hq_problem_pct + low_income_pct,data = chas_tx)

summary(model_percents)
## 
## Call:
## lm(formula = severe_burden_pct ~ hq_problem_pct + low_income_pct, 
##     data = chas_tx)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.207  -3.832   0.716   4.272  46.190 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     5.23734    1.57581   3.324  0.00102 ** 
## hq_problem_pct -0.18958    0.18024  -1.052  0.29389    
## low_income_pct  0.27035    0.03719   7.270 4.56e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.069 on 251 degrees of freedom
## Multiple R-squared:  0.1757, Adjusted R-squared:  0.1692 
## F-statistic: 26.76 on 2 and 251 DF,  p-value: 2.92e-11
model_counts <- lm(severe_rent_burden_n ~ hq_problem_n + low_income_n,data = chas_tx)

summary(model_counts)
## 
## Call:
## lm(formula = severe_rent_burden_n ~ hq_problem_n + low_income_n, 
##     data = chas_tx)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7075.6  -104.8   106.7   190.1  3354.6 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -215.92253   55.11760  -3.917 0.000115 ***
## hq_problem_n    2.41424    0.40640   5.941 9.43e-09 ***
## low_income_n    0.50200    0.01452  34.577  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 819.4 on 251 degrees of freedom
## Multiple R-squared:  0.9967, Adjusted R-squared:  0.9967 
## F-statistic: 3.769e+04 on 2 and 251 DF,  p-value: < 2.2e-16
raintest(model_percents)
## 
##  Rainbow test
## 
## data:  model_percents
## Rain = 0.68361, df1 = 127, df2 = 124, p-value = 0.9831
plot(model_percents,which=1)

#pass
durbinWatsonTest(model_percents)
##  lag Autocorrelation D-W Statistic p-value
##    1     -0.00575984      2.009762    0.95
##  Alternative hypothesis: rho != 0
#Pass
plot(model_percents,which=3)

bptest(model_percents)
## 
##  studentized Breusch-Pagan test
## 
## data:  model_percents
## BP = 14.039, df = 2, p-value = 0.0008943
#Fail homoscedasticity...likely due to percentages
shapiro.test(model_percents$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  model_percents$residuals
## W = 0.93343, p-value = 2.726e-09
plot(model_percents,which=2)

#Fail
vif(model_percents)
## hq_problem_pct low_income_pct 
##       1.001043       1.001043
#Pass
model_glm <- glm(severe_burden_pct/100 ~ hq_problem_pct + low_income_pct,data = chas_tx,family = quasibinomial())

summary(model_glm)
## 
## Call:
## glm(formula = severe_burden_pct/100 ~ hq_problem_pct + low_income_pct, 
##     family = quasibinomial(), data = chas_tx)
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -2.48698    0.12230 -20.336  < 2e-16 ***
## hq_problem_pct -0.01639    0.01447  -1.133    0.258    
## low_income_pct  0.02042    0.00277   7.370 2.46e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.03693774)
## 
##     Null deviance: 12.973  on 253  degrees of freedom
## Residual deviance: 10.945  on 251  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5
descriptive_stats <- data.frame(
  severe_burden_pct = chas_tx$severe_burden_pct,
  hq_problem_pct = chas_tx$hq_problem_pct,
  low_income_pct = chas_tx$low_income_pct
)
descriptive_stats <- descriptive_stats %>% 
  rename(
    `Severe Rent Burden (%)     `   = severe_burden_pct,
    `Housing Quality Problems (%)     ` = hq_problem_pct,
    `Low Income Households (%)     `    = low_income_pct
  )
stargazer(
  descriptive_stats,
  type = "html",   
  title = "Descriptive Statistics for County-Level Variables",
  summary = TRUE,
  summary.stat = c("n", "mean", "sd", "min", "max"),
  digits = 2,
  single.row = TRUE,
  column.sep.width = "30pt"
)
Descriptive Statistics for County-Level Variables
Statistic N Mean St. Dev. Min Max
Severe Rent Burden (%) 254 15.49 7.76 0.00 69.00
Housing Quality Problems (%) 254 2.26 2.47 0.00 20.00
Low Income Households (%) 254 39.51 11.96 7.62 77.73
hist(chas_tx$severe_burden_pct)

hist(chas_tx$hq_problem_pct)

hist(chas_tx$low_income_pct)

#Descriptive Statistics

```