library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(readxl)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(stargazer)
## Warning: package 'stargazer' was built under R version 4.5.2
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
t8 <- read_csv("Table8.csv")
## Rows: 3221 Columns: 272
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): source, sumlevel, geoid, name, st, cnty
## dbl (266): T8_est1, T8_est2, T8_est3, T8_est4, T8_est5, T8_est6, T8_est7, T8...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
t8_tx <- t8 %>%
filter(st == 48)
chas_tx <- t8_tx %>%
mutate(
# only counties in Texas
total_renters = T8_est68,
# severe rent burden: all renter households with cost burden > 50%
severe_rent_burden_n = T8_est76 + T8_est89 + T8_est102 + T8_est115 + T8_est128,
severe_burden_pct = 100 * severe_rent_burden_n / total_renters,
# housing quality problems: unit lacks complete plumbing OR kitchen
hq_problem_n = T8_est71 + T8_est74 + T8_est77 + T8_est80 + T8_est84 + T8_est87 + T8_est90 + T8_est93 +
T8_est97 + T8_est100 + T8_est103 + T8_est106 +
T8_est110 + T8_est113 + T8_est116 + T8_est119 +
T8_est123 + T8_est126 + T8_est129 + T8_est132,
hq_problem_pct = 100 * hq_problem_n / total_renters,
#household income (low income less than 30% HAMFI plus between 30% and 50%.)
low_income_n = T8_est69 + T8_est82,
low_income_pct = 100 * low_income_n / total_renters
)
model_percents <- lm(severe_burden_pct ~ hq_problem_pct + low_income_pct,data = chas_tx)
summary(model_percents)
##
## Call:
## lm(formula = severe_burden_pct ~ hq_problem_pct + low_income_pct,
## data = chas_tx)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.207 -3.832 0.716 4.272 46.190
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.23734 1.57581 3.324 0.00102 **
## hq_problem_pct -0.18958 0.18024 -1.052 0.29389
## low_income_pct 0.27035 0.03719 7.270 4.56e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.069 on 251 degrees of freedom
## Multiple R-squared: 0.1757, Adjusted R-squared: 0.1692
## F-statistic: 26.76 on 2 and 251 DF, p-value: 2.92e-11
model_counts <- lm(severe_rent_burden_n ~ hq_problem_n + low_income_n,data = chas_tx)
summary(model_counts)
##
## Call:
## lm(formula = severe_rent_burden_n ~ hq_problem_n + low_income_n,
## data = chas_tx)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7075.6 -104.8 106.7 190.1 3354.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -215.92253 55.11760 -3.917 0.000115 ***
## hq_problem_n 2.41424 0.40640 5.941 9.43e-09 ***
## low_income_n 0.50200 0.01452 34.577 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 819.4 on 251 degrees of freedom
## Multiple R-squared: 0.9967, Adjusted R-squared: 0.9967
## F-statistic: 3.769e+04 on 2 and 251 DF, p-value: < 2.2e-16
raintest(model_percents)
##
## Rainbow test
##
## data: model_percents
## Rain = 0.68361, df1 = 127, df2 = 124, p-value = 0.9831
plot(model_percents,which=1)
#pass
durbinWatsonTest(model_percents)
## lag Autocorrelation D-W Statistic p-value
## 1 -0.00575984 2.009762 0.95
## Alternative hypothesis: rho != 0
#Pass
plot(model_percents,which=3)
bptest(model_percents)
##
## studentized Breusch-Pagan test
##
## data: model_percents
## BP = 14.039, df = 2, p-value = 0.0008943
#Fail homoscedasticity...likely due to percentages
shapiro.test(model_percents$residuals)
##
## Shapiro-Wilk normality test
##
## data: model_percents$residuals
## W = 0.93343, p-value = 2.726e-09
plot(model_percents,which=2)
#Fail
vif(model_percents)
## hq_problem_pct low_income_pct
## 1.001043 1.001043
#Pass
model_glm <- glm(severe_burden_pct/100 ~ hq_problem_pct + low_income_pct,data = chas_tx,family = quasibinomial())
summary(model_glm)
##
## Call:
## glm(formula = severe_burden_pct/100 ~ hq_problem_pct + low_income_pct,
## family = quasibinomial(), data = chas_tx)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.48698 0.12230 -20.336 < 2e-16 ***
## hq_problem_pct -0.01639 0.01447 -1.133 0.258
## low_income_pct 0.02042 0.00277 7.370 2.46e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 0.03693774)
##
## Null deviance: 12.973 on 253 degrees of freedom
## Residual deviance: 10.945 on 251 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
descriptive_stats <- data.frame(
severe_burden_pct = chas_tx$severe_burden_pct,
hq_problem_pct = chas_tx$hq_problem_pct,
low_income_pct = chas_tx$low_income_pct
)
descriptive_stats <- descriptive_stats %>%
rename(
`Severe Rent Burden (%) ` = severe_burden_pct,
`Housing Quality Problems (%) ` = hq_problem_pct,
`Low Income Households (%) ` = low_income_pct
)
stargazer(
descriptive_stats,
type = "html",
title = "Descriptive Statistics for County-Level Variables",
summary = TRUE,
summary.stat = c("n", "mean", "sd", "min", "max"),
digits = 2,
single.row = TRUE,
column.sep.width = "30pt"
)
| Statistic | N | Mean | St. Dev. | Min | Max |
| Severe Rent Burden (%) | 254 | 15.49 | 7.76 | 0.00 | 69.00 |
| Housing Quality Problems (%) | 254 | 2.26 | 2.47 | 0.00 | 20.00 |
| Low Income Households (%) | 254 | 39.51 | 11.96 | 7.62 | 77.73 |
hist(chas_tx$severe_burden_pct)
hist(chas_tx$hq_problem_pct)
hist(chas_tx$low_income_pct)
#Descriptive Statistics
```