This analysis examines the relationship between educational attainment and median household income across Texas counties using a linear regression model.
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
data <- read_csv("https://corgis-edu.github.io/corgis/datasets/csv/county_demographics/county_demographics.csv")
## Rows: 3139 Columns: 43
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): County, State
## dbl (41): Age.Percent 65 and Older, Age.Percent Under 18 Years, Age.Percent ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
texas <- data %>%
filter(State == "TX" | State == "Texas")
names(texas) <- make.names(names(texas))
model_data <- texas %>%
dplyr::select(
Income.Median.Houseold.Income,
Education.Bachelor.s.Degree.or.Higher,
Age.Percent.65.and.Older,
Population.2020.Population
) %>%
na.omit()
summary(model_data)
## Income.Median.Houseold.Income Education.Bachelor.s.Degree.or.Higher
## Min. : 25098 Min. : 0.00
## 1st Qu.: 44590 1st Qu.:14.22
## Median : 51371 Median :17.30
## Mean : 52848 Mean :19.06
## 3rd Qu.: 58747 3rd Qu.:21.80
## Max. :100920 Max. :52.30
## Age.Percent.65.and.Older Population.2020.Population
## Min. : 8.90 Min. : 64
## 1st Qu.:14.62 1st Qu.: 6264
## Median :17.65 Median : 18484
## Mean :18.55 Mean : 114746
## 3rd Qu.:22.40 3rd Qu.: 51848
## Max. :37.30 Max. :4731145
model <- lm(
Income.Median.Houseold.Income ~
Education.Bachelor.s.Degree.or.Higher +
Age.Percent.65.and.Older +
Population.2020.Population,
data = model_data
)
summary(model)
##
## Call:
## lm(formula = Income.Median.Houseold.Income ~ Education.Bachelor.s.Degree.or.Higher +
## Age.Percent.65.and.Older + Population.2020.Population, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33216 -4796 18 5564 45175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.998e+04 2.608e+03 19.167 < 2e-16
## Education.Bachelor.s.Degree.or.Higher 9.872e+02 8.474e+01 11.650 < 2e-16
## Age.Percent.65.and.Older -8.386e+02 1.146e+02 -7.317 3.45e-12
## Population.2020.Population -3.355e-03 1.681e-03 -1.996 0.0471
##
## (Intercept) ***
## Education.Bachelor.s.Degree.or.Higher ***
## Age.Percent.65.and.Older ***
## Population.2020.Population *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9610 on 250 degrees of freedom
## Multiple R-squared: 0.4417, Adjusted R-squared: 0.4349
## F-statistic: 65.92 on 3 and 250 DF, p-value: < 2.2e-16
# Linearity
plot(model, which = 1)
raintest(model)
##
## Rainbow test
##
## data: model
## Rain = 1.2327, df1 = 127, df2 = 123, p-value = 0.1222
# Independence of errors
dwtest(model)
##
## Durbin-Watson test
##
## data: model
## DW = 1.7801, p-value = 0.03978
## alternative hypothesis: true autocorrelation is greater than 0
# Homoscedasticity
bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 14.263, df = 3, p-value = 0.002568
# Normality of residuals
plot(model, which = 2)
shapiro.test(residuals(model))
##
## Shapiro-Wilk normality test
##
## data: residuals(model)
## W = 0.97838, p-value = 0.0006533
# Multicollinearity
vif(model)
## Education.Bachelor.s.Degree.or.Higher Age.Percent.65.and.Older
## 1.197454 1.103365
## Population.2020.Population
## 1.305788
cor(
model_data[, c(
"Education.Bachelor.s.Degree.or.Higher",
"Age.Percent.65.and.Older",
"Population.2020.Population"
)]
)
## Education.Bachelor.s.Degree.or.Higher
## Education.Bachelor.s.Degree.or.Higher 1.0000000
## Age.Percent.65.and.Older -0.0203556
## Population.2020.Population 0.3941701
## Age.Percent.65.and.Older
## Education.Bachelor.s.Degree.or.Higher -0.0203556
## Age.Percent.65.and.Older 1.0000000
## Population.2020.Population -0.2886943
## Population.2020.Population
## Education.Bachelor.s.Degree.or.Higher 0.3941701
## Age.Percent.65.and.Older -0.2886943
## Population.2020.Population 1.0000000