library(ggplot2)
library(car)
## Loading required package: carData
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(readxl)
district <- read_excel("district.xls")
district_clean <- district[complete.cases(district$DA0CC21R, district$DPETECOP), ]
district_lm <- lm(DA0CC21R ~ DPETECOP, data = district_clean)
summary(district_lm)
##
## Call:
## lm(formula = DA0CC21R ~ DPETECOP, data = district_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.876 -9.154 -0.484 8.928 72.796
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.73187 1.38030 41.10 <2e-16 ***
## DPETECOP -0.51317 0.02187 -23.46 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.59 on 1058 degrees of freedom
## Multiple R-squared: 0.3423, Adjusted R-squared: 0.3417
## F-statistic: 550.6 on 1 and 1058 DF, p-value: < 2.2e-16
ggplot(district_clean, aes(x = DPETECOP, y = DA0CC21R)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE, color = "Green") +
labs(
title = "College Readiness vs Socioeconomically Disadvantaged Students",
x = "Percentage of Economically Disadvantaged Students",
y = "College Readiness Rate (%)",
caption = "Source: Texas SChool Districts"
) +theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

data_multiple <- lm(DA0CC21R ~ DPETECOP, data = district)
plot(data_multiple,which=1)

raintest(data_multiple)
##
## Rainbow test
##
## data: data_multiple
## Rain = 1.0859, df1 = 530, df2 = 528, p-value = 0.1718
durbinWatsonTest(data_multiple)
## lag Autocorrelation D-W Statistic p-value
## 1 0.1033869 1.791504 0
## Alternative hypothesis: rho != 0
plot(data_multiple,which=3)

bptest(data_multiple)
##
## studentized Breusch-Pagan test
##
## data: data_multiple
## BP = 20.807, df = 1, p-value = 5.079e-06
plot(data_multiple,which=2)

shapiro.test(data_multiple$residuals)
##
## Shapiro-Wilk normality test
##
## data: data_multiple$residuals
## W = 0.99245, p-value = 3.112e-05