library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
Public_School_Characteristics_2022_23 <- read_csv("Public_School_Characteristics_2022-23.csv")
## Rows: 101390 Columns: 77
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (23): NCESSCH, SURVYEAR, STABR, LEAID, ST_LEAID, LEA_NAME, SCH_NAME, LST...
## dbl (54): X, Y, OBJECTID, STATUS, TOTFRL, FRELCH, REDLCH, DIRECTCERT, PK, KG...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
final_PSC_data <- Public_School_Characteristics_2022_23 |> dplyr::select(TOTFRL,STUTERATIO,ULOCALE) |> filter(TOTFRL>0) |> filter(STUTERATIO>0) |> filter(ULOCALE>0)
ULOCALE.factor <- factor(final_PSC_data$ULOCALE)
is.factor(ULOCALE.factor)
## [1] TRUE
final_PSC_data <- final_PSC_data |> mutate(ULOCALE=ULOCALE.factor)
PSC_lm <-lm(STUTERATIO~ULOCALE+TOTFRL,data=final_PSC_data)
plot(PSC_lm)




durbinWatsonTest(PSC_lm)
## lag Autocorrelation D-W Statistic p-value
## 1 0.09346752 1.813062 0
## Alternative hypothesis: rho != 0
bptest(PSC_lm)
##
## studentized Breusch-Pagan test
##
## data: PSC_lm
## BP = 21.246, df = 12, p-value = 0.04689
plot(PSC_lm, which=2)

vif(PSC_lm)
## GVIF Df GVIF^(1/(2*Df))
## ULOCALE 1.080477 11 1.003525
## TOTFRL 1.080477 1 1.039460
psc_model_rlm <- rlm(STUTERATIO ~ TOTFRL + ULOCALE, data = final_PSC_data)
summary(psc_model_rlm)
##
## Call: rlm(formula = STUTERATIO ~ TOTFRL + ULOCALE, data = final_PSC_data)
## Residuals:
## Min 1Q Median 3Q Max
## -30.762 -2.659 -0.296 2.674 1845.349
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 14.0086 0.0429 326.5148
## TOTFRL 0.0037 0.0001 71.6560
## ULOCALE12-City: Mid-size 0.4766 0.0704 6.7668
## ULOCALE13-City: Small 0.2014 0.0688 2.9264
## ULOCALE21-Suburb: Large 0.4783 0.0471 10.1502
## ULOCALE22-Suburb: Mid-size 0.6554 0.0892 7.3447
## ULOCALE23-Suburb: Small 0.7408 0.1081 6.8502
## ULOCALE31-Town: Fringe 0.6280 0.0926 6.7839
## ULOCALE32-Town: Distant 0.0134 0.0707 0.1891
## ULOCALE33-Town: Remote -0.2772 0.0829 -3.3430
## ULOCALE41-Rural: Fringe 0.2804 0.0569 4.9278
## ULOCALE42-Rural: Distant -1.2123 0.0601 -20.1608
## ULOCALE43-Rural: Remote -2.5890 0.0709 -36.5317
##
## Residual standard error: 3.951 on 85050 degrees of freedom
STUTERATIO_log<-log(final_PSC_data$STUTERATIO)
PSC_glm<-glm(STUTERATIO~ULOCALE+TOTFRL,data=final_PSC_data,family=gaussian(link=log))
summary(PSC_glm)
##
## Call:
## glm(formula = STUTERATIO ~ ULOCALE + TOTFRL, family = gaussian(link = log),
## data = final_PSC_data)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.765e+00 9.531e-03 290.077 <2e-16 ***
## ULOCALE12-City: Mid-size 5.922e-03 1.587e-02 0.373 0.7090
## ULOCALE13-City: Small 4.621e-04 1.563e-02 0.030 0.9764
## ULOCALE21-Suburb: Large 9.767e-04 1.067e-02 0.092 0.9270
## ULOCALE22-Suburb: Mid-size -7.633e-03 2.046e-02 -0.373 0.7091
## ULOCALE23-Suburb: Small -3.048e-03 2.478e-02 -0.123 0.9021
## ULOCALE31-Town: Fringe 6.964e-03 2.110e-02 0.330 0.7414
## ULOCALE32-Town: Distant -2.356e-02 1.642e-02 -1.435 0.1514
## ULOCALE33-Town: Remote 2.740e-02 1.858e-02 1.474 0.1404
## ULOCALE41-Rural: Fringe -3.199e-02 1.319e-02 -2.425 0.0153 *
## ULOCALE42-Rural: Distant -1.474e-01 1.511e-02 -9.758 <2e-16 ***
## ULOCALE43-Rural: Remote -2.205e-01 1.924e-02 -11.459 <2e-16 ***
## TOTFRL 1.201e-04 9.820e-06 12.229 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 252.8254)
##
## Null deviance: 21641315 on 85062 degrees of freedom
## Residual deviance: 21502480 on 85050 degrees of freedom
## AIC: 712040
##
## Number of Fisher Scoring iterations: 8