Research Paper Final - Show Your Work

Loading libraries, setting working directory, and removing NAs.

library(readxl)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(lmtest)

## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

setwd("~/Desktop/UTSA/Quantitative Methods/RStudio")

district <-read_excel("district.xls")

Clean_District<-district |> select(DISTNAME, DPSTURNR, DAGC4X21R, DPETECOP) |> drop_na()

Renaming variables for ease.

Clean_District <- Clean_District |> rename(Grad_Rate_4 = DAGC4X21R,Turnover = DPSTURNR,Dist_Name = DISTNAME, Econ_Status = DPETECOP)

Building my regression model using my dependent and independent variables. And summary.

Gradrate_model <- lm(Grad_Rate_4 ~ Turnover + Econ_Status, data = Clean_District)

summary(Gradrate_model)

## 
## Call:
## lm(formula = Grad_Rate_4 ~ Turnover + Econ_Status, data = Clean_District)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -95.229  -1.329   2.218   4.891  12.638 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 103.77027    1.34060  77.406  < 2e-16 ***
## Turnover     -0.06330    0.04271  -1.482    0.139    
## Econ_Status  -0.14187    0.01921  -7.386 3.04e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.44 on 1068 degrees of freedom
## Multiple R-squared:  0.05751,    Adjusted R-squared:  0.05575 
## F-statistic: 32.59 on 2 and 1068 DF,  p-value: 1.834e-14

Descriptive Statistics.

summary(Clean_District$Grad_Rate_4)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.00   93.20   96.90   93.89  100.00  100.00

summary(Clean_District$Turnover)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   14.80   19.00   20.55   25.20   80.00

summary(Clean_District$Econ_Status)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.20   48.00   61.50   60.47   75.20  100.00

mean(Clean_District$Grad_Rate_4)

## [1] 93.89094

sd(Clean_District$Grad_Rate_4)

## [1] 12.79782

mean(Clean_District$Turnover)

## [1] 20.54734

sd(Clean_District$Turnover)

## [1] 9.144077

mean(Clean_District$Econ_Status)

## [1] 60.46695

sd(Clean_District$Econ_Status)

## [1] 20.33193

Assumptions: Linearity of variables

plot(Gradrate_model, which = 1)

Assumptions: Normality of Residuals

plot(Gradrate_model, which = 2)

shapiro.test(residuals(Gradrate_model))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(Gradrate_model)
## W = 0.46804, p-value < 2.2e-16

Assumptions: Independence of Errors

dwtest(Gradrate_model)

## 
##  Durbin-Watson test
## 
## data:  Gradrate_model
## DW = 1.7391, p-value = 9.189e-06
## alternative hypothesis: true autocorrelation is greater than 0

Assumptions: Homoscedasticity

plot(Gradrate_model, which = 3)

bptest(Gradrate_model)

## 
##  studentized Breusch-Pagan test
## 
## data:  Gradrate_model
## BP = 17.526, df = 2, p-value = 0.0001564

Research Paper Final - Show Your Work

2025-12-10