library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(readxl)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
setwd("~/Desktop/Monday Class")
district <- read_excel("district.xls")
#Summarize Dependent Variable
summary(district$DA0912DR21R)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1.000 0.000 0.400 1.243 1.400 50.500 112
#Summarize Independent Variable
summary(district$DPSTURNR)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 14.80 19.50 21.51 25.90 100.00 7
#Remove N/As for Dependent Variable
district <- district %>%
filter(!is.na(DA0912DR21R))
#Remove N/As for Independent Variable
district <- district %>%
filter(!is.na(DPSTURNR))
#create first model
#My Dependent variable is Grades 9–12 Dropout Rate, and my Independent variable is Teacher Turnover Rate.
model_simple <- lm(DA0912DR21R ~ DPSTURNR, data = district)
#Create graph 1 to look for possible linearity
ggplot(district,aes(x= DPSTURNR,y = DA0912DR21R)) + geom_point()
#Run Raintest to verify linearity
raintest(model_simple)
##
## Rainbow test
##
## data: model_simple
## Rain = 1.5888, df1 = 546, df2 = 544, p-value = 3.789e-08
#Assumption 2: Independence of Errors
#Run DurbinWatson to test independence
dwtdistrict <- durbinWatsonTest(model_simple)
dwtdistrict
## lag Autocorrelation D-W Statistic p-value
## 1 0.02109722 1.957647 0.346
## Alternative hypothesis: rho != 0
#Assumption 3: Homodscedasticity
#Plot to visualize Homodscedasticity
plot(model_simple,which=2)
#Run BP test
bptestdistrict <- bptest(model_simple)
bptestdistrict
##
## studentized Breusch-Pagan test
##
## data: model_simple
## BP = 0.79742, df = 1, p-value = 0.3719
#Assumption 4: Normality
shapiro.test(model_simple$residuals)
##
## Shapiro-Wilk normality test
##
## data: model_simple$residuals
## W = 0.35773, p-value < 2.2e-16
#Regression Results and Model Summary
summary(model_simple)
##
## Call:
## lm(formula = DA0912DR21R ~ DPSTURNR, data = district)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.727 -1.143 -0.797 0.183 49.326
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.72294 0.24291 2.976 0.00298 **
## DPSTURNR 0.02521 0.01065 2.366 0.01815 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.318 on 1090 degrees of freedom
## Multiple R-squared: 0.00511, Adjusted R-squared: 0.004197
## F-statistic: 5.598 on 1 and 1090 DF, p-value: 0.01815