library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(stargazer)
## 
## Please cite as: 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(ggplot2)
library(ggrepel)

District_data_Current <-read_excel("22-23 district.xls")

Dist_Name<-District_data_Current$DISTNAME
Percent_Meets_STAAR<-District_data_Current$DDA00A001223R
Ratio<-District_data_Current$DPSTKIDR
Turnover<-District_data_Current$DPSTURNR
Experience_Yrs<-District_data_Current$DPSTEXPA
Avg_Salary<-District_data_Current$DPSTTOSA
Athletics<-District_data_Current$DPFPAATHP

Evaluating Texas School District Data to determing extent of affect that Salary, Experience, Turnover, and Student-Teacher ratios have on STAAR test performance as measured by the Percentage of Students that Meet Expectations on the STAAR exam.

The following analysis and models utilize data from the TEA Snapshot for all Texas Districts during the 2022-2023 school year.

lm_Current_Dist <- lm(Percent_Meets_STAAR ~ Avg_Salary + Ratio + Turnover + 
             Experience_Yrs + Athletics, data = District_data_Current)
summary(lm_Current_Dist)
## 
## Call:
## lm(formula = Percent_Meets_STAAR ~ Avg_Salary + Ratio + Turnover + 
##     Experience_Yrs + Athletics, data = District_data_Current)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -58.071  -8.215  -0.903   6.947  54.838 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.695e+01  4.584e+00  10.242  < 2e-16 ***
## Avg_Salary      8.819e-05  7.034e-05   1.254 0.210148    
## Ratio          -8.668e-02  1.246e-01  -0.696 0.486786    
## Turnover       -4.223e-01  3.563e-02 -11.853  < 2e-16 ***
## Experience_Yrs  4.858e-01  1.289e-01   3.767 0.000173 ***
## Athletics       5.043e-01  2.285e-01   2.207 0.027531 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.27 on 1194 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.2177, Adjusted R-squared:  0.2144 
## F-statistic: 66.46 on 5 and 1194 DF,  p-value: < 2.2e-16

R-squared: 0.2177–> model explains ~21.8% of the variance in STAAR scores.

F-statistic is significant meaning the Model as a whole is useful.

#Teacher turnover and years of experience have strong, significant associations with STAAR performance with Turnover being the strongest negative predictor in the model

#Salary shows a weak, non-significant relationship— explore further? For every $1 increase in average salary, STAAR percent increases by 0.000088, or 0.088% for every $1,000. p-value: 0.2101 is Not statistically significant (above 0.05), meaning salary does not have a strong independent effect in this model. Although the relationship is positive, it’s not statistically significant, meaning we cannot confidently say salary alone has a meaningful impact after controlling for other variables.

#Athletics funding appears to play a modestly positive role.

#Class size (Ratio) does not show a strong effect here

plot(lm_Current_Dist)

durbinWatsonTest(lm_Current_Dist)
##  lag Autocorrelation D-W Statistic p-value
##    1      0.05959918      1.880143   0.056
##  Alternative hypothesis: rho != 0

The P value for the Durbin Watson Test is less than 0.05 meaning that the errors ARE NOT independent and data may be more suited to mixed-effects model. The D-W statistic is nearly 2 which indicates there is no autocorrelation in the model.

vif(lm_Current_Dist)
##     Avg_Salary          Ratio       Turnover Experience_Yrs      Athletics 
##       1.216539       1.267405       1.400427       1.589651       1.281819

#the variables are not strongly correlated with some other variable. For research purposes this means the assumption of “no multicolinearity” is not violated.

Salary_Turnover_interaction <- lm(Percent_Meets_STAAR ~ Avg_Salary * Turnover + 
                        Ratio + Experience_Yrs + Athletics, 
                        data = District_data_Current)
summary(Salary_Turnover_interaction)
## 
## Call:
## lm(formula = Percent_Meets_STAAR ~ Avg_Salary * Turnover + Ratio + 
##     Experience_Yrs + Athletics, data = District_data_Current)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -60.851  -8.106  -0.729   7.138  53.129 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.271e+01  7.613e+00   2.982 0.002919 ** 
## Avg_Salary           5.500e-04  1.356e-04   4.056 5.31e-05 ***
## Turnover             4.998e-01  2.347e-01   2.130 0.033395 *  
## Ratio               -1.275e-01  1.243e-01  -1.026 0.305041    
## Experience_Yrs       4.403e-01  1.287e-01   3.422 0.000643 ***
## Athletics            5.827e-01  2.280e-01   2.556 0.010714 *  
## Avg_Salary:Turnover -1.719e-05  4.324e-06  -3.975 7.47e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.2 on 1193 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.2279, Adjusted R-squared:  0.224 
## F-statistic:  58.7 on 6 and 1193 DF,  p-value: < 2.2e-16

Avg_Salary = positive and highly significant (p < 0.001) continiuing to show higher salaries are associated with higher STAAR performance.

Turnover is also significant (p ≈ 0.03) showing that higher turnover actually predicts higher scores which seems misleading before considering the interaction due to strong negative correlation in previous model

Avg_Salary/Turnover interactions are Negative and highly significant (p < 0.001) suggesting the positive effect of salary on STAAR scores diminishes as turnover increases, or conversely, high turnover reduces the benefits of higher salary.

model_salary_only <- lm(Percent_Meets_STAAR ~ Avg_Salary, data = District_data_Current) %>% na.omit()
summary(model_salary_only)
## 
## Call:
## lm(formula = Percent_Meets_STAAR ~ Avg_Salary, data = District_data_Current)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -48.793  -9.611   0.150   8.774  41.002 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.501e+01  4.003e+00   8.747  < 2e-16 ***
## Avg_Salary  2.191e-04  7.170e-05   3.056  0.00229 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.85 on 1202 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.00771,    Adjusted R-squared:  0.006885 
## F-statistic:  9.34 on 1 and 1202 DF,  p-value: 0.002292

#fairly strong and positive relationship between salary and STAAR test scores when evaluated independently

model_turnover_only <- lm(Percent_Meets_STAAR ~ Turnover, data = District_data_Current) %>% na.omit()
summary(model_turnover_only)
## 
## Call:
## lm(formula = Percent_Meets_STAAR ~ Turnover, data = District_data_Current)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.247  -8.057  -0.720   7.378  55.138 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 60.32512    0.84269   71.59   <2e-16 ***
## Turnover    -0.52177    0.03046  -17.13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.41 on 1198 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.1968, Adjusted R-squared:  0.1961 
## F-statistic: 293.5 on 1 and 1198 DF,  p-value: < 2.2e-16

#Significant P value reinforcing strong negative impact on STAAR scores when evaluated on its own

library(ggplot2)
ggplot(District_data_Current, aes(x = Avg_Salary, y = Percent_Meets_STAAR)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Impact of Teacher Salary on STAAR Score",
       x = "Average Teacher Salary",
       y = "Percentage of Students Meet or Above STAAR")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(District_data_Current, aes(x = Turnover, y = Percent_Meets_STAAR)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Impact of Turnover on STAAR Score",
       x = "Teacher Turnover Rate",
       y = "Percentage of Students Meet or Above STAAR")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 9 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
hlm_model_Current <- lmer(Percent_Meets_STAAR ~ Avg_Salary + Ratio + Turnover + 
                   Experience_Yrs + Athletics + (1 | Dist_Name), 
                  data = District_data_Current)
## Warning: Some predictor variables are on very different scales: consider
## rescaling
summary(hlm_model_Current)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## Percent_Meets_STAAR ~ Avg_Salary + Ratio + Turnover + Experience_Yrs +  
##     Athletics + (1 | Dist_Name)
##    Data: District_data_Current
## 
## REML criterion at convergence: 9443.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7335 -0.5284 -0.0559  0.4476  3.5385 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Dist_Name (Intercept) 56.95    7.546   
##  Residual              93.61    9.675   
## Number of obs: 1200, groups:  Dist_Name, 1189
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)     4.686e+01  4.575e+00  10.241
## Avg_Salary      8.723e-05  7.025e-05   1.242
## Ratio          -8.079e-02  1.245e-01  -0.649
## Turnover       -4.232e-01  3.562e-02 -11.882
## Experience_Yrs  4.941e-01  1.288e-01   3.835
## Athletics       4.998e-01  2.279e-01   2.193
## 
## Correlation of Fixed Effects:
##             (Intr) Avg_Sl Ratio  Turnvr Expr_Y
## Avg_Salary  -0.786                            
## Ratio       -0.187 -0.295                     
## Turnover    -0.516  0.207 -0.018              
## Expernc_Yrs -0.364 -0.100  0.233  0.427       
## Athletics   -0.275  0.190  0.138  0.077 -0.287
## fit warnings:
## Some predictor variables are on very different scales: consider rescaling

#Random Effects: Variance (Intercept for Dist_Name): 56.95 = there’s noticeable variation in STAAR scores between districts, justifying the use of a mixed model.

Residual variance: 93.61 = more variation remains within districts than between them

#Fixed Effects: Turnover (-0.4232): Strong negative effect on STAAR scores, highly significant. Experience_Yrs (0.4941): Positive, significant effect. Athletics (0.4998): Also positive and significant. Avg_Salary (0.00008723) and Ratio (-0.0808): Not statistically significant

#Correlation of Fixed Effects: Salary is negatively correlated with turnover and experience, which is expected

plot(hlm_model_Current)

stargazer(Salary_Turnover_interaction, type="html")
Dependent variable:
Percent_Meets_STAAR
Avg_Salary 0.001***
(0.0001)
Turnover 0.500**
(0.235)
Ratio -0.128
(0.124)
Experience_Yrs 0.440***
(0.129)
Athletics 0.583**
(0.228)
Avg_Salary:Turnover -0.00002***
(0.00000)
Constant 22.705***
(7.613)
Observations 1,200
R2 0.228
Adjusted R2 0.224
Residual Std. Error 12.197 (df = 1193)
F Statistic 58.699*** (df = 6; 1193)
Note: p<0.1; p<0.05; p<0.01
stargazer(lm_Current_Dist, type="html")
Dependent variable:
Percent_Meets_STAAR
Avg_Salary 0.0001
(0.0001)
Ratio -0.087
(0.125)
Turnover -0.422***
(0.036)
Experience_Yrs 0.486***
(0.129)
Athletics 0.504**
(0.229)
Constant 46.950***
(4.584)
Observations 1,200
R2 0.218
Adjusted R2 0.214
Residual Std. Error 12.272 (df = 1194)
F Statistic 66.456*** (df = 5; 1194)
Note: p<0.1; p<0.05; p<0.01
stargazer(lm_Current_Dist, hlm_model_Current, Salary_Turnover_interaction, type="html")
Dependent variable:
Percent_Meets_STAAR
OLS linear OLS
mixed-effects
(1) (2) (3)
Avg_Salary 0.0001 0.0001 0.001***
(0.0001) (0.0001) (0.0001)
Ratio -0.087 -0.081 -0.128
(0.125) (0.124) (0.124)
Turnover -0.422*** -0.423*** 0.500**
(0.036) (0.036) (0.235)
Experience_Yrs 0.486*** 0.494*** 0.440***
(0.129) (0.129) (0.129)
Athletics 0.504** 0.500** 0.583**
(0.229) (0.228) (0.228)
Avg_Salary:Turnover -0.00002***
(0.00000)
Constant 46.950*** 46.855*** 22.705***
(4.584) (4.575) (7.613)
Observations 1,200 1,200 1,200
R2 0.218 0.228
Adjusted R2 0.214 0.224
Log Likelihood -4,721.874
Akaike Inf. Crit. 9,459.748
Bayesian Inf. Crit. 9,500.469
Residual Std. Error 12.272 (df = 1194) 12.197 (df = 1193)
F Statistic 66.456*** (df = 5; 1194) 58.699*** (df = 6; 1193)
Note: p<0.1; p<0.05; p<0.01
lm_Personnel_Variables <-lm(Turnover ~ Experience_Yrs + Avg_Salary, data = District_data_Current)
summary(lm_Personnel_Variables)
## 
## Call:
## lm(formula = Turnover ~ Experience_Yrs + Avg_Salary, data = District_data_Current)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.230  -6.002  -1.804   4.832  63.454 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     6.332e+01  3.047e+00  20.777  < 2e-16 ***
## Experience_Yrs -1.658e+00  8.473e-02 -19.565  < 2e-16 ***
## Avg_Salary     -3.424e-04  5.269e-05  -6.498 1.19e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.15 on 1198 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.2691, Adjusted R-squared:  0.2679 
## F-statistic: 220.5 on 2 and 1198 DF,  p-value: < 2.2e-16