library(car)
## Loading required package: carData
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
district_data <- read_csv("r4data.csv")
## Rows: 450 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Year, distname, geotype_new
## dbl (20): total teachers, total_new_hires, new_hires_per, std_all, std_all_p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dep_var <- district_data[, c("turnover_rate")]
indie_vars <- district_data[, c("std_all_per", "intern_per", "other_temp_per", "oos_std_per", "lag_starter_per", "no_cert_per", "reenterer_per", "emer_per", "new_hires_per")]
summary(dep_var)
## turnover_rate
## Min. : 6.337
## 1st Qu.:13.100
## Median :15.800
## Mean :17.705
## 3rd Qu.:20.375
## Max. :51.800
summary(indie_vars)
## std_all_per intern_per other_temp_per oos_std_per
## Min. : 0.00 Min. : 0.00 Min. : 0.0000 Min. : 0.000
## 1st Qu.:13.04 1st Qu.: 22.92 1st Qu.: 0.6475 1st Qu.: 0.000
## Median :17.55 Median : 30.59 Median : 4.0250 Median : 1.800
## Mean :17.47 Mean : 31.46 Mean : 4.3983 Mean : 2.293
## 3rd Qu.:22.53 3rd Qu.: 39.13 3rd Qu.: 6.5125 3rd Qu.: 3.225
## Max. :51.16 Max. :100.00 Max. :25.0000 Max. :25.000
## lag_starter_per no_cert_per reenterer_per emer_per
## Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.0000
## 1st Qu.: 2.365 1st Qu.: 1.417 1st Qu.:25.07 1st Qu.: 0.0000
## Median : 4.225 Median : 3.585 Median :31.79 Median : 0.0000
## Mean : 5.516 Mean : 5.351 Mean :32.72 Mean : 0.8016
## 3rd Qu.: 7.447 3rd Qu.: 7.122 3rd Qu.:38.54 3rd Qu.: 0.3900
## Max. :50.000 Max. :66.670 Max. :80.00 Max. :42.8600
## new_hires_per
## Min. : 2.480
## 1st Qu.: 8.662
## Median :10.910
## Mean :12.232
## 3rd Qu.:14.168
## Max. :47.240
all_vars <- district_data[, c("turnover_rate", "std_all_per", "intern_per", "other_temp_per", "oos_std_per", "lag_starter_per", "no_cert_per", "reenterer_per", "emer_per", "new_hires_per")]
summary(all_vars)
## turnover_rate std_all_per intern_per other_temp_per
## Min. : 6.337 Min. : 0.00 Min. : 0.00 Min. : 0.0000
## 1st Qu.:13.100 1st Qu.:13.04 1st Qu.: 22.92 1st Qu.: 0.6475
## Median :15.800 Median :17.55 Median : 30.59 Median : 4.0250
## Mean :17.705 Mean :17.47 Mean : 31.46 Mean : 4.3983
## 3rd Qu.:20.375 3rd Qu.:22.53 3rd Qu.: 39.13 3rd Qu.: 6.5125
## Max. :51.800 Max. :51.16 Max. :100.00 Max. :25.0000
## oos_std_per lag_starter_per no_cert_per reenterer_per
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 0.000 1st Qu.: 2.365 1st Qu.: 1.417 1st Qu.:25.07
## Median : 1.800 Median : 4.225 Median : 3.585 Median :31.79
## Mean : 2.293 Mean : 5.516 Mean : 5.351 Mean :32.72
## 3rd Qu.: 3.225 3rd Qu.: 7.447 3rd Qu.: 7.122 3rd Qu.:38.54
## Max. :25.000 Max. :50.000 Max. :66.670 Max. :80.00
## emer_per new_hires_per
## Min. : 0.0000 Min. : 2.480
## 1st Qu.: 0.0000 1st Qu.: 8.662
## Median : 0.0000 Median :10.910
## Mean : 0.8016 Mean :12.232
## 3rd Qu.: 0.3900 3rd Qu.:14.168
## Max. :42.8600 Max. :47.240
pastecs::stat.desc(all_vars)
## turnover_rate std_all_per intern_per other_temp_per
## nbr.val 450.0000000 450.0000000 4.500000e+02 450.0000000
## nbr.null 0.0000000 26.0000000 9.000000e+00 112.0000000
## nbr.na 0.0000000 0.0000000 0.000000e+00 0.0000000
## min 6.3370000 0.0000000 0.000000e+00 0.0000000
## max 51.8000000 51.1600000 1.000000e+02 25.0000000
## range 45.4630000 51.1600000 1.000000e+02 25.0000000
## sum 7967.2280000 7860.7300000 1.415512e+04 1979.2200000
## median 15.8000000 17.5450000 3.059000e+01 4.0250000
## mean 17.7049511 17.4682889 3.145582e+01 4.3982667
## SE.mean 0.3262882 0.3967071 5.966893e-01 0.1846020
## CI.mean.0.95 0.6412415 0.7796332 1.172651e+00 0.3627911
## var 47.9087826 70.8194414 1.602172e+02 15.3350460
## std.dev 6.9216171 8.4154288 1.265769e+01 3.9159987
## coef.var 0.3909425 0.4817546 4.023959e-01 0.8903505
## oos_std_per lag_starter_per no_cert_per reenterer_per
## nbr.val 450.0000000 450.0000000 450.0000000 4.500000e+02
## nbr.null 142.0000000 65.0000000 89.0000000 3.000000e+00
## nbr.na 0.0000000 0.0000000 0.0000000 0.000000e+00
## min 0.0000000 0.0000000 0.0000000 0.000000e+00
## max 25.0000000 50.0000000 66.6700000 8.000000e+01
## range 25.0000000 50.0000000 66.6700000 8.000000e+01
## sum 1031.7900000 2482.0400000 2407.8300000 1.472253e+04
## median 1.8000000 4.2250000 3.5850000 3.179000e+01
## mean 2.2928667 5.5156444 5.3507333 3.271673e+01
## SE.mean 0.1282076 0.2575389 0.3079684 5.411812e-01
## CI.mean.0.95 0.2519614 0.5061312 0.6052384 1.063563e+00
## var 7.3967314 29.8468237 42.6800429 1.317947e+02
## std.dev 2.7196933 5.4632247 6.5329965 1.148019e+01
## coef.var 1.1861541 0.9904962 1.2209535 3.508965e-01
## emer_per new_hires_per
## nbr.val 450.0000000 450.0000000
## nbr.null 299.0000000 0.0000000
## nbr.na 0.0000000 0.0000000
## min 0.0000000 2.4800000
## max 42.8600000 47.2400000
## range 42.8600000 44.7600000
## sum 360.7300000 5504.2700000
## median 0.0000000 10.9100000
## mean 0.8016222 12.2317111
## SE.mean 0.1467364 0.2758708
## CI.mean.0.95 0.2883754 0.5421583
## var 9.6892052 34.2471162
## std.dev 3.1127488 5.8521036
## coef.var 3.8830620 0.4784370
#linear regression model
lm_model <- lm(turnover_rate ~ std_all_per + intern_per + other_temp_per +
oos_std_per + lag_starter_per + no_cert_per + reenterer_per + emer_per + new_hires_per,
data = district_data)
summary(lm_model)
##
## Call:
## lm(formula = turnover_rate ~ std_all_per + intern_per + other_temp_per +
## oos_std_per + lag_starter_per + no_cert_per + reenterer_per +
## emer_per + new_hires_per, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.973 -2.128 -0.242 1.811 12.227
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -274.36765 1918.33604 -0.143 0.886
## std_all_per 2.71881 19.18370 0.142 0.887
## intern_per 2.83087 19.18392 0.148 0.883
## other_temp_per 2.69553 19.18251 0.141 0.888
## oos_std_per 2.75144 19.18213 0.143 0.886
## lag_starter_per 2.89853 19.18442 0.151 0.880
## no_cert_per 2.83716 19.18388 0.148 0.882
## reenterer_per 2.81565 19.18345 0.147 0.883
## emer_per 2.99491 19.18349 0.156 0.876
## new_hires_per 0.95491 0.02972 32.129 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.31 on 440 degrees of freedom
## Multiple R-squared: 0.7758, Adjusted R-squared: 0.7712
## F-statistic: 169.2 on 9 and 440 DF, p-value: < 2.2e-16
vif(lm_model)
## std_all_per intern_per other_temp_per oos_std_per lag_starter_per
## 1.067775e+06 2.415721e+06 2.311846e+05 1.115055e+05 4.500476e+05
## no_cert_per reenterer_per emer_per new_hires_per
## 6.435180e+05 1.987075e+06 1.460853e+05 1.239437e+00
#linear regression model with interactions
lm_inter_model <- lm(turnover_rate ~ new_hires_per*std_all_per + new_hires_per*intern_per + new_hires_per*other_temp_per + new_hires_per*oos_std_per + new_hires_per*lag_starter_per + new_hires_per*no_cert_per + new_hires_per*reenterer_per + new_hires_per*emer_per, data = district_data)
summary(lm_inter_model)
##
## Call:
## lm(formula = turnover_rate ~ new_hires_per * std_all_per + new_hires_per *
## intern_per + new_hires_per * other_temp_per + new_hires_per *
## oos_std_per + new_hires_per * lag_starter_per + new_hires_per *
## no_cert_per + new_hires_per * reenterer_per + new_hires_per *
## emer_per, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.9131 -2.1332 -0.3303 1.6913 13.4939
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3136.235 4868.284 -0.644 0.520
## new_hires_per 200.248 382.419 0.524 0.601
## std_all_per 31.481 48.683 0.647 0.518
## intern_per 31.401 48.684 0.645 0.519
## other_temp_per 31.408 48.676 0.645 0.519
## oos_std_per 31.301 48.687 0.643 0.521
## lag_starter_per 31.451 48.688 0.646 0.519
## no_cert_per 31.539 48.680 0.648 0.517
## reenterer_per 31.411 48.682 0.645 0.519
## emer_per 31.532 48.672 0.648 0.517
## new_hires_per:std_all_per -2.006 3.824 -0.525 0.600
## new_hires_per:intern_per -1.988 3.824 -0.520 0.603
## new_hires_per:other_temp_per -2.000 3.824 -0.523 0.601
## new_hires_per:oos_std_per -1.989 3.824 -0.520 0.603
## new_hires_per:lag_starter_per -1.987 3.825 -0.520 0.604
## new_hires_per:no_cert_per -1.998 3.824 -0.522 0.602
## new_hires_per:reenterer_per -1.991 3.824 -0.521 0.603
## new_hires_per:emer_per -1.988 3.823 -0.520 0.603
##
## Residual standard error: 3.233 on 432 degrees of freedom
## Multiple R-squared: 0.79, Adjusted R-squared: 0.7818
## F-statistic: 95.61 on 17 and 432 DF, p-value: < 2.2e-16
vif(lm_inter_model)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
## new_hires_per std_all_per
## 215082818.9 7207783.5
## intern_per other_temp_per
## 16307331.6 1560344.5
## oos_std_per lag_starter_per
## 752947.3 3038364.6
## no_cert_per reenterer_per
## 4343428.7 13413159.1
## emer_per new_hires_per:std_all_per
## 985709.0 7737093.5
## new_hires_per:intern_per new_hires_per:other_temp_per
## 41631515.0 3592898.9
## new_hires_per:oos_std_per new_hires_per:lag_starter_per
## 2263915.2 4988271.2
## new_hires_per:no_cert_per new_hires_per:reenterer_per
## 17256936.3 38100844.8
## new_hires_per:emer_per
## 5784502.3
# --- Assumption Testing ---
# Linearity
plot(lm_model, which = 1)

plot(lm_inter_model, which = 1)

raintest(lm_model)
##
## Rainbow test
##
## data: lm_model
## Rain = 1.2308, df1 = 225, df2 = 215, p-value = 0.06251
raintest(lm_inter_model)
##
## Rainbow test
##
## data: lm_inter_model
## Rain = 1.2256, df1 = 225, df2 = 207, p-value = 0.06853
# Independence of Errors: Durbin-Watson Test
dwtest(lm_model)
##
## Durbin-Watson test
##
## data: lm_model
## DW = 1.6862, p-value = 0.0002815
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(lm_inter_model)
##
## Durbin-Watson test
##
## data: lm_inter_model
## DW = 1.7088, p-value = 0.0005942
## alternative hypothesis: true autocorrelation is greater than 0
#homoscedasticity
plot(lm_model, which = 3)

plot(lm_inter_model, which = 3)

# Normality of Residuals: Q-Q Plot
plot(lm_model, which = 2)

plot(lm_inter_model, which = 2)

# Shapiro-Wilk test for normality
shapiro.test(lm_model$residuals)
##
## Shapiro-Wilk normality test
##
## data: lm_model$residuals
## W = 0.97656, p-value = 1.22e-06
shapiro.test(lm_inter_model$residuals)
##
## Shapiro-Wilk normality test
##
## data: lm_inter_model$residuals
## W = 0.96611, p-value = 1.098e-08
# 5 Multicollinearity
vif_results1 <- vif(lm_model)
print(vif_results1)
## std_all_per intern_per other_temp_per oos_std_per lag_starter_per
## 1.067775e+06 2.415721e+06 2.311846e+05 1.115055e+05 4.500476e+05
## no_cert_per reenterer_per emer_per new_hires_per
## 6.435180e+05 1.987075e+06 1.460853e+05 1.239437e+00
vif_results2 <- vif(lm_inter_model)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
print(vif_results2)
## new_hires_per std_all_per
## 215082818.9 7207783.5
## intern_per other_temp_per
## 16307331.6 1560344.5
## oos_std_per lag_starter_per
## 752947.3 3038364.6
## no_cert_per reenterer_per
## 4343428.7 13413159.1
## emer_per new_hires_per:std_all_per
## 985709.0 7737093.5
## new_hires_per:intern_per new_hires_per:other_temp_per
## 41631515.0 3592898.9
## new_hires_per:oos_std_per new_hires_per:lag_starter_per
## 2263915.2 4988271.2
## new_hires_per:no_cert_per new_hires_per:reenterer_per
## 17256936.3 38100844.8
## new_hires_per:emer_per
## 5784502.3
#MITIGATION
#mitigating violation of independence of variables
rlm_model <- rlm(turnover_rate ~ std_all_per + intern_per + other_temp_per + oos_std_per + lag_starter_per + no_cert_per + reenterer_per + emer_per + new_hires_per, data = district_data)
rlm_inter_model <- rlm(turnover_rate ~ new_hires_per * std_all_per + new_hires_per * intern_per + new_hires_per * other_temp_per + new_hires_per * oos_std_per + new_hires_per * lag_starter_per + new_hires_per * no_cert_per + new_hires_per * reenterer_per + new_hires_per * emer_per, data = district_data)
summary(rlm_model)
##
## Call: rlm(formula = turnover_rate ~ std_all_per + intern_per + other_temp_per +
## oos_std_per + lag_starter_per + no_cert_per + reenterer_per +
## emer_per + new_hires_per, data = district_data)
## Residuals:
## Min 1Q Median 3Q Max
## -11.8208 -1.9127 -0.1411 1.8909 12.9665
##
## Coefficients:
## Value Std. Error t value
## (Intercept) -538.3054 1752.4176 -0.3072
## std_all_per 5.3751 17.5245 0.3067
## intern_per 5.4642 17.5247 0.3118
## other_temp_per 5.3080 17.5234 0.3029
## oos_std_per 5.4189 17.5231 0.3092
## lag_starter_per 5.5115 17.5251 0.3145
## no_cert_per 5.4782 17.5247 0.3126
## reenterer_per 5.4439 17.5243 0.3107
## emer_per 5.6185 17.5243 0.3206
## new_hires_per 0.9768 0.0272 35.9781
##
## Residual standard error: 2.81 on 440 degrees of freedom
summary(rlm_inter_model)
##
## Call: rlm(formula = turnover_rate ~ new_hires_per * std_all_per + new_hires_per *
## intern_per + new_hires_per * other_temp_per + new_hires_per *
## oos_std_per + new_hires_per * lag_starter_per + new_hires_per *
## no_cert_per + new_hires_per * reenterer_per + new_hires_per *
## emer_per, data = district_data)
## Residuals:
## Min 1Q Median 3Q Max
## -13.1016 -1.8265 -0.1292 1.8063 14.4610
##
## Coefficients:
## Value Std. Error t value
## (Intercept) -5972.3160 4288.6750 -1.3926
## new_hires_per 424.8084 336.8893 1.2610
## std_all_per 59.8704 42.8865 1.3960
## intern_per 59.7461 42.8877 1.3931
## other_temp_per 59.7472 42.8809 1.3933
## oos_std_per 59.7188 42.8903 1.3924
## lag_starter_per 59.8198 42.8911 1.3947
## no_cert_per 59.9069 42.8845 1.3969
## reenterer_per 59.7565 42.8857 1.3934
## emer_per 59.8428 42.8772 1.3957
## new_hires_per:std_all_per -4.2537 3.3688 -1.2627
## new_hires_per:intern_per -4.2328 3.3690 -1.2564
## new_hires_per:other_temp_per -4.2456 3.3684 -1.2604
## new_hires_per:oos_std_per -4.2351 3.3689 -1.2571
## new_hires_per:lag_starter_per -4.2348 3.3692 -1.2569
## new_hires_per:no_cert_per -4.2431 3.3685 -1.2597
## new_hires_per:reenterer_per -4.2366 3.3689 -1.2576
## new_hires_per:emer_per -4.2325 3.3681 -1.2566
##
## Residual standard error: 2.699 on 432 degrees of freedom
#mitigating violation of heteroscedasticity
#log transformation
district_data <- district_data %>% mutate(log_turnover_rate = log(turnover_rate))
lm_model_log <- lm(log_turnover_rate ~ std_all_per + intern_per + other_temp_per + oos_std_per + lag_starter_per + no_cert_per + reenterer_per + emer_per + new_hires_per, data = district_data)
lm_inter_model_log <- lm(log_turnover_rate ~ new_hires_per * std_all_per +new_hires_per * intern_per + new_hires_per * other_temp_per + new_hires_per * oos_std_per + new_hires_per * lag_starter_per + new_hires_per * no_cert_per + new_hires_per * reenterer_per + new_hires_per * emer_per, data = district_data)
summary(lm_model_log)
##
## Call:
## lm(formula = log_turnover_rate ~ std_all_per + intern_per + other_temp_per +
## oos_std_per + lag_starter_per + no_cert_per + reenterer_per +
## emer_per + new_hires_per, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.67161 -0.11884 -0.00912 0.11775 0.64258
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.393e-01 1.090e+02 0.008 0.994
## std_all_per 9.846e-03 1.090e+00 0.009 0.993
## intern_per 1.677e-02 1.090e+00 0.015 0.988
## other_temp_per 1.122e-02 1.090e+00 0.010 0.992
## oos_std_per 8.430e-03 1.090e+00 0.008 0.994
## lag_starter_per 1.916e-02 1.090e+00 0.018 0.986
## no_cert_per 1.681e-02 1.090e+00 0.015 0.988
## reenterer_per 1.355e-02 1.090e+00 0.012 0.990
## emer_per 1.949e-02 1.090e+00 0.018 0.986
## new_hires_per 4.483e-02 1.689e-03 26.545 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1881 on 440 degrees of freedom
## Multiple R-squared: 0.7098, Adjusted R-squared: 0.7038
## F-statistic: 119.6 on 9 and 440 DF, p-value: < 2.2e-16
summary(lm_inter_model_log)
##
## Call:
## lm(formula = log_turnover_rate ~ new_hires_per * std_all_per +
## new_hires_per * intern_per + new_hires_per * other_temp_per +
## new_hires_per * oos_std_per + new_hires_per * lag_starter_per +
## new_hires_per * no_cert_per + new_hires_per * reenterer_per +
## new_hires_per * emer_per, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53666 -0.12728 -0.01082 0.11647 0.73175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -192.9978 275.3325 -0.701 0.484
## new_hires_per 12.3286 21.6283 0.570 0.569
## std_all_per 1.9503 2.7533 0.708 0.479
## intern_per 1.9534 2.7534 0.709 0.478
## other_temp_per 1.9535 2.7530 0.710 0.478
## oos_std_per 1.9497 2.7536 0.708 0.479
## lag_starter_per 1.9597 2.7536 0.712 0.477
## no_cert_per 1.9625 2.7532 0.713 0.476
## reenterer_per 1.9494 2.7533 0.708 0.479
## emer_per 1.9652 2.7527 0.714 0.476
## new_hires_per:std_all_per -0.1230 0.2163 -0.569 0.570
## new_hires_per:intern_per -0.1227 0.2163 -0.567 0.571
## new_hires_per:other_temp_per -0.1232 0.2163 -0.570 0.569
## new_hires_per:oos_std_per -0.1229 0.2163 -0.568 0.570
## new_hires_per:lag_starter_per -0.1231 0.2163 -0.569 0.570
## new_hires_per:no_cert_per -0.1233 0.2163 -0.570 0.569
## new_hires_per:reenterer_per -0.1226 0.2163 -0.567 0.571
## new_hires_per:emer_per -0.1232 0.2162 -0.570 0.569
##
## Residual standard error: 0.1829 on 432 degrees of freedom
## Multiple R-squared: 0.7308, Adjusted R-squared: 0.7202
## F-statistic: 68.97 on 17 and 432 DF, p-value: < 2.2e-16
plot(lm_model_log, which = 3)

plot(lm_inter_model_log, which = 3)

bptest(lm_model_log)
##
## studentized Breusch-Pagan test
##
## data: lm_model_log
## BP = 45.985, df = 9, p-value = 6.062e-07
bptest(lm_inter_model_log)
##
## studentized Breusch-Pagan test
##
## data: lm_inter_model_log
## BP = 37.544, df = 17, p-value = 0.002838
#square root transformation
district_data <- district_data %>% mutate(sqrt_turnover_rate = sqrt(turnover_rate))
lm_model_sqrt <- lm(sqrt_turnover_rate ~ std_all_per + intern_per + other_temp_per + oos_std_per + lag_starter_per + no_cert_per + reenterer_per + emer_per + new_hires_per, data = district_data)
lm_inter_model_sqrt <- lm(sqrt_turnover_rate ~ new_hires_per * std_all_per + new_hires_per * intern_per + new_hires_per * other_temp_per + new_hires_per * oos_std_per + new_hires_per * lag_starter_per + new_hires_per * no_cert_per + new_hires_per * reenterer_per + new_hires_per * emer_per, data = district_data)
summary(lm_inter_model_sqrt)
##
## Call:
## lm(formula = sqrt_turnover_rate ~ new_hires_per * std_all_per +
## new_hires_per * intern_per + new_hires_per * other_temp_per +
## new_hires_per * oos_std_per + new_hires_per * lag_starter_per +
## new_hires_per * no_cert_per + new_hires_per * reenterer_per +
## new_hires_per * emer_per, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.29888 -0.25033 -0.02642 0.22000 1.55202
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -376.6364 562.8777 -0.669 0.504
## new_hires_per 23.7597 44.2159 0.537 0.591
## std_all_per 3.7960 5.6287 0.674 0.500
## intern_per 3.7947 5.6289 0.674 0.501
## other_temp_per 3.7948 5.6280 0.674 0.500
## oos_std_per 3.7857 5.6292 0.673 0.502
## lag_starter_per 3.8046 5.6293 0.676 0.500
## no_cert_per 3.8128 5.6285 0.677 0.499
## reenterer_per 3.7915 5.6286 0.674 0.501
## emer_per 3.8155 5.6275 0.678 0.498
## new_hires_per:std_all_per -0.2375 0.4421 -0.537 0.591
## new_hires_per:intern_per -0.2362 0.4422 -0.534 0.594
## new_hires_per:other_temp_per -0.2373 0.4421 -0.537 0.592
## new_hires_per:oos_std_per -0.2365 0.4422 -0.535 0.593
## new_hires_per:lag_starter_per -0.2365 0.4422 -0.535 0.593
## new_hires_per:no_cert_per -0.2373 0.4421 -0.537 0.592
## new_hires_per:reenterer_per -0.2363 0.4422 -0.534 0.593
## new_hires_per:emer_per -0.2367 0.4421 -0.535 0.593
##
## Residual standard error: 0.3739 on 432 degrees of freedom
## Multiple R-squared: 0.764, Adjusted R-squared: 0.7547
## F-statistic: 82.26 on 17 and 432 DF, p-value: < 2.2e-16
summary(lm_model_sqrt)
##
## Call:
## lm(formula = sqrt_turnover_rate ~ std_all_per + intern_per +
## other_temp_per + oos_std_per + lag_starter_per + no_cert_per +
## reenterer_per + emer_per + new_hires_per, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.19977 -0.24619 -0.02803 0.22799 1.38152
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -14.302653 220.990824 -0.065 0.948
## std_all_per 0.162349 2.209947 0.073 0.941
## intern_per 0.176311 2.209973 0.080 0.936
## other_temp_per 0.162553 2.209811 0.074 0.941
## oos_std_per 0.162382 2.209766 0.073 0.941
## lag_starter_per 0.182520 2.210031 0.083 0.934
## no_cert_per 0.176534 2.209968 0.080 0.936
## reenterer_per 0.171990 2.209918 0.078 0.938
## emer_per 0.187533 2.209923 0.085 0.932
## new_hires_per 0.101740 0.003424 29.715 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3814 on 440 degrees of freedom
## Multiple R-squared: 0.7499, Adjusted R-squared: 0.7448
## F-statistic: 146.6 on 9 and 440 DF, p-value: < 2.2e-16
plot(lm_model_sqrt, which = 3)

plot(lm_inter_model_sqrt, which = 3)

bptest(lm_model_sqrt)
##
## studentized Breusch-Pagan test
##
## data: lm_model_sqrt
## BP = 40.273, df = 9, p-value = 6.78e-06
bptest(lm_inter_model_sqrt)
##
## studentized Breusch-Pagan test
##
## data: lm_inter_model_sqrt
## BP = 34.445, df = 17, p-value = 0.007352
#mitigating violation of residual non-normality
#log transformation
district_data <- district_data %>% mutate(log_turnover_rate = log(turnover_rate))
lm_model_log <- lm(log_turnover_rate ~ std_all_per + intern_per + other_temp_per + oos_std_per + lag_starter_per + no_cert_per + reenterer_per + emer_per + new_hires_per, data = district_data)
lm_inter_model_log <- lm(log_turnover_rate ~ new_hires_per * std_all_per + new_hires_per * intern_per + new_hires_per * other_temp_per + new_hires_per * oos_std_per + new_hires_per * lag_starter_per + new_hires_per * no_cert_per + new_hires_per * reenterer_per + new_hires_per * emer_per, data = district_data)
summary(lm_model_log)
##
## Call:
## lm(formula = log_turnover_rate ~ std_all_per + intern_per + other_temp_per +
## oos_std_per + lag_starter_per + no_cert_per + reenterer_per +
## emer_per + new_hires_per, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.67161 -0.11884 -0.00912 0.11775 0.64258
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.393e-01 1.090e+02 0.008 0.994
## std_all_per 9.846e-03 1.090e+00 0.009 0.993
## intern_per 1.677e-02 1.090e+00 0.015 0.988
## other_temp_per 1.122e-02 1.090e+00 0.010 0.992
## oos_std_per 8.430e-03 1.090e+00 0.008 0.994
## lag_starter_per 1.916e-02 1.090e+00 0.018 0.986
## no_cert_per 1.681e-02 1.090e+00 0.015 0.988
## reenterer_per 1.355e-02 1.090e+00 0.012 0.990
## emer_per 1.949e-02 1.090e+00 0.018 0.986
## new_hires_per 4.483e-02 1.689e-03 26.545 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1881 on 440 degrees of freedom
## Multiple R-squared: 0.7098, Adjusted R-squared: 0.7038
## F-statistic: 119.6 on 9 and 440 DF, p-value: < 2.2e-16
summary(lm_inter_model_log)
##
## Call:
## lm(formula = log_turnover_rate ~ new_hires_per * std_all_per +
## new_hires_per * intern_per + new_hires_per * other_temp_per +
## new_hires_per * oos_std_per + new_hires_per * lag_starter_per +
## new_hires_per * no_cert_per + new_hires_per * reenterer_per +
## new_hires_per * emer_per, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53666 -0.12728 -0.01082 0.11647 0.73175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -192.9978 275.3325 -0.701 0.484
## new_hires_per 12.3286 21.6283 0.570 0.569
## std_all_per 1.9503 2.7533 0.708 0.479
## intern_per 1.9534 2.7534 0.709 0.478
## other_temp_per 1.9535 2.7530 0.710 0.478
## oos_std_per 1.9497 2.7536 0.708 0.479
## lag_starter_per 1.9597 2.7536 0.712 0.477
## no_cert_per 1.9625 2.7532 0.713 0.476
## reenterer_per 1.9494 2.7533 0.708 0.479
## emer_per 1.9652 2.7527 0.714 0.476
## new_hires_per:std_all_per -0.1230 0.2163 -0.569 0.570
## new_hires_per:intern_per -0.1227 0.2163 -0.567 0.571
## new_hires_per:other_temp_per -0.1232 0.2163 -0.570 0.569
## new_hires_per:oos_std_per -0.1229 0.2163 -0.568 0.570
## new_hires_per:lag_starter_per -0.1231 0.2163 -0.569 0.570
## new_hires_per:no_cert_per -0.1233 0.2163 -0.570 0.569
## new_hires_per:reenterer_per -0.1226 0.2163 -0.567 0.571
## new_hires_per:emer_per -0.1232 0.2162 -0.570 0.569
##
## Residual standard error: 0.1829 on 432 degrees of freedom
## Multiple R-squared: 0.7308, Adjusted R-squared: 0.7202
## F-statistic: 68.97 on 17 and 432 DF, p-value: < 2.2e-16
#square root transformation
district_data <- district_data %>% mutate(sqrt_turnover_rate = sqrt(turnover_rate))
lm_model_sqrt <- lm(sqrt_turnover_rate ~ std_all_per + intern_per + other_temp_per + oos_std_per + lag_starter_per + no_cert_per + reenterer_per + emer_per + new_hires_per, data = district_data)
lm_inter_model_sqrt <- lm(sqrt_turnover_rate ~ new_hires_per * std_all_per + new_hires_per * intern_per + new_hires_per * other_temp_per + new_hires_per * oos_std_per + new_hires_per * lag_starter_per + new_hires_per * no_cert_per + new_hires_per * reenterer_per + new_hires_per * emer_per, data = district_data)
summary(lm_model_sqrt)
##
## Call:
## lm(formula = sqrt_turnover_rate ~ std_all_per + intern_per +
## other_temp_per + oos_std_per + lag_starter_per + no_cert_per +
## reenterer_per + emer_per + new_hires_per, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.19977 -0.24619 -0.02803 0.22799 1.38152
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -14.302653 220.990824 -0.065 0.948
## std_all_per 0.162349 2.209947 0.073 0.941
## intern_per 0.176311 2.209973 0.080 0.936
## other_temp_per 0.162553 2.209811 0.074 0.941
## oos_std_per 0.162382 2.209766 0.073 0.941
## lag_starter_per 0.182520 2.210031 0.083 0.934
## no_cert_per 0.176534 2.209968 0.080 0.936
## reenterer_per 0.171990 2.209918 0.078 0.938
## emer_per 0.187533 2.209923 0.085 0.932
## new_hires_per 0.101740 0.003424 29.715 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3814 on 440 degrees of freedom
## Multiple R-squared: 0.7499, Adjusted R-squared: 0.7448
## F-statistic: 146.6 on 9 and 440 DF, p-value: < 2.2e-16
summary(lm_inter_model_sqrt)
##
## Call:
## lm(formula = sqrt_turnover_rate ~ new_hires_per * std_all_per +
## new_hires_per * intern_per + new_hires_per * other_temp_per +
## new_hires_per * oos_std_per + new_hires_per * lag_starter_per +
## new_hires_per * no_cert_per + new_hires_per * reenterer_per +
## new_hires_per * emer_per, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.29888 -0.25033 -0.02642 0.22000 1.55202
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -376.6364 562.8777 -0.669 0.504
## new_hires_per 23.7597 44.2159 0.537 0.591
## std_all_per 3.7960 5.6287 0.674 0.500
## intern_per 3.7947 5.6289 0.674 0.501
## other_temp_per 3.7948 5.6280 0.674 0.500
## oos_std_per 3.7857 5.6292 0.673 0.502
## lag_starter_per 3.8046 5.6293 0.676 0.500
## no_cert_per 3.8128 5.6285 0.677 0.499
## reenterer_per 3.7915 5.6286 0.674 0.501
## emer_per 3.8155 5.6275 0.678 0.498
## new_hires_per:std_all_per -0.2375 0.4421 -0.537 0.591
## new_hires_per:intern_per -0.2362 0.4422 -0.534 0.594
## new_hires_per:other_temp_per -0.2373 0.4421 -0.537 0.592
## new_hires_per:oos_std_per -0.2365 0.4422 -0.535 0.593
## new_hires_per:lag_starter_per -0.2365 0.4422 -0.535 0.593
## new_hires_per:no_cert_per -0.2373 0.4421 -0.537 0.592
## new_hires_per:reenterer_per -0.2363 0.4422 -0.534 0.593
## new_hires_per:emer_per -0.2367 0.4421 -0.535 0.593
##
## Residual standard error: 0.3739 on 432 degrees of freedom
## Multiple R-squared: 0.764, Adjusted R-squared: 0.7547
## F-statistic: 82.26 on 17 and 432 DF, p-value: < 2.2e-16
plot(lm_model_log, which = 2)

plot(lm_model_sqrt, which = 2)

shapiro.test(lm_model_log$residuals)
##
## Shapiro-Wilk normality test
##
## data: lm_model_log$residuals
## W = 0.99413, p-value = 0.08043
shapiro.test(lm_model_sqrt$residuals)
##
## Shapiro-Wilk normality test
##
## data: lm_model_sqrt$residuals
## W = 0.987, p-value = 0.0004792
shapiro.test(lm_inter_model_log$residuals)
##
## Shapiro-Wilk normality test
##
## data: lm_inter_model_log$residuals
## W = 0.99116, p-value = 0.008649
shapiro.test(lm_inter_model_sqrt$residuals)
##
## Shapiro-Wilk normality test
##
## data: lm_inter_model_sqrt$residuals
## W = 0.98011, p-value = 7.771e-06
#mitigating violation of multicollinearity
kitchen_sink_vars <- district_data %>% dplyr::select(std_all_per, intern_per, other_temp_per, oos_std_per, lag_starter_per, no_cert_per, reenterer_per, emer_per, new_hires_per)
cor_matrix <- cor(kitchen_sink_vars, use = "pairwise.complete.obs")
print(cor_matrix)
## std_all_per intern_per other_temp_per oos_std_per
## std_all_per 1.000000000 -0.22554188 -0.008392639 0.11767473
## intern_per -0.225541884 1.00000000 -0.169005675 -0.18674578
## other_temp_per -0.008392639 -0.16900567 1.000000000 0.15897515
## oos_std_per 0.117674730 -0.18674578 0.158975152 1.00000000
## lag_starter_per -0.038265217 -0.37848198 0.018435987 -0.07900271
## no_cert_per -0.279407667 -0.04999227 -0.125190444 -0.15649815
## reenterer_per -0.299354683 -0.59903772 -0.118744720 -0.07960925
## emer_per -0.121070194 -0.10239384 -0.018563343 0.12835520
## new_hires_per -0.299816472 0.10006796 0.104916438 0.03966198
## lag_starter_per no_cert_per reenterer_per emer_per
## std_all_per -0.03826522 -0.27940767 -0.29935468 -0.12107019
## intern_per -0.37848198 -0.04999227 -0.59903772 -0.10239384
## other_temp_per 0.01843599 -0.12519044 -0.11874472 -0.01856334
## oos_std_per -0.07900271 -0.15649815 -0.07960925 0.12835520
## lag_starter_per 1.00000000 -0.14677829 0.09547726 -0.11088175
## no_cert_per -0.14677829 1.00000000 -0.16329654 0.01399734
## reenterer_per 0.09547726 -0.16329654 1.00000000 -0.04874726
## emer_per -0.11088175 0.01399734 -0.04874726 1.00000000
## new_hires_per 0.02683779 0.18791220 -0.11894367 0.23440918
## new_hires_per
## std_all_per -0.29981647
## intern_per 0.10006796
## other_temp_per 0.10491644
## oos_std_per 0.03966198
## lag_starter_per 0.02683779
## no_cert_per 0.18791220
## reenterer_per -0.11894367
## emer_per 0.23440918
## new_hires_per 1.00000000
#combining certification variables
district_data <- district_data %>% mutate(cert = std_all_per + oos_std_per + lag_starter_per, alt = intern_per + other_temp_per, uncert = no_cert_per + emer_per)
#cert = standard certs
#alt = alternative certs
#uncert = uncertified or temporary
#decided to drop reenterers. conceptually, they are not "new" teachers -- they are experienced but are coming back to the field
lm_model_combined <- lm(turnover_rate ~ cert + alt + uncert + new_hires_per, data = district_data)
lm_inter_model_combined <- lm(turnover_rate ~ new_hires_per * cert + new_hires_per * alt + new_hires_per * uncert, data = district_data)
summary(lm_model_combined)
##
## Call:
## lm(formula = turnover_rate ~ cert + alt + uncert + new_hires_per,
## data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.6573 -2.1480 -0.4508 1.7160 13.9036
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.55897 1.02665 7.363 8.79e-13 ***
## cert -0.06438 0.01947 -3.306 0.00102 **
## alt -0.01459 0.01516 -0.963 0.33622
## uncert 0.03833 0.02611 1.468 0.14293
## new_hires_per 0.98601 0.02932 33.634 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.442 on 445 degrees of freedom
## Multiple R-squared: 0.7549, Adjusted R-squared: 0.7527
## F-statistic: 342.7 on 4 and 445 DF, p-value: < 2.2e-16
summary(lm_inter_model_combined)
##
## Call:
## lm(formula = turnover_rate ~ new_hires_per * cert + new_hires_per *
## alt + new_hires_per * uncert, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.9249 -2.1440 -0.4746 1.7226 13.7419
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.342547 1.876000 2.315 0.02108 *
## new_hires_per 1.245757 0.131485 9.475 < 2e-16 ***
## cert -0.012084 0.040658 -0.297 0.76644
## alt 0.021852 0.030317 0.721 0.47141
## uncert 0.125220 0.041745 3.000 0.00286 **
## new_hires_per:cert -0.004195 0.003089 -1.358 0.17510
## new_hires_per:alt -0.003095 0.002056 -1.505 0.13297
## new_hires_per:uncert -0.005705 0.002108 -2.707 0.00705 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.425 on 442 degrees of freedom
## Multiple R-squared: 0.759, Adjusted R-squared: 0.7552
## F-statistic: 198.8 on 7 and 442 DF, p-value: < 2.2e-16
vif_lm_model_combined <- vif(lm_model_combined)
print(vif_lm_model_combined)
## cert alt uncert new_hires_per
## 1.546264 1.382419 1.368248 1.115453
vif_lm_inter_model_combined <- vif(lm_inter_model_combined)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
print(vif_lm_inter_model_combined)
## new_hires_per cert alt
## 22.663134 6.807575 5.586699
## uncert new_hires_per:cert new_hires_per:alt
## 3.531244 10.133858 12.508046
## new_hires_per:uncert
## 6.180599
#log transformation of combined variables
district_data$cert_log <- log(district_data$cert + 1 )
district_data$alt_log <- log(district_data$alt + 1 )
district_data$uncert_log <- log(district_data$uncert + 1 )
district_data$new_hires_per_log <- log(district_data$new_hires_per + 1)
district_data$log_new_hires_cert <- district_data$new_hires_per_log * district_data$cert_log
district_data$log_new_hires_alt <- district_data$new_hires_per_log * district_data$alt_log
district_data$log_new_hires_uncert <- district_data$new_hires_per_log * district_data$uncert_log
lm_model_log2 <- lm(turnover_rate ~ cert_log + alt_log + uncert_log + new_hires_per_log, data = district_data)
lm_inter_model_log2 <- lm(turnover_rate ~ new_hires_per_log * cert_log + new_hires_per_log * alt_log + new_hires_per_log * uncert_log, data = district_data)
summary(lm_model_log2)
##
## Call:
## lm(formula = turnover_rate ~ cert_log + alt_log + uncert_log +
## new_hires_per_log, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.4326 -2.5171 -0.6843 2.2400 14.6057
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.5960 1.8990 -1.894 0.0589 .
## cert_log -1.9886 0.2751 -7.229 2.14e-12 ***
## alt_log -2.4568 0.3230 -7.607 1.69e-13 ***
## uncert_log -0.2873 0.2032 -1.414 0.1581
## new_hires_per_log 14.6286 0.4684 31.230 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.694 on 445 degrees of freedom
## Multiple R-squared: 0.7177, Adjusted R-squared: 0.7152
## F-statistic: 282.9 on 4 and 445 DF, p-value: < 2.2e-16
summary(lm_inter_model_log2)
##
## Call:
## lm(formula = turnover_rate ~ new_hires_per_log * cert_log + new_hires_per_log *
## alt_log + new_hires_per_log * uncert_log, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8370 -2.5064 -0.6714 2.1811 14.8261
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.2670 7.1950 -1.844 0.0659 .
## new_hires_per_log 18.3214 2.8114 6.517 1.96e-10 ***
## cert_log 0.2880 1.4788 0.195 0.8457
## alt_log -1.2852 1.1926 -1.078 0.2818
## uncert_log -0.8889 1.0027 -0.887 0.3758
## new_hires_per_log:cert_log -0.9189 0.5940 -1.547 0.1226
## new_hires_per_log:alt_log -0.4192 0.4425 -0.947 0.3440
## new_hires_per_log:uncert_log 0.2485 0.3986 0.623 0.5334
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.686 on 442 degrees of freedom
## Multiple R-squared: 0.7208, Adjusted R-squared: 0.7164
## F-statistic: 163 on 7 and 442 DF, p-value: < 2.2e-16
vif(lm_model_log2)
## cert_log alt_log uncert_log new_hires_per_log
## 1.104417 1.066457 1.144414 1.106576
vif(lm_inter_model_log2)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
## new_hires_per_log cert_log
## 40.03352 32.05083
## alt_log uncert_log
## 14.60348 27.99157
## new_hires_per_log:cert_log new_hires_per_log:alt_log
## 42.96771 27.71856
## new_hires_per_log:uncert_log
## 33.89596
#square root transformation of combined variables
district_data$cert_sqrt <- sqrt(district_data$cert)
district_data$alt_sqrt <- sqrt(district_data$alt)
district_data$uncert_sqrt <- sqrt(district_data$uncert)
district_data$new_hires_per_sqrt <- sqrt(district_data$new_hires_per)
district_data$sqrt_new_hires_cert <- district_data$new_hires_per_sqrt * district_data$cert_sqrt
district_data$sqrt_new_hires_alt <- district_data$new_hires_per_sqrt * district_data$alt_sqrt
district_data$sqrt_new_hires_uncert <- district_data$new_hires_per_sqrt * district_data$uncert_sqrt
lm_model_sqrt2 <- lm(turnover_rate ~ cert_sqrt + alt_sqrt + uncert_sqrt + new_hires_per_sqrt, data = district_data)
lm_inter_model_sqrt2 <- lm(turnover_rate ~ new_hires_per_sqrt * cert_sqrt + new_hires_per_sqrt * alt_sqrt + new_hires_per_sqrt * uncert_sqrt, data = district_data)
summary(lm_model_sqrt2)
##
## Call:
## lm(formula = turnover_rate ~ cert_sqrt + alt_sqrt + uncert_sqrt +
## new_hires_per_sqrt, data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.8802 -2.3148 -0.5312 1.9763 13.7126
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6283 1.5398 0.408 0.683
## cert_sqrt -0.9269 0.1468 -6.313 6.64e-10 ***
## alt_sqrt -0.7611 0.1448 -5.258 2.27e-07 ***
## uncert_sqrt -0.1046 0.1358 -0.771 0.441
## new_hires_per_sqrt 7.6916 0.2290 33.592 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.493 on 445 degrees of freedom
## Multiple R-squared: 0.7476, Adjusted R-squared: 0.7453
## F-statistic: 329.5 on 4 and 445 DF, p-value: < 2.2e-16
summary(lm_inter_model_sqrt2)
##
## Call:
## lm(formula = turnover_rate ~ new_hires_per_sqrt * cert_sqrt +
## new_hires_per_sqrt * alt_sqrt + new_hires_per_sqrt * uncert_sqrt,
## data = district_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.5240 -2.3719 -0.6452 2.0435 14.0328
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.91650 4.74118 -1.670 0.0957 .
## new_hires_per_sqrt 10.19253 1.33748 7.621 1.55e-13 ***
## cert_sqrt 0.01635 0.58621 0.028 0.9778
## alt_sqrt -0.17429 0.45012 -0.387 0.6988
## uncert_sqrt 0.40050 0.45128 0.887 0.3753
## new_hires_per_sqrt:cert_sqrt -0.28286 0.17118 -1.652 0.0992 .
## new_hires_per_sqrt:alt_sqrt -0.16421 0.11856 -1.385 0.1667
## new_hires_per_sqrt:uncert_sqrt -0.15132 0.12628 -1.198 0.2314
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.491 on 442 degrees of freedom
## Multiple R-squared: 0.7496, Adjusted R-squared: 0.7457
## F-statistic: 189.1 on 7 and 442 DF, p-value: < 2.2e-16
vif(lm_model_sqrt2)
## cert_sqrt alt_sqrt uncert_sqrt new_hires_per_sqrt
## 1.276592 1.175454 1.221371 1.105554
vif(lm_inter_model_sqrt2)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
## new_hires_per_sqrt cert_sqrt
## 37.77730 20.37647
## alt_sqrt uncert_sqrt
## 11.38176 13.51409
## new_hires_per_sqrt:cert_sqrt new_hires_per_sqrt:alt_sqrt
## 27.65513 22.16217
## new_hires_per_sqrt:uncert_sqrt
## 18.89142