This project explores whether teacher experience is a strong predictor of student success across Texas school districts, using SAT scores as the outcome measure. Additional variables include student demographics, funding per student, and special education populations.
data <- read.csv("district.csv")
read.csv("district.csv") %>% head()
## DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC
## 1 CAYUGA ISD 1902 001 ANDERSON 7 A 3 574
## 2 ELKHART ISD 1903 001 ANDERSON 7 A 4 1150
## 3 FRANKSTON ISD 1904 001 ANDERSON 7 A 3 808
## 4 NECHES ISD 1906 001 ANDERSON 7 A 2 342
## 5 PALESTINE ISD 1907 001 ANDERSON 7 B 6 3360
## 6 WESTWOOD ISD 1908 001 ANDERSON 7 B 4 1332
## DPETBLAP DPETHISP DPETWHIP DPETINDP DPETASIP DPETPCIP DPETTWOP DPETECOP
## 1 4.4 11.5 79.1 0.0 0.5 0.0 4.5 40.8
## 2 4.0 11.8 80.3 0.3 0.2 0.0 3.4 45.4
## 3 8.5 11.3 75.2 0.4 1.0 0.0 3.6 54.2
## 4 8.2 13.5 75.1 0.3 0.3 0.0 2.6 54.1
## 5 25.1 42.9 27.3 0.2 0.7 0.1 3.7 81.6
## 6 19.7 26.2 48.0 0.7 0.5 0.1 4.9 74.0
## DPETLEPP DPETSPEP DPETBILP DPETVOCP DPETGIFP DA0AT21R DA0912DR21R DAGC4X21R
## 1 1.0 14.6 1.0 30.5 6.1 96.7 0.0 100.0
## 2 2.8 12.1 2.7 31.8 4.6 96.0 0.3 100.0
## 3 4.1 13.1 4.1 43.9 7.3 95.4 0.4 95.2
## 4 2.0 10.5 2.0 29.5 5.6 95.8 0.0 95.8
## 5 17.7 13.5 16.1 30.6 2.3 93.7 0.0 99.0
## 6 7.1 14.5 6.8 38.7 3.2 94.5 0.0 97.8
## DAGC5X20R DAGC6X19R DA0GR21N DA0GS21N DDA00A001S22R DDA00A001222R
## 1 100.0 96.0 36 34 84 62
## 2 98.9 98.8 91 79 85 59
## 3 100.0 33.3 41 40 83 57
## 4 97.0 100.0 23 17 90 64
## 5 99.6 98.6 201 198 74 46
## 6 97.0 97.4 95 77 69 40
## DDA00A001322R DDA00AR01S22R DDA00AR01222R DDA00AR01322R DDA00AM01S22R
## 1 33 81 67 39 88
## 2 30 85 64 34 84
## 3 25 84 63 24 85
## 4 27 87 67 30 94
## 5 20 72 48 20 75
## 6 16 70 45 19 66
## DDA00AM01222R DDA00AM01322R DDA00AC01S22R DDA00AC01222R DDA00AC01322R
## 1 65 34 85 54 22
## 2 49 23 86 63 29
## 3 57 26 81 49 21
## 4 69 27 90 54 23
## 5 44 20 78 48 22
## 6 34 14 73 41 15
## DDA00AS01S22R DDA00AS01222R DDA00AS01322R DDB00A001S22R DDB00A001222R
## 1 78 47 21 60 17
## 2 90 63 42 46 22
## 3 74 48 26 74 38
## 4 83 51 26 88 48
## 5 72 42 20 64 33
## 6 68 38 15 56 26
## DDB00A001322R DDH00A001S22R DDH00A001222R DDH00A001322R DDW00A001S22R
## 1 3 74 53 24 87
## 2 8 85 56 25 88
## 3 6 75 46 19 85
## 4 19 91 69 26 89
## 5 11 73 44 19 83
## 6 11 69 36 12 75
## DDW00A001222R DDW00A001322R DDI00A001S22R DDI00A001222R DDI00A001322R
## 1 66 35 NA NA NA
## 2 61 32 100 100 100
## 3 62 28 80 20 20
## 4 66 29 -1 -1 -1
## 5 60 29 75 50 17
## 6 48 21 NA NA NA
## DD300A001S22R DD300A001222R DD300A001322R DD400A001S22R DD400A001222R
## 1 33 33 17 NA NA
## 2 -1 -1 -1 NA NA
## 3 84 53 16 NA NA
## 4 -1 -1 -1 NA NA
## 5 85 77 44 -1 -1
## 6 100 100 88 -1 -1
## DD400A001322R DD200A001S22R DD200A001222R DD200A001322R DDE00A001S22R
## 1 NA 83 54 34 76
## 2 NA 77 46 23 77
## 3 NA 75 58 28 77
## 4 NA -1 -1 -1 86
## 5 -1 74 44 18 70
## 6 -1 62 38 13 65
## DDE00A001222R DDE00A001322R DA0CT21R DA0CC21R DA0CSA21R DA0CAA21R DPSATOFC
## 1 50 23 58.3 19.0 980 NA 99.9
## 2 42 19 51.6 27.7 979 -1.0 186.6
## 3 49 17 92.7 36.8 980 -1.0 146.7
## 4 53 17 87.0 15.0 1007 18.8 60.1
## 5 40 16 43.3 49.4 1048 21.0 553.4
## 6 34 14 40.0 28.9 990 -1.0 265.1
## DPSTTOFC DPSCTOFP DPSSTOFP DPSUTOFP DPSTTOFP DPSETOFP DPSXTOFP DPSCTOSA
## 1 46.7 1.5 5.0 5.4 46.8 14.8 26.5 93333
## 2 104.9 1.1 2.1 4.9 56.2 16.2 19.5 100313
## 3 74.5 1.4 3.5 2.0 50.8 15.0 27.4 98293
## 4 30.2 3.1 5.0 1.7 50.3 13.7 26.2 85537
## 5 260.3 2.1 3.4 8.3 47.0 19.7 19.5 99324
## 6 120.6 1.1 4.6 4.4 45.5 19.2 25.2 121228
## DPSSTOSA DPSUTOSA DPSTTOSA DPSAMIFP DPSAKIDR DPSTKIDR DPST05FP DPSTEXPA
## 1 73300 59550 55570 15.6 5.7 12.3 10.4 16.7
## 2 79305 60616 47916 13.4 6.2 11.0 23.8 13.5
## 3 71215 58022 50382 10.9 5.5 10.8 32.7 12.8
## 4 81593 77642 55346 16.3 5.7 11.3 9.7 14.8
## 5 80415 63829 48825 32.1 6.1 12.9 33.8 12.7
## 6 69527 63612 44741 29.9 5.0 11.0 44.8 10.3
## DPSTADFP DPSTURNR DPSTBLFP DPSTHIFP DPSTWHFP DPSTINFP DPSTASFP DPSTPIFP
## 1 14.8 19.1 8.3 0.0 91.7 0.0 0 0
## 2 19.0 13.9 2.9 6.7 90.5 0.0 0 0
## 3 30.7 21.6 4.0 1.3 93.3 0.0 0 0
## 4 9.6 18.3 6.5 0.0 93.5 0.0 0 0
## 5 15.4 17.9 9.6 13.8 74.6 0.0 0 0
## 6 17.4 30.6 11.6 6.6 80.9 0.8 0 0
## DPSTTWFP DPSTREFP DPSTSPFP DPSTCOFP DPSTBIFP DPSTVOFP DPSTGOFP DPFVTOTK
## 1 0.0 81.6 9.9 0.0 0.0 8.5 0.0 551481
## 2 0.0 71.5 8.4 4.9 0.7 13.0 1.5 250124
## 3 1.3 87.6 7.5 2.7 0.0 2.2 0.0 373882
## 4 0.0 70.0 5.5 12.0 0.0 10.8 1.7 339519
## 5 1.9 71.4 10.2 5.0 2.6 9.0 1.8 337763
## 6 0.0 71.4 6.4 6.1 0.0 10.8 5.3 381133
## DPFTADPR DPFRAALLT DPFRAALLK DPFRAOPRT DPFRASTAP DZRVLOCP DPFRAFEDP DPFRAORVT
## 1 1.055 10600571 19814 10525571 47.2 34.4 18.4 75000
## 2 1.244 16544197 13787 15623002 61.8 25.7 12.5 921195
## 3 1.341 10632871 13845 9815575 58.2 30.5 11.3 817296
## 4 1.370 5044735 14925 4573108 64.2 27.7 8.1 471627
## 5 1.405 59631485 17549 45806947 48.3 30.6 21.1 13824538
## 6 1.053 18304035 13538 18204035 51.5 32.5 15.9 100000
## DPFUNAB1T DPFUNA4T DPFEAALLT DPFEAOPFT DPFEAOPFK DPFEAINSP DZEXADMP DZEXADSP
## 1 3306025 0 9222524 8878441 16595 49.6 9.1 3.7
## 2 6071780 0 15181525 13694502 11412 60.3 6.9 4.8
## 3 3880100 0 10569512 9568092 12458 54.2 8.3 6.1
## 4 930315 0 5061803 4709122 13932 53.7 10.7 8.7
## 5 10006405 0 52684829 43017866 12660 54.6 8.3 6.2
## 6 8510495 0 17702691 17289992 12788 50.6 8.5 7.0
## DZEXPLAP DZEXOTHP DPFEAINST DPFEAINSK DPFPAREGP DPFPASPEP DPFPACOMP DPFPABILP
## 1 10.2 27.4 4405076 8234 32.9 28.9 5.9 0.1
## 2 10.5 17.4 8261144 6884 44.0 8.8 7.6 0.0
## 3 13.6 17.8 5184733 6751 42.5 8.4 6.1 0.0
## 4 10.3 16.6 2529704 7484 40.3 10.1 8.6 0.1
## 5 10.8 20.1 23492731 6914 43.2 6.1 7.1 1.0
## 6 10.5 23.3 8750034 6472 36.8 9.4 8.9 0.2
## DPFPAVOCP DPFPAGIFP DPFPAATHP DPFPAHSAP DPFPREKP DPFPAOTHP DISTSIZE
## 1 3.3 0.0 3.7 0.0 0.0 24.1 500 to 999
## 2 6.9 0.0 3.8 0.0 0.0 26.8 1,000 to 1,599
## 3 5.3 0.1 6.7 0.0 0.2 28.6 500 to 999
## 4 4.5 0.0 0.0 0.0 0.7 34.1 Under 500
## 5 4.2 0.1 3.5 0.7 0.9 30.7 3,000 to 4,999
## 6 4.1 0.1 4.7 0.0 0.8 32.4 1,000 to 1,599
## COMMTYPE PROPWLTH TAXRATE
## 1 Rural $539,089 to < $573,876 Under $1.0809
## 2 Non-metropolitan Stable $234,712 to < $298,152 $1.2148 to under $1.3239
## 3 Rural $359,962 to < $411,857 $1.3239 and over
## 4 Rural $298,152 to < $340,843 $1.3239 and over
## 5 Independent Town $298,152 to < $340,843 $1.3239 and over
## 6 Non-metropolitan Stable $359,962 to < $411,857 Under $1.0809
data_clean <- data %>%
filter(!is.na(DPSATOFC), !is.na(DPFUNAB1T), DPFUNAB1T >= 0) %>%
mutate(log_FUNAB1T = log(DPFUNAB1T + 1))
# Model 1: Basic model
model1 <- lm(DPSATOFC ~ DPSTEXPA + DPSTBLFP + DPSTHIFP + DPSTWHFP + DPSTSPFP + DPFUNAB1T, data = data_clean)
summary(model1)
##
## Call:
## lm(formula = DPSATOFC ~ DPSTEXPA + DPSTBLFP + DPSTHIFP + DPSTWHFP +
## DPSTSPFP + DPFUNAB1T, data = data_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5932.6 -162.3 -41.6 59.1 7923.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.584e+02 3.180e+02 1.756 0.0794 .
## DPSTEXPA -1.193e+01 6.227e+00 -1.916 0.0556 .
## DPSTBLFP -2.940e+00 3.642e+00 -0.807 0.4196
## DPSTHIFP -2.325e+00 3.376e+00 -0.689 0.4912
## DPSTWHFP -5.708e+00 3.360e+00 -1.699 0.0896 .
## DPSTSPFP 2.048e+01 4.706e+00 4.352 1.47e-05 ***
## DPFUNAB1T 3.940e-05 4.845e-07 81.333 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 619.3 on 1190 degrees of freedom
## Multiple R-squared: 0.8641, Adjusted R-squared: 0.8634
## F-statistic: 1261 on 6 and 1190 DF, p-value: < 2.2e-16
# Model 2: Log-transformed funding
model2 <- lm(DPSATOFC ~ DPSTEXPA + DPSTBLFP + DPSTHIFP + DPSTWHFP + DPSTSPFP + log_FUNAB1T, data = data_clean)
summary(model2)
##
## Call:
## lm(formula = DPSATOFC ~ DPSTEXPA + DPSTBLFP + DPSTHIFP + DPSTWHFP +
## DPSTSPFP + log_FUNAB1T, data = data_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2795.4 -530.3 -220.7 103.5 21240.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2796.396 773.961 3.613 0.000315 ***
## DPSTEXPA -45.009 16.048 -2.805 0.005119 **
## DPSTBLFP -13.013 8.828 -1.474 0.140763
## DPSTHIFP -26.846 8.262 -3.249 0.001190 **
## DPSTWHFP -41.468 8.219 -5.046 5.23e-07 ***
## DPSTSPFP 58.470 11.775 4.966 7.85e-07 ***
## log_FUNAB1T 114.732 9.738 11.782 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1501 on 1190 degrees of freedom
## Multiple R-squared: 0.2019, Adjusted R-squared: 0.1979
## F-statistic: 50.17 on 6 and 1190 DF, p-value: < 2.2e-16
The dataset includes over 1,200 Texas school districts. We cleaned the data to remove missing values and created a log-transformed version of the funding variable to improve statistical assumptions for regression.
data_clean <- data_clean %>%
mutate(predicted_SAT = predict(model1))
ggplot(data_clean, aes(x = predicted_SAT, y = DPSATOFC)) +
geom_point(alpha = 0.5) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "blue") +
labs(
title = "Predicted vs. Actual SAT Scores",
x = "Predicted SAT",
y = "Actual SAT"
) +
theme_minimal()
ggplot(data_clean, aes(x = DPSTEXPA, y = DPSATOFC)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "orange") +
labs(
title = "Teacher Experience vs. SAT Scores",
x = "Average Years of Teacher Experience",
y = "SAT Score"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data_clean, aes(x = DPSTSPFP, y = DPSATOFC)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "purple") +
labs(
title = "Special Education % vs. SAT Scores",
x = "Special Education %", y = "SAT Score"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data_clean, aes(x = log_FUNAB1T, y = DPSATOFC)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "darkgreen") +
labs(
title = "Log of Per-Student Funding vs. SAT Scores",
x = "Log(Funding per Student)", y = "SAT Score"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
### Conclusion
This analysis suggests that teacher experience, while important, is not the strongest predictor of SAT scores in Texas school districts. Instead, funding levels and special education support are more strongly associated with academic success. These findings align with recent literature questioning whether experience alone guarantees better outcomes in test-focused environments.