Load data from csv files provided
t.small <- read.csv("Small Districts.csv")
t.large <- read.csv("Large Districts.csv")
Model large and small groups separately
# Large
ml <- lm(Expenditures ~ Student.Daily.Variable + S.E..Student..Daily.Variable + Annual.Mileage + locale.group, data = t.large)
summary(ml)
##
## Call:
## lm(formula = Expenditures ~ Student.Daily.Variable + S.E..Student..Daily.Variable +
## Annual.Mileage + locale.group, data = t.large)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2333727 -142051 61371 176012 1078985
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.017e+05 3.253e+05 0.313 0.75653
## Student.Daily.Variable 2.844e+00 4.004e-01 7.101 3.93e-08 ***
## S.E..Student..Daily.Variable 3.888e+00 5.156e+00 0.754 0.45617
## Annual.Mileage 1.054e+00 3.131e-01 3.365 0.00195 **
## locale.group -4.011e+03 1.022e+05 -0.039 0.96893
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 555000 on 33 degrees of freedom
## Multiple R-squared: 0.9766, Adjusted R-squared: 0.9738
## F-statistic: 344.9 on 4 and 33 DF, p-value: < 2.2e-16
coef(ml)
## (Intercept) Student.Daily.Variable
## 101702.964884 2.843489
## S.E..Student..Daily.Variable Annual.Mileage
## 3.888092 1.053524
## locale.group
## -4011.178926
# Small
ms <- lm(Expenditures ~ Student.Daily.Variable + Bus.Daily.Variable + Annual.Mileage + locale.group, data = t.small)
summary(ms)
##
## Call:
## lm(formula = Expenditures ~ Student.Daily.Variable + Bus.Daily.Variable +
## Annual.Mileage + locale.group, data = t.small)
##
## Residuals:
## Min 1Q Median 3Q Max
## -252119 -17880 -1615 24344 104841
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.278e+04 1.793e+04 -2.386 0.019868 *
## Student.Daily.Variable 1.041e+00 3.147e-01 3.307 0.001520 **
## Bus.Daily.Variable 1.482e+02 2.353e+01 6.297 2.68e-08 ***
## Annual.Mileage 1.144e+00 1.519e-01 7.532 1.68e-10 ***
## locale.group 1.974e+04 4.919e+03 4.012 0.000154 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 45890 on 67 degrees of freedom
## Multiple R-squared: 0.9089, Adjusted R-squared: 0.9034
## F-statistic: 167.1 on 4 and 67 DF, p-value: < 2.2e-16
coef(ms)
## (Intercept) Student.Daily.Variable Bus.Daily.Variable
## -42776.837934 1.040585 148.193499
## Annual.Mileage locale.group
## 1.143695 19736.172261
# Plot all main effects in the model
plot(allEffects(ml))
plot(allEffects(ms))
Plot residuals. First the large districts group
Next the small districts group
qqnorm(residuals_ms, main = "Q-Q Plot of Residuals Small Districts and Charters")
qqline(residuals_ms, col = "red") # Add a Q-Q line with a red color
Histogram of residuals both groups
hist(residuals_ml)
hist(residuals_ms)
Create the residuals vs. fitted values plot. First the large group
# Large
fitted_values <- fitted(ml)
plot(fitted_values, residuals_ml,
main = "Residuals vs. Fitted Values",
xlab = "Fitted Values",
ylab = "Residuals",
pch = 20, # Use solid circles for points
col = "blue") # Color the points blue
abline(h = 0, col = "red") # Add a horizontal line at y = 0
Next the small group
# Small
fitted_values <- fitted(ms)
plot(fitted_values, residuals_ms,
main = "Residuals vs. Fitted Values",
xlab = "Fitted Values",
ylab = "Residuals",
pch = 20, # Use solid circles for points
col = "blue") # Color the points blue
abline(h = 0, col = "red") # Add a horizontal line at y = 0