Load data from csv files provided

t.small <- read.csv("Small Districts.csv")
t.large <- read.csv("Large Districts.csv")

Model large and small groups separately

# Large
ml <- lm(Expenditures ~ Student.Daily.Variable + S.E..Student..Daily.Variable + Annual.Mileage + locale.group, data = t.large)
summary(ml)
## 
## Call:
## lm(formula = Expenditures ~ Student.Daily.Variable + S.E..Student..Daily.Variable + 
##     Annual.Mileage + locale.group, data = t.large)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2333727  -142051    61371   176012  1078985 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   1.017e+05  3.253e+05   0.313  0.75653    
## Student.Daily.Variable        2.844e+00  4.004e-01   7.101 3.93e-08 ***
## S.E..Student..Daily.Variable  3.888e+00  5.156e+00   0.754  0.45617    
## Annual.Mileage                1.054e+00  3.131e-01   3.365  0.00195 ** 
## locale.group                 -4.011e+03  1.022e+05  -0.039  0.96893    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 555000 on 33 degrees of freedom
## Multiple R-squared:  0.9766, Adjusted R-squared:  0.9738 
## F-statistic: 344.9 on 4 and 33 DF,  p-value: < 2.2e-16
coef(ml)
##                  (Intercept)       Student.Daily.Variable 
##                101702.964884                     2.843489 
## S.E..Student..Daily.Variable               Annual.Mileage 
##                     3.888092                     1.053524 
##                 locale.group 
##                 -4011.178926
# Small
ms <- lm(Expenditures ~ Student.Daily.Variable + Bus.Daily.Variable + Annual.Mileage + locale.group, data = t.small)
summary(ms)
## 
## Call:
## lm(formula = Expenditures ~ Student.Daily.Variable + Bus.Daily.Variable + 
##     Annual.Mileage + locale.group, data = t.small)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -252119  -17880   -1615   24344  104841 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -4.278e+04  1.793e+04  -2.386 0.019868 *  
## Student.Daily.Variable  1.041e+00  3.147e-01   3.307 0.001520 ** 
## Bus.Daily.Variable      1.482e+02  2.353e+01   6.297 2.68e-08 ***
## Annual.Mileage          1.144e+00  1.519e-01   7.532 1.68e-10 ***
## locale.group            1.974e+04  4.919e+03   4.012 0.000154 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 45890 on 67 degrees of freedom
## Multiple R-squared:  0.9089, Adjusted R-squared:  0.9034 
## F-statistic: 167.1 on 4 and 67 DF,  p-value: < 2.2e-16
coef(ms)
##            (Intercept) Student.Daily.Variable     Bus.Daily.Variable 
##          -42776.837934               1.040585             148.193499 
##         Annual.Mileage           locale.group 
##               1.143695           19736.172261
# Plot all main effects in the model
plot(allEffects(ml))

plot(allEffects(ms))

Plot residuals. First the large districts group Next the small districts group

qqnorm(residuals_ms, main = "Q-Q Plot of Residuals Small Districts and Charters")
qqline(residuals_ms, col = "red")  # Add a Q-Q line with a red color

Histogram of residuals both groups

hist(residuals_ml)

hist(residuals_ms)

Create the residuals vs. fitted values plot. First the large group

# Large
fitted_values <- fitted(ml)

plot(fitted_values, residuals_ml, 
     main = "Residuals vs. Fitted Values",
     xlab = "Fitted Values",
     ylab = "Residuals",
     pch = 20,   # Use solid circles for points
     col = "blue")  # Color the points blue
abline(h = 0, col = "red")  # Add a horizontal line at y = 0

Next the small group

# Small
fitted_values <- fitted(ms)

plot(fitted_values, residuals_ms, 
     main = "Residuals vs. Fitted Values",
     xlab = "Fitted Values",
     ylab = "Residuals",
     pch = 20,   # Use solid circles for points
     col = "blue")  # Color the points blue
abline(h = 0, col = "red")  # Add a horizontal line at y = 0