library(readxl)
library(moments)
library(car)
## Loading required package: carData
SchoolData <- read_excel("~/Downloads/SchoolCostData.xlsx")
scatterplot(SchoolData$Attendance,SchoolData$CostPerStudent)

scatterplot(SchoolData$TeacherSalary,SchoolData$CostPerStudent)

scatterplot(SchoolData$`Student:Teacher`,SchoolData$CostPerStudent)

agostino.test(SchoolData$CostPerStudent)
## 
##  D'Agostino skewness test
## 
## data:  SchoolData$CostPerStudent
## skew = 1.4771, z = 3.6306, p-value = 0.0002828
## alternative hypothesis: data have a skewness
agostino.test(SchoolData$TeacherSalary)
## 
##  D'Agostino skewness test
## 
## data:  SchoolData$TeacherSalary
## skew = -0.91951, z = -2.52250, p-value = 0.01165
## alternative hypothesis: data have a skewness
agostino.test(SchoolData$`Student:Teacher`)
## 
##  D'Agostino skewness test
## 
## data:  SchoolData$`Student:Teacher`
## skew = -0.3670, z = -1.0975, p-value = 0.2724
## alternative hypothesis: data have a skewness
Schooldata2 = SchoolData
Schooldata2$CostPerStudent = log(Schooldata2$CostPerStudent-39)
Schooldata2$Attendance = scale(Schooldata2$Attendance)
Schooldata2$TeacherSalary = scale(Schooldata2$TeacherSalary)
Schooldata2$`Student:Teacher` = scale(Schooldata2$`Student:Teacher`)


## Multiple Regression
schooldatamodel = lm(CostPerStudent~Attendance*TeacherSalary*`Student:Teacher`, data = Schooldata2)
summary(schooldatamodel)
## 
## Call:
## lm(formula = CostPerStudent ~ Attendance * TeacherSalary * `Student:Teacher`, 
##     data = Schooldata2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69573 -0.13789  0.03014  0.14038  0.45058 
## 
## Coefficients:
##                                            Estimate Std. Error t value
## (Intercept)                                 3.29135    0.04039  81.492
## Attendance                                 -0.09159    0.04043  -2.265
## TeacherSalary                               0.09499    0.04715   2.014
## `Student:Teacher`                          -0.24670    0.04649  -5.307
## Attendance:TeacherSalary                   -0.10757    0.04941  -2.177
## Attendance:`Student:Teacher`               -0.10390    0.05056  -2.055
## TeacherSalary:`Student:Teacher`             0.11052    0.07881   1.402
## Attendance:TeacherSalary:`Student:Teacher`  0.16039    0.07487   2.142
##                                            Pr(>|t|)    
## (Intercept)                                 < 2e-16 ***
## Attendance                                   0.0296 *  
## TeacherSalary                                0.0515 .  
## `Student:Teacher`                          5.87e-06 ***
## Attendance:TeacherSalary                     0.0361 *  
## Attendance:`Student:Teacher`                 0.0472 *  
## TeacherSalary:`Student:Teacher`              0.1694    
## Attendance:TeacherSalary:`Student:Teacher`   0.0390 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2315 on 36 degrees of freedom
## Multiple R-squared:  0.7167, Adjusted R-squared:  0.6616 
## F-statistic: 13.01 on 7 and 36 DF,  p-value: 3.293e-08
vif(schooldatamodel)
##                                 Attendance 
##                                   1.311481 
##                              TeacherSalary 
##                                   1.784017 
##                          `Student:Teacher` 
##                                   1.734081 
##                   Attendance:TeacherSalary 
##                                   1.971039 
##               Attendance:`Student:Teacher` 
##                                   1.760076 
##            TeacherSalary:`Student:Teacher` 
##                                   6.619948 
## Attendance:TeacherSalary:`Student:Teacher` 
##                                   6.741970
plot(schooldatamodel)

leveragePlots(schooldatamodel)

cooks.distance(schooldatamodel)
##            1            2            3            4            5 
## 1.906689e-03 4.584973e-03 1.561093e-03 6.557567e-01 3.092236e-04 
##            6            7            8            9           10 
## 1.531921e-02 1.457870e-01 2.435371e+00 1.133828e-02 5.519364e-03 
##           11           12           13           14           15 
## 8.658819e-03 7.408102e-03 3.569801e-03 3.538708e-03 3.020763e-03 
##           16           17           18           19           20 
## 1.016985e-02 1.680151e-03 2.021636e-02 1.431638e-04 4.009323e-03 
##           21           22           23           24           25 
## 2.169667e-06 1.270729e-02 9.912779e-05 7.272903e-02 2.669402e-02 
##           26           27           28           29           30 
## 9.094207e-03 1.980376e-02 5.573886e-03 1.477007e-02 4.843576e-03 
##           31           32           33           34           35 
## 6.557172e-02 8.957224e-03 5.730005e-02 4.533767e-04 2.024288e-02 
##           36           37           38           39           40 
## 1.008194e-01 4.772764e-02 3.637162e-03 1.409608e-03 5.290870e-02 
##           41           42           43           44 
## 1.006951e-02 7.143922e-04 1.362527e-04 1.101726e-01

Summary: I perform multiple regression model to study how variable attendance, teacher salary and student teacher ratio effect the cost per student. From result above, I conclude that teacher and student ratio is significantly impact the cost per student.