library(readxl)
library(moments)
library(car)
## Loading required package: carData
SchoolData <- read_excel("~/Downloads/SchoolCostData.xlsx")
scatterplot(SchoolData$Attendance,SchoolData$CostPerStudent)
scatterplot(SchoolData$TeacherSalary,SchoolData$CostPerStudent)
scatterplot(SchoolData$`Student:Teacher`,SchoolData$CostPerStudent)
agostino.test(SchoolData$CostPerStudent)
##
## D'Agostino skewness test
##
## data: SchoolData$CostPerStudent
## skew = 1.4771, z = 3.6306, p-value = 0.0002828
## alternative hypothesis: data have a skewness
agostino.test(SchoolData$TeacherSalary)
##
## D'Agostino skewness test
##
## data: SchoolData$TeacherSalary
## skew = -0.91951, z = -2.52250, p-value = 0.01165
## alternative hypothesis: data have a skewness
agostino.test(SchoolData$`Student:Teacher`)
##
## D'Agostino skewness test
##
## data: SchoolData$`Student:Teacher`
## skew = -0.3670, z = -1.0975, p-value = 0.2724
## alternative hypothesis: data have a skewness
Schooldata2 = SchoolData
Schooldata2$CostPerStudent = log(Schooldata2$CostPerStudent-39)
Schooldata2$Attendance = scale(Schooldata2$Attendance)
Schooldata2$TeacherSalary = scale(Schooldata2$TeacherSalary)
Schooldata2$`Student:Teacher` = scale(Schooldata2$`Student:Teacher`)
## Multiple Regression
schooldatamodel = lm(CostPerStudent~Attendance*TeacherSalary*`Student:Teacher`, data = Schooldata2)
summary(schooldatamodel)
##
## Call:
## lm(formula = CostPerStudent ~ Attendance * TeacherSalary * `Student:Teacher`,
## data = Schooldata2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.69573 -0.13789 0.03014 0.14038 0.45058
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 3.29135 0.04039 81.492
## Attendance -0.09159 0.04043 -2.265
## TeacherSalary 0.09499 0.04715 2.014
## `Student:Teacher` -0.24670 0.04649 -5.307
## Attendance:TeacherSalary -0.10757 0.04941 -2.177
## Attendance:`Student:Teacher` -0.10390 0.05056 -2.055
## TeacherSalary:`Student:Teacher` 0.11052 0.07881 1.402
## Attendance:TeacherSalary:`Student:Teacher` 0.16039 0.07487 2.142
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## Attendance 0.0296 *
## TeacherSalary 0.0515 .
## `Student:Teacher` 5.87e-06 ***
## Attendance:TeacherSalary 0.0361 *
## Attendance:`Student:Teacher` 0.0472 *
## TeacherSalary:`Student:Teacher` 0.1694
## Attendance:TeacherSalary:`Student:Teacher` 0.0390 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2315 on 36 degrees of freedom
## Multiple R-squared: 0.7167, Adjusted R-squared: 0.6616
## F-statistic: 13.01 on 7 and 36 DF, p-value: 3.293e-08
vif(schooldatamodel)
## Attendance
## 1.311481
## TeacherSalary
## 1.784017
## `Student:Teacher`
## 1.734081
## Attendance:TeacherSalary
## 1.971039
## Attendance:`Student:Teacher`
## 1.760076
## TeacherSalary:`Student:Teacher`
## 6.619948
## Attendance:TeacherSalary:`Student:Teacher`
## 6.741970
plot(schooldatamodel)
leveragePlots(schooldatamodel)
cooks.distance(schooldatamodel)
## 1 2 3 4 5
## 1.906689e-03 4.584973e-03 1.561093e-03 6.557567e-01 3.092236e-04
## 6 7 8 9 10
## 1.531921e-02 1.457870e-01 2.435371e+00 1.133828e-02 5.519364e-03
## 11 12 13 14 15
## 8.658819e-03 7.408102e-03 3.569801e-03 3.538708e-03 3.020763e-03
## 16 17 18 19 20
## 1.016985e-02 1.680151e-03 2.021636e-02 1.431638e-04 4.009323e-03
## 21 22 23 24 25
## 2.169667e-06 1.270729e-02 9.912779e-05 7.272903e-02 2.669402e-02
## 26 27 28 29 30
## 9.094207e-03 1.980376e-02 5.573886e-03 1.477007e-02 4.843576e-03
## 31 32 33 34 35
## 6.557172e-02 8.957224e-03 5.730005e-02 4.533767e-04 2.024288e-02
## 36 37 38 39 40
## 1.008194e-01 4.772764e-02 3.637162e-03 1.409608e-03 5.290870e-02
## 41 42 43 44
## 1.006951e-02 7.143922e-04 1.362527e-04 1.101726e-01
Summary: I perform multiple regression model to study how variable attendance, teacher salary and student teacher ratio effect the cost per student. From result above, I conclude that teacher and student ratio is significantly impact the cost per student.