Number of Attributes: 33 (Independent: 30 / Dependent: G1, G2, G3) Number of Instances: 649 Attribute Characteristics: Numeric / Binary / Categorical Associated Tasks: Regression / Classification
- Import libraries
suppressMessages(library(car))
suppressMessages(library(relaimpo))
suppressMessages(library(glmnet))
suppressMessages(library(corrplot))
suppressMessages(library(rpart))
suppressMessages(library(neuralnet))
suppressMessages(library(caret))
suppressMessages(library(e1071))
suppressMessages(library(DT))
- Load Dataset
### 데이터 불러오기
st <- read.csv("./input/student.csv", row.names = 1)
- Split the data
set.seed(555)
flag <- sample(c("tr", "te"), size = nrow(st), c(8, 2), replace = T)
train <- st[which(flag == "tr"), ]
test <- st[which(flag == "te"), ]
datatable(st, style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
summary(st)
## school sex age address
## Min. :0.0000 Min. :0.0000 Min. :15.00 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:16.00 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :17.00 Median :1.0000
## Mean :0.3482 Mean :0.4099 Mean :16.74 Mean :0.6965
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:18.00 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :22.00 Max. :1.0000
## famsize Pstatus Medu Fedu
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:2.000 1st Qu.:1.000
## Median :0.0000 Median :1.0000 Median :2.000 Median :2.000
## Mean :0.2958 Mean :0.8767 Mean :2.515 Mean :2.307
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :1.0000 Max. :1.0000 Max. :4.000 Max. :4.000
## traveltime studytime failures schoolsup
## Min. :1.000 Min. :1.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.000 Median :2.000 Median :0.0000 Median :0.0000
## Mean :1.569 Mean :1.931 Mean :0.2219 Mean :0.1048
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :4.000 Max. :4.000 Max. :3.0000 Max. :1.0000
## famsup paid activities nursery
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :1.0000 Median :0.00000 Median :0.0000 Median :1.0000
## Mean :0.6133 Mean :0.06009 Mean :0.4854 Mean :0.8028
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## higher internet romantic famrel
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :1.000
## 1st Qu.:1.0000 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:4.000
## Median :1.0000 Median :1.0000 Median :0.0000 Median :4.000
## Mean :0.8937 Mean :0.7673 Mean :0.3683 Mean :3.931
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:5.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :5.000
## freetime goout Dalc Walc
## Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.00
## 1st Qu.:3.00 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.00
## Median :3.00 Median :3.000 Median :1.000 Median :2.00
## Mean :3.18 Mean :3.185 Mean :1.502 Mean :2.28
## 3rd Qu.:4.00 3rd Qu.:4.000 3rd Qu.:2.000 3rd Qu.:3.00
## Max. :5.00 Max. :5.000 Max. :5.000 Max. :5.00
## health absences Mjob_health Mjob_other
## Min. :1.000 Min. : 0.000 Min. :0.00000 Min. :0.0000
## 1st Qu.:2.000 1st Qu.: 0.000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :4.000 Median : 2.000 Median :0.00000 Median :0.0000
## Mean :3.536 Mean : 3.659 Mean :0.07396 Mean :0.3975
## 3rd Qu.:5.000 3rd Qu.: 6.000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :5.000 Max. :32.000 Max. :1.00000 Max. :1.0000
## Mjob_services Mjob_teacher Fjob_health Fjob_other
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :1.0000
## Mean :0.2096 Mean :0.1109 Mean :0.03544 Mean :0.5655
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## Fjob_services Fjob_teacher reason_home reason_other
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.2789 Mean :0.05547 Mean :0.2296 Mean :0.1109
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## reason_reputation guardian_mother guardian_other score
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. : 1.333
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:10.000
## Median :0.0000 Median :1.0000 Median :0.00000 Median :11.667
## Mean :0.2203 Mean :0.7011 Mean :0.06317 Mean :11.625
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:13.333
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :18.667
str(st)
## 'data.frame': 649 obs. of 40 variables:
## $ school : int 0 0 0 0 0 0 0 0 0 0 ...
## $ sex : int 0 0 0 0 0 1 1 0 1 1 ...
## $ age : int 18 17 15 15 16 16 16 17 15 15 ...
## $ address : int 1 1 1 1 1 1 1 1 1 1 ...
## $ famsize : int 0 0 1 0 0 1 1 0 1 0 ...
## $ Pstatus : int 0 1 1 1 1 1 1 0 0 1 ...
## $ Medu : int 4 1 1 4 3 4 2 4 3 3 ...
## $ Fedu : int 4 1 1 2 3 3 2 4 2 4 ...
## $ traveltime : int 2 1 1 1 1 1 1 2 1 1 ...
## $ studytime : int 2 2 2 3 2 2 2 2 2 2 ...
## $ failures : int 0 0 0 0 0 0 0 0 0 0 ...
## $ schoolsup : int 1 0 1 0 0 0 0 1 0 0 ...
## $ famsup : int 0 1 0 1 1 1 0 1 1 1 ...
## $ paid : int 0 0 0 0 0 0 0 0 0 0 ...
## $ activities : int 0 0 0 1 0 1 0 0 0 1 ...
## $ nursery : int 1 0 1 1 1 1 1 1 1 1 ...
## $ higher : int 1 1 1 1 1 1 1 1 1 1 ...
## $ internet : int 0 1 1 1 0 1 1 0 1 1 ...
## $ romantic : int 0 0 0 1 0 0 0 0 0 0 ...
## $ famrel : int 4 5 4 3 4 5 4 4 4 5 ...
## $ freetime : int 3 3 3 2 3 4 4 1 2 5 ...
## $ goout : int 4 3 2 2 2 2 4 4 2 1 ...
## $ Dalc : int 1 1 2 1 1 1 1 1 1 1 ...
## $ Walc : int 1 1 3 1 2 2 1 1 1 1 ...
## $ health : int 3 3 3 5 5 5 3 1 1 5 ...
## $ absences : int 4 2 6 0 0 6 0 2 0 0 ...
## $ Mjob_health : int 0 0 0 1 0 0 0 0 0 0 ...
## $ Mjob_other : int 0 0 0 0 1 0 1 1 0 1 ...
## $ Mjob_services : int 0 0 0 0 0 1 0 0 1 0 ...
## $ Mjob_teacher : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Fjob_health : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Fjob_other : int 0 1 1 0 1 1 1 0 1 1 ...
## $ Fjob_services : int 0 0 0 1 0 0 0 0 0 0 ...
## $ Fjob_teacher : int 1 0 0 0 0 0 0 1 0 0 ...
## $ reason_home : int 0 0 0 1 1 0 1 1 1 1 ...
## $ reason_other : int 0 0 1 0 0 0 0 0 0 0 ...
## $ reason_reputation: int 0 0 0 0 0 1 0 0 0 0 ...
## $ guardian_mother : int 1 0 1 1 0 1 1 1 1 1 ...
## $ guardian_other : int 0 0 0 0 0 0 0 0 0 0 ...
## $ score : num 7.33 10.33 12.33 14 12.33 ...
- outlier 제거
boxplot(train)
b1 <- boxplot(train$absences)
b2 <- boxplot(train$score)
out1 <- which(train$absences > b1$stats[5])
out2 <- which(train$score < b2$stats[1])
train <- train[-c(out1, out2), ] # training data에서 absences, score 변수의 outlier 제거
boxplot(train)
c <- cor(train)
corrplot(c, method = "circle", order = "hclust")
m <- lm(score~Dalc, data = train) # 단순 회귀 모형
plot(train$Dalc, train$score, xlab = "Dalc", ylab = "score", pch = 19)
abline(m, col = "red") # 회귀 적합선
summary(m)
##
## Call:
## lm(formula = score ~ Dalc, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.4107 -1.8762 -0.0774 1.7276 6.9918
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.6120 0.2202 57.272 < 2e-16 ***
## Dalc -0.5346 0.1256 -4.257 2.48e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.572 on 492 degrees of freedom
## Multiple R-squared: 0.03552, Adjusted R-squared: 0.03356
## F-statistic: 18.12 on 1 and 492 DF, p-value: 2.483e-05
## test dataset을 이용한 예측
pred <- predict(m, newdata= test) # testset을 이용한 단순 회귀 모형 예측
plot(test$score, pred, xlab = "Actual score", ylab = "Predicted score", pch = 19, xlim = c(5, 18), ylim = c(5, 18))
lines(test$score, test$score, col = "red", lty = 2)
result <- data.frame("actual" = test$score, "pred" = pred, "resid" = test$score - pred) # 실제값, 예측값, 잔차의 result table
result[1:10, ]
## actual pred resid
## 2 10.33333 12.07741 -1.74407183
## 7 12.66667 12.07741 0.58926150
## 10 12.33333 12.07741 0.25592817
## 11 14.00000 12.07741 1.92259483
## 15 14.33333 12.07741 2.25592817
## 20 12.00000 12.07741 -0.07740517
## 28 11.00000 11.54282 -0.54281743
## 47 12.66667 12.07741 0.58926150
## 59 13.66667 12.07741 1.58926150
## 66 15.66667 12.07741 3.58926150
## 예측 성능 측정
MSE <- mean(result$resid^2) # test MSE 계산
MSE
## [1] 7.26915
# result_function (training, test MSE 계산, 결과 시각화)
lm_result <- function(m, train, test){
par(mfrow = c(1, 2)) # 그림을 1행 2열로 배치
MSE_tr <- mean((train$score - m$fitted.values)^2) # MSE 계산
plot(train$score, m$fitted.values, pch = 19, xlab = "Actual score", ylab = "Predicted score", xlim = c(5, 18), ylim = c(5, 18), main = paste("Training MSE = ", round(MSE_tr, 2), sep = ""))
lines(train$score, train$score, col = "red", lty = 2)
pred <- predict(m, newdata= test) # test dataset 예측
MSE_te <- mean((test$score - pred)^2) # MSE 계산
plot(test$score, pred, xlab = "Actual score", ylab = "Predicted score", pch = 19, xlim = c(5, 18), ylim = c(5, 18), main = paste("Test MSE = ", round(MSE_te, 2), sep = ""))
lines(test$score, test$score, col = "red", lty = 2)
MSE <- c(MSE_tr, MSE_te)
names(MSE) <- c("MSE_tr", "MSE_te")
print(round(MSE, 2))
return(MSE) # MSE 결과를 반환
par(mfrow = c(1,1)) # 그림을 1행 1열로 다시 옵션 원상복귀
}
### 다중 선형 회귀 모형 학습 (multiple linear regression)
m <- lm(score~., data = train) # 다중 회귀 모형
summary(m)
##
## Call:
## lm(formula = score ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.5217 -1.3320 -0.0949 1.2366 6.6196
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.393389 1.815848 4.072 5.51e-05 ***
## school -0.710581 0.235919 -3.012 0.002740 **
## sex -0.643999 0.226488 -2.843 0.004665 **
## age 0.161976 0.091202 1.776 0.076399 .
## address 0.284466 0.233532 1.218 0.223818
## famsize 0.078951 0.217709 0.363 0.717039
## Pstatus 0.277959 0.314945 0.883 0.377939
## Medu 0.218263 0.134044 1.628 0.104156
## Fedu 0.001694 0.124744 0.014 0.989174
## traveltime 0.038551 0.141203 0.273 0.784963
## studytime 0.274661 0.120463 2.280 0.023068 *
## failures -1.149888 0.174476 -6.591 1.22e-10 ***
## schoolsup -1.153984 0.324786 -3.553 0.000421 ***
## famsup -0.210148 0.204113 -1.030 0.303762
## paid -0.463470 0.424199 -1.093 0.275160
## activities 0.162197 0.201561 0.805 0.421413
## nursery -0.172622 0.239559 -0.721 0.471536
## higher 1.479837 0.344927 4.290 2.18e-05 ***
## internet 0.083312 0.244341 0.341 0.733288
## romantic -0.406032 0.205598 -1.975 0.048886 *
## famrel 0.234623 0.104326 2.249 0.024995 *
## freetime 0.061513 0.102871 0.598 0.550163
## goout -0.154058 0.094984 -1.622 0.105511
## Dalc -0.001842 0.140606 -0.013 0.989553
## Walc -0.123880 0.106589 -1.162 0.245755
## health -0.114383 0.069683 -1.641 0.101391
## absences -0.081225 0.028573 -2.843 0.004674 **
## Mjob_health 0.733595 0.460703 1.592 0.112005
## Mjob_other 0.101480 0.275725 0.368 0.713010
## Mjob_services 0.054489 0.331363 0.164 0.869458
## Mjob_teacher 0.605537 0.463051 1.308 0.191633
## Fjob_health -0.166462 0.639204 -0.260 0.794658
## Fjob_other 0.251658 0.395911 0.636 0.525330
## Fjob_services 0.021904 0.414190 0.053 0.957847
## Fjob_teacher 1.017871 0.590807 1.723 0.085597 .
## reason_home 0.424029 0.266360 1.592 0.112094
## reason_other 0.166663 0.329066 0.506 0.612771
## reason_reputation 0.622347 0.255886 2.432 0.015396 *
## guardian_mother -0.435901 0.236828 -1.841 0.066336 .
## guardian_other -0.704618 0.484698 -1.454 0.146713
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.067 on 454 degrees of freedom
## Multiple R-squared: 0.4253, Adjusted R-squared: 0.3759
## F-statistic: 8.614 on 39 and 454 DF, p-value: < 2.2e-16
## lm_result()함수에 모델, training, test dataset을 전달하여 MSE를 계산하고 plotting
lm_result(m, train, test) # 회귀 분석 결과 성능평가 및 시각화 함수 사용
## MSE_tr MSE_te
## 3.93 6.91
## MSE_tr MSE_te
## 3.926032 6.907939
- 다중공선성
### 다중공선성 확인
vif(m) # 분산 팽창 지수 계산 (VIF)
## school sex age address
## 1.460686 1.417245 1.377845 1.343418
## famsize Pstatus Medu Fedu
## 1.127313 1.188589 2.639189 2.222232
## traveltime studytime failures schoolsup
## 1.337530 1.196624 1.307672 1.168310
## famsup paid activities nursery
## 1.118441 1.149843 1.172178 1.095182
## higher internet romantic famrel
## 1.273725 1.297974 1.142523 1.134523
## freetime goout Dalc Walc
## 1.274660 1.391473 1.941243 2.195803
## health absences Mjob_health Mjob_other
## 1.155724 1.190499 1.909317 2.100425
## Mjob_services Mjob_teacher Fjob_health Fjob_other
## 2.226067 2.093833 1.835422 4.485031
## Fjob_services Fjob_teacher reason_home reason_other
## 4.028827 2.158158 1.324591 1.219163
## reason_reputation guardian_mother guardian_other
## 1.415861 1.350352 1.501213
###### Regression with variable selection
### forward, backward, stepwise regression
full_m <- lm(score~., data = train) # 모든 변수를 이용한 full model
null_m <- lm(score~1, data = train) # 변수를 한 개도 이용하지 않은 null model
r1 <- lm_result(full_m, train, test)
## MSE_tr MSE_te
## 3.93 6.91
forw_m <- step(null_m, direction = "forward", trace = 1, scope = list(lower = null_m, upper = full_m))
## Start: AIC=951.23
## score ~ 1
##
## Df Sum of Sq RSS AIC
## + failures 1 618.60 2756.1 853.20
## + higher 1 419.64 2955.0 887.63
## + Medu 1 290.20 3084.5 908.81
## + studytime 1 196.07 3178.6 923.66
## + school 1 192.96 3181.7 924.14
## + reason_reputation 1 152.85 3221.8 930.33
## + Fedu 1 144.90 3229.8 931.55
## + absences 1 140.50 3234.2 932.22
## + Walc 1 132.00 3242.7 933.52
## + Dalc 1 119.88 3254.8 935.36
## + Mjob_teacher 1 98.81 3275.8 938.55
## + Fjob_teacher 1 92.88 3281.8 939.44
## + sex 1 70.39 3304.3 942.81
## + address 1 66.20 3308.5 943.44
## + traveltime 1 65.29 3309.4 943.58
## + internet 1 61.09 3313.6 944.20
## + goout 1 60.46 3314.2 944.30
## + Mjob_health 1 60.27 3314.4 944.33
## + guardian_other 1 53.70 3321.0 945.30
## + famrel 1 49.16 3325.5 945.98
## + health 1 40.92 3333.7 947.20
## + age 1 39.50 3335.2 947.41
## + romantic 1 36.06 3338.6 947.92
## + freetime 1 32.75 3341.9 948.41
## + schoolsup 1 24.67 3350.0 949.60
## + reason_other 1 22.16 3352.5 949.97
## + activities 1 20.66 3354.0 950.19
## + reason_home 1 18.32 3356.3 950.54
## + paid 1 14.58 3360.1 951.09
## <none> 3374.7 951.23
## + Fjob_health 1 5.98 3368.7 952.35
## + Fjob_services 1 5.44 3369.2 952.43
## + Mjob_other 1 3.22 3371.4 952.76
## + Fjob_other 1 1.73 3372.9 952.97
## + guardian_mother 1 1.67 3373.0 952.98
## + nursery 1 0.70 3374.0 953.13
## + famsize 1 0.51 3374.1 953.15
## + Pstatus 1 0.41 3374.2 953.17
## + Mjob_services 1 0.40 3374.3 953.17
## + famsup 1 0.07 3374.6 953.22
##
## Step: AIC=853.2
## score ~ failures
##
## Df Sum of Sq RSS AIC
## + higher 1 198.424 2557.6 818.29
## + Medu 1 179.678 2576.4 821.89
## + school 1 133.826 2622.2 830.61
## + studytime 1 120.574 2635.5 833.10
## + reason_reputation 1 97.175 2658.9 837.46
## + Walc 1 94.387 2661.7 837.98
## + Dalc 1 73.592 2682.5 841.83
## + absences 1 69.834 2686.2 842.52
## + Fedu 1 68.224 2687.8 842.81
## + Mjob_health 1 58.664 2697.4 844.57
## + Fjob_teacher 1 55.349 2700.7 845.17
## + address 1 54.700 2701.4 845.29
## + Mjob_teacher 1 54.081 2702.0 845.41
## + goout 1 44.527 2711.5 847.15
## + internet 1 43.649 2712.4 847.31
## + traveltime 1 41.081 2715.0 847.78
## + sex 1 31.629 2724.4 849.49
## + famrel 1 25.661 2730.4 850.58
## + health 1 24.789 2731.3 850.73
## + reason_other 1 22.220 2733.8 851.20
## + romantic 1 19.987 2736.1 851.60
## + schoolsup 1 19.197 2736.9 851.74
## + activities 1 15.673 2740.4 852.38
## <none> 2756.1 853.20
## + freetime 1 7.040 2749.0 853.93
## + guardian_mother 1 6.094 2750.0 854.10
## + guardian_other 1 5.133 2750.9 854.28
## + reason_home 1 4.739 2751.3 854.35
## + paid 1 3.523 2752.5 854.57
## + Fjob_services 1 3.291 2752.8 854.61
## + Mjob_other 1 2.875 2753.2 854.68
## + Pstatus 1 1.278 2754.8 854.97
## + Fjob_health 1 0.999 2755.1 855.02
## + nursery 1 0.361 2755.7 855.13
## + age 1 0.311 2755.8 855.14
## + famsize 1 0.284 2755.8 855.15
## + Mjob_services 1 0.179 2755.9 855.16
## + Fjob_other 1 0.060 2756.0 855.19
## + famsup 1 0.049 2756.0 855.19
##
## Step: AIC=818.29
## score ~ failures + higher
##
## Df Sum of Sq RSS AIC
## + Medu 1 119.318 2438.3 796.69
## + school 1 94.848 2462.8 801.62
## + studytime 1 79.563 2478.1 804.67
## + reason_reputation 1 77.243 2480.4 805.14
## + Walc 1 72.573 2485.1 806.07
## + absences 1 59.755 2497.9 808.61
## + Dalc 1 55.022 2502.6 809.54
## + address 1 46.575 2511.1 811.21
## + Fjob_teacher 1 43.506 2514.1 811.81
## + Mjob_teacher 1 38.532 2519.1 812.79
## + Mjob_health 1 38.367 2519.3 812.82
## + Fedu 1 36.261 2521.4 813.23
## + schoolsup 1 31.290 2526.3 814.20
## + traveltime 1 29.927 2527.7 814.47
## + internet 1 29.490 2528.2 814.56
## + goout 1 29.142 2528.5 814.62
## + health 1 24.064 2533.6 815.62
## + famrel 1 22.774 2534.9 815.87
## + sex 1 22.270 2535.4 815.97
## + activities 1 14.360 2543.3 817.50
## + reason_other 1 13.336 2544.3 817.70
## + romantic 1 11.024 2546.6 818.15
## <none> 2557.6 818.29
## + guardian_mother 1 9.727 2547.9 818.40
## + paid 1 9.243 2548.4 818.50
## + age 1 8.864 2548.8 818.57
## + Fjob_services 1 3.992 2553.6 819.51
## + reason_home 1 3.843 2553.8 819.54
## + Mjob_other 1 3.755 2553.9 819.56
## + famsup 1 2.294 2555.3 819.84
## + Pstatus 1 1.820 2555.8 819.93
## + guardian_other 1 1.016 2556.6 820.09
## + nursery 1 0.871 2556.8 820.12
## + freetime 1 0.761 2556.9 820.14
## + Mjob_services 1 0.676 2557.0 820.16
## + Fjob_health 1 0.505 2557.1 820.19
## + Fjob_other 1 0.023 2557.6 820.28
## + famsize 1 0.004 2557.6 820.29
##
## Step: AIC=796.69
## score ~ failures + higher + Medu
##
## Df Sum of Sq RSS AIC
## + Walc 1 69.903 2368.4 784.32
## + studytime 1 68.803 2369.5 784.55
## + reason_reputation 1 60.990 2377.3 786.17
## + Dalc 1 57.930 2380.4 786.81
## + absences 1 55.241 2383.1 787.36
## + school 1 53.477 2384.8 787.73
## + sex 1 39.612 2398.7 790.59
## + goout 1 29.704 2408.6 792.63
## + schoolsup 1 25.790 2412.5 793.43
## + health 1 25.350 2413.0 793.52
## + address 1 23.216 2415.1 793.96
## + paid 1 22.350 2416.0 794.14
## + Fjob_teacher 1 17.574 2420.7 795.11
## + famrel 1 17.428 2420.9 795.14
## + guardian_mother 1 14.334 2424.0 795.77
## + age 1 13.144 2425.2 796.01
## + Mjob_health 1 12.266 2426.1 796.19
## <none> 2438.3 796.69
## + reason_other 1 9.493 2428.8 796.76
## + romantic 1 9.313 2429.0 796.79
## + famsup 1 7.603 2430.7 797.14
## + internet 1 7.320 2431.0 797.20
## + traveltime 1 6.339 2432.0 797.40
## + activities 1 5.179 2433.1 797.63
## + Mjob_services 1 4.669 2433.7 797.74
## + Pstatus 1 4.603 2433.7 797.75
## + nursery 1 3.986 2434.3 797.88
## + Mjob_teacher 1 3.915 2434.4 797.89
## + Fjob_services 1 3.873 2434.4 797.90
## + Fjob_other 1 1.950 2436.4 798.29
## + reason_home 1 1.595 2436.7 798.36
## + Fedu 1 1.141 2437.2 798.45
## + Fjob_health 1 0.991 2437.3 798.48
## + freetime 1 0.971 2437.3 798.49
## + guardian_other 1 0.498 2437.8 798.58
## + Mjob_other 1 0.384 2437.9 798.61
## + famsize 1 0.081 2438.2 798.67
##
## Step: AIC=784.32
## score ~ failures + higher + Medu + Walc
##
## Df Sum of Sq RSS AIC
## + reason_reputation 1 58.122 2310.3 774.04
## + school 1 56.690 2311.7 774.35
## + studytime 1 48.031 2320.4 776.19
## + absences 1 38.683 2329.7 778.18
## + schoolsup 1 34.176 2334.2 779.14
## + address 1 22.587 2345.8 781.58
## + paid 1 19.424 2349.0 782.25
## + age 1 16.668 2351.8 782.83
## + health 1 16.157 2352.3 782.93
## + Mjob_health 1 14.809 2353.6 783.22
## + sex 1 14.218 2354.2 783.34
## + guardian_mother 1 12.234 2356.2 783.76
## + internet 1 12.028 2356.4 783.80
## + famrel 1 11.702 2356.7 783.87
## + Fjob_teacher 1 11.694 2356.7 783.87
## + famsup 1 10.267 2358.2 784.17
## + Pstatus 1 9.656 2358.8 784.30
## + romantic 1 9.586 2358.8 784.31
## <none> 2368.4 784.32
## + Dalc 1 9.064 2359.3 784.42
## + reason_other 1 8.025 2360.4 784.64
## + nursery 1 8.005 2360.4 784.64
## + goout 1 6.989 2361.4 784.86
## + activities 1 5.489 2362.9 785.17
## + Mjob_teacher 1 5.277 2363.1 785.21
## + traveltime 1 5.010 2363.4 785.27
## + Mjob_services 1 2.683 2365.7 785.76
## + reason_home 1 2.652 2365.8 785.76
## + Fjob_other 1 1.524 2366.9 786.00
## + Fjob_health 1 1.503 2366.9 786.00
## + Fjob_services 1 1.146 2367.3 786.08
## + guardian_other 1 1.144 2367.3 786.08
## + famsize 1 0.822 2367.6 786.14
## + freetime 1 0.162 2368.2 786.28
## + Fedu 1 0.147 2368.3 786.29
## + Mjob_other 1 0.000 2368.4 786.32
##
## Step: AIC=774.04
## score ~ failures + higher + Medu + Walc + reason_reputation
##
## Df Sum of Sq RSS AIC
## + school 1 43.400 2266.9 766.67
## + absences 1 41.718 2268.6 767.04
## + schoolsup 1 36.570 2273.7 768.16
## + studytime 1 33.380 2276.9 768.85
## + address 1 24.561 2285.7 770.76
## + reason_home 1 17.705 2292.6 772.24
## + age 1 14.330 2296.0 772.97
## + Fjob_teacher 1 13.796 2296.5 773.08
## + sex 1 12.821 2297.5 773.29
## + paid 1 12.440 2297.8 773.37
## + Pstatus 1 11.677 2298.6 773.54
## + famrel 1 10.726 2299.6 773.74
## + health 1 10.427 2299.9 773.81
## + Mjob_health 1 10.419 2299.9 773.81
## + famsup 1 10.356 2299.9 773.82
## <none> 2310.3 774.04
## + guardian_mother 1 9.188 2301.1 774.07
## + nursery 1 9.070 2301.2 774.10
## + Mjob_teacher 1 8.764 2301.5 774.16
## + internet 1 7.379 2302.9 774.46
## + romantic 1 6.913 2303.4 774.56
## + goout 1 6.606 2303.7 774.63
## + Dalc 1 5.128 2305.2 774.94
## + traveltime 1 4.223 2306.1 775.14
## + Mjob_services 1 3.654 2306.6 775.26
## + Fjob_health 1 2.607 2307.7 775.48
## + reason_other 1 1.909 2308.4 775.63
## + guardian_other 1 1.610 2308.7 775.70
## + activities 1 1.009 2309.3 775.83
## + Fjob_services 1 0.902 2309.4 775.85
## + Fjob_other 1 0.883 2309.4 775.85
## + famsize 1 0.384 2309.9 775.96
## + Fedu 1 0.224 2310.1 775.99
## + freetime 1 0.213 2310.1 776.00
## + Mjob_other 1 0.036 2310.3 776.03
##
## Step: AIC=766.67
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school
##
## Df Sum of Sq RSS AIC
## + absences 1 51.477 2215.4 757.33
## + schoolsup 1 49.468 2217.4 757.77
## + studytime 1 29.300 2237.6 762.25
## + sex 1 16.814 2250.1 765.00
## + age 1 15.971 2250.9 765.18
## + health 1 14.690 2252.2 765.46
## + paid 1 13.079 2253.8 765.81
## + Fjob_teacher 1 12.614 2254.3 765.92
## + Pstatus 1 11.309 2255.6 766.20
## + guardian_mother 1 10.942 2255.9 766.28
## + reason_home 1 10.475 2256.4 766.39
## + Mjob_health 1 10.473 2256.4 766.39
## + famsup 1 10.234 2256.7 766.44
## + Mjob_teacher 1 9.685 2257.2 766.56
## <none> 2266.9 766.67
## + address 1 8.939 2257.9 766.72
## + famrel 1 8.677 2258.2 766.78
## + nursery 1 7.421 2259.5 767.05
## + romantic 1 6.458 2260.4 767.26
## + Mjob_services 1 6.302 2260.6 767.30
## + goout 1 4.817 2262.1 767.62
## + Dalc 1 4.389 2262.5 767.72
## + Fjob_health 1 3.459 2263.4 767.92
## + internet 1 2.899 2264.0 768.04
## + guardian_other 1 2.786 2264.1 768.07
## + traveltime 1 0.615 2266.3 768.54
## + famsize 1 0.577 2266.3 768.55
## + activities 1 0.479 2266.4 768.57
## + Fedu 1 0.236 2266.7 768.62
## + Fjob_services 1 0.225 2266.7 768.62
## + reason_other 1 0.191 2266.7 768.63
## + Mjob_other 1 0.189 2266.7 768.63
## + freetime 1 0.115 2266.8 768.65
## + Fjob_other 1 0.106 2266.8 768.65
##
## Step: AIC=757.33
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences
##
## Df Sum of Sq RSS AIC
## + schoolsup 1 57.743 2157.7 746.28
## + studytime 1 26.607 2188.8 753.36
## + age 1 25.633 2189.8 753.58
## + sex 1 18.163 2197.2 755.26
## + health 1 16.186 2199.2 755.70
## + paid 1 14.313 2201.1 756.12
## + reason_home 1 13.486 2201.9 756.31
## + address 1 10.728 2204.7 756.93
## + guardian_mother 1 9.938 2205.5 757.11
## + Fjob_teacher 1 9.875 2205.5 757.12
## + Mjob_teacher 1 9.601 2205.8 757.18
## <none> 2215.4 757.33
## + famsup 1 8.044 2207.4 757.53
## + Pstatus 1 7.754 2207.7 757.59
## + nursery 1 7.468 2207.9 757.66
## + famrel 1 6.082 2209.3 757.97
## + Mjob_health 1 5.207 2210.2 758.16
## + Mjob_services 1 4.483 2210.9 758.33
## + romantic 1 3.780 2211.6 758.48
## + Fjob_health 1 3.133 2212.3 758.63
## + goout 1 3.010 2212.4 758.65
## + internet 1 2.963 2212.4 758.67
## + Dalc 1 2.266 2213.2 758.82
## + Fjob_services 1 0.761 2214.7 759.16
## + guardian_other 1 0.721 2214.7 759.17
## + traveltime 1 0.477 2214.9 759.22
## + activities 1 0.465 2214.9 759.22
## + reason_other 1 0.463 2214.9 759.22
## + Fjob_other 1 0.366 2215.1 759.24
## + freetime 1 0.298 2215.1 759.26
## + famsize 1 0.121 2215.3 759.30
## + Mjob_other 1 0.008 2215.4 759.32
## + Fedu 1 0.004 2215.4 759.33
##
## Step: AIC=746.28
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup
##
## Df Sum of Sq RSS AIC
## + studytime 1 28.8503 2128.8 741.63
## + sex 1 28.1894 2129.5 741.78
## + age 1 17.3077 2140.4 744.30
## + Fjob_teacher 1 14.3411 2143.3 744.99
## + health 1 14.1435 2143.5 745.03
## + reason_home 1 13.7433 2143.9 745.12
## + paid 1 12.8005 2144.9 745.34
## + guardian_mother 1 11.6364 2146.0 745.61
## + address 1 9.1436 2148.5 746.18
## <none> 2157.7 746.28
## + romantic 1 6.5686 2151.1 746.77
## + Mjob_teacher 1 6.4273 2151.2 746.81
## + Pstatus 1 6.3082 2151.4 746.83
## + famrel 1 5.9579 2151.7 746.91
## + famsup 1 5.7214 2151.9 746.97
## + nursery 1 5.4965 2152.2 747.02
## + Mjob_health 1 3.1144 2154.6 747.57
## + Mjob_services 1 3.0778 2154.6 747.57
## + goout 1 3.0045 2154.7 747.59
## + Fjob_services 1 1.7852 2155.9 747.87
## + internet 1 1.4121 2156.3 747.96
## + guardian_other 1 1.2597 2156.4 747.99
## + Fjob_health 1 1.0677 2156.6 748.04
## + Dalc 1 1.0227 2156.7 748.05
## + traveltime 1 0.4903 2157.2 748.17
## + freetime 1 0.2086 2157.5 748.23
## + Fedu 1 0.1643 2157.5 748.24
## + Fjob_other 1 0.1239 2157.6 748.25
## + reason_other 1 0.0613 2157.6 748.27
## + Mjob_other 1 0.0322 2157.6 748.27
## + famsize 1 0.0186 2157.7 748.28
## + activities 1 0.0087 2157.7 748.28
##
## Step: AIC=741.63
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime
##
## Df Sum of Sq RSS AIC
## + sex 1 19.5842 2109.2 739.06
## + Fjob_teacher 1 17.1497 2111.7 739.63
## + age 1 13.9279 2114.9 740.39
## + health 1 12.6332 2116.2 740.69
## + reason_home 1 12.6104 2116.2 740.69
## + paid 1 12.0612 2116.8 740.82
## + guardian_mother 1 9.9126 2118.9 741.32
## <none> 2128.8 741.63
## + romantic 1 8.5316 2120.3 741.65
## + address 1 8.4414 2120.4 741.67
## + famsup 1 8.2137 2120.6 741.72
## + famrel 1 7.0683 2121.8 741.99
## + Mjob_teacher 1 6.5884 2122.2 742.10
## + Pstatus 1 6.2860 2122.5 742.17
## + nursery 1 5.3345 2123.5 742.39
## + Mjob_health 1 4.3144 2124.5 742.63
## + Mjob_services 1 3.5252 2125.3 742.81
## + goout 1 3.1539 2125.7 742.90
## + Fjob_health 1 2.3100 2126.5 743.09
## + Fjob_services 1 2.0532 2126.8 743.15
## + guardian_other 1 1.5663 2127.3 743.27
## + internet 1 1.4943 2127.3 743.28
## + Dalc 1 1.3747 2127.4 743.31
## + traveltime 1 0.5234 2128.3 743.51
## + Fjob_other 1 0.2601 2128.6 743.57
## + Fedu 1 0.2467 2128.6 743.57
## + freetime 1 0.0767 2128.8 743.61
## + famsize 1 0.0285 2128.8 743.62
## + Mjob_other 1 0.0133 2128.8 743.63
## + activities 1 0.0002 2128.8 743.63
## + reason_other 1 0.0002 2128.8 743.63
##
## Step: AIC=739.06
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex
##
## Df Sum of Sq RSS AIC
## + Fjob_teacher 1 19.0555 2090.2 736.58
## + reason_home 1 13.9159 2095.3 737.79
## + romantic 1 13.0390 2096.2 738.00
## + famsup 1 11.0070 2098.2 738.48
## + guardian_mother 1 10.5977 2098.6 738.58
## + age 1 10.4541 2098.8 738.61
## + health 1 10.4139 2098.8 738.62
## + paid 1 10.1726 2099.1 738.68
## + famrel 1 10.1237 2099.1 738.69
## <none> 2109.2 739.06
## + address 1 7.7607 2101.5 739.24
## + Mjob_teacher 1 7.7548 2101.5 739.24
## + Pstatus 1 7.3145 2101.9 739.35
## + nursery 1 6.0498 2103.2 739.65
## + goout 1 4.6447 2104.6 739.98
## + Mjob_health 1 3.5620 2105.7 740.23
## + Mjob_services 1 3.3999 2105.8 740.27
## + Fjob_health 1 2.2907 2106.9 740.53
## + guardian_other 1 2.1035 2107.1 740.57
## + Fjob_services 1 1.8303 2107.4 740.64
## + internet 1 1.3386 2107.9 740.75
## + Dalc 1 0.4812 2108.8 740.95
## + Fedu 1 0.2477 2109.0 741.01
## + famsize 1 0.2133 2109.0 741.01
## + activities 1 0.2003 2109.0 741.02
## + traveltime 1 0.1480 2109.1 741.03
## + Fjob_other 1 0.1130 2109.1 741.04
## + freetime 1 0.0570 2109.2 741.05
## + Mjob_other 1 0.0444 2109.2 741.05
## + reason_other 1 0.0125 2109.2 741.06
##
## Step: AIC=736.58
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher
##
## Df Sum of Sq RSS AIC
## + romantic 1 14.6785 2075.5 735.10
## + reason_home 1 14.0021 2076.2 735.26
## + famrel 1 13.1083 2077.1 735.47
## + health 1 10.3436 2079.8 736.13
## + famsup 1 10.3127 2079.9 736.14
## + guardian_mother 1 10.1923 2080.0 736.17
## + age 1 9.4170 2080.8 736.35
## <none> 2090.2 736.58
## + paid 1 8.3641 2081.8 736.60
## + address 1 8.3452 2081.8 736.60
## + Pstatus 1 7.8920 2082.3 736.71
## + Mjob_health 1 5.0862 2085.1 737.38
## + nursery 1 4.7057 2085.5 737.47
## + goout 1 4.3702 2085.8 737.55
## + Mjob_teacher 1 4.2012 2086.0 737.59
## + Mjob_services 1 2.8948 2087.3 737.90
## + internet 1 2.3251 2087.9 738.03
## + Fjob_other 1 2.2171 2088.0 738.06
## + guardian_other 1 1.7967 2088.4 738.16
## + Fjob_health 1 1.1479 2089.0 738.31
## + Dalc 1 0.6265 2089.6 738.43
## + Fjob_services 1 0.5105 2089.7 738.46
## + famsize 1 0.4882 2089.7 738.47
## + Fedu 1 0.4056 2089.8 738.49
## + traveltime 1 0.2846 2089.9 738.51
## + activities 1 0.1923 2090.0 738.54
## + Mjob_other 1 0.1149 2090.1 738.55
## + freetime 1 0.0775 2090.1 738.56
## + reason_other 1 0.0088 2090.2 738.58
##
## Step: AIC=735.1
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher +
## romantic
##
## Df Sum of Sq RSS AIC
## + reason_home 1 14.2869 2061.2 733.69
## + famrel 1 13.4225 2062.1 733.89
## + age 1 12.0141 2063.5 734.23
## + guardian_mother 1 11.0379 2064.5 734.47
## + famsup 1 10.8894 2064.6 734.50
## + health 1 10.0211 2065.5 734.71
## <none> 2075.5 735.10
## + paid 1 8.3136 2067.2 735.12
## + address 1 7.6853 2067.8 735.27
## + Pstatus 1 7.0522 2068.4 735.42
## + Mjob_health 1 5.7327 2069.8 735.73
## + goout 1 4.5240 2071.0 736.02
## + nursery 1 4.2793 2071.2 736.08
## + Mjob_services 1 3.9233 2071.6 736.16
## + Mjob_teacher 1 3.8692 2071.6 736.18
## + internet 1 3.6742 2071.8 736.22
## + Fjob_other 1 2.7096 2072.8 736.45
## + Fjob_health 1 1.0717 2074.4 736.84
## + Fedu 1 1.0347 2074.5 736.85
## + guardian_other 1 0.6873 2074.8 736.94
## + Fjob_services 1 0.6496 2074.9 736.95
## + activities 1 0.4184 2075.1 737.00
## + famsize 1 0.3716 2075.1 737.01
## + Mjob_other 1 0.3138 2075.2 737.02
## + traveltime 1 0.2257 2075.3 737.05
## + Dalc 1 0.2082 2075.3 737.05
## + freetime 1 0.1939 2075.3 737.05
## + reason_other 1 0.0270 2075.5 737.09
##
## Step: AIC=733.69
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher +
## romantic + reason_home
##
## Df Sum of Sq RSS AIC
## + famrel 1 14.3539 2046.9 732.24
## + famsup 1 11.7799 2049.4 732.86
## + guardian_mother 1 10.5960 2050.6 733.14
## + age 1 9.9816 2051.2 733.29
## + paid 1 8.8730 2052.3 733.56
## + health 1 8.4483 2052.8 733.66
## <none> 2061.2 733.69
## + Pstatus 1 7.3310 2053.9 733.93
## + Mjob_health 1 5.6158 2055.6 734.34
## + address 1 5.4782 2055.7 734.37
## + goout 1 4.2813 2056.9 734.66
## + nursery 1 4.1599 2057.1 734.69
## + Mjob_teacher 1 4.0911 2057.1 734.71
## + internet 1 3.3757 2057.8 734.88
## + Mjob_services 1 3.1868 2058.0 734.92
## + Fjob_other 1 1.9419 2059.3 735.22
## + reason_other 1 1.1747 2060.0 735.41
## + activities 1 1.1295 2060.1 735.42
## + guardian_other 1 0.9968 2060.2 735.45
## + Fedu 1 0.8600 2060.4 735.48
## + Fjob_health 1 0.7946 2060.4 735.50
## + freetime 1 0.4890 2060.7 735.57
## + Dalc 1 0.4070 2060.8 735.59
## + Fjob_services 1 0.3894 2060.8 735.59
## + famsize 1 0.2225 2061.0 735.63
## + Mjob_other 1 0.0283 2061.2 735.68
## + traveltime 1 0.0005 2061.2 735.69
##
## Step: AIC=732.24
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher +
## romantic + reason_home + famrel
##
## Df Sum of Sq RSS AIC
## + famsup 1 12.3145 2034.5 731.25
## + health 1 11.7472 2035.1 731.39
## + guardian_mother 1 11.1215 2035.7 731.54
## + paid 1 10.0591 2036.8 731.80
## + age 1 9.4192 2037.5 731.96
## <none> 2046.9 732.24
## + goout 1 6.7259 2040.1 732.61
## + address 1 6.5218 2040.3 732.66
## + Pstatus 1 6.4926 2040.4 732.67
## + Mjob_health 1 6.4454 2040.4 732.68
## + Mjob_teacher 1 4.5811 2042.3 733.13
## + nursery 1 4.3304 2042.5 733.19
## + Mjob_services 1 3.9104 2043.0 733.29
## + internet 1 2.3491 2044.5 733.67
## + Fjob_other 1 1.8040 2045.1 733.80
## + Fedu 1 1.1237 2045.7 733.96
## + Fjob_health 1 0.8621 2046.0 734.03
## + reason_other 1 0.8439 2046.0 734.03
## + activities 1 0.8246 2046.0 734.04
## + guardian_other 1 0.8034 2046.1 734.04
## + Fjob_services 1 0.4423 2046.4 734.13
## + famsize 1 0.3110 2046.5 734.16
## + Dalc 1 0.1399 2046.7 734.20
## + freetime 1 0.1132 2046.8 734.21
## + Mjob_other 1 0.0442 2046.8 734.22
## + traveltime 1 0.0039 2046.9 734.23
##
## Step: AIC=731.25
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher +
## romantic + reason_home + famrel + famsup
##
## Df Sum of Sq RSS AIC
## + guardian_mother 1 11.1767 2023.4 730.53
## + health 1 10.8910 2023.7 730.60
## + paid 1 8.6312 2025.9 731.15
## <none> 2034.5 731.25
## + age 1 7.3106 2027.2 731.48
## + Mjob_health 1 6.9503 2027.6 731.56
## + Pstatus 1 6.6350 2027.9 731.64
## + goout 1 6.5660 2028.0 731.66
## + address 1 5.6111 2028.9 731.89
## + Mjob_teacher 1 4.1664 2030.4 732.24
## + nursery 1 3.8916 2030.7 732.31
## + Mjob_services 1 2.7363 2031.8 732.59
## + internet 1 2.6368 2031.9 732.61
## + Fjob_other 1 1.1411 2033.4 732.98
## + activities 1 0.7390 2033.8 733.07
## + reason_other 1 0.6419 2033.9 733.10
## + guardian_other 1 0.6183 2033.9 733.10
## + Fjob_health 1 0.5150 2034.0 733.13
## + Fedu 1 0.4401 2034.1 733.15
## + Fjob_services 1 0.2716 2034.3 733.19
## + famsize 1 0.2230 2034.3 733.20
## + freetime 1 0.1711 2034.4 733.21
## + Dalc 1 0.0722 2034.5 733.24
## + Mjob_other 1 0.0297 2034.5 733.25
## + traveltime 1 0.0135 2034.5 733.25
##
## Step: AIC=730.53
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher +
## romantic + reason_home + famrel + famsup + guardian_mother
##
## Df Sum of Sq RSS AIC
## + health 1 10.5323 2012.8 729.95
## <none> 2023.4 730.53
## + paid 1 7.6551 2015.7 730.66
## + age 1 6.9369 2016.4 730.84
## + goout 1 6.3907 2017.0 730.97
## + Mjob_health 1 5.8808 2017.5 731.10
## + Mjob_teacher 1 5.8235 2017.5 731.11
## + address 1 5.6207 2017.8 731.16
## + guardian_other 1 5.1616 2018.2 731.27
## + Pstatus 1 4.8357 2018.5 731.35
## + nursery 1 2.9034 2020.5 731.82
## + Mjob_services 1 2.3059 2021.1 731.97
## + internet 1 1.9397 2021.4 732.06
## + Fjob_other 1 1.7413 2021.6 732.11
## + Fedu 1 1.4197 2022.0 732.19
## + activities 1 1.0732 2022.3 732.27
## + Fjob_health 1 0.8116 2022.6 732.33
## + reason_other 1 0.6990 2022.7 732.36
## + Fjob_services 1 0.5334 2022.8 732.40
## + Dalc 1 0.3401 2023.0 732.45
## + famsize 1 0.3110 2023.1 732.46
## + freetime 1 0.2754 2023.1 732.47
## + Mjob_other 1 0.1894 2023.2 732.49
## + traveltime 1 0.0249 2023.3 732.53
##
## Step: AIC=729.95
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher +
## romantic + reason_home + famrel + famsup + guardian_mother +
## health
##
## Df Sum of Sq RSS AIC
## <none> 2012.8 729.95
## + goout 1 7.5555 2005.3 730.10
## + Mjob_health 1 6.8778 2006.0 730.26
## + age 1 6.8180 2006.0 730.28
## + paid 1 6.4735 2006.4 730.36
## + Mjob_teacher 1 6.3522 2006.5 730.39
## + address 1 5.4019 2007.4 730.63
## + guardian_other 1 4.9789 2007.9 730.73
## + Pstatus 1 4.6350 2008.2 730.82
## + nursery 1 2.7357 2010.1 731.28
## + Fjob_other 1 2.0797 2010.8 731.44
## + Mjob_services 1 1.9080 2010.9 731.49
## + internet 1 1.4709 2011.4 731.59
## + activities 1 1.2775 2011.6 731.64
## + Fjob_services 1 1.0361 2011.8 731.70
## + Fedu 1 1.0342 2011.8 731.70
## + reason_other 1 0.5371 2012.3 731.82
## + freetime 1 0.5098 2012.3 731.83
## + Dalc 1 0.4343 2012.4 731.85
## + Fjob_health 1 0.2683 2012.6 731.89
## + Mjob_other 1 0.2294 2012.6 731.90
## + famsize 1 0.1945 2012.7 731.91
## + traveltime 1 0.1272 2012.7 731.92
anova(forw_m)
## Analysis of Variance Table
##
## Response: score
## Df Sum Sq Mean Sq F value Pr(>F)
## failures 1 618.60 618.60 146.2868 < 2.2e-16 ***
## higher 1 198.42 198.42 46.9236 2.290e-11 ***
## Medu 1 119.32 119.32 28.2165 1.669e-07 ***
## Walc 1 69.90 69.90 16.5308 5.600e-05 ***
## reason_reputation 1 58.12 58.12 13.7447 0.0002340 ***
## school 1 43.40 43.40 10.2633 0.0014481 **
## absences 1 51.48 51.48 12.1734 0.0005298 ***
## schoolsup 1 57.74 57.74 13.6551 0.0002452 ***
## studytime 1 28.85 28.85 6.8226 0.0092857 **
## sex 1 19.58 19.58 4.6313 0.0318958 *
## Fjob_teacher 1 19.06 19.06 4.5063 0.0342860 *
## romantic 1 14.68 14.68 3.4712 0.0630620 .
## reason_home 1 14.29 14.29 3.3786 0.0666712 .
## famrel 1 14.35 14.35 3.3944 0.0660389 .
## famsup 1 12.31 12.31 2.9122 0.0885662 .
## guardian_mother 1 11.18 11.18 2.6431 0.1046636
## health 1 10.53 10.53 2.4907 0.1151855
## Residuals 476 2012.84 4.23
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r2 <- lm_result(forw_m, train, test)
## MSE_tr MSE_te
## 4.07 6.58
back_m <- step(full_m, direction = "backward", trace = 1, scope = list(lower = null_m, upper = full_m))
## Start: AIC=755.61
## score ~ school + sex + age + address + famsize + Pstatus + Medu +
## Fedu + traveltime + studytime + failures + schoolsup + famsup +
## paid + activities + nursery + higher + internet + romantic +
## famrel + freetime + goout + Dalc + Walc + health + absences +
## Mjob_health + Mjob_other + Mjob_services + Mjob_teacher +
## Fjob_health + Fjob_other + Fjob_services + Fjob_teacher +
## reason_home + reason_other + reason_reputation + guardian_mother +
## guardian_other
##
## Df Sum of Sq RSS AIC
## - Dalc 1 0.001 1939.5 753.61
## - Fedu 1 0.001 1939.5 753.61
## - Fjob_services 1 0.012 1939.5 753.61
## - Mjob_services 1 0.116 1939.6 753.64
## - Fjob_health 1 0.290 1939.8 753.68
## - traveltime 1 0.318 1939.8 753.69
## - internet 1 0.497 1940.0 753.74
## - famsize 1 0.562 1940.0 753.75
## - Mjob_other 1 0.579 1940.0 753.76
## - reason_other 1 1.096 1940.6 753.89
## - freetime 1 1.527 1941.0 754.00
## - Fjob_other 1 1.726 1941.2 754.05
## - nursery 1 2.218 1941.7 754.17
## - activities 1 2.766 1942.2 754.31
## - Pstatus 1 3.327 1942.8 754.46
## - famsup 1 4.528 1944.0 754.76
## - paid 1 5.100 1944.6 754.91
## - Walc 1 5.770 1945.2 755.08
## - address 1 6.339 1945.8 755.22
## - Mjob_teacher 1 7.305 1946.8 755.47
## <none> 1939.5 755.61
## - guardian_other 1 9.028 1948.5 755.90
## - reason_home 1 10.826 1950.3 756.36
## - Mjob_health 1 10.832 1950.3 756.36
## - goout 1 11.238 1950.7 756.46
## - Medu 1 11.326 1950.8 756.49
## - health 1 11.510 1951.0 756.53
## - Fjob_teacher 1 12.680 1952.1 756.83
## - age 1 13.475 1952.9 757.03
## - guardian_mother 1 14.472 1953.9 757.28
## - romantic 1 16.661 1956.1 757.83
## - famrel 1 21.606 1961.1 759.08
## - studytime 1 22.208 1961.7 759.23
## - reason_reputation 1 25.269 1964.7 760.00
## - absences 1 34.523 1974.0 762.32
## - sex 1 34.539 1974.0 762.33
## - school 1 38.755 1978.2 763.38
## - schoolsup 1 53.930 1993.4 767.16
## - higher 1 78.632 2018.1 773.24
## - failures 1 185.551 2125.0 798.74
##
## Step: AIC=753.61
## score ~ school + sex + age + address + famsize + Pstatus + Medu +
## Fedu + traveltime + studytime + failures + schoolsup + famsup +
## paid + activities + nursery + higher + internet + romantic +
## famrel + freetime + goout + Walc + health + absences + Mjob_health +
## Mjob_other + Mjob_services + Mjob_teacher + Fjob_health +
## Fjob_other + Fjob_services + Fjob_teacher + reason_home +
## reason_other + reason_reputation + guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - Fedu 1 0.001 1939.5 751.61
## - Fjob_services 1 0.012 1939.5 751.61
## - Mjob_services 1 0.115 1939.6 751.64
## - Fjob_health 1 0.294 1939.8 751.68
## - traveltime 1 0.318 1939.8 751.69
## - internet 1 0.497 1940.0 751.74
## - famsize 1 0.561 1940.0 751.75
## - Mjob_other 1 0.578 1940.0 751.76
## - reason_other 1 1.102 1940.6 751.89
## - freetime 1 1.534 1941.0 752.00
## - Fjob_other 1 1.728 1941.2 752.05
## - nursery 1 2.222 1941.7 752.17
## - activities 1 2.766 1942.2 752.31
## - Pstatus 1 3.336 1942.8 752.46
## - famsup 1 4.537 1944.0 752.76
## - paid 1 5.137 1944.6 752.92
## - address 1 6.355 1945.8 753.23
## - Mjob_teacher 1 7.308 1946.8 753.47
## <none> 1939.5 753.61
## - Walc 1 8.360 1947.8 753.73
## - guardian_other 1 9.050 1948.5 753.91
## - reason_home 1 10.885 1950.3 754.37
## - Mjob_health 1 10.952 1950.4 754.39
## - goout 1 11.255 1950.7 754.47
## - Medu 1 11.461 1950.9 754.52
## - health 1 11.511 1951.0 754.53
## - Fjob_teacher 1 12.689 1952.2 754.83
## - age 1 13.494 1953.0 755.03
## - guardian_mother 1 14.561 1954.0 755.30
## - romantic 1 16.744 1956.2 755.86
## - famrel 1 21.821 1961.3 757.14
## - studytime 1 22.210 1961.7 757.23
## - reason_reputation 1 25.326 1964.8 758.02
## - absences 1 34.678 1974.1 760.36
## - sex 1 34.905 1974.4 760.42
## - school 1 38.765 1978.2 761.39
## - schoolsup 1 54.182 1993.6 765.22
## - higher 1 78.635 2018.1 771.24
## - failures 1 185.550 2125.0 796.74
##
## Step: AIC=751.61
## score ~ school + sex + age + address + famsize + Pstatus + Medu +
## traveltime + studytime + failures + schoolsup + famsup +
## paid + activities + nursery + higher + internet + romantic +
## famrel + freetime + goout + Walc + health + absences + Mjob_health +
## Mjob_other + Mjob_services + Mjob_teacher + Fjob_health +
## Fjob_other + Fjob_services + Fjob_teacher + reason_home +
## reason_other + reason_reputation + guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - Fjob_services 1 0.013 1939.5 749.61
## - Mjob_services 1 0.114 1939.6 749.64
## - Fjob_health 1 0.297 1939.8 749.69
## - traveltime 1 0.317 1939.8 749.69
## - internet 1 0.496 1940.0 749.74
## - famsize 1 0.560 1940.0 749.75
## - Mjob_other 1 0.579 1940.0 749.76
## - reason_other 1 1.101 1940.6 749.89
## - freetime 1 1.552 1941.0 750.00
## - Fjob_other 1 1.730 1941.2 750.05
## - nursery 1 2.221 1941.7 750.17
## - activities 1 2.766 1942.2 750.31
## - Pstatus 1 3.339 1942.8 750.46
## - famsup 1 4.554 1944.0 750.77
## - paid 1 5.191 1944.7 750.93
## - address 1 6.354 1945.8 751.23
## - Mjob_teacher 1 7.326 1946.8 751.47
## <none> 1939.5 751.61
## - Walc 1 8.425 1947.9 751.75
## - guardian_other 1 9.053 1948.5 751.91
## - reason_home 1 10.884 1950.3 752.37
## - Mjob_health 1 10.976 1950.4 752.40
## - goout 1 11.255 1950.7 752.47
## - health 1 11.514 1951.0 752.53
## - age 1 13.496 1953.0 753.03
## - Fjob_teacher 1 13.693 1953.2 753.08
## - guardian_mother 1 14.853 1954.3 753.38
## - Medu 1 15.523 1955.0 753.55
## - romantic 1 16.980 1956.4 753.92
## - famrel 1 21.832 1961.3 755.14
## - studytime 1 22.218 1961.7 755.24
## - reason_reputation 1 25.328 1964.8 756.02
## - absences 1 34.966 1974.4 758.44
## - sex 1 34.977 1974.4 758.44
## - school 1 38.764 1978.2 759.39
## - schoolsup 1 54.190 1993.7 763.22
## - higher 1 79.095 2018.6 769.36
## - failures 1 186.855 2126.3 795.05
##
## Step: AIC=749.61
## score ~ school + sex + age + address + famsize + Pstatus + Medu +
## traveltime + studytime + failures + schoolsup + famsup +
## paid + activities + nursery + higher + internet + romantic +
## famrel + freetime + goout + Walc + health + absences + Mjob_health +
## Mjob_other + Mjob_services + Mjob_teacher + Fjob_health +
## Fjob_other + Fjob_teacher + reason_home + reason_other +
## reason_reputation + guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - Mjob_services 1 0.125 1939.6 747.64
## - traveltime 1 0.325 1939.8 747.70
## - internet 1 0.507 1940.0 747.74
## - Fjob_health 1 0.514 1940.0 747.74
## - famsize 1 0.558 1940.0 747.75
## - Mjob_other 1 0.593 1940.1 747.76
## - reason_other 1 1.114 1940.6 747.90
## - freetime 1 1.541 1941.0 748.00
## - nursery 1 2.238 1941.7 748.18
## - activities 1 2.755 1942.2 748.31
## - Pstatus 1 3.344 1942.8 748.46
## - famsup 1 4.571 1944.0 748.78
## - Fjob_other 1 4.938 1944.4 748.87
## - paid 1 5.196 1944.7 748.93
## - address 1 6.352 1945.8 749.23
## - Mjob_teacher 1 7.385 1946.9 749.49
## <none> 1939.5 749.61
## - Walc 1 8.425 1947.9 749.75
## - guardian_other 1 9.044 1948.5 749.91
## - reason_home 1 10.896 1950.4 750.38
## - Mjob_health 1 11.114 1950.6 750.44
## - goout 1 11.283 1950.8 750.48
## - health 1 11.545 1951.0 750.54
## - age 1 13.488 1953.0 751.04
## - guardian_mother 1 14.860 1954.3 751.38
## - Medu 1 15.564 1955.0 751.56
## - romantic 1 16.968 1956.4 751.92
## - Fjob_teacher 1 20.274 1959.8 752.75
## - famrel 1 21.955 1961.4 753.17
## - studytime 1 22.223 1961.7 753.24
## - reason_reputation 1 25.388 1964.9 754.04
## - sex 1 34.977 1974.5 756.44
## - absences 1 35.216 1974.7 756.50
## - school 1 39.150 1978.6 757.48
## - schoolsup 1 54.356 1993.8 761.27
## - higher 1 79.086 2018.6 767.36
## - failures 1 187.089 2126.6 793.11
##
## Step: AIC=747.64
## score ~ school + sex + age + address + famsize + Pstatus + Medu +
## traveltime + studytime + failures + schoolsup + famsup +
## paid + activities + nursery + higher + internet + romantic +
## famrel + freetime + goout + Walc + health + absences + Mjob_health +
## Mjob_other + Mjob_teacher + Fjob_health + Fjob_other + Fjob_teacher +
## reason_home + reason_other + reason_reputation + guardian_mother +
## guardian_other
##
## Df Sum of Sq RSS AIC
## - traveltime 1 0.314 1939.9 745.72
## - Mjob_other 1 0.488 1940.1 745.77
## - Fjob_health 1 0.540 1940.1 745.78
## - famsize 1 0.561 1940.2 745.79
## - internet 1 0.660 1940.3 745.81
## - reason_other 1 1.129 1940.7 745.93
## - freetime 1 1.556 1941.2 746.04
## - nursery 1 2.278 1941.9 746.22
## - activities 1 2.777 1942.4 746.35
## - Pstatus 1 3.312 1942.9 746.49
## - famsup 1 4.519 1944.1 746.79
## - Fjob_other 1 4.823 1944.4 746.87
## - paid 1 5.396 1945.0 747.02
## - address 1 6.475 1946.1 747.29
## <none> 1939.6 747.64
## - Walc 1 8.407 1948.0 747.78
## - Mjob_teacher 1 8.844 1948.4 747.89
## - guardian_other 1 9.212 1948.8 747.99
## - reason_home 1 10.943 1950.5 748.42
## - goout 1 11.236 1950.8 748.50
## - health 1 11.433 1951.0 748.55
## - Mjob_health 1 13.334 1952.9 749.03
## - age 1 13.457 1953.1 749.06
## - guardian_mother 1 14.825 1954.4 749.41
## - romantic 1 17.167 1956.8 750.00
## - Medu 1 17.406 1957.0 750.06
## - Fjob_teacher 1 20.200 1959.8 750.76
## - famrel 1 21.896 1961.5 751.19
## - studytime 1 22.265 1961.9 751.28
## - reason_reputation 1 25.583 1965.2 752.12
## - sex 1 34.901 1974.5 754.45
## - absences 1 35.103 1974.7 754.50
## - school 1 39.636 1979.2 755.64
## - schoolsup 1 54.286 1993.9 759.28
## - higher 1 80.034 2019.6 765.62
## - failures 1 187.027 2126.6 791.12
##
## Step: AIC=745.72
## score ~ school + sex + age + address + famsize + Pstatus + Medu +
## studytime + failures + schoolsup + famsup + paid + activities +
## nursery + higher + internet + romantic + famrel + freetime +
## goout + Walc + health + absences + Mjob_health + Mjob_other +
## Mjob_teacher + Fjob_health + Fjob_other + Fjob_teacher +
## reason_home + reason_other + reason_reputation + guardian_mother +
## guardian_other
##
## Df Sum of Sq RSS AIC
## - Mjob_other 1 0.482 1940.4 743.85
## - famsize 1 0.551 1940.5 743.86
## - Fjob_health 1 0.563 1940.5 743.87
## - internet 1 0.612 1940.5 743.88
## - reason_other 1 1.043 1941.0 743.99
## - freetime 1 1.476 1941.4 744.10
## - nursery 1 2.173 1942.1 744.28
## - activities 1 2.845 1942.8 744.45
## - Pstatus 1 3.324 1943.2 744.57
## - famsup 1 4.418 1944.3 744.85
## - Fjob_other 1 5.159 1945.1 745.04
## - paid 1 5.523 1945.4 745.13
## - address 1 6.167 1946.1 745.29
## <none> 1939.9 745.72
## - Walc 1 8.337 1948.2 745.84
## - Mjob_teacher 1 8.904 1948.8 745.99
## - guardian_other 1 9.025 1948.9 746.02
## - reason_home 1 10.654 1950.6 746.43
## - goout 1 11.044 1951.0 746.53
## - health 1 11.737 1951.7 746.70
## - Mjob_health 1 13.321 1953.2 747.10
## - age 1 13.347 1953.3 747.11
## - guardian_mother 1 15.178 1955.1 747.57
## - Medu 1 17.125 1957.0 748.07
## - romantic 1 17.146 1957.1 748.07
## - Fjob_teacher 1 20.513 1960.4 748.92
## - famrel 1 21.968 1961.9 749.29
## - studytime 1 22.407 1962.3 749.40
## - reason_reputation 1 25.289 1965.2 750.12
## - sex 1 34.588 1974.5 752.45
## - absences 1 35.000 1974.9 752.56
## - school 1 39.337 1979.2 753.64
## - schoolsup 1 54.241 1994.2 757.35
## - higher 1 80.140 2020.0 763.72
## - failures 1 187.155 2127.1 789.22
##
## Step: AIC=743.85
## score ~ school + sex + age + address + famsize + Pstatus + Medu +
## studytime + failures + schoolsup + famsup + paid + activities +
## nursery + higher + internet + romantic + famrel + freetime +
## goout + Walc + health + absences + Mjob_health + Mjob_teacher +
## Fjob_health + Fjob_other + Fjob_teacher + reason_home + reason_other +
## reason_reputation + guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - famsize 1 0.486 1940.9 741.97
## - Fjob_health 1 0.553 1941.0 741.99
## - internet 1 0.581 1941.0 741.99
## - reason_other 1 1.041 1941.4 742.11
## - freetime 1 1.470 1941.9 742.22
## - nursery 1 2.277 1942.7 742.43
## - activities 1 2.797 1943.2 742.56
## - Pstatus 1 3.275 1943.7 742.68
## - famsup 1 4.762 1945.2 743.06
## - paid 1 5.632 1946.0 743.28
## - Fjob_other 1 6.110 1946.5 743.40
## - address 1 6.247 1946.6 743.43
## <none> 1940.4 743.85
## - Mjob_teacher 1 8.422 1948.8 743.99
## - Walc 1 8.654 1949.0 744.05
## - guardian_other 1 8.964 1949.4 744.12
## - goout 1 10.843 1951.2 744.60
## - reason_home 1 11.187 1951.6 744.69
## - health 1 11.724 1952.1 744.82
## - Mjob_health 1 12.890 1953.3 745.12
## - age 1 13.193 1953.6 745.19
## - guardian_mother 1 15.667 1956.1 745.82
## - romantic 1 16.874 1957.3 746.12
## - Medu 1 16.930 1957.3 746.14
## - Fjob_teacher 1 20.706 1961.1 747.09
## - famrel 1 21.793 1962.2 747.36
## - studytime 1 22.456 1962.8 747.53
## - reason_reputation 1 25.733 1966.1 748.36
## - sex 1 34.341 1974.7 750.51
## - absences 1 35.041 1975.4 750.69
## - school 1 39.282 1979.7 751.75
## - schoolsup 1 54.135 1994.5 755.44
## - higher 1 81.954 2022.3 762.28
## - failures 1 187.233 2127.6 787.35
##
## Step: AIC=741.97
## score ~ school + sex + age + address + Pstatus + Medu + studytime +
## failures + schoolsup + famsup + paid + activities + nursery +
## higher + internet + romantic + famrel + freetime + goout +
## Walc + health + absences + Mjob_health + Mjob_teacher + Fjob_health +
## Fjob_other + Fjob_teacher + reason_home + reason_other +
## reason_reputation + guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - Fjob_health 1 0.526 1941.4 740.10
## - internet 1 0.590 1941.5 740.12
## - reason_other 1 0.989 1941.9 740.22
## - freetime 1 1.475 1942.4 740.35
## - nursery 1 2.078 1943.0 740.50
## - activities 1 2.762 1943.6 740.67
## - Pstatus 1 2.869 1943.8 740.70
## - famsup 1 4.826 1945.7 741.20
## - paid 1 5.849 1946.7 741.46
## - Fjob_other 1 6.035 1946.9 741.50
## - address 1 6.342 1947.2 741.58
## <none> 1940.9 741.97
## - Walc 1 8.352 1949.2 742.09
## - Mjob_teacher 1 8.732 1949.6 742.19
## - guardian_other 1 8.955 1949.8 742.24
## - goout 1 11.020 1951.9 742.77
## - reason_home 1 11.287 1952.2 742.84
## - health 1 11.966 1952.8 743.01
## - Mjob_health 1 12.915 1953.8 743.25
## - age 1 13.306 1954.2 743.35
## - guardian_mother 1 15.707 1956.6 743.95
## - Medu 1 16.605 1957.5 744.18
## - romantic 1 17.104 1958.0 744.30
## - Fjob_teacher 1 20.375 1961.3 745.13
## - famrel 1 21.827 1962.7 745.50
## - studytime 1 22.451 1963.3 745.65
## - reason_reputation 1 25.882 1966.8 746.51
## - sex 1 33.910 1974.8 748.53
## - absences 1 35.957 1976.8 749.04
## - school 1 39.129 1980.0 749.83
## - schoolsup 1 54.325 1995.2 753.61
## - higher 1 81.808 2022.7 760.37
## - failures 1 188.560 2129.4 785.77
##
## Step: AIC=740.1
## score ~ school + sex + age + address + Pstatus + Medu + studytime +
## failures + schoolsup + famsup + paid + activities + nursery +
## higher + internet + romantic + famrel + freetime + goout +
## Walc + health + absences + Mjob_health + Mjob_teacher + Fjob_other +
## Fjob_teacher + reason_home + reason_other + reason_reputation +
## guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - internet 1 0.648 1942.0 738.27
## - reason_other 1 0.939 1942.3 738.34
## - freetime 1 1.534 1942.9 738.49
## - nursery 1 2.142 1943.5 738.65
## - activities 1 2.795 1944.2 738.81
## - Pstatus 1 2.945 1944.3 738.85
## - famsup 1 4.953 1946.4 739.36
## - paid 1 5.744 1947.2 739.56
## - address 1 6.216 1947.6 739.68
## - Fjob_other 1 7.311 1948.7 739.96
## <none> 1941.4 740.10
## - Walc 1 8.115 1949.5 740.17
## - Mjob_teacher 1 8.468 1949.9 740.25
## - guardian_other 1 8.862 1950.3 740.35
## - reason_home 1 11.330 1952.7 740.98
## - goout 1 11.459 1952.9 741.01
## - Mjob_health 1 12.400 1953.8 741.25
## - health 1 12.811 1954.2 741.35
## - age 1 13.554 1955.0 741.54
## - guardian_mother 1 15.472 1956.9 742.03
## - Medu 1 16.328 1957.7 742.24
## - romantic 1 17.263 1958.7 742.48
## - famrel 1 21.756 1963.2 743.61
## - Fjob_teacher 1 21.995 1963.4 743.67
## - studytime 1 22.004 1963.4 743.67
## - reason_reputation 1 25.649 1967.1 744.59
## - sex 1 34.011 1975.4 746.68
## - absences 1 36.608 1978.0 747.33
## - school 1 38.846 1980.2 747.89
## - schoolsup 1 56.168 1997.6 752.19
## - higher 1 82.760 2024.2 758.73
## - failures 1 188.088 2129.5 783.79
##
## Step: AIC=738.27
## score ~ school + sex + age + address + Pstatus + Medu + studytime +
## failures + schoolsup + famsup + paid + activities + nursery +
## higher + romantic + famrel + freetime + goout + Walc + health +
## absences + Mjob_health + Mjob_teacher + Fjob_other + Fjob_teacher +
## reason_home + reason_other + reason_reputation + guardian_mother +
## guardian_other
##
## Df Sum of Sq RSS AIC
## - reason_other 1 0.976 1943.0 736.52
## - freetime 1 1.608 1943.7 736.68
## - nursery 1 2.215 1944.3 736.83
## - activities 1 2.785 1944.8 736.98
## - Pstatus 1 3.190 1945.2 737.08
## - famsup 1 4.831 1946.9 737.50
## - paid 1 5.600 1947.7 737.69
## - address 1 6.620 1948.7 737.95
## - Fjob_other 1 7.211 1949.3 738.10
## <none> 1942.0 738.27
## - Walc 1 7.999 1950.0 738.30
## - Mjob_teacher 1 8.727 1950.8 738.48
## - guardian_other 1 8.805 1950.9 738.50
## - goout 1 11.060 1953.1 739.07
## - reason_home 1 11.392 1953.5 739.16
## - Mjob_health 1 12.522 1954.6 739.44
## - health 1 13.155 1955.2 739.60
## - age 1 14.076 1956.1 739.84
## - guardian_mother 1 15.844 1957.9 740.28
## - romantic 1 16.779 1958.8 740.52
## - Medu 1 17.889 1959.9 740.80
## - Fjob_teacher 1 21.503 1963.6 741.71
## - studytime 1 21.808 1963.9 741.79
## - famrel 1 22.424 1964.5 741.94
## - reason_reputation 1 26.605 1968.7 742.99
## - sex 1 33.947 1976.0 744.83
## - absences 1 36.832 1978.9 745.55
## - school 1 41.137 1983.2 746.62
## - schoolsup 1 56.683 1998.7 750.48
## - higher 1 83.899 2026.0 757.16
## - failures 1 188.521 2130.6 782.04
##
## Step: AIC=736.52
## score ~ school + sex + age + address + Pstatus + Medu + studytime +
## failures + schoolsup + famsup + paid + activities + nursery +
## higher + romantic + famrel + freetime + goout + Walc + health +
## absences + Mjob_health + Mjob_teacher + Fjob_other + Fjob_teacher +
## reason_home + reason_reputation + guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - freetime 1 1.442 1944.5 734.88
## - nursery 1 2.180 1945.2 735.07
## - activities 1 2.602 1945.6 735.18
## - Pstatus 1 3.126 1946.2 735.31
## - famsup 1 5.023 1948.0 735.79
## - paid 1 5.270 1948.3 735.86
## - address 1 6.441 1949.5 736.15
## - Fjob_other 1 7.124 1950.2 736.33
## - Walc 1 7.805 1950.8 736.50
## <none> 1943.0 736.52
## - Mjob_teacher 1 8.502 1951.5 736.67
## - guardian_other 1 9.237 1952.3 736.86
## - reason_home 1 10.451 1953.5 737.17
## - goout 1 11.168 1954.2 737.35
## - Mjob_health 1 13.045 1956.1 737.82
## - health 1 13.422 1956.5 737.92
## - age 1 14.029 1957.1 738.07
## - guardian_mother 1 15.915 1959.0 738.55
## - romantic 1 16.538 1959.6 738.70
## - Medu 1 17.952 1961.0 739.06
## - studytime 1 21.583 1964.6 739.97
## - Fjob_teacher 1 21.662 1964.7 739.99
## - famrel 1 23.018 1966.0 740.34
## - reason_reputation 1 25.669 1968.7 741.00
## - sex 1 33.579 1976.6 742.98
## - absences 1 36.739 1979.8 743.77
## - school 1 40.307 1983.3 744.66
## - schoolsup 1 56.015 1999.0 748.56
## - higher 1 83.125 2026.2 755.21
## - failures 1 190.917 2133.9 780.82
##
## Step: AIC=734.88
## score ~ school + sex + age + address + Pstatus + Medu + studytime +
## failures + schoolsup + famsup + paid + activities + nursery +
## higher + romantic + famrel + goout + Walc + health + absences +
## Mjob_health + Mjob_teacher + Fjob_other + Fjob_teacher +
## reason_home + reason_reputation + guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - nursery 1 2.282 1946.8 733.46
## - activities 1 3.165 1947.6 733.69
## - Pstatus 1 3.227 1947.7 733.70
## - famsup 1 4.921 1949.4 734.13
## - paid 1 5.793 1950.3 734.35
## - address 1 6.250 1950.7 734.47
## - Fjob_other 1 7.339 1951.8 734.75
## <none> 1944.5 734.88
## - Walc 1 8.353 1952.8 735.00
## - guardian_other 1 9.126 1953.6 735.20
## - Mjob_teacher 1 9.290 1953.8 735.24
## - goout 1 9.777 1954.2 735.36
## - reason_home 1 10.198 1954.7 735.47
## - health 1 12.815 1957.3 736.13
## - Mjob_health 1 13.052 1957.5 736.19
## - age 1 13.375 1957.8 736.27
## - guardian_mother 1 15.706 1960.2 736.86
## - romantic 1 16.103 1960.6 736.96
## - Medu 1 17.603 1962.1 737.34
## - studytime 1 21.483 1966.0 738.31
## - Fjob_teacher 1 21.503 1966.0 738.32
## - famrel 1 23.669 1968.1 738.86
## - reason_reputation 1 25.626 1970.1 739.35
## - sex 1 32.386 1976.9 741.04
## - absences 1 37.243 1981.7 742.26
## - school 1 40.059 1984.5 742.96
## - schoolsup 1 55.343 1999.8 746.75
## - higher 1 81.744 2026.2 753.23
## - failures 1 189.487 2134.0 778.82
##
## Step: AIC=733.46
## score ~ school + sex + age + address + Pstatus + Medu + studytime +
## failures + schoolsup + famsup + paid + activities + higher +
## romantic + famrel + goout + Walc + health + absences + Mjob_health +
## Mjob_teacher + Fjob_other + Fjob_teacher + reason_home +
## reason_reputation + guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - activities 1 3.132 1949.9 732.26
## - Pstatus 1 3.443 1950.2 732.34
## - famsup 1 5.093 1951.8 732.75
## - paid 1 6.232 1953.0 733.04
## - address 1 6.361 1953.1 733.07
## - Walc 1 7.635 1954.4 733.40
## <none> 1946.8 733.46
## - Fjob_other 1 7.929 1954.7 733.47
## - guardian_other 1 8.343 1955.1 733.58
## - Mjob_teacher 1 9.251 1956.0 733.81
## - goout 1 9.980 1956.7 733.99
## - reason_home 1 10.169 1956.9 734.04
## - health 1 12.989 1959.7 734.75
## - Mjob_health 1 13.071 1959.8 734.77
## - age 1 13.305 1960.1 734.83
## - guardian_mother 1 16.104 1962.9 735.53
## - romantic 1 16.656 1963.4 735.67
## - Medu 1 16.686 1963.4 735.68
## - studytime 1 21.755 1968.5 736.95
## - Fjob_teacher 1 22.842 1969.6 737.23
## - famrel 1 23.663 1970.4 737.43
## - reason_reputation 1 25.079 1971.8 737.79
## - sex 1 31.923 1978.7 739.50
## - absences 1 37.287 1984.0 740.84
## - school 1 40.604 1987.4 741.66
## - schoolsup 1 56.573 2003.3 745.61
## - higher 1 82.198 2029.0 751.89
## - failures 1 188.557 2135.3 777.13
##
## Step: AIC=732.26
## score ~ school + sex + age + address + Pstatus + Medu + studytime +
## failures + schoolsup + famsup + paid + higher + romantic +
## famrel + goout + Walc + health + absences + Mjob_health +
## Mjob_teacher + Fjob_other + Fjob_teacher + reason_home +
## reason_reputation + guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - Pstatus 1 4.115 1954.0 731.30
## - famsup 1 5.369 1955.3 731.62
## - paid 1 5.912 1955.8 731.75
## - address 1 6.136 1956.0 731.81
## - Fjob_other 1 7.145 1957.0 732.06
## <none> 1949.9 732.26
## - Walc 1 8.339 1958.2 732.37
## - guardian_other 1 8.564 1958.5 732.42
## - goout 1 8.961 1958.8 732.52
## - reason_home 1 9.310 1959.2 732.61
## - Mjob_teacher 1 9.657 1959.5 732.70
## - Mjob_health 1 12.320 1962.2 733.37
## - age 1 12.360 1962.2 733.38
## - health 1 12.503 1962.4 733.41
## - guardian_mother 1 15.533 1965.4 734.18
## - romantic 1 15.643 1965.5 734.20
## - Medu 1 17.914 1967.8 734.78
## - Fjob_teacher 1 22.421 1972.3 735.91
## - studytime 1 22.502 1972.4 735.93
## - famrel 1 23.936 1973.8 736.28
## - reason_reputation 1 28.785 1978.7 737.50
## - sex 1 30.333 1980.2 737.88
## - absences 1 37.095 1987.0 739.57
## - school 1 42.427 1992.3 740.89
## - schoolsup 1 58.704 2008.6 744.91
## - higher 1 81.434 2031.3 750.47
## - failures 1 187.661 2137.6 775.65
##
## Step: AIC=731.3
## score ~ school + sex + age + address + Medu + studytime + failures +
## schoolsup + famsup + paid + higher + romantic + famrel +
## goout + Walc + health + absences + Mjob_health + Mjob_teacher +
## Fjob_other + Fjob_teacher + reason_home + reason_reputation +
## guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - address 1 5.110 1959.1 730.59
## - famsup 1 5.339 1959.3 730.65
## - paid 1 5.757 1959.8 730.75
## - Fjob_other 1 6.052 1960.0 730.83
## - Walc 1 7.369 1961.4 731.16
## <none> 1954.0 731.30
## - goout 1 9.216 1963.2 731.62
## - reason_home 1 9.389 1963.4 731.67
## - guardian_other 1 10.165 1964.2 731.86
## - Mjob_teacher 1 10.408 1964.4 731.92
## - Mjob_health 1 12.481 1966.5 732.44
## - health 1 12.722 1966.7 732.50
## - age 1 13.117 1967.1 732.60
## - romantic 1 16.183 1970.2 733.37
## - Medu 1 16.711 1970.7 733.51
## - guardian_mother 1 18.464 1972.5 733.95
## - Fjob_teacher 1 21.290 1975.3 734.65
## - studytime 1 22.400 1976.4 734.93
## - famrel 1 24.840 1978.8 735.54
## - reason_reputation 1 27.776 1981.8 736.27
## - sex 1 29.856 1983.9 736.79
## - absences 1 38.713 1992.7 738.99
## - school 1 44.783 1998.8 740.49
## - schoolsup 1 59.916 2013.9 744.22
## - higher 1 81.552 2035.5 749.50
## - failures 1 186.638 2140.6 774.36
##
## Step: AIC=730.59
## score ~ school + sex + age + Medu + studytime + failures + schoolsup +
## famsup + paid + higher + romantic + famrel + goout + Walc +
## health + absences + Mjob_health + Mjob_teacher + Fjob_other +
## Fjob_teacher + reason_home + reason_reputation + guardian_mother +
## guardian_other
##
## Df Sum of Sq RSS AIC
## - Fjob_other 1 5.291 1964.4 729.92
## - paid 1 5.877 1965.0 730.07
## - famsup 1 5.914 1965.0 730.08
## <none> 1959.1 730.59
## - Walc 1 8.042 1967.2 730.61
## - goout 1 8.523 1967.6 730.73
## - Mjob_teacher 1 10.102 1969.2 731.13
## - guardian_other 1 10.780 1969.9 731.30
## - reason_home 1 11.290 1970.4 731.43
## - health 1 12.853 1972.0 731.82
## - Mjob_health 1 13.144 1972.3 731.89
## - age 1 14.164 1973.3 732.15
## - romantic 1 16.768 1975.9 732.80
## - guardian_mother 1 18.561 1977.7 733.25
## - Medu 1 18.962 1978.1 733.35
## - Fjob_teacher 1 20.448 1979.6 733.72
## - studytime 1 22.630 1981.7 734.26
## - famrel 1 23.602 1982.7 734.50
## - reason_reputation 1 26.863 1986.0 735.32
## - sex 1 30.400 1989.5 736.20
## - absences 1 37.928 1997.0 738.06
## - schoolsup 1 60.926 2020.0 743.72
## - school 1 61.001 2020.1 743.74
## - higher 1 80.838 2040.0 748.56
## - failures 1 185.970 2145.1 773.39
##
## Step: AIC=729.92
## score ~ school + sex + age + Medu + studytime + failures + schoolsup +
## famsup + paid + higher + romantic + famrel + goout + Walc +
## health + absences + Mjob_health + Mjob_teacher + Fjob_teacher +
## reason_home + reason_reputation + guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - paid 1 5.892 1970.3 729.40
## - famsup 1 7.003 1971.4 729.68
## - goout 1 7.891 1972.3 729.90
## <none> 1964.4 729.92
## - Walc 1 8.838 1973.2 730.14
## - guardian_other 1 9.036 1973.4 730.19
## - Mjob_teacher 1 9.538 1973.9 730.31
## - Mjob_health 1 11.992 1976.4 730.93
## - health 1 12.147 1976.5 730.97
## - reason_home 1 12.529 1976.9 731.06
## - age 1 13.631 1978.0 731.34
## - romantic 1 16.270 1980.7 732.00
## - guardian_mother 1 16.519 1980.9 732.06
## - Fjob_teacher 1 16.646 1981.0 732.09
## - Medu 1 18.330 1982.7 732.51
## - studytime 1 21.678 1986.1 733.34
## - famrel 1 23.608 1988.0 733.82
## - reason_reputation 1 28.451 1992.8 735.02
## - sex 1 31.186 1995.6 735.70
## - absences 1 38.156 2002.6 737.42
## - schoolsup 1 61.402 2025.8 743.13
## - school 1 64.330 2028.7 743.84
## - higher 1 81.099 2045.5 747.91
## - failures 1 185.142 2149.5 772.41
##
## Step: AIC=729.4
## score ~ school + sex + age + Medu + studytime + failures + schoolsup +
## famsup + higher + romantic + famrel + goout + Walc + health +
## absences + Mjob_health + Mjob_teacher + Fjob_teacher + reason_home +
## reason_reputation + guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## - goout 1 7.249 1977.5 729.22
## - famsup 1 7.910 1978.2 729.38
## <none> 1970.3 729.40
## - Walc 1 9.251 1979.5 729.71
## - guardian_other 1 9.578 1979.9 729.80
## - Mjob_teacher 1 10.195 1980.5 729.95
## - reason_home 1 12.084 1982.4 730.42
## - Mjob_health 1 12.592 1982.9 730.55
## - health 1 13.429 1983.7 730.76
## - age 1 13.472 1983.8 730.77
## - Medu 1 15.678 1986.0 731.32
## - romantic 1 16.229 1986.5 731.45
## - Fjob_teacher 1 17.856 1988.2 731.86
## - guardian_mother 1 17.873 1988.2 731.86
## - studytime 1 21.992 1992.3 732.88
## - famrel 1 22.668 1993.0 733.05
## - reason_reputation 1 31.113 2001.4 735.14
## - sex 1 33.036 2003.3 735.62
## - absences 1 37.254 2007.5 736.65
## - schoolsup 1 62.735 2033.0 742.88
## - school 1 64.801 2035.1 743.39
## - higher 1 78.730 2049.0 746.76
## - failures 1 192.125 2162.4 773.37
##
## Step: AIC=729.22
## score ~ school + sex + age + Medu + studytime + failures + schoolsup +
## famsup + higher + romantic + famrel + Walc + health + absences +
## Mjob_health + Mjob_teacher + Fjob_teacher + reason_home +
## reason_reputation + guardian_mother + guardian_other
##
## Df Sum of Sq RSS AIC
## <none> 1977.5 729.22
## - famsup 1 8.089 1985.6 729.23
## - guardian_other 1 10.084 1987.6 729.73
## - Mjob_teacher 1 11.699 1989.2 730.13
## - Mjob_health 1 11.969 1989.5 730.20
## - health 1 12.215 1989.8 730.26
## - reason_home 1 12.504 1990.0 730.33
## - age 1 12.679 1990.2 730.37
## - Medu 1 14.648 1992.2 730.86
## - romantic 1 15.796 1993.3 731.15
## - Fjob_teacher 1 17.481 1995.0 731.56
## - guardian_mother 1 18.660 1996.2 731.85
## - Walc 1 18.713 1996.3 731.87
## - famrel 1 19.641 1997.2 732.10
## - studytime 1 22.072 1999.6 732.70
## - sex 1 30.753 2008.3 734.84
## - reason_reputation 1 32.081 2009.6 735.16
## - absences 1 39.618 2017.2 737.01
## - schoolsup 1 62.405 2040.0 742.56
## - school 1 67.550 2045.1 743.81
## - higher 1 81.717 2059.3 747.22
## - failures 1 191.367 2168.9 772.85
anova(back_m)
## Analysis of Variance Table
##
## Response: score
## Df Sum Sq Mean Sq F value Pr(>F)
## school 1 192.96 192.96 46.0561 3.457e-11 ***
## sex 1 92.56 92.56 22.0910 3.417e-06 ***
## age 1 31.13 31.13 7.4290 0.0066568 **
## Medu 1 199.81 199.81 47.6910 1.619e-11 ***
## studytime 1 83.34 83.34 19.8915 1.026e-05 ***
## failures 1 389.60 389.60 92.9906 < 2.2e-16 ***
## schoolsup 1 43.96 43.96 10.4914 0.0012839 **
## famsup 1 11.06 11.06 2.6387 0.1049577
## higher 1 104.44 104.44 24.9281 8.389e-07 ***
## romantic 1 25.86 25.86 6.1731 0.0133164 *
## famrel 1 21.16 21.16 5.0513 0.0250683 *
## Walc 1 33.64 33.64 8.0293 0.0047998 **
## health 1 15.47 15.47 3.6921 0.0552733 .
## absences 1 52.95 52.95 12.6376 0.0004162 ***
## Mjob_health 1 10.43 10.43 2.4886 0.1153424
## Mjob_teacher 1 11.24 11.24 2.6830 0.1020922
## Fjob_teacher 1 19.80 19.80 4.7257 0.0302120 *
## reason_home 1 2.73 2.73 0.6513 0.4200659
## reason_reputation 1 33.88 33.88 8.0876 0.0046504 **
## guardian_mother 1 11.02 11.02 2.6291 0.1055860
## guardian_other 1 10.08 10.08 2.4068 0.1214817
## Residuals 472 1977.54 4.19
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r3 <- lm_result(back_m, train, test)
## MSE_tr MSE_te
## 4.00 6.69
step_m <- step(null_m, direction = "both", trace = 1, scope = list(lower = null_m, upper = full_m))
## Start: AIC=951.23
## score ~ 1
##
## Df Sum of Sq RSS AIC
## + failures 1 618.60 2756.1 853.20
## + higher 1 419.64 2955.0 887.63
## + Medu 1 290.20 3084.5 908.81
## + studytime 1 196.07 3178.6 923.66
## + school 1 192.96 3181.7 924.14
## + reason_reputation 1 152.85 3221.8 930.33
## + Fedu 1 144.90 3229.8 931.55
## + absences 1 140.50 3234.2 932.22
## + Walc 1 132.00 3242.7 933.52
## + Dalc 1 119.88 3254.8 935.36
## + Mjob_teacher 1 98.81 3275.8 938.55
## + Fjob_teacher 1 92.88 3281.8 939.44
## + sex 1 70.39 3304.3 942.81
## + address 1 66.20 3308.5 943.44
## + traveltime 1 65.29 3309.4 943.58
## + internet 1 61.09 3313.6 944.20
## + goout 1 60.46 3314.2 944.30
## + Mjob_health 1 60.27 3314.4 944.33
## + guardian_other 1 53.70 3321.0 945.30
## + famrel 1 49.16 3325.5 945.98
## + health 1 40.92 3333.7 947.20
## + age 1 39.50 3335.2 947.41
## + romantic 1 36.06 3338.6 947.92
## + freetime 1 32.75 3341.9 948.41
## + schoolsup 1 24.67 3350.0 949.60
## + reason_other 1 22.16 3352.5 949.97
## + activities 1 20.66 3354.0 950.19
## + reason_home 1 18.32 3356.3 950.54
## + paid 1 14.58 3360.1 951.09
## <none> 3374.7 951.23
## + Fjob_health 1 5.98 3368.7 952.35
## + Fjob_services 1 5.44 3369.2 952.43
## + Mjob_other 1 3.22 3371.4 952.76
## + Fjob_other 1 1.73 3372.9 952.97
## + guardian_mother 1 1.67 3373.0 952.98
## + nursery 1 0.70 3374.0 953.13
## + famsize 1 0.51 3374.1 953.15
## + Pstatus 1 0.41 3374.2 953.17
## + Mjob_services 1 0.40 3374.3 953.17
## + famsup 1 0.07 3374.6 953.22
##
## Step: AIC=853.2
## score ~ failures
##
## Df Sum of Sq RSS AIC
## + higher 1 198.42 2557.6 818.29
## + Medu 1 179.68 2576.4 821.89
## + school 1 133.83 2622.2 830.61
## + studytime 1 120.57 2635.5 833.10
## + reason_reputation 1 97.17 2658.9 837.46
## + Walc 1 94.39 2661.7 837.98
## + Dalc 1 73.59 2682.5 841.83
## + absences 1 69.83 2686.2 842.52
## + Fedu 1 68.22 2687.8 842.81
## + Mjob_health 1 58.66 2697.4 844.57
## + Fjob_teacher 1 55.35 2700.7 845.17
## + address 1 54.70 2701.4 845.29
## + Mjob_teacher 1 54.08 2702.0 845.41
## + goout 1 44.53 2711.5 847.15
## + internet 1 43.65 2712.4 847.31
## + traveltime 1 41.08 2715.0 847.78
## + sex 1 31.63 2724.4 849.49
## + famrel 1 25.66 2730.4 850.58
## + health 1 24.79 2731.3 850.73
## + reason_other 1 22.22 2733.8 851.20
## + romantic 1 19.99 2736.1 851.60
## + schoolsup 1 19.20 2736.9 851.74
## + activities 1 15.67 2740.4 852.38
## <none> 2756.1 853.20
## + freetime 1 7.04 2749.0 853.93
## + guardian_mother 1 6.09 2750.0 854.10
## + guardian_other 1 5.13 2750.9 854.28
## + reason_home 1 4.74 2751.3 854.35
## + paid 1 3.52 2752.5 854.57
## + Fjob_services 1 3.29 2752.8 854.61
## + Mjob_other 1 2.87 2753.2 854.68
## + Pstatus 1 1.28 2754.8 854.97
## + Fjob_health 1 1.00 2755.1 855.02
## + nursery 1 0.36 2755.7 855.13
## + age 1 0.31 2755.7 855.14
## + famsize 1 0.28 2755.8 855.15
## + Mjob_services 1 0.18 2755.9 855.16
## + Fjob_other 1 0.06 2756.0 855.19
## + famsup 1 0.05 2756.0 855.19
## - failures 1 618.60 3374.7 951.23
##
## Step: AIC=818.29
## score ~ failures + higher
##
## Df Sum of Sq RSS AIC
## + Medu 1 119.32 2438.3 796.69
## + school 1 94.85 2462.8 801.62
## + studytime 1 79.56 2478.1 804.67
## + reason_reputation 1 77.24 2480.4 805.14
## + Walc 1 72.57 2485.1 806.07
## + absences 1 59.75 2497.9 808.61
## + Dalc 1 55.02 2502.6 809.54
## + address 1 46.57 2511.1 811.21
## + Fjob_teacher 1 43.51 2514.1 811.81
## + Mjob_teacher 1 38.53 2519.1 812.79
## + Mjob_health 1 38.37 2519.3 812.82
## + Fedu 1 36.26 2521.4 813.23
## + schoolsup 1 31.29 2526.3 814.20
## + traveltime 1 29.93 2527.7 814.47
## + internet 1 29.49 2528.2 814.56
## + goout 1 29.14 2528.5 814.62
## + health 1 24.06 2533.6 815.62
## + famrel 1 22.77 2534.9 815.87
## + sex 1 22.27 2535.4 815.97
## + activities 1 14.36 2543.3 817.50
## + reason_other 1 13.34 2544.3 817.70
## + romantic 1 11.02 2546.6 818.15
## <none> 2557.6 818.29
## + guardian_mother 1 9.73 2547.9 818.40
## + paid 1 9.24 2548.4 818.50
## + age 1 8.86 2548.8 818.57
## + Fjob_services 1 3.99 2553.6 819.51
## + reason_home 1 3.84 2553.8 819.54
## + Mjob_other 1 3.76 2553.9 819.56
## + famsup 1 2.29 2555.3 819.84
## + Pstatus 1 1.82 2555.8 819.93
## + guardian_other 1 1.02 2556.6 820.09
## + nursery 1 0.87 2556.8 820.12
## + freetime 1 0.76 2556.9 820.14
## + Mjob_services 1 0.68 2557.0 820.16
## + Fjob_health 1 0.51 2557.1 820.19
## + Fjob_other 1 0.02 2557.6 820.28
## + famsize 1 0.00 2557.6 820.29
## - higher 1 198.42 2756.1 853.20
## - failures 1 397.38 2955.0 887.63
##
## Step: AIC=796.69
## score ~ failures + higher + Medu
##
## Df Sum of Sq RSS AIC
## + Walc 1 69.90 2368.4 784.32
## + studytime 1 68.80 2369.5 784.55
## + reason_reputation 1 60.99 2377.3 786.17
## + Dalc 1 57.93 2380.4 786.81
## + absences 1 55.24 2383.1 787.36
## + school 1 53.48 2384.8 787.73
## + sex 1 39.61 2398.7 790.59
## + goout 1 29.70 2408.6 792.63
## + schoolsup 1 25.79 2412.5 793.43
## + health 1 25.35 2413.0 793.52
## + address 1 23.22 2415.1 793.96
## + paid 1 22.35 2416.0 794.14
## + Fjob_teacher 1 17.57 2420.7 795.11
## + famrel 1 17.43 2420.9 795.14
## + guardian_mother 1 14.33 2424.0 795.77
## + age 1 13.14 2425.2 796.01
## + Mjob_health 1 12.27 2426.1 796.19
## <none> 2438.3 796.69
## + reason_other 1 9.49 2428.8 796.76
## + romantic 1 9.31 2429.0 796.79
## + famsup 1 7.60 2430.7 797.14
## + internet 1 7.32 2431.0 797.20
## + traveltime 1 6.34 2432.0 797.40
## + activities 1 5.18 2433.1 797.63
## + Mjob_services 1 4.67 2433.7 797.74
## + Pstatus 1 4.60 2433.7 797.75
## + nursery 1 3.99 2434.3 797.88
## + Mjob_teacher 1 3.91 2434.4 797.89
## + Fjob_services 1 3.87 2434.4 797.90
## + Fjob_other 1 1.95 2436.4 798.29
## + reason_home 1 1.60 2436.7 798.36
## + Fedu 1 1.14 2437.2 798.45
## + Fjob_health 1 0.99 2437.3 798.48
## + freetime 1 0.97 2437.3 798.49
## + guardian_other 1 0.50 2437.8 798.58
## + Mjob_other 1 0.38 2437.9 798.61
## + famsize 1 0.08 2438.2 798.67
## - Medu 1 119.32 2557.6 818.29
## - higher 1 138.06 2576.4 821.89
## - failures 1 353.22 2791.5 861.52
##
## Step: AIC=784.32
## score ~ failures + higher + Medu + Walc
##
## Df Sum of Sq RSS AIC
## + reason_reputation 1 58.12 2310.3 774.04
## + school 1 56.69 2311.7 774.35
## + studytime 1 48.03 2320.4 776.19
## + absences 1 38.68 2329.7 778.18
## + schoolsup 1 34.18 2334.2 779.14
## + address 1 22.59 2345.8 781.58
## + paid 1 19.42 2349.0 782.25
## + age 1 16.67 2351.8 782.83
## + health 1 16.16 2352.3 782.93
## + Mjob_health 1 14.81 2353.6 783.22
## + sex 1 14.22 2354.2 783.34
## + guardian_mother 1 12.23 2356.2 783.76
## + internet 1 12.03 2356.4 783.80
## + famrel 1 11.70 2356.7 783.87
## + Fjob_teacher 1 11.69 2356.7 783.87
## + famsup 1 10.27 2358.2 784.17
## + Pstatus 1 9.66 2358.8 784.30
## + romantic 1 9.59 2358.8 784.31
## <none> 2368.4 784.32
## + Dalc 1 9.06 2359.3 784.42
## + reason_other 1 8.02 2360.4 784.64
## + nursery 1 8.00 2360.4 784.64
## + goout 1 6.99 2361.4 784.86
## + activities 1 5.49 2362.9 785.17
## + Mjob_teacher 1 5.28 2363.1 785.21
## + traveltime 1 5.01 2363.4 785.27
## + Mjob_services 1 2.68 2365.7 785.76
## + reason_home 1 2.65 2365.8 785.76
## + Fjob_other 1 1.52 2366.9 786.00
## + Fjob_health 1 1.50 2366.9 786.00
## + Fjob_services 1 1.15 2367.3 786.08
## + guardian_other 1 1.14 2367.3 786.08
## + famsize 1 0.82 2367.6 786.14
## + freetime 1 0.16 2368.2 786.28
## + Fedu 1 0.15 2368.3 786.29
## + Mjob_other 1 0.00 2368.4 786.32
## - Walc 1 69.90 2438.3 796.69
## - Medu 1 116.65 2485.1 806.07
## - higher 1 121.36 2489.8 807.00
## - failures 1 338.95 2707.4 848.39
##
## Step: AIC=774.04
## score ~ failures + higher + Medu + Walc + reason_reputation
##
## Df Sum of Sq RSS AIC
## + school 1 43.40 2266.9 766.67
## + absences 1 41.72 2268.6 767.04
## + schoolsup 1 36.57 2273.7 768.16
## + studytime 1 33.38 2276.9 768.85
## + address 1 24.56 2285.7 770.76
## + reason_home 1 17.70 2292.6 772.24
## + age 1 14.33 2296.0 772.97
## + Fjob_teacher 1 13.80 2296.5 773.08
## + sex 1 12.82 2297.5 773.29
## + paid 1 12.44 2297.8 773.37
## + Pstatus 1 11.68 2298.6 773.54
## + famrel 1 10.73 2299.6 773.74
## + health 1 10.43 2299.9 773.81
## + Mjob_health 1 10.42 2299.9 773.81
## + famsup 1 10.36 2299.9 773.82
## <none> 2310.3 774.04
## + guardian_mother 1 9.19 2301.1 774.07
## + nursery 1 9.07 2301.2 774.10
## + Mjob_teacher 1 8.76 2301.5 774.16
## + internet 1 7.38 2302.9 774.46
## + romantic 1 6.91 2303.4 774.56
## + goout 1 6.61 2303.7 774.63
## + Dalc 1 5.13 2305.2 774.94
## + traveltime 1 4.22 2306.1 775.14
## + Mjob_services 1 3.65 2306.6 775.26
## + Fjob_health 1 2.61 2307.7 775.48
## + reason_other 1 1.91 2308.4 775.63
## + guardian_other 1 1.61 2308.7 775.70
## + activities 1 1.01 2309.3 775.83
## + Fjob_services 1 0.90 2309.4 775.85
## + Fjob_other 1 0.88 2309.4 775.85
## + famsize 1 0.38 2309.9 775.96
## + Fedu 1 0.22 2310.1 775.99
## + freetime 1 0.21 2310.1 776.00
## + Mjob_other 1 0.04 2310.3 776.03
## - reason_reputation 1 58.12 2368.4 784.32
## - Walc 1 67.04 2377.3 786.17
## - Medu 1 100.99 2411.3 793.18
## - higher 1 111.59 2421.9 795.34
## - failures 1 318.74 2629.0 835.89
##
## Step: AIC=766.67
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school
##
## Df Sum of Sq RSS AIC
## + absences 1 51.477 2215.4 757.33
## + schoolsup 1 49.468 2217.4 757.77
## + studytime 1 29.300 2237.6 762.25
## + sex 1 16.814 2250.1 765.00
## + age 1 15.971 2250.9 765.18
## + health 1 14.690 2252.2 765.46
## + paid 1 13.079 2253.8 765.81
## + Fjob_teacher 1 12.614 2254.3 765.92
## + Pstatus 1 11.309 2255.6 766.20
## + guardian_mother 1 10.942 2255.9 766.28
## + reason_home 1 10.475 2256.4 766.39
## + Mjob_health 1 10.473 2256.4 766.39
## + famsup 1 10.234 2256.7 766.44
## + Mjob_teacher 1 9.685 2257.2 766.56
## <none> 2266.9 766.67
## + address 1 8.939 2257.9 766.72
## + famrel 1 8.677 2258.2 766.78
## + nursery 1 7.421 2259.5 767.05
## + romantic 1 6.458 2260.4 767.26
## + Mjob_services 1 6.302 2260.6 767.30
## + goout 1 4.817 2262.1 767.62
## + Dalc 1 4.389 2262.5 767.72
## + Fjob_health 1 3.459 2263.4 767.92
## + internet 1 2.899 2264.0 768.04
## + guardian_other 1 2.786 2264.1 768.07
## + traveltime 1 0.615 2266.3 768.54
## + famsize 1 0.577 2266.3 768.55
## + activities 1 0.479 2266.4 768.57
## + Fedu 1 0.236 2266.7 768.62
## + Fjob_services 1 0.225 2266.7 768.62
## + reason_other 1 0.191 2266.7 768.63
## + Mjob_other 1 0.189 2266.7 768.63
## + freetime 1 0.115 2266.8 768.65
## + Fjob_other 1 0.106 2266.8 768.65
## - school 1 43.400 2310.3 774.04
## - reason_reputation 1 44.832 2311.7 774.35
## - Medu 1 67.982 2334.9 779.27
## - Walc 1 70.118 2337.0 779.72
## - higher 1 99.098 2366.0 785.81
## - failures 1 312.856 2579.8 828.54
##
## Step: AIC=757.33
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences
##
## Df Sum of Sq RSS AIC
## + schoolsup 1 57.743 2157.7 746.28
## + studytime 1 26.607 2188.8 753.36
## + age 1 25.633 2189.8 753.58
## + sex 1 18.163 2197.2 755.26
## + health 1 16.186 2199.2 755.70
## + paid 1 14.313 2201.1 756.12
## + reason_home 1 13.486 2201.9 756.31
## + address 1 10.728 2204.7 756.93
## + guardian_mother 1 9.938 2205.5 757.11
## + Fjob_teacher 1 9.875 2205.5 757.12
## + Mjob_teacher 1 9.601 2205.8 757.18
## <none> 2215.4 757.33
## + famsup 1 8.044 2207.4 757.53
## + Pstatus 1 7.754 2207.7 757.59
## + nursery 1 7.468 2207.9 757.66
## + famrel 1 6.082 2209.3 757.97
## + Mjob_health 1 5.207 2210.2 758.16
## + Mjob_services 1 4.483 2210.9 758.33
## + romantic 1 3.780 2211.6 758.48
## + Fjob_health 1 3.133 2212.3 758.63
## + goout 1 3.010 2212.4 758.65
## + internet 1 2.963 2212.4 758.67
## + Dalc 1 2.266 2213.2 758.82
## + Fjob_services 1 0.761 2214.7 759.16
## + guardian_other 1 0.721 2214.7 759.17
## + traveltime 1 0.477 2214.9 759.22
## + activities 1 0.465 2214.9 759.22
## + reason_other 1 0.463 2214.9 759.22
## + Fjob_other 1 0.366 2215.1 759.24
## + freetime 1 0.298 2215.1 759.26
## + famsize 1 0.121 2215.3 759.30
## + Mjob_other 1 0.008 2215.4 759.32
## + Fedu 1 0.004 2215.4 759.33
## - reason_reputation 1 46.503 2261.9 765.59
## - absences 1 51.477 2266.9 766.67
## - Walc 1 51.573 2267.0 766.69
## - school 1 53.159 2268.6 767.04
## - Medu 1 61.833 2277.2 768.92
## - higher 1 93.730 2309.2 775.80
## - failures 1 277.993 2493.4 813.72
##
## Step: AIC=746.28
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup
##
## Df Sum of Sq RSS AIC
## + studytime 1 28.850 2128.8 741.63
## + sex 1 28.189 2129.5 741.78
## + age 1 17.308 2140.4 744.30
## + Fjob_teacher 1 14.341 2143.3 744.99
## + health 1 14.143 2143.5 745.03
## + reason_home 1 13.743 2143.9 745.12
## + paid 1 12.801 2144.9 745.34
## + guardian_mother 1 11.636 2146.0 745.61
## + address 1 9.144 2148.5 746.18
## <none> 2157.7 746.28
## + romantic 1 6.569 2151.1 746.77
## + Mjob_teacher 1 6.427 2151.2 746.81
## + Pstatus 1 6.308 2151.4 746.83
## + famrel 1 5.958 2151.7 746.91
## + famsup 1 5.721 2151.9 746.97
## + nursery 1 5.496 2152.2 747.02
## + Mjob_health 1 3.114 2154.6 747.57
## + Mjob_services 1 3.078 2154.6 747.57
## + goout 1 3.004 2154.7 747.59
## + Fjob_services 1 1.785 2155.9 747.87
## + internet 1 1.412 2156.3 747.96
## + guardian_other 1 1.260 2156.4 747.99
## + Fjob_health 1 1.068 2156.6 748.04
## + Dalc 1 1.023 2156.7 748.05
## + traveltime 1 0.490 2157.2 748.17
## + freetime 1 0.209 2157.5 748.23
## + Fedu 1 0.164 2157.5 748.24
## + Fjob_other 1 0.124 2157.6 748.25
## + reason_other 1 0.061 2157.6 748.27
## + Mjob_other 1 0.032 2157.6 748.27
## + famsize 1 0.019 2157.7 748.28
## + activities 1 0.009 2157.7 748.28
## - reason_reputation 1 47.447 2205.1 755.03
## - Medu 1 51.451 2209.1 755.92
## - schoolsup 1 57.743 2215.4 757.33
## - absences 1 59.752 2217.4 757.77
## - Walc 1 60.425 2218.1 757.92
## - school 1 69.317 2227.0 759.90
## - higher 1 103.706 2261.4 767.47
## - failures 1 261.929 2419.6 800.88
##
## Step: AIC=741.63
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime
##
## Df Sum of Sq RSS AIC
## + sex 1 19.584 2109.2 739.06
## + Fjob_teacher 1 17.150 2111.7 739.63
## + age 1 13.928 2114.9 740.39
## + health 1 12.633 2116.2 740.69
## + reason_home 1 12.610 2116.2 740.69
## + paid 1 12.061 2116.8 740.82
## + guardian_mother 1 9.913 2118.9 741.32
## <none> 2128.8 741.63
## + romantic 1 8.532 2120.3 741.65
## + address 1 8.441 2120.4 741.67
## + famsup 1 8.214 2120.6 741.72
## + famrel 1 7.068 2121.8 741.99
## + Mjob_teacher 1 6.588 2122.2 742.10
## + Pstatus 1 6.286 2122.5 742.17
## + nursery 1 5.334 2123.5 742.39
## + Mjob_health 1 4.314 2124.5 742.63
## + Mjob_services 1 3.525 2125.3 742.81
## + goout 1 3.154 2125.7 742.90
## + Fjob_health 1 2.310 2126.5 743.09
## + Fjob_services 1 2.053 2126.8 743.15
## + guardian_other 1 1.566 2127.3 743.27
## + internet 1 1.494 2127.3 743.28
## + Dalc 1 1.375 2127.4 743.31
## + traveltime 1 0.523 2128.3 743.51
## + Fjob_other 1 0.260 2128.6 743.57
## + Fedu 1 0.247 2128.6 743.57
## + freetime 1 0.077 2128.8 743.61
## + famsize 1 0.029 2128.8 743.62
## + Mjob_other 1 0.013 2128.8 743.63
## + activities 1 0.000 2128.8 743.63
## + reason_other 1 0.000 2128.8 743.63
## - studytime 1 28.850 2157.7 746.28
## - reason_reputation 1 35.796 2164.6 747.87
## - Walc 1 45.809 2174.6 750.15
## - Medu 1 49.236 2178.1 750.93
## - absences 1 56.913 2185.7 752.66
## - schoolsup 1 59.986 2188.8 753.36
## - school 1 64.245 2193.1 754.32
## - higher 1 90.630 2219.4 760.23
## - failures 1 252.407 2381.2 794.98
##
## Step: AIC=739.06
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex
##
## Df Sum of Sq RSS AIC
## + Fjob_teacher 1 19.056 2090.2 736.58
## + reason_home 1 13.916 2095.3 737.79
## + romantic 1 13.039 2096.2 738.00
## + famsup 1 11.007 2098.2 738.48
## + guardian_mother 1 10.598 2098.6 738.58
## + age 1 10.454 2098.8 738.61
## + health 1 10.414 2098.8 738.62
## + paid 1 10.173 2099.1 738.68
## + famrel 1 10.124 2099.1 738.69
## <none> 2109.2 739.06
## + address 1 7.761 2101.5 739.24
## + Mjob_teacher 1 7.755 2101.5 739.24
## + Pstatus 1 7.314 2101.9 739.35
## + nursery 1 6.050 2103.2 739.65
## + goout 1 4.645 2104.6 739.98
## + Mjob_health 1 3.562 2105.7 740.23
## + Mjob_services 1 3.400 2105.8 740.27
## + Fjob_health 1 2.291 2106.9 740.53
## + guardian_other 1 2.104 2107.1 740.57
## + Fjob_services 1 1.830 2107.4 740.64
## + internet 1 1.339 2107.9 740.75
## + Dalc 1 0.481 2108.8 740.95
## + Fedu 1 0.248 2109.0 741.01
## + famsize 1 0.213 2109.0 741.01
## + activities 1 0.200 2109.0 741.02
## + traveltime 1 0.148 2109.1 741.03
## + Fjob_other 1 0.113 2109.1 741.04
## + freetime 1 0.057 2109.2 741.05
## + Mjob_other 1 0.044 2109.2 741.05
## + reason_other 1 0.012 2109.2 741.06
## - sex 1 19.584 2128.8 741.63
## - studytime 1 20.245 2129.5 741.78
## - Walc 1 26.882 2136.1 743.32
## - reason_reputation 1 35.466 2144.7 745.30
## - Medu 1 56.574 2165.8 750.14
## - absences 1 59.467 2168.7 750.80
## - schoolsup 1 68.078 2177.3 752.76
## - school 1 71.418 2180.7 753.51
## - higher 1 86.692 2195.9 756.96
## - failures 1 236.378 2345.6 789.54
##
## Step: AIC=736.58
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher
##
## Df Sum of Sq RSS AIC
## + romantic 1 14.678 2075.5 735.10
## + reason_home 1 14.002 2076.2 735.26
## + famrel 1 13.108 2077.1 735.47
## + health 1 10.344 2079.8 736.13
## + famsup 1 10.313 2079.9 736.14
## + guardian_mother 1 10.192 2080.0 736.17
## + age 1 9.417 2080.8 736.35
## <none> 2090.2 736.58
## + paid 1 8.364 2081.8 736.60
## + address 1 8.345 2081.8 736.60
## + Pstatus 1 7.892 2082.3 736.71
## + Mjob_health 1 5.086 2085.1 737.38
## + nursery 1 4.706 2085.5 737.47
## + goout 1 4.370 2085.8 737.55
## + Mjob_teacher 1 4.201 2086.0 737.59
## + Mjob_services 1 2.895 2087.3 737.90
## + internet 1 2.325 2087.9 738.03
## + Fjob_other 1 2.217 2088.0 738.06
## + guardian_other 1 1.797 2088.4 738.16
## + Fjob_health 1 1.148 2089.0 738.31
## + Dalc 1 0.626 2089.6 738.43
## + Fjob_services 1 0.511 2089.7 738.46
## + famsize 1 0.488 2089.7 738.47
## + Fedu 1 0.406 2089.8 738.49
## + traveltime 1 0.285 2089.9 738.51
## + activities 1 0.192 2090.0 738.54
## + Mjob_other 1 0.115 2090.1 738.55
## + freetime 1 0.077 2090.1 738.56
## + reason_other 1 0.009 2090.2 738.58
## - Fjob_teacher 1 19.056 2109.2 739.06
## - sex 1 21.490 2111.7 739.63
## - Walc 1 22.316 2112.5 739.83
## - studytime 1 22.386 2112.6 739.84
## - reason_reputation 1 37.043 2127.2 743.26
## - Medu 1 40.669 2130.8 744.10
## - absences 1 55.873 2146.1 747.61
## - school 1 70.197 2160.4 750.90
## - schoolsup 1 74.182 2164.4 751.81
## - higher 1 85.868 2176.1 754.47
## - failures 1 228.828 2319.0 785.90
##
## Step: AIC=735.1
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher +
## romantic
##
## Df Sum of Sq RSS AIC
## + reason_home 1 14.287 2061.2 733.69
## + famrel 1 13.422 2062.1 733.89
## + age 1 12.014 2063.5 734.23
## + guardian_mother 1 11.038 2064.5 734.47
## + famsup 1 10.889 2064.6 734.50
## + health 1 10.021 2065.5 734.71
## <none> 2075.5 735.10
## + paid 1 8.314 2067.2 735.12
## + address 1 7.685 2067.8 735.27
## + Pstatus 1 7.052 2068.4 735.42
## + Mjob_health 1 5.733 2069.8 735.73
## + goout 1 4.524 2071.0 736.02
## + nursery 1 4.279 2071.2 736.08
## + Mjob_services 1 3.923 2071.6 736.16
## + Mjob_teacher 1 3.869 2071.6 736.18
## + internet 1 3.674 2071.8 736.22
## + Fjob_other 1 2.710 2072.8 736.45
## - romantic 1 14.678 2090.2 736.58
## + Fjob_health 1 1.072 2074.4 736.84
## + Fedu 1 1.035 2074.5 736.85
## + guardian_other 1 0.687 2074.8 736.94
## + Fjob_services 1 0.650 2074.9 736.95
## + activities 1 0.418 2075.1 737.00
## + famsize 1 0.372 2075.1 737.01
## + Mjob_other 1 0.314 2075.2 737.02
## + traveltime 1 0.226 2075.3 737.05
## + Dalc 1 0.208 2075.3 737.05
## + freetime 1 0.194 2075.3 737.05
## + reason_other 1 0.027 2075.5 737.09
## - Fjob_teacher 1 20.695 2096.2 738.00
## - Walc 1 21.070 2096.6 738.09
## - studytime 1 23.877 2099.4 738.75
## - sex 1 26.586 2102.1 739.39
## - reason_reputation 1 33.656 2109.2 741.05
## - Medu 1 40.082 2115.6 742.55
## - absences 1 51.165 2126.7 745.13
## - school 1 70.304 2145.8 749.56
## - higher 1 79.898 2155.4 751.76
## - schoolsup 1 80.444 2155.9 751.88
## - failures 1 223.191 2298.7 783.56
##
## Step: AIC=733.69
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher +
## romantic + reason_home
##
## Df Sum of Sq RSS AIC
## + famrel 1 14.354 2046.9 732.24
## + famsup 1 11.780 2049.4 732.86
## + guardian_mother 1 10.596 2050.6 733.14
## + age 1 9.982 2051.2 733.29
## + paid 1 8.873 2052.3 733.56
## + health 1 8.448 2052.8 733.66
## <none> 2061.2 733.69
## + Pstatus 1 7.331 2053.9 733.93
## + Mjob_health 1 5.616 2055.6 734.34
## + address 1 5.478 2055.7 734.37
## + goout 1 4.281 2056.9 734.66
## + nursery 1 4.160 2057.1 734.69
## + Mjob_teacher 1 4.091 2057.1 734.71
## + internet 1 3.376 2057.8 734.88
## + Mjob_services 1 3.187 2058.0 734.92
## - reason_home 1 14.287 2075.5 735.10
## + Fjob_other 1 1.942 2059.3 735.22
## - romantic 1 14.963 2076.2 735.26
## + reason_other 1 1.175 2060.0 735.41
## + activities 1 1.130 2060.1 735.42
## + guardian_other 1 0.997 2060.2 735.45
## + Fedu 1 0.860 2060.4 735.48
## + Fjob_health 1 0.795 2060.4 735.50
## + freetime 1 0.489 2060.7 735.57
## + Dalc 1 0.407 2060.8 735.59
## + Fjob_services 1 0.389 2060.8 735.59
## + famsize 1 0.222 2061.0 735.63
## + Mjob_other 1 0.028 2061.2 735.68
## + traveltime 1 0.000 2061.2 735.69
## - Fjob_teacher 1 20.802 2082.0 736.65
## - Walc 1 21.631 2082.8 736.84
## - studytime 1 22.560 2083.8 737.06
## - sex 1 28.160 2089.4 738.39
## - Medu 1 37.284 2098.5 740.54
## - reason_reputation 1 45.198 2106.4 742.40
## - absences 1 54.338 2115.6 744.54
## - school 1 60.584 2121.8 746.00
## - higher 1 78.981 2140.2 750.26
## - schoolsup 1 81.120 2142.3 750.76
## - failures 1 209.141 2270.4 779.43
##
## Step: AIC=732.24
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher +
## romantic + reason_home + famrel
##
## Df Sum of Sq RSS AIC
## + famsup 1 12.315 2034.5 731.25
## + health 1 11.747 2035.1 731.39
## + guardian_mother 1 11.121 2035.7 731.54
## + paid 1 10.059 2036.8 731.80
## + age 1 9.419 2037.5 731.96
## <none> 2046.9 732.24
## + goout 1 6.726 2040.1 732.61
## + address 1 6.522 2040.3 732.66
## + Pstatus 1 6.493 2040.4 732.67
## + Mjob_health 1 6.445 2040.4 732.68
## + Mjob_teacher 1 4.581 2042.3 733.13
## + nursery 1 4.330 2042.5 733.19
## + Mjob_services 1 3.910 2043.0 733.29
## + internet 1 2.349 2044.5 733.67
## - famrel 1 14.354 2061.2 733.69
## + Fjob_other 1 1.804 2045.1 733.80
## - reason_home 1 15.218 2062.1 733.89
## - romantic 1 15.301 2062.2 733.91
## + Fedu 1 1.124 2045.7 733.96
## + Fjob_health 1 0.862 2046.0 734.03
## + reason_other 1 0.844 2046.0 734.03
## + activities 1 0.825 2046.0 734.04
## + guardian_other 1 0.803 2046.1 734.04
## + Fjob_services 1 0.442 2046.4 734.13
## + famsize 1 0.311 2046.5 734.16
## + Dalc 1 0.140 2046.7 734.20
## + freetime 1 0.113 2046.8 734.21
## + Mjob_other 1 0.044 2046.8 734.22
## + traveltime 1 0.004 2046.9 734.23
## - Walc 1 17.100 2064.0 734.35
## - studytime 1 23.424 2070.3 735.86
## - Fjob_teacher 1 24.085 2070.9 736.01
## - sex 1 32.761 2079.6 738.08
## - Medu 1 35.175 2082.0 738.65
## - reason_reputation 1 44.859 2091.7 740.94
## - absences 1 50.316 2097.2 742.23
## - school 1 57.344 2104.2 743.88
## - higher 1 78.534 2125.4 748.83
## - schoolsup 1 82.637 2129.5 749.79
## - failures 1 200.374 2247.2 776.37
##
## Step: AIC=731.25
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher +
## romantic + reason_home + famrel + famsup
##
## Df Sum of Sq RSS AIC
## + guardian_mother 1 11.177 2023.4 730.53
## + health 1 10.891 2023.7 730.60
## + paid 1 8.631 2025.9 731.15
## <none> 2034.5 731.25
## + age 1 7.311 2027.2 731.48
## + Mjob_health 1 6.950 2027.6 731.56
## + Pstatus 1 6.635 2027.9 731.64
## + goout 1 6.566 2028.0 731.66
## + address 1 5.611 2028.9 731.89
## - famsup 1 12.315 2046.9 732.24
## + Mjob_teacher 1 4.166 2030.4 732.24
## + nursery 1 3.892 2030.7 732.31
## + Mjob_services 1 2.736 2031.8 732.59
## + internet 1 2.637 2031.9 732.61
## - famrel 1 14.888 2049.4 732.86
## + Fjob_other 1 1.141 2033.4 732.98
## + activities 1 0.739 2033.8 733.07
## + reason_other 1 0.642 2033.9 733.10
## + guardian_other 1 0.618 2033.9 733.10
## - romantic 1 15.944 2050.5 733.11
## + Fjob_health 1 0.515 2034.0 733.13
## + Fedu 1 0.440 2034.1 733.15
## - reason_home 1 16.175 2050.7 733.17
## + Fjob_services 1 0.272 2034.3 733.19
## + famsize 1 0.223 2034.3 733.20
## + freetime 1 0.171 2034.4 733.21
## + Dalc 1 0.072 2034.5 733.24
## + Mjob_other 1 0.030 2034.5 733.25
## + traveltime 1 0.013 2034.5 733.25
## - Walc 1 17.371 2051.9 733.45
## - Fjob_teacher 1 23.348 2057.9 734.89
## - studytime 1 25.633 2060.2 735.44
## - sex 1 36.642 2071.2 738.07
## - Medu 1 39.976 2074.5 738.87
## - reason_reputation 1 44.698 2079.2 739.99
## - absences 1 47.502 2082.1 740.66
## - school 1 56.482 2091.0 742.78
## - schoolsup 1 79.797 2114.3 748.26
## - higher 1 81.248 2115.8 748.60
## - failures 1 195.019 2229.6 774.47
##
## Step: AIC=730.53
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher +
## romantic + reason_home + famrel + famsup + guardian_mother
##
## Df Sum of Sq RSS AIC
## + health 1 10.532 2012.8 729.95
## <none> 2023.4 730.53
## + paid 1 7.655 2015.7 730.66
## + age 1 6.937 2016.4 730.84
## + goout 1 6.391 2017.0 730.97
## + Mjob_health 1 5.881 2017.5 731.10
## + Mjob_teacher 1 5.824 2017.5 731.11
## + address 1 5.621 2017.8 731.16
## - guardian_mother 1 11.177 2034.5 731.25
## + guardian_other 1 5.162 2018.2 731.27
## + Pstatus 1 4.836 2018.5 731.35
## - famsup 1 12.370 2035.7 731.54
## + nursery 1 2.903 2020.5 731.82
## + Mjob_services 1 2.306 2021.1 731.97
## + internet 1 1.940 2021.4 732.06
## + Fjob_other 1 1.741 2021.6 732.11
## + Fedu 1 1.420 2022.0 732.19
## + activities 1 1.073 2022.3 732.27
## - famrel 1 15.426 2038.8 732.28
## + Fjob_health 1 0.812 2022.6 732.33
## - reason_home 1 15.713 2039.1 732.35
## + reason_other 1 0.699 2022.7 732.36
## + Fjob_services 1 0.533 2022.8 732.40
## + Dalc 1 0.340 2023.0 732.45
## + famsize 1 0.311 2023.1 732.46
## + freetime 1 0.275 2023.1 732.47
## + Mjob_other 1 0.189 2023.2 732.49
## + traveltime 1 0.025 2023.3 732.53
## - Walc 1 16.652 2040.0 732.58
## - romantic 1 16.833 2040.2 732.63
## - Fjob_teacher 1 22.996 2046.4 734.12
## - studytime 1 23.871 2047.2 734.33
## - sex 1 37.818 2061.2 737.68
## - reason_reputation 1 41.699 2065.1 738.61
## - Medu 1 42.342 2065.7 738.76
## - absences 1 46.522 2069.9 739.76
## - school 1 58.872 2082.2 742.70
## - schoolsup 1 81.930 2105.3 748.14
## - higher 1 83.610 2107.0 748.54
## - failures 1 197.732 2221.1 774.59
##
## Step: AIC=729.95
## score ~ failures + higher + Medu + Walc + reason_reputation +
## school + absences + schoolsup + studytime + sex + Fjob_teacher +
## romantic + reason_home + famrel + famsup + guardian_mother +
## health
##
## Df Sum of Sq RSS AIC
## <none> 2012.8 729.95
## + goout 1 7.556 2005.3 730.10
## + Mjob_health 1 6.878 2006.0 730.26
## + age 1 6.818 2006.0 730.28
## + paid 1 6.474 2006.4 730.36
## + Mjob_teacher 1 6.352 2006.5 730.39
## - health 1 10.532 2023.4 730.53
## - guardian_mother 1 10.818 2023.7 730.60
## + address 1 5.402 2007.4 730.63
## + guardian_other 1 4.979 2007.9 730.73
## - famsup 1 11.524 2024.4 730.78
## + Pstatus 1 4.635 2008.2 730.82
## + nursery 1 2.736 2010.1 731.28
## - Walc 1 13.729 2026.6 731.31
## - reason_home 1 13.949 2026.8 731.37
## + Fjob_other 1 2.080 2010.8 731.44
## + Mjob_services 1 1.908 2010.9 731.49
## + internet 1 1.471 2011.4 731.59
## + activities 1 1.277 2011.6 731.64
## + Fjob_services 1 1.036 2011.8 731.70
## + Fedu 1 1.034 2011.8 731.70
## + reason_other 1 0.537 2012.3 731.82
## + freetime 1 0.510 2012.3 731.83
## + Dalc 1 0.434 2012.4 731.85
## + Fjob_health 1 0.268 2012.6 731.89
## + Mjob_other 1 0.229 2012.6 731.90
## + famsize 1 0.194 2012.7 731.91
## + traveltime 1 0.127 2012.7 731.92
## - romantic 1 16.458 2029.3 731.98
## - famrel 1 18.601 2031.4 732.50
## - studytime 1 23.178 2036.0 733.61
## - Fjob_teacher 1 23.315 2036.2 733.64
## - sex 1 34.957 2047.8 736.46
## - reason_reputation 1 35.952 2048.8 736.70
## - Medu 1 41.864 2054.7 738.12
## - absences 1 47.059 2059.9 739.37
## - school 1 62.606 2075.4 743.09
## - schoolsup 1 79.211 2092.1 747.02
## - higher 1 83.885 2096.7 748.12
## - failures 1 194.240 2207.1 773.46
anova(step_m)
## Analysis of Variance Table
##
## Response: score
## Df Sum Sq Mean Sq F value Pr(>F)
## failures 1 618.60 618.60 146.2868 < 2.2e-16 ***
## higher 1 198.42 198.42 46.9236 2.290e-11 ***
## Medu 1 119.32 119.32 28.2165 1.669e-07 ***
## Walc 1 69.90 69.90 16.5308 5.600e-05 ***
## reason_reputation 1 58.12 58.12 13.7447 0.0002340 ***
## school 1 43.40 43.40 10.2633 0.0014481 **
## absences 1 51.48 51.48 12.1734 0.0005298 ***
## schoolsup 1 57.74 57.74 13.6551 0.0002452 ***
## studytime 1 28.85 28.85 6.8226 0.0092857 **
## sex 1 19.58 19.58 4.6313 0.0318958 *
## Fjob_teacher 1 19.06 19.06 4.5063 0.0342860 *
## romantic 1 14.68 14.68 3.4712 0.0630620 .
## reason_home 1 14.29 14.29 3.3786 0.0666712 .
## famrel 1 14.35 14.35 3.3944 0.0660389 .
## famsup 1 12.31 12.31 2.9122 0.0885662 .
## guardian_mother 1 11.18 11.18 2.6431 0.1046636
## health 1 10.53 10.53 2.4907 0.1151855
## Residuals 476 2012.84 4.23
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r4 <- lm_result(step_m, train, test)
## MSE_tr MSE_te
## 4.07 6.58
result <- rbind(r1, r2, r3, r4)
rownames(result) <- c("full", "forward", "backward", "stepwise")
result # forward, stepwise가 best
## MSE_tr MSE_te
## full 3.926032 6.907939
## forward 4.074577 6.577594
## backward 4.003125 6.690604
## stepwise 4.074577 6.577594
- stepwise regression 모델의 AIC 변화
## stepwise regression 모델의 AIC 변화
par(mfrow = c(1,1)) # 그림을 1행 1열로 표시
step_m$anova$AIC # 변수 추가 및 제거에 따른 AIC 변화량
## [1] 951.2276 853.1969 818.2858 796.6850 784.3158 774.0416 766.6733
## [8] 757.3261 746.2798 741.6299 739.0643 736.5811 735.0997 733.6874
## [15] 732.2353 731.2543 730.5330 729.9549
step_m$anova$Step # 각 단계에서 추가 / 제거 된 변수
## [1] "" "+ failures" "+ higher"
## [4] "+ Medu" "+ Walc" "+ reason_reputation"
## [7] "+ school" "+ absences" "+ schoolsup"
## [10] "+ studytime" "+ sex" "+ Fjob_teacher"
## [13] "+ romantic" "+ reason_home" "+ famrel"
## [16] "+ famsup" "+ guardian_mother" "+ health"
plot(step_m$anova$AIC, pch = 19, main = "AIC (stepwise regression)", xlab = "Step", ylab = "AIC")
text(step_m$anova$AIC, step_m$anova$Step, pos = 3, col = "red", cex = 0.8)
- feature importance
## 변수의 상대적 중요도 측정
imp <- calc.relimp(step_m, rela = T)
plot(imp)
imp
## Response variable: score
## Total response variance: 6.845146
## Analysis based on 494 observations
##
## 17 Regressors:
## failures higher Medu Walc reason_reputation school absences schoolsup studytime sex Fjob_teacher romantic reason_home famrel famsup guardian_mother health
## Proportion of variance explained by model: 40.35%
## Metrics are normalized to sum to 100% (rela=TRUE).
##
## Relative importance metrics:
##
## lmg
## failures 0.266115242
## higher 0.150577530
## Medu 0.096897186
## Walc 0.040111380
## reason_reputation 0.057980745
## school 0.079592331
## absences 0.059429889
## schoolsup 0.042064926
## studytime 0.059718225
## sex 0.034703422
## Fjob_teacher 0.034261209
## romantic 0.016636493
## reason_home 0.013572503
## famrel 0.021687831
## famsup 0.005286136
## guardian_mother 0.004927594
## health 0.016437358
##
## Average coefficients for different model sizes:
##
## 1X 2Xs 3Xs 4Xs 5Xs
## failures -1.83602495 -1.77120362 -1.7103768 -1.6531626 -1.5992117
## higher 3.02912406 2.87511065 2.7309804 2.5961422 2.4700364
## Medu 0.68005695 0.64363935 0.6095608 0.5775912 0.5475308
## Walc -0.39984766 -0.37360641 -0.3496158 -0.3276138 -0.3073652
## reason_reputation 1.28632788 1.22250295 1.1644309 1.1114462 1.0629623
## school -1.31191921 -1.24959784 -1.1936743 -1.1434810 -1.0984342
## absences -0.15018065 -0.14311599 -0.1367777 -0.1310755 -0.1259318
## schoolsup -0.72210100 -0.79769434 -0.8645489 -0.9239534 -0.9770035
## studytime 0.74604214 0.69720444 0.6521105 0.6104329 0.5718757
## sex -0.77227796 -0.73746114 -0.7085303 -0.6846257 -0.6650294
## Fjob_teacher 1.87517883 1.75757217 1.6528062 1.5594641 1.4763144
## romantic -0.55881253 -0.53073062 -0.5062478 -0.4849667 -0.4665425
## reason_home 0.47923858 0.47915100 0.4785364 0.4775267 0.4762220
## famrel 0.33225443 0.31676062 0.3028976 0.2904857 0.2793674
## famsup -0.02545908 -0.06916087 -0.1073702 -0.1407702 -0.1699502
## guardian_mother -0.12728967 -0.14668998 -0.1647496 -0.1815801 -0.1972868
## health -0.20061028 -0.19099730 -0.1821856 -0.1740623 -0.1665352
## 6Xs 7Xs 8Xs 9Xs 10Xs
## failures -1.5482035 -1.4998434 -1.4538604 -1.4100044 -1.3680449
## higher 2.3521338 2.2419341 2.1389650 2.0427813 1.9529633
## Medu 0.5192061 0.4924658 0.4671774 0.4432245 0.4205038
## Walc -0.2886596 -0.2713084 -0.2551432 -0.2400133 -0.2257839
## reason_reputation 1.0184619 0.9774886 0.9396392 0.9045574 0.8719284
## school -1.0580215 -1.0217914 -0.9893444 -0.9603260 -0.9344201
## absences -0.1212797 -0.1170622 -0.1132303 -0.1097421 -0.1065619
## schoolsup -1.0246297 -1.0676222 -1.1066520 -1.1422899 -1.1750220
## studytime 0.5361713 0.5030778 0.4723768 0.4438711 0.4173829
## sex -0.6491421 -0.6364636 -0.6265764 -0.6191315 -0.6138373
## Fjob_teacher 1.4022857 1.3364444 1.2779759 1.2261686 1.1804006
## romantic -0.4506751 -0.4371028 -0.4255966 -0.4159564 -0.4080070
## reason_home 0.4746965 0.4730047 0.4711851 0.4692641 0.4672594
## famrel 0.2694057 0.2604811 0.2524903 0.2453440 0.2389656
## famsup -0.1954197 -0.2176212 -0.2369400 -0.2537126 -0.2682341
## guardian_mother -0.2119685 -0.2257177 -0.2386205 -0.2507568 -0.2622002
## health -0.1595288 -0.1529816 -0.1468436 -0.1410745 -0.1356420
## 11Xs 12Xs 13Xs 14Xs
## failures -1.3277692 -1.2889814 -1.25150087 -1.21516149
## higher 1.8691170 1.7908725 1.71788435 1.64983009
## Medu 0.3989233 0.3784003 0.35885953 0.34023218
## Walc -0.2123341 -0.1995554 -0.18735025 -0.17563060
## reason_reputation 0.8414736 0.8129465 0.78612883 0.76082748
## school -0.9113444 -0.8908460 -0.87269805 -0.85669687
## absences -0.1036593 -0.1010083 -0.09858665 -0.09637545
## schoolsup -1.2052637 -1.2333706 -1.25964855 -1.28436147
## studytime 0.3927517 0.3698332 0.34849740 0.32862746
## sex -0.6104498 -0.6087647 -0.60861089 -0.60984520
## Fjob_teacher 1.1401276 1.1048737 1.07422298 1.04781199
## romantic -0.4015950 -0.3965861 -0.39286304 -0.39032323
## reason_home 0.4651813 0.4630356 0.46082426 0.45854696
## famrel 0.2332901 0.2282625 0.22383668 0.21997496
## famsup -0.2807638 -0.2915302 -0.30073540 -0.30855811
## guardian_mother -0.2730182 -0.2832726 -0.29301938 -0.30230927
## health -0.1305204 -0.1256896 -0.12113415 -0.11684280
## 15Xs 16Xs 17Xs
## failures -1.17981091 -1.14530957 -1.1115300
## higher 1.58641030 1.52734777 1.4723868
## Medu 0.32245443 0.30546656 0.2892121
## Walc -0.16431681 -0.15333662 -0.1426242
## reason_reputation 0.73687137 0.71410915 0.6924070
## school -0.84265956 -0.83042213 -0.8198378
## absences -0.09435847 -0.09252184 -0.0908536
## schoolsup -1.30773833 -1.32997883 -1.3512583
## studytime 0.31011832 0.29287558 0.2768143
## sex -0.61234781 -0.61601880 -0.6207752
## Fjob_teacher 1.02532404 1.00648372 0.9910523
## romantic -0.38887721 -0.38844706 -0.3889651
## reason_home 0.45620172 0.45378559 0.4512951
## famrel 0.21664670 0.21382773 0.2114996
## famsup -0.31515702 -0.32067311 -0.3252318
## guardian_mother -0.31118805 -0.31969688 -0.3278727
## health -0.11280750 -0.10902312 -0.1054869
cvfit <- cv.glmnet(as.matrix(train[, 1:39]), train$score, alpha = 0) # ridge regression
plot(cvfit)
lambda <- cvfit$lambda.min # cross validation error를 최소로 만드는 람다값
- ridge regression의 계수
coef_ridge <- predict(cvfit, s = lambda, type = "coefficients") # ridge coefficients
coef_ridge
## 40 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 9.167737155
## school -0.556418300
## sex -0.460344328
## age 0.076928269
## address 0.244592945
## famsize 0.063560505
## Pstatus 0.226990972
## Medu 0.188941562
## Fedu 0.052239686
## traveltime -0.017614395
## studytime 0.264365505
## failures -0.937852536
## schoolsup -0.849064862
## famsup -0.175809489
## paid -0.458864006
## activities 0.136721937
## nursery -0.134694970
## higher 1.254116533
## internet 0.162342371
## romantic -0.296269262
## famrel 0.187311926
## freetime -0.002181064
## goout -0.113656915
## Dalc -0.054947299
## Walc -0.112253238
## health -0.096697434
## absences -0.069186924
## Mjob_health 0.531777798
## Mjob_other 0.042627055
## Mjob_services -0.026825340
## Mjob_teacher 0.488728706
## Fjob_health -0.215685064
## Fjob_other 0.110702703
## Fjob_services -0.080892755
## Fjob_teacher 0.729175303
## reason_home 0.320186924
## reason_other 0.033538667
## reason_reputation 0.527084198
## guardian_mother -0.288609519
## guardian_other -0.515290392
- ridge regression을 이용한 예측값 계산
# training data
pred_ridge_tr <- predict(cvfit, as.matrix(train[, 1:39]), s = lambda)
# plotting
plot(train$score, pred_ridge_tr, pch = 19, xlab = "Actual score", ylab = "Predicted score", xlim = c(5, 18), ylim = c(5, 18))
lines(train$score, train$score, col = "red", lty = 2)
# test data
pred_ridge_te <- predict(cvfit, as.matrix(test[, 1:39]), s = lambda)
- performance evaluation
MSE_ridte <- mean((pred_ridge_te - test$score)^2)
MSE_ridte
## [1] 6.619485
cvfit <- cv.glmnet(as.matrix(train[, 1:39]), train$score, alpha = 1) # LASSO regression
plot(cvfit)
lambda <- cvfit$lambda.min # cross validation error를 최소로 만드는 람다값
## LASSO regression의 계수
coef_lasso <- predict(cvfit, s = lambda, type = "coefficients") # LASSO coefficients
coef_lasso
## 40 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 8.77356292
## school -0.66338499
## sex -0.51134488
## age 0.10188416
## address 0.22146570
## famsize .
## Pstatus 0.17369125
## Medu 0.21327165
## Fedu .
## traveltime .
## studytime 0.25855717
## failures -1.13724762
## schoolsup -1.03649021
## famsup -0.16601759
## paid -0.38425736
## activities 0.08759245
## nursery -0.09217803
## higher 1.42044312
## internet 0.06355573
## romantic -0.31532981
## famrel 0.19839417
## freetime .
## goout -0.10765529
## Dalc .
## Walc -0.12101969
## health -0.09643229
## absences -0.07801459
## Mjob_health 0.50853631
## Mjob_other .
## Mjob_services .
## Mjob_teacher 0.44210011
## Fjob_health -0.03276677
## Fjob_other 0.14138765
## Fjob_services .
## Fjob_teacher 0.85101068
## reason_home 0.29936051
## reason_other .
## reason_reputation 0.56675028
## guardian_mother -0.30978159
## guardian_other -0.42207321
## LASSO regression의 예측값 계산
# training data
pred_lasso_tr <- predict(cvfit, as.matrix(train[, 1:39]), s = lambda)
plot(train$score, pred_lasso_tr, pch = 19, xlab = "Actual score", ylab = "Predicted score", xlim = c(5, 18), ylim = c(5, 18))
lines(train$score, train$score, col = "red", lty = 2)
# test data
pred_lasso_te <- predict(cvfit, as.matrix(test[, 1:39]), s = lambda)
# performance evaluation
MSE_laste <- mean((pred_lasso_te - test$score)^2)
MSE_laste
## [1] 6.66937
result <- c(MSE_ridte, MSE_laste) # test 성능 비교
names(result) <- c("Ridge", "LASSO")
result
## Ridge LASSO
## 6.619485 6.669370
# 데이터 불러오기
st <- read.csv("./input/student.csv", row.names = 1)
# train / validation / test set
set.seed(555)
flag <- sample(c("tr", "va", "te"), size = nrow(st), c(6, 2, 2), replace = T)
train <- st[which(flag == "tr"), ]
valid <- st[which(flag == "va"), ]
test <- st[which(flag == "te"), ]
# outlier 제거
boxplot(train)
b1 <- boxplot(train$absences)
b2 <- boxplot(train$score)
out1 <- which(train$absences > b1$stats[5])
out2 <- which(train$score < b2$stats[1])
train <- train[-c(out1, out2), ] # training data에서 absences, score 변수의 outlier 제거
boxplot(train)
### Standardization
pp_model <- preProcess(train[, -40], method = c("center", "scale"))
train <- predict(pp_model, train)
valid <- predict(pp_model, valid)
test <- predict(pp_model, test)
tr_x <- train[, 1:39]
va_x <- valid[, 1:39]
te_x <- test[, 1:39]
tr_y <- train$score
va_y <- valid$score
te_y <- test$score
# find optimal k
MSE_k <- NULL
for(i in 1:100){
m_knn <- knnreg(tr_x, tr_y, k = i)
MSE_va <- mean((predict(m_knn, va_x) - va_y)^2)
MSE_k <- c(MSE_k, MSE_va)
}
which.min(MSE_k)
## [1] 17
plot(MSE_k, type = "l", ylab = "MSE", xlab = "k")
abline(v = which.min(MSE_k), lty = 2, col = "red")
text(which.min(MSE_k), min(MSE_k), labels = round(MSE_k[which.min(MSE_k)], 2), pos = 3, col = "red")
# test error
m_knn <- knnreg(tr_x, tr_y, k = which.min(MSE_k))
pred_te <- predict(m_knn, te_x)
MSE_te <- mean((pred_te - te_y)^2)
MSE_te
## [1] 6.899605
# test result plotting
plot(te_y, pred_te, pch = 19, xlab = "Actual score", ylab = "Predicted score", main = "Result of Regression Tree", xlim = c(5, 18), ylim = c(5, 18))
lines(te_y, te_y, lty = 2, col = "red")
# 데이터 불러오기
st <- read.csv("./input/student.csv", row.names = 1)
# train / test set
set.seed(555)
flag <- sample(c("tr", "te"), size = nrow(st), c(8, 2), replace = T)
train <- st[which(flag == "tr"), ]
test <- st[which(flag == "te"), ]
# outlier 제거
boxplot(train)
b1 <- boxplot(train$absences)
b2 <- boxplot(train$score)
out1 <- which(train$absences > b1$stats[5])
out2 <- which(train$score < b2$stats[1])
train <- train[-c(out1, out2), ] # training data에서 absences, score 변수의 outlier 제거
boxplot(train)
##### rpart
set.seed(111)
dt <- rpart(score~., data = train, cp = 0.1^20) # 모든 변수 사용하여 full tree 생성
printcp(dt) # cptable 출력
##
## Regression tree:
## rpart(formula = score ~ ., data = train, cp = 0.1^20)
##
## Variables actually used in tree construction:
## [1] absences activities address
## [4] age Dalc failures
## [7] famrel famsize famsup
## [10] Fedu freetime goout
## [13] health higher internet
## [16] Medu Mjob_health reason_home
## [19] reason_reputation school schoolsup
## [22] studytime traveltime Walc
##
## Root node error: 3374.7/494 = 6.8313
##
## n= 494
##
## CP nsplit rel error xerror xstd
## 1 2.0697e-01 0 1.00000 1.00301 0.056645
## 2 7.3186e-02 1 0.79303 0.79635 0.048106
## 3 3.4056e-02 2 0.71984 0.72309 0.043693
## 4 2.4437e-02 3 0.68578 0.71183 0.044176
## 5 1.5088e-02 4 0.66135 0.70006 0.044504
## 6 1.5036e-02 5 0.64626 0.70919 0.046023
## 7 1.1735e-02 7 0.61619 0.72050 0.047333
## 8 1.0291e-02 8 0.60445 0.74127 0.050471
## 9 8.8407e-03 9 0.59416 0.75355 0.053274
## 10 7.4938e-03 10 0.58532 0.76567 0.055839
## 11 7.2498e-03 12 0.57033 0.77521 0.056217
## 12 6.8552e-03 13 0.56308 0.77959 0.056078
## 13 6.8511e-03 16 0.54252 0.79388 0.057735
## 14 6.6068e-03 17 0.53567 0.80562 0.057956
## 15 6.3976e-03 18 0.52906 0.80582 0.057948
## 16 6.2038e-03 20 0.51626 0.80934 0.058074
## 17 6.1408e-03 21 0.51006 0.80665 0.057846
## 18 6.1328e-03 23 0.49778 0.80739 0.057837
## 19 5.7126e-03 24 0.49165 0.80395 0.057706
## 20 5.0460e-03 25 0.48593 0.80728 0.058304
## 21 4.8599e-03 26 0.48089 0.82015 0.058132
## 22 4.4255e-03 27 0.47603 0.82425 0.057562
## 23 4.3704e-03 29 0.46718 0.82259 0.057384
## 24 3.9915e-03 30 0.46281 0.81782 0.057253
## 25 3.4565e-03 33 0.45083 0.82966 0.058005
## 26 3.1668e-03 35 0.44392 0.82624 0.057279
## 27 2.7740e-03 36 0.44075 0.82531 0.057404
## 28 2.2344e-03 37 0.43798 0.82984 0.057332
## 29 1.7808e-03 40 0.43128 0.83230 0.057367
## 30 1.0000e-20 41 0.42949 0.83381 0.057703
plotcp(dt) # cpplot 출력
### tree pruning
dt_prune <- prune(dt, cp = dt$cptable[which.min(dt$cptable[, "xerror"]), "CP"])
### plotting
plot(dt_prune, margin = 0.1) # tree plotting
text(dt_prune, use.n = T)
### variable importance
dt_prune$variable.importance # variable importance
## failures higher Walc Medu age
## 698.462257 246.978734 114.927613 82.467687 57.407857
## Dalc Fedu Mjob_teacher absences Fjob_teacher
## 38.309204 31.337721 25.564983 19.135952 13.194830
## Mjob_health guardian_other paid
## 12.370153 9.567976 3.298707
barplot(dt_prune$variable.importance, ylim = c(0,800))
### performance evaluation
# trainin MSE
pred_tr <- predict(dt_prune)
MSE_tr <- mean((pred_tr - train$score)^2)
MSE_tr
## [1] 4.517856
# test MSE
pred_te <- predict(dt_prune, test)
MSE_te <- mean((pred_te - test$score)^2)
MSE_te
## [1] 7.648207
# 데이터 불러오기
st <- read.csv("./input/student.csv", row.names = 1)
# train / test set
set.seed(555)
flag <- sample(c("tr", "te"), size = nrow(st), c(8, 2), replace = T)
train <- st[which(flag == "tr"), ]
test <- st[which(flag == "te"), ]
### Standardization
pp_model <- preProcess(train[, -40], method = c("center", "scale"))
train <- predict(pp_model, train)
test <- predict(pp_model, test)
### ann modeling
set.seed(1004)
# formula
fml <- as.formula(paste("score~", paste(colnames(train[, 1:39]), collapse = "+"))) # 변수명으로 formula 생성
### neuralnet tuning
# ptm <- proc.time() # 알고리즘 시작 시의 시스템 시간
# # hidden node의 개수를 2개부터 8개까지 바꿔가며 validation MSE 측정
# MSE <- NULL
# for(h in 2:8){
# ann <- neuralnet(fml, data = train, stepmax = 100000000, threshold = 0.01, linear.output = T, hidden =h, err.fct = "sse", lifesign = "full") #알고리즘 생성
# pred <- compute(ann, test[, 1:39])$net.result # validation data의 예측값 계산
# MSE_v <- mean((pred - test$score)^2)
# MSE <- c(MSE, MSE_v)
# }
#
# proc.time() - ptm # 알고리즘이 돌아가는동안 걸린 총 시간(단위: 초), elapsed확인
# plot(2:8, MSE, xlab = "Hidden node", ylab = "MSE", type = "l", main = "Validation set MSE")
# abline(v = which.max(MSE) + 1, lty = 2)
# hidden node = 2로 ann 모델 학습
m_ann <- neuralnet(fml, data = train, stepmax = 1e6, threshold = 0.01, linear.output = T, hidden = 2, err.fct = "sse", lifesign = "full") #알고리즘 생성
## hidden: 2 thresh: 0.01 rep: 1/1 steps: 1000 min thresh: 0.2335553271
## 2000 min thresh: 0.1505807638
## 3000 min thresh: 0.09653231758
## 4000 min thresh: 0.08927646165
## 5000 min thresh: 0.07649720845
## 6000 min thresh: 0.06023353057
## 7000 min thresh: 0.04868439858
## 8000 min thresh: 0.04722636276
## 9000 min thresh: 0.03894188661
## 10000 min thresh: 0.03562505974
## 11000 min thresh: 0.03519242481
## 12000 min thresh: 0.03233752237
## 13000 min thresh: 0.02933776676
## 14000 min thresh: 0.0258825741
## 15000 min thresh: 0.0258825741
## 16000 min thresh: 0.02311210518
## 17000 min thresh: 0.02163785798
## 18000 min thresh: 0.0201117989
## 19000 min thresh: 0.0201117989
## 20000 min thresh: 0.01944093671
## 21000 min thresh: 0.01482905607
## 22000 min thresh: 0.01482905607
## 23000 min thresh: 0.01482905607
## 24000 min thresh: 0.01482905607
## 25000 min thresh: 0.01331030992
## 26000 min thresh: 0.01331030992
## 27000 min thresh: 0.01331030992
## 28000 min thresh: 0.01156519501
## 29000 min thresh: 0.01156519501
## 30000 min thresh: 0.0114995452
## 31000 min thresh: 0.0114995452
## 32000 min thresh: 0.0114995452
## 33000 min thresh: 0.0109319609
## 34000 min thresh: 0.0109319609
## 35000 min thresh: 0.0109319609
## 36000 min thresh: 0.01046851022
## 37000 min thresh: 0.01005458883
## 38000 min thresh: 0.01005458883
## 38027 error: 954.64028 time: 9.68 secs
plot(m_ann)
### Prediction
# trainin MSE
pred_tr <- compute(m_ann, train[, 1:39])$net.result
MSE_tr <- mean((pred_tr - train$score)^2)
MSE_tr
## [1] 3.671693373
# test MSE
pred_te <- compute(m_ann, test[, 1:39])$net.result
MSE_te <- mean((pred_te - test$score)^2)
MSE_te
## [1] 9.383553398
# plotting
plot(test$score, pred_te, pch = 19, xlab = "Actual value", ylab = "Predicted value")
abline(1, 1, col = "red", lty = 2, lwd = 2)
# 데이터 불러오기
st <- read.csv("./input/student.csv", row.names = 1)
# train / test set
set.seed(555)
flag <- sample(c("tr", "te"), size = nrow(st), c(8, 2), replace = T)
train <- st[which(flag == "tr"), ]
test <- st[which(flag == "te"), ]
# Standardization
pp_model <- preProcess(train[, -40], method = c("center", "scale"))
train <- predict(pp_model, train)
test <- predict(pp_model, test)
##### SVR
### SVR tuning
tune.svr <- tune(svm, score~., data = train, ranges = list(epsilon = seq(0, 1, 0.1), cost = 2^(-3:3)))
plot(tune.svr)
# Best model
svr <- tune.svr$best.model
### performance evaluation
# Trainin MSE
pred_tr <- predict(svr)
MSE_tr <- mean((pred_tr - train$score)^2)
MSE_tr
## [1] 2.264410905
# Test MSE
pred_te <- predict(svr, test)
MSE_te <- mean((pred_te - test$score)^2)
MSE_te
## [1] 6.176742383
# plotting
plot(test$score, pred_te, pch = 19, xlab = "Actual value", ylab = "Predicted value")
abline(1, 1, col = "red", lty = 2, lwd = 2)
# 데이터 불러오기
st <- read.csv("./input/student.csv", row.names = 1)
# train / test set
set.seed(555)
flag <- sample(c("tr", "te"), size = nrow(st), c(8, 2), replace = T)
train <- st[which(flag == "tr"), ]
test <- st[which(flag == "te"), ]
# 학생의 성적 분포
h <- hist(train$score, breaks = 12)
cuts <- cut(h$breaks, c(-Inf, 12.9, Inf)) # 12.9점을 기준으로 histogram 분할
plot(h, col = c("orange", "skyblue")[cuts], main = "Histogram of Score", xlab = "Score")
abline(v = 13, col = "red", lwd = 2)
# training dataset에 성적 13점을 기준으로 class 생성
class_tr <- rep(0, dim(train)[1]) # training data에 추가할 class 변수 생성
class_tr[which(train$score > 13)] <- 1 # 성적이 13점 이상인 학생만 class = 1 (0: 하위권, 1: 상위권)
train_cls <- data.frame(train, "class" = class_tr) # class 변수 생성
train_cls <- subset(train_cls, select = -(score)) # 기존의 score 변수 제거
train_cls[1:10, ]
## school sex age address famsize Pstatus Medu Fedu traveltime studytime
## 1 0 0 18 1 0 0 4 4 2 2
## 3 0 0 15 1 1 1 1 1 1 2
## 4 0 0 15 1 0 1 4 2 1 3
## 5 0 0 16 1 0 1 3 3 1 2
## 6 0 1 16 1 1 1 4 3 1 2
## 8 0 0 17 1 0 0 4 4 2 2
## 9 0 1 15 1 1 0 3 2 1 2
## 12 0 0 15 1 0 1 2 1 3 3
## 13 0 1 15 1 1 1 4 4 1 1
## 14 0 1 15 1 0 1 4 3 2 2
## failures schoolsup famsup paid activities nursery higher internet
## 1 0 1 0 0 0 1 1 0
## 3 0 1 0 0 0 1 1 1
## 4 0 0 1 0 1 1 1 1
## 5 0 0 1 0 0 1 1 0
## 6 0 0 1 0 1 1 1 1
## 8 0 1 1 0 0 1 1 0
## 9 0 0 1 0 0 1 1 1
## 12 0 0 1 0 1 1 1 1
## 13 0 0 1 0 1 1 1 1
## 14 0 0 1 0 0 1 1 1
## romantic famrel freetime goout Dalc Walc health absences Mjob_health
## 1 0 4 3 4 1 1 3 4 0
## 3 0 4 3 2 2 3 3 6 0
## 4 1 3 2 2 1 1 5 0 1
## 5 0 4 3 2 1 2 5 0 0
## 6 0 5 4 2 1 2 5 6 0
## 8 0 4 1 4 1 1 1 2 0
## 9 0 4 2 2 1 1 1 0 0
## 12 0 5 2 2 1 1 4 0 0
## 13 0 4 3 3 1 3 5 0 1
## 14 0 5 4 3 1 2 3 0 0
## Mjob_other Mjob_services Mjob_teacher Fjob_health Fjob_other
## 1 0 0 0 0 0
## 3 0 0 0 0 1
## 4 0 0 0 0 0
## 5 1 0 0 0 1
## 6 0 1 0 0 1
## 8 1 0 0 0 0
## 9 0 1 0 0 1
## 12 0 1 0 0 1
## 13 0 0 0 0 0
## 14 0 0 1 0 1
## Fjob_services Fjob_teacher reason_home reason_other reason_reputation
## 1 0 1 0 0 0
## 3 0 0 0 1 0
## 4 1 0 1 0 0
## 5 0 0 1 0 0
## 6 0 0 0 0 1
## 8 0 1 1 0 0
## 9 0 0 1 0 0
## 12 0 0 0 0 1
## 13 1 0 0 0 0
## 14 0 0 0 0 0
## guardian_mother guardian_other class
## 1 1 0 0
## 3 1 0 0
## 4 1 0 1
## 5 0 0 0
## 6 1 0 0
## 8 1 0 0
## 9 1 0 1
## 12 0 0 0
## 13 0 0 0
## 14 1 0 0
# test dataset에 성적 13점을 기준으로 class 생성
class_te <- rep(0, dim(test)[1]) # test data에 추가할 class 변수 생성
class_te[which(test$score > 13)] <- 1 # 성적이 13점 이상인 학생만 class = 1 (0: 하위권, 1: 상위권)
test_cls <- data.frame(test, "class" = class_te) # class 변수 생성
test_cls <- subset(test_cls, select = -(score)) # 기존의 score 변수 제거
test_cls[1:10, ]
## school sex age address famsize Pstatus Medu Fedu traveltime studytime
## 2 0 0 17 1 0 1 1 1 1 2
## 7 0 1 16 1 1 1 2 2 1 2
## 10 0 1 15 1 0 1 3 4 1 2
## 11 0 0 15 1 0 1 4 4 1 2
## 15 0 1 15 1 0 0 2 2 1 3
## 20 0 1 16 1 1 1 4 3 1 1
## 28 0 1 15 1 0 1 4 2 1 1
## 47 0 0 16 1 1 0 3 3 1 2
## 59 0 1 15 1 1 1 1 2 1 2
## 66 0 0 16 1 1 1 4 3 3 2
## failures schoolsup famsup paid activities nursery higher internet
## 2 0 0 1 0 0 0 1 1
## 7 0 0 0 0 0 1 1 1
## 10 0 0 1 0 1 1 1 1
## 11 0 0 1 0 0 1 1 1
## 15 0 0 1 0 0 1 1 1
## 20 0 0 0 0 1 1 1 1
## 28 0 0 0 0 0 1 1 1
## 47 0 0 1 0 0 1 1 1
## 59 0 1 1 0 1 1 1 1
## 66 0 0 1 0 1 1 1 1
## romantic famrel freetime goout Dalc Walc health absences Mjob_health
## 2 0 5 3 3 1 1 3 2 0
## 7 0 4 4 4 1 1 3 0 0
## 10 0 5 5 1 1 1 5 0 0
## 11 0 3 3 3 1 2 2 2 0
## 15 1 4 5 2 1 1 3 0 0
## 20 0 3 1 3 1 3 5 6 1
## 28 0 2 2 4 2 4 1 0 1
## 47 0 2 3 5 1 4 3 6 0
## 59 0 4 3 2 1 1 5 0 0
## 66 0 5 4 3 1 2 1 2 0
## Mjob_other Mjob_services Mjob_teacher Fjob_health Fjob_other
## 2 0 0 0 0 1
## 7 1 0 0 0 1
## 10 1 0 0 0 1
## 11 0 0 1 1 0
## 15 1 0 0 0 1
## 20 0 0 0 0 1
## 28 0 0 0 0 0
## 47 1 0 0 0 0
## 59 1 0 0 0 0
## 66 0 0 1 0 0
## Fjob_services Fjob_teacher reason_home reason_other reason_reputation
## 2 0 0 0 0 0
## 7 0 0 1 0 0
## 10 0 0 1 0 0
## 11 0 0 0 0 1
## 15 0 0 1 0 0
## 20 0 0 1 0 0
## 28 1 0 0 1 0
## 47 1 0 1 0 0
## 59 0 0 1 0 0
## 66 1 0 0 0 0
## guardian_mother guardian_other class
## 2 0 0 0
## 7 1 0 0
## 10 1 0 0
## 11 1 0 1
## 15 0 1 1
## 20 0 0 0
## 28 1 0 0
## 47 1 0 0
## 59 0 0 1
## 66 1 0 1
##### logistic regression
m_logis <- glm(class~., data = train_cls, family = "binomial")
# test set prediction
pred_te <- predict(m_logis, test_cls, type = "response")
# test set accuracy
t <- table(test_cls$class, pred_te > 0.5)
acc <- sum(diag(t)) / sum(t)
### stepwise logistic regression
full_m <- glm(class~., data = train_cls, family = "binomial")
null_m <- glm(class~1., data = train_cls, family = "binomial")
m_logis_step <- step(null_m, direction = "both", trace = F, scope = list(lower = null_m, upper = full_m))
m_logis_step
##
## Call: glm(formula = class ~ failures + higher + schoolsup + studytime +
## Medu + sex + absences + Fjob_teacher + health + romantic +
## paid + reason_home + reason_reputation + famrel, family = "binomial",
## data = train_cls)
##
## Coefficients:
## (Intercept) failures higher
## -4.36909238 -2.56551880 2.33791649
## schoolsup studytime Medu
## -2.09631351 0.30253953 0.34358680
## sex absences Fjob_teacher
## -0.78946338 -0.07735731 1.09973044
## health romantic paid
## -0.13732595 -0.39770436 -0.86197535
## reason_home reason_reputation famrel
## 0.64838051 0.50040822 0.20136710
##
## Degrees of Freedom: 519 Total (i.e. Null); 505 Residual
## Null Deviance: 611.6955
## Residual Deviance: 462.2943 AIC: 492.2943
summary(m_logis_step)
##
## Call:
## glm(formula = class ~ failures + higher + schoolsup + studytime +
## Medu + sex + absences + Fjob_teacher + health + romantic +
## paid + reason_home + reason_reputation + famrel, family = "binomial",
## data = train_cls)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0376043 -0.7851192 -0.2899791 0.7855093 2.4544009
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.36909238 1.21458880 -3.59718 0.00032169 ***
## failures -2.56551880 1.01301914 -2.53255 0.01132371 *
## higher 2.33791649 1.03663340 2.25530 0.02411467 *
## schoolsup -2.09631351 0.53246202 -3.93702 0.0000825 ***
## studytime 0.30253953 0.14011290 2.15926 0.03083036 *
## Medu 0.34358680 0.11230351 3.05945 0.00221745 **
## sex -0.78946338 0.26000885 -3.03629 0.00239506 **
## absences -0.07735731 0.03085085 -2.50746 0.01216019 *
## Fjob_teacher 1.09973044 0.46626611 2.35859 0.01834453 *
## health -0.13732595 0.08013480 -1.71369 0.08658624 .
## romantic -0.39770436 0.24740580 -1.60750 0.10794514
## paid -0.86197535 0.60265526 -1.43030 0.15263211
## reason_home 0.64838051 0.29192043 2.22109 0.02634511 *
## reason_reputation 0.50040822 0.27736578 1.80415 0.07120851 .
## famrel 0.20136710 0.13133223 1.53326 0.12521060
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 611.69553 on 519 degrees of freedom
## Residual deviance: 462.29431 on 505 degrees of freedom
## AIC: 492.29431
##
## Number of Fisher Scoring iterations: 7
# training data class prediction
pred_tr <- predict(m_logis_step, type = "response")
# trainig set classification 성능
t <- table(train_cls$class, pred_tr > 0.5) # threshold = 0.5기준으로 confusion matrix
t
##
## FALSE TRUE
## 0 338 39
## 1 80 63
acc_tr <- sum(diag(t)) / sum(t) # training accuracy
acc_tr
## [1] 0.7711538462
# test dataset classification 성능
pred_te <- predict(m_logis_step, test, type = "response") # class prediction
t_te <- table(test_cls$class, pred_te >0.5) # confusion matrix
t_te
##
## FALSE TRUE
## 0 84 8
## 1 26 11
acc_te <- sum(diag(t_te)) / sum(t_te) # test accuracy
acc_te
## [1] 0.7364341085
### 가장 높은 accuracy를 보여주는 threshold 탐색
acc_th <- NULL
threshold <- seq(0.1, 0.9, by = 0.1) # threshold 범위: 0.1~0.9 범위에서 0.1씩 증가시키면서 탐색
for(i in threshold){
pred_te <- predict(m_logis_step, test, type = "response") # class prediction
t_te <- table(test_cls$class, pred_te > i) # confusion matrix
t_te
acc_te <- sum(diag(t_te)) / sum(t_te) # test accuracy
acc_te # test accuracy
acc_th <- c(acc_th, acc_te)
}
# plotting
plot(threshold, acc_th, type = "l", xlab = "Threshold", ylab = "Test accuracy")
abline(v = threshold[which.max(acc_th)], col = "red", lty = 2)
points(threshold[which.max(acc_th)], max(acc_th), pch = 19, col = "red")
text(threshold[which.max(acc_th)], max(acc_th), pos =4, labels = round(max(acc_th), 2), col = "red", cex = 0.8)
##### threshold = 0.6 test result
t_te <- table(test_cls$class, pred_te > 0.6) # confusion matrix
t_te
##
## FALSE TRUE
## 0 90 2
## 1 31 6
acc_te <- sum(diag(t_te)) / sum(t_te) # test accuracy
acc_te # test accuracy
## [1] 0.7441860465
# result plot
plot(pred_te, col = as.factor(test_cls$class), pch = 19, ylim = c(0, 1), ylab = "Predicted class")
abline(h = 0.6, lty = 2)