1. Introduction

1.1. Aim

  • 중학생들의 사회적 정보, 성별, 학습 정보 등의 다양한 데이터를 기반으로 학생의 시험 성적을 예측
    • 포르투갈어 과목의 총 3차 시험 성적을 예측(각 시험당 20점 만점)

1.2. Data descritpion

Number of Attributes: 33 (Independent: 30 / Dependent: G1, G2, G3) Number of Instances: 649 Attribute Characteristics: Numeric / Binary / Categorical Associated Tasks: Regression / Classification

2. Preprocessing

2.1. Setting

- Import libraries

suppressMessages(library(car))
suppressMessages(library(relaimpo))
suppressMessages(library(glmnet))
suppressMessages(library(corrplot))
suppressMessages(library(rpart))
suppressMessages(library(neuralnet))
suppressMessages(library(caret))
suppressMessages(library(e1071))
suppressMessages(library(DT))

- Load Dataset

### 데이터 불러오기
st <- read.csv("./input/student.csv", row.names = 1)

- Split the data

set.seed(555)
flag <- sample(c("tr", "te"), size = nrow(st), c(8, 2), replace = T) 
train <- st[which(flag == "tr"), ]
test <- st[which(flag == "te"), ]

2.2. Peek into the Data

datatable(st, style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
summary(st)
##      school            sex              age           address      
##  Min.   :0.0000   Min.   :0.0000   Min.   :15.00   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:16.00   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :17.00   Median :1.0000  
##  Mean   :0.3482   Mean   :0.4099   Mean   :16.74   Mean   :0.6965  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:18.00   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :22.00   Max.   :1.0000  
##     famsize          Pstatus            Medu            Fedu      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :0.0000   Median :1.0000   Median :2.000   Median :2.000  
##  Mean   :0.2958   Mean   :0.8767   Mean   :2.515   Mean   :2.307  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:3.000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :4.000   Max.   :4.000  
##    traveltime      studytime        failures        schoolsup     
##  Min.   :1.000   Min.   :1.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :1.000   Median :2.000   Median :0.0000   Median :0.0000  
##  Mean   :1.569   Mean   :1.931   Mean   :0.2219   Mean   :0.1048  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :4.000   Max.   :4.000   Max.   :3.0000   Max.   :1.0000  
##      famsup            paid           activities        nursery      
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:1.0000  
##  Median :1.0000   Median :0.00000   Median :0.0000   Median :1.0000  
##  Mean   :0.6133   Mean   :0.06009   Mean   :0.4854   Mean   :0.8028  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.0000   Max.   :1.0000  
##      higher          internet         romantic          famrel     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :1.000  
##  1st Qu.:1.0000   1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:4.000  
##  Median :1.0000   Median :1.0000   Median :0.0000   Median :4.000  
##  Mean   :0.8937   Mean   :0.7673   Mean   :0.3683   Mean   :3.931  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:5.000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :5.000  
##     freetime        goout            Dalc            Walc     
##  Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.00  
##  1st Qu.:3.00   1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.00  
##  Median :3.00   Median :3.000   Median :1.000   Median :2.00  
##  Mean   :3.18   Mean   :3.185   Mean   :1.502   Mean   :2.28  
##  3rd Qu.:4.00   3rd Qu.:4.000   3rd Qu.:2.000   3rd Qu.:3.00  
##  Max.   :5.00   Max.   :5.000   Max.   :5.000   Max.   :5.00  
##      health         absences       Mjob_health        Mjob_other    
##  Min.   :1.000   Min.   : 0.000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:2.000   1st Qu.: 0.000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :4.000   Median : 2.000   Median :0.00000   Median :0.0000  
##  Mean   :3.536   Mean   : 3.659   Mean   :0.07396   Mean   :0.3975  
##  3rd Qu.:5.000   3rd Qu.: 6.000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :5.000   Max.   :32.000   Max.   :1.00000   Max.   :1.0000  
##  Mjob_services     Mjob_teacher     Fjob_health        Fjob_other    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :1.0000  
##  Mean   :0.2096   Mean   :0.1109   Mean   :0.03544   Mean   :0.5655  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.0000  
##  Fjob_services     Fjob_teacher      reason_home      reason_other   
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.00000   Median :0.0000   Median :0.0000  
##  Mean   :0.2789   Mean   :0.05547   Mean   :0.2296   Mean   :0.1109  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.0000   Max.   :1.0000  
##  reason_reputation guardian_mother  guardian_other        score       
##  Min.   :0.0000    Min.   :0.0000   Min.   :0.00000   Min.   : 1.333  
##  1st Qu.:0.0000    1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:10.000  
##  Median :0.0000    Median :1.0000   Median :0.00000   Median :11.667  
##  Mean   :0.2203    Mean   :0.7011   Mean   :0.06317   Mean   :11.625  
##  3rd Qu.:0.0000    3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:13.333  
##  Max.   :1.0000    Max.   :1.0000   Max.   :1.00000   Max.   :18.667
str(st)
## 'data.frame':    649 obs. of  40 variables:
##  $ school           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ sex              : int  0 0 0 0 0 1 1 0 1 1 ...
##  $ age              : int  18 17 15 15 16 16 16 17 15 15 ...
##  $ address          : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ famsize          : int  0 0 1 0 0 1 1 0 1 0 ...
##  $ Pstatus          : int  0 1 1 1 1 1 1 0 0 1 ...
##  $ Medu             : int  4 1 1 4 3 4 2 4 3 3 ...
##  $ Fedu             : int  4 1 1 2 3 3 2 4 2 4 ...
##  $ traveltime       : int  2 1 1 1 1 1 1 2 1 1 ...
##  $ studytime        : int  2 2 2 3 2 2 2 2 2 2 ...
##  $ failures         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ schoolsup        : int  1 0 1 0 0 0 0 1 0 0 ...
##  $ famsup           : int  0 1 0 1 1 1 0 1 1 1 ...
##  $ paid             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ activities       : int  0 0 0 1 0 1 0 0 0 1 ...
##  $ nursery          : int  1 0 1 1 1 1 1 1 1 1 ...
##  $ higher           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ internet         : int  0 1 1 1 0 1 1 0 1 1 ...
##  $ romantic         : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ famrel           : int  4 5 4 3 4 5 4 4 4 5 ...
##  $ freetime         : int  3 3 3 2 3 4 4 1 2 5 ...
##  $ goout            : int  4 3 2 2 2 2 4 4 2 1 ...
##  $ Dalc             : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ Walc             : int  1 1 3 1 2 2 1 1 1 1 ...
##  $ health           : int  3 3 3 5 5 5 3 1 1 5 ...
##  $ absences         : int  4 2 6 0 0 6 0 2 0 0 ...
##  $ Mjob_health      : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ Mjob_other       : int  0 0 0 0 1 0 1 1 0 1 ...
##  $ Mjob_services    : int  0 0 0 0 0 1 0 0 1 0 ...
##  $ Mjob_teacher     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fjob_health      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fjob_other       : int  0 1 1 0 1 1 1 0 1 1 ...
##  $ Fjob_services    : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ Fjob_teacher     : int  1 0 0 0 0 0 0 1 0 0 ...
##  $ reason_home      : int  0 0 0 1 1 0 1 1 1 1 ...
##  $ reason_other     : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ reason_reputation: int  0 0 0 0 0 1 0 0 0 0 ...
##  $ guardian_mother  : int  1 0 1 1 0 1 1 1 1 1 ...
##  $ guardian_other   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ score            : num  7.33 10.33 12.33 14 12.33 ...

2.3. Treatment Outlier

- outlier 제거

boxplot(train)

b1 <- boxplot(train$absences)

b2 <- boxplot(train$score)

out1 <- which(train$absences > b1$stats[5])
out2 <- which(train$score < b2$stats[1])
train <- train[-c(out1, out2), ] # training data에서 absences, score 변수의 outlier 제거
boxplot(train)

2.4. Correlation Analysis

c <- cor(train)
corrplot(c, method = "circle", order = "hclust")

3. Modeling

3.1. Linear Regression

3.1.1. Simple linear regression

m <- lm(score~Dalc, data = train) # 단순 회귀 모형 
plot(train$Dalc, train$score, xlab = "Dalc", ylab = "score", pch = 19)
abline(m, col = "red") # 회귀 적합선

summary(m)
## 
## Call:
## lm(formula = score ~ Dalc, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.4107 -1.8762 -0.0774  1.7276  6.9918 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  12.6120     0.2202  57.272  < 2e-16 ***
## Dalc         -0.5346     0.1256  -4.257 2.48e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.572 on 492 degrees of freedom
## Multiple R-squared:  0.03552,    Adjusted R-squared:  0.03356 
## F-statistic: 18.12 on 1 and 492 DF,  p-value: 2.483e-05
## test dataset을 이용한 예측
pred <- predict(m, newdata= test) # testset을 이용한 단순 회귀 모형 예측
plot(test$score, pred, xlab = "Actual score", ylab = "Predicted score", pch = 19, xlim = c(5, 18), ylim = c(5, 18))
lines(test$score, test$score, col = "red", lty = 2)

result <- data.frame("actual" = test$score, "pred" = pred, "resid" = test$score - pred) # 실제값, 예측값, 잔차의 result table
result[1:10, ]
##      actual     pred       resid
## 2  10.33333 12.07741 -1.74407183
## 7  12.66667 12.07741  0.58926150
## 10 12.33333 12.07741  0.25592817
## 11 14.00000 12.07741  1.92259483
## 15 14.33333 12.07741  2.25592817
## 20 12.00000 12.07741 -0.07740517
## 28 11.00000 11.54282 -0.54281743
## 47 12.66667 12.07741  0.58926150
## 59 13.66667 12.07741  1.58926150
## 66 15.66667 12.07741  3.58926150
## 예측 성능 측정
MSE <- mean(result$resid^2) # test MSE 계산
MSE
## [1] 7.26915

3.1.2. Multiple linear regression

# result_function (training, test MSE 계산, 결과 시각화)
lm_result <- function(m, train, test){
  par(mfrow = c(1, 2)) # 그림을 1행 2열로 배치
  MSE_tr <- mean((train$score - m$fitted.values)^2) # MSE 계산
  plot(train$score, m$fitted.values, pch = 19, xlab = "Actual score", ylab = "Predicted score", xlim = c(5, 18), ylim = c(5, 18), main = paste("Training MSE = ", round(MSE_tr, 2), sep = ""))
  lines(train$score, train$score, col = "red", lty = 2)
  
  pred <- predict(m, newdata= test) # test dataset 예측
  MSE_te <- mean((test$score - pred)^2) # MSE 계산
  plot(test$score, pred, xlab = "Actual score", ylab = "Predicted score", pch = 19, xlim = c(5, 18), ylim = c(5, 18), main = paste("Test MSE = ", round(MSE_te, 2), sep = ""))
  lines(test$score, test$score, col = "red", lty = 2)
  
  MSE <- c(MSE_tr, MSE_te)
  names(MSE) <- c("MSE_tr", "MSE_te")
  print(round(MSE, 2))
  
  return(MSE) # MSE 결과를 반환
  par(mfrow = c(1,1)) # 그림을 1행 1열로 다시 옵션 원상복귀
}
### 다중 선형 회귀 모형 학습 (multiple linear regression)
m <- lm(score~., data = train) # 다중 회귀 모형
summary(m)
## 
## Call:
## lm(formula = score ~ ., data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5217 -1.3320 -0.0949  1.2366  6.6196 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        7.393389   1.815848   4.072 5.51e-05 ***
## school            -0.710581   0.235919  -3.012 0.002740 ** 
## sex               -0.643999   0.226488  -2.843 0.004665 ** 
## age                0.161976   0.091202   1.776 0.076399 .  
## address            0.284466   0.233532   1.218 0.223818    
## famsize            0.078951   0.217709   0.363 0.717039    
## Pstatus            0.277959   0.314945   0.883 0.377939    
## Medu               0.218263   0.134044   1.628 0.104156    
## Fedu               0.001694   0.124744   0.014 0.989174    
## traveltime         0.038551   0.141203   0.273 0.784963    
## studytime          0.274661   0.120463   2.280 0.023068 *  
## failures          -1.149888   0.174476  -6.591 1.22e-10 ***
## schoolsup         -1.153984   0.324786  -3.553 0.000421 ***
## famsup            -0.210148   0.204113  -1.030 0.303762    
## paid              -0.463470   0.424199  -1.093 0.275160    
## activities         0.162197   0.201561   0.805 0.421413    
## nursery           -0.172622   0.239559  -0.721 0.471536    
## higher             1.479837   0.344927   4.290 2.18e-05 ***
## internet           0.083312   0.244341   0.341 0.733288    
## romantic          -0.406032   0.205598  -1.975 0.048886 *  
## famrel             0.234623   0.104326   2.249 0.024995 *  
## freetime           0.061513   0.102871   0.598 0.550163    
## goout             -0.154058   0.094984  -1.622 0.105511    
## Dalc              -0.001842   0.140606  -0.013 0.989553    
## Walc              -0.123880   0.106589  -1.162 0.245755    
## health            -0.114383   0.069683  -1.641 0.101391    
## absences          -0.081225   0.028573  -2.843 0.004674 ** 
## Mjob_health        0.733595   0.460703   1.592 0.112005    
## Mjob_other         0.101480   0.275725   0.368 0.713010    
## Mjob_services      0.054489   0.331363   0.164 0.869458    
## Mjob_teacher       0.605537   0.463051   1.308 0.191633    
## Fjob_health       -0.166462   0.639204  -0.260 0.794658    
## Fjob_other         0.251658   0.395911   0.636 0.525330    
## Fjob_services      0.021904   0.414190   0.053 0.957847    
## Fjob_teacher       1.017871   0.590807   1.723 0.085597 .  
## reason_home        0.424029   0.266360   1.592 0.112094    
## reason_other       0.166663   0.329066   0.506 0.612771    
## reason_reputation  0.622347   0.255886   2.432 0.015396 *  
## guardian_mother   -0.435901   0.236828  -1.841 0.066336 .  
## guardian_other    -0.704618   0.484698  -1.454 0.146713    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.067 on 454 degrees of freedom
## Multiple R-squared:  0.4253, Adjusted R-squared:  0.3759 
## F-statistic: 8.614 on 39 and 454 DF,  p-value: < 2.2e-16
## lm_result()함수에 모델, training, test dataset을 전달하여 MSE를 계산하고 plotting 
lm_result(m, train, test) # 회귀 분석 결과 성능평가 및 시각화 함수 사용

## MSE_tr MSE_te 
##   3.93   6.91
##   MSE_tr   MSE_te 
## 3.926032 6.907939

- 다중공선성

### 다중공선성 확인
vif(m) # 분산 팽창 지수 계산 (VIF)
##            school               sex               age           address 
##          1.460686          1.417245          1.377845          1.343418 
##           famsize           Pstatus              Medu              Fedu 
##          1.127313          1.188589          2.639189          2.222232 
##        traveltime         studytime          failures         schoolsup 
##          1.337530          1.196624          1.307672          1.168310 
##            famsup              paid        activities           nursery 
##          1.118441          1.149843          1.172178          1.095182 
##            higher          internet          romantic            famrel 
##          1.273725          1.297974          1.142523          1.134523 
##          freetime             goout              Dalc              Walc 
##          1.274660          1.391473          1.941243          2.195803 
##            health          absences       Mjob_health        Mjob_other 
##          1.155724          1.190499          1.909317          2.100425 
##     Mjob_services      Mjob_teacher       Fjob_health        Fjob_other 
##          2.226067          2.093833          1.835422          4.485031 
##     Fjob_services      Fjob_teacher       reason_home      reason_other 
##          4.028827          2.158158          1.324591          1.219163 
## reason_reputation   guardian_mother    guardian_other 
##          1.415861          1.350352          1.501213

3.1.3. Regression with variable selection

###### Regression with variable selection
### forward, backward, stepwise regression
full_m <- lm(score~., data = train) # 모든 변수를 이용한 full model
null_m <- lm(score~1, data = train) # 변수를 한 개도 이용하지 않은 null model

3.1.4. full model

r1 <- lm_result(full_m, train, test)

## MSE_tr MSE_te 
##   3.93   6.91

3.1.5. forward method

forw_m <- step(null_m, direction = "forward", trace = 1, scope = list(lower = null_m, upper = full_m))
## Start:  AIC=951.23
## score ~ 1
## 
##                     Df Sum of Sq    RSS    AIC
## + failures           1    618.60 2756.1 853.20
## + higher             1    419.64 2955.0 887.63
## + Medu               1    290.20 3084.5 908.81
## + studytime          1    196.07 3178.6 923.66
## + school             1    192.96 3181.7 924.14
## + reason_reputation  1    152.85 3221.8 930.33
## + Fedu               1    144.90 3229.8 931.55
## + absences           1    140.50 3234.2 932.22
## + Walc               1    132.00 3242.7 933.52
## + Dalc               1    119.88 3254.8 935.36
## + Mjob_teacher       1     98.81 3275.8 938.55
## + Fjob_teacher       1     92.88 3281.8 939.44
## + sex                1     70.39 3304.3 942.81
## + address            1     66.20 3308.5 943.44
## + traveltime         1     65.29 3309.4 943.58
## + internet           1     61.09 3313.6 944.20
## + goout              1     60.46 3314.2 944.30
## + Mjob_health        1     60.27 3314.4 944.33
## + guardian_other     1     53.70 3321.0 945.30
## + famrel             1     49.16 3325.5 945.98
## + health             1     40.92 3333.7 947.20
## + age                1     39.50 3335.2 947.41
## + romantic           1     36.06 3338.6 947.92
## + freetime           1     32.75 3341.9 948.41
## + schoolsup          1     24.67 3350.0 949.60
## + reason_other       1     22.16 3352.5 949.97
## + activities         1     20.66 3354.0 950.19
## + reason_home        1     18.32 3356.3 950.54
## + paid               1     14.58 3360.1 951.09
## <none>                           3374.7 951.23
## + Fjob_health        1      5.98 3368.7 952.35
## + Fjob_services      1      5.44 3369.2 952.43
## + Mjob_other         1      3.22 3371.4 952.76
## + Fjob_other         1      1.73 3372.9 952.97
## + guardian_mother    1      1.67 3373.0 952.98
## + nursery            1      0.70 3374.0 953.13
## + famsize            1      0.51 3374.1 953.15
## + Pstatus            1      0.41 3374.2 953.17
## + Mjob_services      1      0.40 3374.3 953.17
## + famsup             1      0.07 3374.6 953.22
## 
## Step:  AIC=853.2
## score ~ failures
## 
##                     Df Sum of Sq    RSS    AIC
## + higher             1   198.424 2557.6 818.29
## + Medu               1   179.678 2576.4 821.89
## + school             1   133.826 2622.2 830.61
## + studytime          1   120.574 2635.5 833.10
## + reason_reputation  1    97.175 2658.9 837.46
## + Walc               1    94.387 2661.7 837.98
## + Dalc               1    73.592 2682.5 841.83
## + absences           1    69.834 2686.2 842.52
## + Fedu               1    68.224 2687.8 842.81
## + Mjob_health        1    58.664 2697.4 844.57
## + Fjob_teacher       1    55.349 2700.7 845.17
## + address            1    54.700 2701.4 845.29
## + Mjob_teacher       1    54.081 2702.0 845.41
## + goout              1    44.527 2711.5 847.15
## + internet           1    43.649 2712.4 847.31
## + traveltime         1    41.081 2715.0 847.78
## + sex                1    31.629 2724.4 849.49
## + famrel             1    25.661 2730.4 850.58
## + health             1    24.789 2731.3 850.73
## + reason_other       1    22.220 2733.8 851.20
## + romantic           1    19.987 2736.1 851.60
## + schoolsup          1    19.197 2736.9 851.74
## + activities         1    15.673 2740.4 852.38
## <none>                           2756.1 853.20
## + freetime           1     7.040 2749.0 853.93
## + guardian_mother    1     6.094 2750.0 854.10
## + guardian_other     1     5.133 2750.9 854.28
## + reason_home        1     4.739 2751.3 854.35
## + paid               1     3.523 2752.5 854.57
## + Fjob_services      1     3.291 2752.8 854.61
## + Mjob_other         1     2.875 2753.2 854.68
## + Pstatus            1     1.278 2754.8 854.97
## + Fjob_health        1     0.999 2755.1 855.02
## + nursery            1     0.361 2755.7 855.13
## + age                1     0.311 2755.8 855.14
## + famsize            1     0.284 2755.8 855.15
## + Mjob_services      1     0.179 2755.9 855.16
## + Fjob_other         1     0.060 2756.0 855.19
## + famsup             1     0.049 2756.0 855.19
## 
## Step:  AIC=818.29
## score ~ failures + higher
## 
##                     Df Sum of Sq    RSS    AIC
## + Medu               1   119.318 2438.3 796.69
## + school             1    94.848 2462.8 801.62
## + studytime          1    79.563 2478.1 804.67
## + reason_reputation  1    77.243 2480.4 805.14
## + Walc               1    72.573 2485.1 806.07
## + absences           1    59.755 2497.9 808.61
## + Dalc               1    55.022 2502.6 809.54
## + address            1    46.575 2511.1 811.21
## + Fjob_teacher       1    43.506 2514.1 811.81
## + Mjob_teacher       1    38.532 2519.1 812.79
## + Mjob_health        1    38.367 2519.3 812.82
## + Fedu               1    36.261 2521.4 813.23
## + schoolsup          1    31.290 2526.3 814.20
## + traveltime         1    29.927 2527.7 814.47
## + internet           1    29.490 2528.2 814.56
## + goout              1    29.142 2528.5 814.62
## + health             1    24.064 2533.6 815.62
## + famrel             1    22.774 2534.9 815.87
## + sex                1    22.270 2535.4 815.97
## + activities         1    14.360 2543.3 817.50
## + reason_other       1    13.336 2544.3 817.70
## + romantic           1    11.024 2546.6 818.15
## <none>                           2557.6 818.29
## + guardian_mother    1     9.727 2547.9 818.40
## + paid               1     9.243 2548.4 818.50
## + age                1     8.864 2548.8 818.57
## + Fjob_services      1     3.992 2553.6 819.51
## + reason_home        1     3.843 2553.8 819.54
## + Mjob_other         1     3.755 2553.9 819.56
## + famsup             1     2.294 2555.3 819.84
## + Pstatus            1     1.820 2555.8 819.93
## + guardian_other     1     1.016 2556.6 820.09
## + nursery            1     0.871 2556.8 820.12
## + freetime           1     0.761 2556.9 820.14
## + Mjob_services      1     0.676 2557.0 820.16
## + Fjob_health        1     0.505 2557.1 820.19
## + Fjob_other         1     0.023 2557.6 820.28
## + famsize            1     0.004 2557.6 820.29
## 
## Step:  AIC=796.69
## score ~ failures + higher + Medu
## 
##                     Df Sum of Sq    RSS    AIC
## + Walc               1    69.903 2368.4 784.32
## + studytime          1    68.803 2369.5 784.55
## + reason_reputation  1    60.990 2377.3 786.17
## + Dalc               1    57.930 2380.4 786.81
## + absences           1    55.241 2383.1 787.36
## + school             1    53.477 2384.8 787.73
## + sex                1    39.612 2398.7 790.59
## + goout              1    29.704 2408.6 792.63
## + schoolsup          1    25.790 2412.5 793.43
## + health             1    25.350 2413.0 793.52
## + address            1    23.216 2415.1 793.96
## + paid               1    22.350 2416.0 794.14
## + Fjob_teacher       1    17.574 2420.7 795.11
## + famrel             1    17.428 2420.9 795.14
## + guardian_mother    1    14.334 2424.0 795.77
## + age                1    13.144 2425.2 796.01
## + Mjob_health        1    12.266 2426.1 796.19
## <none>                           2438.3 796.69
## + reason_other       1     9.493 2428.8 796.76
## + romantic           1     9.313 2429.0 796.79
## + famsup             1     7.603 2430.7 797.14
## + internet           1     7.320 2431.0 797.20
## + traveltime         1     6.339 2432.0 797.40
## + activities         1     5.179 2433.1 797.63
## + Mjob_services      1     4.669 2433.7 797.74
## + Pstatus            1     4.603 2433.7 797.75
## + nursery            1     3.986 2434.3 797.88
## + Mjob_teacher       1     3.915 2434.4 797.89
## + Fjob_services      1     3.873 2434.4 797.90
## + Fjob_other         1     1.950 2436.4 798.29
## + reason_home        1     1.595 2436.7 798.36
## + Fedu               1     1.141 2437.2 798.45
## + Fjob_health        1     0.991 2437.3 798.48
## + freetime           1     0.971 2437.3 798.49
## + guardian_other     1     0.498 2437.8 798.58
## + Mjob_other         1     0.384 2437.9 798.61
## + famsize            1     0.081 2438.2 798.67
## 
## Step:  AIC=784.32
## score ~ failures + higher + Medu + Walc
## 
##                     Df Sum of Sq    RSS    AIC
## + reason_reputation  1    58.122 2310.3 774.04
## + school             1    56.690 2311.7 774.35
## + studytime          1    48.031 2320.4 776.19
## + absences           1    38.683 2329.7 778.18
## + schoolsup          1    34.176 2334.2 779.14
## + address            1    22.587 2345.8 781.58
## + paid               1    19.424 2349.0 782.25
## + age                1    16.668 2351.8 782.83
## + health             1    16.157 2352.3 782.93
## + Mjob_health        1    14.809 2353.6 783.22
## + sex                1    14.218 2354.2 783.34
## + guardian_mother    1    12.234 2356.2 783.76
## + internet           1    12.028 2356.4 783.80
## + famrel             1    11.702 2356.7 783.87
## + Fjob_teacher       1    11.694 2356.7 783.87
## + famsup             1    10.267 2358.2 784.17
## + Pstatus            1     9.656 2358.8 784.30
## + romantic           1     9.586 2358.8 784.31
## <none>                           2368.4 784.32
## + Dalc               1     9.064 2359.3 784.42
## + reason_other       1     8.025 2360.4 784.64
## + nursery            1     8.005 2360.4 784.64
## + goout              1     6.989 2361.4 784.86
## + activities         1     5.489 2362.9 785.17
## + Mjob_teacher       1     5.277 2363.1 785.21
## + traveltime         1     5.010 2363.4 785.27
## + Mjob_services      1     2.683 2365.7 785.76
## + reason_home        1     2.652 2365.8 785.76
## + Fjob_other         1     1.524 2366.9 786.00
## + Fjob_health        1     1.503 2366.9 786.00
## + Fjob_services      1     1.146 2367.3 786.08
## + guardian_other     1     1.144 2367.3 786.08
## + famsize            1     0.822 2367.6 786.14
## + freetime           1     0.162 2368.2 786.28
## + Fedu               1     0.147 2368.3 786.29
## + Mjob_other         1     0.000 2368.4 786.32
## 
## Step:  AIC=774.04
## score ~ failures + higher + Medu + Walc + reason_reputation
## 
##                   Df Sum of Sq    RSS    AIC
## + school           1    43.400 2266.9 766.67
## + absences         1    41.718 2268.6 767.04
## + schoolsup        1    36.570 2273.7 768.16
## + studytime        1    33.380 2276.9 768.85
## + address          1    24.561 2285.7 770.76
## + reason_home      1    17.705 2292.6 772.24
## + age              1    14.330 2296.0 772.97
## + Fjob_teacher     1    13.796 2296.5 773.08
## + sex              1    12.821 2297.5 773.29
## + paid             1    12.440 2297.8 773.37
## + Pstatus          1    11.677 2298.6 773.54
## + famrel           1    10.726 2299.6 773.74
## + health           1    10.427 2299.9 773.81
## + Mjob_health      1    10.419 2299.9 773.81
## + famsup           1    10.356 2299.9 773.82
## <none>                         2310.3 774.04
## + guardian_mother  1     9.188 2301.1 774.07
## + nursery          1     9.070 2301.2 774.10
## + Mjob_teacher     1     8.764 2301.5 774.16
## + internet         1     7.379 2302.9 774.46
## + romantic         1     6.913 2303.4 774.56
## + goout            1     6.606 2303.7 774.63
## + Dalc             1     5.128 2305.2 774.94
## + traveltime       1     4.223 2306.1 775.14
## + Mjob_services    1     3.654 2306.6 775.26
## + Fjob_health      1     2.607 2307.7 775.48
## + reason_other     1     1.909 2308.4 775.63
## + guardian_other   1     1.610 2308.7 775.70
## + activities       1     1.009 2309.3 775.83
## + Fjob_services    1     0.902 2309.4 775.85
## + Fjob_other       1     0.883 2309.4 775.85
## + famsize          1     0.384 2309.9 775.96
## + Fedu             1     0.224 2310.1 775.99
## + freetime         1     0.213 2310.1 776.00
## + Mjob_other       1     0.036 2310.3 776.03
## 
## Step:  AIC=766.67
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school
## 
##                   Df Sum of Sq    RSS    AIC
## + absences         1    51.477 2215.4 757.33
## + schoolsup        1    49.468 2217.4 757.77
## + studytime        1    29.300 2237.6 762.25
## + sex              1    16.814 2250.1 765.00
## + age              1    15.971 2250.9 765.18
## + health           1    14.690 2252.2 765.46
## + paid             1    13.079 2253.8 765.81
## + Fjob_teacher     1    12.614 2254.3 765.92
## + Pstatus          1    11.309 2255.6 766.20
## + guardian_mother  1    10.942 2255.9 766.28
## + reason_home      1    10.475 2256.4 766.39
## + Mjob_health      1    10.473 2256.4 766.39
## + famsup           1    10.234 2256.7 766.44
## + Mjob_teacher     1     9.685 2257.2 766.56
## <none>                         2266.9 766.67
## + address          1     8.939 2257.9 766.72
## + famrel           1     8.677 2258.2 766.78
## + nursery          1     7.421 2259.5 767.05
## + romantic         1     6.458 2260.4 767.26
## + Mjob_services    1     6.302 2260.6 767.30
## + goout            1     4.817 2262.1 767.62
## + Dalc             1     4.389 2262.5 767.72
## + Fjob_health      1     3.459 2263.4 767.92
## + internet         1     2.899 2264.0 768.04
## + guardian_other   1     2.786 2264.1 768.07
## + traveltime       1     0.615 2266.3 768.54
## + famsize          1     0.577 2266.3 768.55
## + activities       1     0.479 2266.4 768.57
## + Fedu             1     0.236 2266.7 768.62
## + Fjob_services    1     0.225 2266.7 768.62
## + reason_other     1     0.191 2266.7 768.63
## + Mjob_other       1     0.189 2266.7 768.63
## + freetime         1     0.115 2266.8 768.65
## + Fjob_other       1     0.106 2266.8 768.65
## 
## Step:  AIC=757.33
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences
## 
##                   Df Sum of Sq    RSS    AIC
## + schoolsup        1    57.743 2157.7 746.28
## + studytime        1    26.607 2188.8 753.36
## + age              1    25.633 2189.8 753.58
## + sex              1    18.163 2197.2 755.26
## + health           1    16.186 2199.2 755.70
## + paid             1    14.313 2201.1 756.12
## + reason_home      1    13.486 2201.9 756.31
## + address          1    10.728 2204.7 756.93
## + guardian_mother  1     9.938 2205.5 757.11
## + Fjob_teacher     1     9.875 2205.5 757.12
## + Mjob_teacher     1     9.601 2205.8 757.18
## <none>                         2215.4 757.33
## + famsup           1     8.044 2207.4 757.53
## + Pstatus          1     7.754 2207.7 757.59
## + nursery          1     7.468 2207.9 757.66
## + famrel           1     6.082 2209.3 757.97
## + Mjob_health      1     5.207 2210.2 758.16
## + Mjob_services    1     4.483 2210.9 758.33
## + romantic         1     3.780 2211.6 758.48
## + Fjob_health      1     3.133 2212.3 758.63
## + goout            1     3.010 2212.4 758.65
## + internet         1     2.963 2212.4 758.67
## + Dalc             1     2.266 2213.2 758.82
## + Fjob_services    1     0.761 2214.7 759.16
## + guardian_other   1     0.721 2214.7 759.17
## + traveltime       1     0.477 2214.9 759.22
## + activities       1     0.465 2214.9 759.22
## + reason_other     1     0.463 2214.9 759.22
## + Fjob_other       1     0.366 2215.1 759.24
## + freetime         1     0.298 2215.1 759.26
## + famsize          1     0.121 2215.3 759.30
## + Mjob_other       1     0.008 2215.4 759.32
## + Fedu             1     0.004 2215.4 759.33
## 
## Step:  AIC=746.28
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup
## 
##                   Df Sum of Sq    RSS    AIC
## + studytime        1   28.8503 2128.8 741.63
## + sex              1   28.1894 2129.5 741.78
## + age              1   17.3077 2140.4 744.30
## + Fjob_teacher     1   14.3411 2143.3 744.99
## + health           1   14.1435 2143.5 745.03
## + reason_home      1   13.7433 2143.9 745.12
## + paid             1   12.8005 2144.9 745.34
## + guardian_mother  1   11.6364 2146.0 745.61
## + address          1    9.1436 2148.5 746.18
## <none>                         2157.7 746.28
## + romantic         1    6.5686 2151.1 746.77
## + Mjob_teacher     1    6.4273 2151.2 746.81
## + Pstatus          1    6.3082 2151.4 746.83
## + famrel           1    5.9579 2151.7 746.91
## + famsup           1    5.7214 2151.9 746.97
## + nursery          1    5.4965 2152.2 747.02
## + Mjob_health      1    3.1144 2154.6 747.57
## + Mjob_services    1    3.0778 2154.6 747.57
## + goout            1    3.0045 2154.7 747.59
## + Fjob_services    1    1.7852 2155.9 747.87
## + internet         1    1.4121 2156.3 747.96
## + guardian_other   1    1.2597 2156.4 747.99
## + Fjob_health      1    1.0677 2156.6 748.04
## + Dalc             1    1.0227 2156.7 748.05
## + traveltime       1    0.4903 2157.2 748.17
## + freetime         1    0.2086 2157.5 748.23
## + Fedu             1    0.1643 2157.5 748.24
## + Fjob_other       1    0.1239 2157.6 748.25
## + reason_other     1    0.0613 2157.6 748.27
## + Mjob_other       1    0.0322 2157.6 748.27
## + famsize          1    0.0186 2157.7 748.28
## + activities       1    0.0087 2157.7 748.28
## 
## Step:  AIC=741.63
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime
## 
##                   Df Sum of Sq    RSS    AIC
## + sex              1   19.5842 2109.2 739.06
## + Fjob_teacher     1   17.1497 2111.7 739.63
## + age              1   13.9279 2114.9 740.39
## + health           1   12.6332 2116.2 740.69
## + reason_home      1   12.6104 2116.2 740.69
## + paid             1   12.0612 2116.8 740.82
## + guardian_mother  1    9.9126 2118.9 741.32
## <none>                         2128.8 741.63
## + romantic         1    8.5316 2120.3 741.65
## + address          1    8.4414 2120.4 741.67
## + famsup           1    8.2137 2120.6 741.72
## + famrel           1    7.0683 2121.8 741.99
## + Mjob_teacher     1    6.5884 2122.2 742.10
## + Pstatus          1    6.2860 2122.5 742.17
## + nursery          1    5.3345 2123.5 742.39
## + Mjob_health      1    4.3144 2124.5 742.63
## + Mjob_services    1    3.5252 2125.3 742.81
## + goout            1    3.1539 2125.7 742.90
## + Fjob_health      1    2.3100 2126.5 743.09
## + Fjob_services    1    2.0532 2126.8 743.15
## + guardian_other   1    1.5663 2127.3 743.27
## + internet         1    1.4943 2127.3 743.28
## + Dalc             1    1.3747 2127.4 743.31
## + traveltime       1    0.5234 2128.3 743.51
## + Fjob_other       1    0.2601 2128.6 743.57
## + Fedu             1    0.2467 2128.6 743.57
## + freetime         1    0.0767 2128.8 743.61
## + famsize          1    0.0285 2128.8 743.62
## + Mjob_other       1    0.0133 2128.8 743.63
## + activities       1    0.0002 2128.8 743.63
## + reason_other     1    0.0002 2128.8 743.63
## 
## Step:  AIC=739.06
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex
## 
##                   Df Sum of Sq    RSS    AIC
## + Fjob_teacher     1   19.0555 2090.2 736.58
## + reason_home      1   13.9159 2095.3 737.79
## + romantic         1   13.0390 2096.2 738.00
## + famsup           1   11.0070 2098.2 738.48
## + guardian_mother  1   10.5977 2098.6 738.58
## + age              1   10.4541 2098.8 738.61
## + health           1   10.4139 2098.8 738.62
## + paid             1   10.1726 2099.1 738.68
## + famrel           1   10.1237 2099.1 738.69
## <none>                         2109.2 739.06
## + address          1    7.7607 2101.5 739.24
## + Mjob_teacher     1    7.7548 2101.5 739.24
## + Pstatus          1    7.3145 2101.9 739.35
## + nursery          1    6.0498 2103.2 739.65
## + goout            1    4.6447 2104.6 739.98
## + Mjob_health      1    3.5620 2105.7 740.23
## + Mjob_services    1    3.3999 2105.8 740.27
## + Fjob_health      1    2.2907 2106.9 740.53
## + guardian_other   1    2.1035 2107.1 740.57
## + Fjob_services    1    1.8303 2107.4 740.64
## + internet         1    1.3386 2107.9 740.75
## + Dalc             1    0.4812 2108.8 740.95
## + Fedu             1    0.2477 2109.0 741.01
## + famsize          1    0.2133 2109.0 741.01
## + activities       1    0.2003 2109.0 741.02
## + traveltime       1    0.1480 2109.1 741.03
## + Fjob_other       1    0.1130 2109.1 741.04
## + freetime         1    0.0570 2109.2 741.05
## + Mjob_other       1    0.0444 2109.2 741.05
## + reason_other     1    0.0125 2109.2 741.06
## 
## Step:  AIC=736.58
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher
## 
##                   Df Sum of Sq    RSS    AIC
## + romantic         1   14.6785 2075.5 735.10
## + reason_home      1   14.0021 2076.2 735.26
## + famrel           1   13.1083 2077.1 735.47
## + health           1   10.3436 2079.8 736.13
## + famsup           1   10.3127 2079.9 736.14
## + guardian_mother  1   10.1923 2080.0 736.17
## + age              1    9.4170 2080.8 736.35
## <none>                         2090.2 736.58
## + paid             1    8.3641 2081.8 736.60
## + address          1    8.3452 2081.8 736.60
## + Pstatus          1    7.8920 2082.3 736.71
## + Mjob_health      1    5.0862 2085.1 737.38
## + nursery          1    4.7057 2085.5 737.47
## + goout            1    4.3702 2085.8 737.55
## + Mjob_teacher     1    4.2012 2086.0 737.59
## + Mjob_services    1    2.8948 2087.3 737.90
## + internet         1    2.3251 2087.9 738.03
## + Fjob_other       1    2.2171 2088.0 738.06
## + guardian_other   1    1.7967 2088.4 738.16
## + Fjob_health      1    1.1479 2089.0 738.31
## + Dalc             1    0.6265 2089.6 738.43
## + Fjob_services    1    0.5105 2089.7 738.46
## + famsize          1    0.4882 2089.7 738.47
## + Fedu             1    0.4056 2089.8 738.49
## + traveltime       1    0.2846 2089.9 738.51
## + activities       1    0.1923 2090.0 738.54
## + Mjob_other       1    0.1149 2090.1 738.55
## + freetime         1    0.0775 2090.1 738.56
## + reason_other     1    0.0088 2090.2 738.58
## 
## Step:  AIC=735.1
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher + 
##     romantic
## 
##                   Df Sum of Sq    RSS    AIC
## + reason_home      1   14.2869 2061.2 733.69
## + famrel           1   13.4225 2062.1 733.89
## + age              1   12.0141 2063.5 734.23
## + guardian_mother  1   11.0379 2064.5 734.47
## + famsup           1   10.8894 2064.6 734.50
## + health           1   10.0211 2065.5 734.71
## <none>                         2075.5 735.10
## + paid             1    8.3136 2067.2 735.12
## + address          1    7.6853 2067.8 735.27
## + Pstatus          1    7.0522 2068.4 735.42
## + Mjob_health      1    5.7327 2069.8 735.73
## + goout            1    4.5240 2071.0 736.02
## + nursery          1    4.2793 2071.2 736.08
## + Mjob_services    1    3.9233 2071.6 736.16
## + Mjob_teacher     1    3.8692 2071.6 736.18
## + internet         1    3.6742 2071.8 736.22
## + Fjob_other       1    2.7096 2072.8 736.45
## + Fjob_health      1    1.0717 2074.4 736.84
## + Fedu             1    1.0347 2074.5 736.85
## + guardian_other   1    0.6873 2074.8 736.94
## + Fjob_services    1    0.6496 2074.9 736.95
## + activities       1    0.4184 2075.1 737.00
## + famsize          1    0.3716 2075.1 737.01
## + Mjob_other       1    0.3138 2075.2 737.02
## + traveltime       1    0.2257 2075.3 737.05
## + Dalc             1    0.2082 2075.3 737.05
## + freetime         1    0.1939 2075.3 737.05
## + reason_other     1    0.0270 2075.5 737.09
## 
## Step:  AIC=733.69
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher + 
##     romantic + reason_home
## 
##                   Df Sum of Sq    RSS    AIC
## + famrel           1   14.3539 2046.9 732.24
## + famsup           1   11.7799 2049.4 732.86
## + guardian_mother  1   10.5960 2050.6 733.14
## + age              1    9.9816 2051.2 733.29
## + paid             1    8.8730 2052.3 733.56
## + health           1    8.4483 2052.8 733.66
## <none>                         2061.2 733.69
## + Pstatus          1    7.3310 2053.9 733.93
## + Mjob_health      1    5.6158 2055.6 734.34
## + address          1    5.4782 2055.7 734.37
## + goout            1    4.2813 2056.9 734.66
## + nursery          1    4.1599 2057.1 734.69
## + Mjob_teacher     1    4.0911 2057.1 734.71
## + internet         1    3.3757 2057.8 734.88
## + Mjob_services    1    3.1868 2058.0 734.92
## + Fjob_other       1    1.9419 2059.3 735.22
## + reason_other     1    1.1747 2060.0 735.41
## + activities       1    1.1295 2060.1 735.42
## + guardian_other   1    0.9968 2060.2 735.45
## + Fedu             1    0.8600 2060.4 735.48
## + Fjob_health      1    0.7946 2060.4 735.50
## + freetime         1    0.4890 2060.7 735.57
## + Dalc             1    0.4070 2060.8 735.59
## + Fjob_services    1    0.3894 2060.8 735.59
## + famsize          1    0.2225 2061.0 735.63
## + Mjob_other       1    0.0283 2061.2 735.68
## + traveltime       1    0.0005 2061.2 735.69
## 
## Step:  AIC=732.24
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher + 
##     romantic + reason_home + famrel
## 
##                   Df Sum of Sq    RSS    AIC
## + famsup           1   12.3145 2034.5 731.25
## + health           1   11.7472 2035.1 731.39
## + guardian_mother  1   11.1215 2035.7 731.54
## + paid             1   10.0591 2036.8 731.80
## + age              1    9.4192 2037.5 731.96
## <none>                         2046.9 732.24
## + goout            1    6.7259 2040.1 732.61
## + address          1    6.5218 2040.3 732.66
## + Pstatus          1    6.4926 2040.4 732.67
## + Mjob_health      1    6.4454 2040.4 732.68
## + Mjob_teacher     1    4.5811 2042.3 733.13
## + nursery          1    4.3304 2042.5 733.19
## + Mjob_services    1    3.9104 2043.0 733.29
## + internet         1    2.3491 2044.5 733.67
## + Fjob_other       1    1.8040 2045.1 733.80
## + Fedu             1    1.1237 2045.7 733.96
## + Fjob_health      1    0.8621 2046.0 734.03
## + reason_other     1    0.8439 2046.0 734.03
## + activities       1    0.8246 2046.0 734.04
## + guardian_other   1    0.8034 2046.1 734.04
## + Fjob_services    1    0.4423 2046.4 734.13
## + famsize          1    0.3110 2046.5 734.16
## + Dalc             1    0.1399 2046.7 734.20
## + freetime         1    0.1132 2046.8 734.21
## + Mjob_other       1    0.0442 2046.8 734.22
## + traveltime       1    0.0039 2046.9 734.23
## 
## Step:  AIC=731.25
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher + 
##     romantic + reason_home + famrel + famsup
## 
##                   Df Sum of Sq    RSS    AIC
## + guardian_mother  1   11.1767 2023.4 730.53
## + health           1   10.8910 2023.7 730.60
## + paid             1    8.6312 2025.9 731.15
## <none>                         2034.5 731.25
## + age              1    7.3106 2027.2 731.48
## + Mjob_health      1    6.9503 2027.6 731.56
## + Pstatus          1    6.6350 2027.9 731.64
## + goout            1    6.5660 2028.0 731.66
## + address          1    5.6111 2028.9 731.89
## + Mjob_teacher     1    4.1664 2030.4 732.24
## + nursery          1    3.8916 2030.7 732.31
## + Mjob_services    1    2.7363 2031.8 732.59
## + internet         1    2.6368 2031.9 732.61
## + Fjob_other       1    1.1411 2033.4 732.98
## + activities       1    0.7390 2033.8 733.07
## + reason_other     1    0.6419 2033.9 733.10
## + guardian_other   1    0.6183 2033.9 733.10
## + Fjob_health      1    0.5150 2034.0 733.13
## + Fedu             1    0.4401 2034.1 733.15
## + Fjob_services    1    0.2716 2034.3 733.19
## + famsize          1    0.2230 2034.3 733.20
## + freetime         1    0.1711 2034.4 733.21
## + Dalc             1    0.0722 2034.5 733.24
## + Mjob_other       1    0.0297 2034.5 733.25
## + traveltime       1    0.0135 2034.5 733.25
## 
## Step:  AIC=730.53
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher + 
##     romantic + reason_home + famrel + famsup + guardian_mother
## 
##                  Df Sum of Sq    RSS    AIC
## + health          1   10.5323 2012.8 729.95
## <none>                        2023.4 730.53
## + paid            1    7.6551 2015.7 730.66
## + age             1    6.9369 2016.4 730.84
## + goout           1    6.3907 2017.0 730.97
## + Mjob_health     1    5.8808 2017.5 731.10
## + Mjob_teacher    1    5.8235 2017.5 731.11
## + address         1    5.6207 2017.8 731.16
## + guardian_other  1    5.1616 2018.2 731.27
## + Pstatus         1    4.8357 2018.5 731.35
## + nursery         1    2.9034 2020.5 731.82
## + Mjob_services   1    2.3059 2021.1 731.97
## + internet        1    1.9397 2021.4 732.06
## + Fjob_other      1    1.7413 2021.6 732.11
## + Fedu            1    1.4197 2022.0 732.19
## + activities      1    1.0732 2022.3 732.27
## + Fjob_health     1    0.8116 2022.6 732.33
## + reason_other    1    0.6990 2022.7 732.36
## + Fjob_services   1    0.5334 2022.8 732.40
## + Dalc            1    0.3401 2023.0 732.45
## + famsize         1    0.3110 2023.1 732.46
## + freetime        1    0.2754 2023.1 732.47
## + Mjob_other      1    0.1894 2023.2 732.49
## + traveltime      1    0.0249 2023.3 732.53
## 
## Step:  AIC=729.95
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher + 
##     romantic + reason_home + famrel + famsup + guardian_mother + 
##     health
## 
##                  Df Sum of Sq    RSS    AIC
## <none>                        2012.8 729.95
## + goout           1    7.5555 2005.3 730.10
## + Mjob_health     1    6.8778 2006.0 730.26
## + age             1    6.8180 2006.0 730.28
## + paid            1    6.4735 2006.4 730.36
## + Mjob_teacher    1    6.3522 2006.5 730.39
## + address         1    5.4019 2007.4 730.63
## + guardian_other  1    4.9789 2007.9 730.73
## + Pstatus         1    4.6350 2008.2 730.82
## + nursery         1    2.7357 2010.1 731.28
## + Fjob_other      1    2.0797 2010.8 731.44
## + Mjob_services   1    1.9080 2010.9 731.49
## + internet        1    1.4709 2011.4 731.59
## + activities      1    1.2775 2011.6 731.64
## + Fjob_services   1    1.0361 2011.8 731.70
## + Fedu            1    1.0342 2011.8 731.70
## + reason_other    1    0.5371 2012.3 731.82
## + freetime        1    0.5098 2012.3 731.83
## + Dalc            1    0.4343 2012.4 731.85
## + Fjob_health     1    0.2683 2012.6 731.89
## + Mjob_other      1    0.2294 2012.6 731.90
## + famsize         1    0.1945 2012.7 731.91
## + traveltime      1    0.1272 2012.7 731.92
anova(forw_m)
## Analysis of Variance Table
## 
## Response: score
##                    Df  Sum Sq Mean Sq  F value    Pr(>F)    
## failures            1  618.60  618.60 146.2868 < 2.2e-16 ***
## higher              1  198.42  198.42  46.9236 2.290e-11 ***
## Medu                1  119.32  119.32  28.2165 1.669e-07 ***
## Walc                1   69.90   69.90  16.5308 5.600e-05 ***
## reason_reputation   1   58.12   58.12  13.7447 0.0002340 ***
## school              1   43.40   43.40  10.2633 0.0014481 ** 
## absences            1   51.48   51.48  12.1734 0.0005298 ***
## schoolsup           1   57.74   57.74  13.6551 0.0002452 ***
## studytime           1   28.85   28.85   6.8226 0.0092857 ** 
## sex                 1   19.58   19.58   4.6313 0.0318958 *  
## Fjob_teacher        1   19.06   19.06   4.5063 0.0342860 *  
## romantic            1   14.68   14.68   3.4712 0.0630620 .  
## reason_home         1   14.29   14.29   3.3786 0.0666712 .  
## famrel              1   14.35   14.35   3.3944 0.0660389 .  
## famsup              1   12.31   12.31   2.9122 0.0885662 .  
## guardian_mother     1   11.18   11.18   2.6431 0.1046636    
## health              1   10.53   10.53   2.4907 0.1151855    
## Residuals         476 2012.84    4.23                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r2 <- lm_result(forw_m, train, test)

## MSE_tr MSE_te 
##   4.07   6.58

3.1.6. backward method

back_m <- step(full_m, direction = "backward", trace = 1, scope = list(lower = null_m, upper = full_m))
## Start:  AIC=755.61
## score ~ school + sex + age + address + famsize + Pstatus + Medu + 
##     Fedu + traveltime + studytime + failures + schoolsup + famsup + 
##     paid + activities + nursery + higher + internet + romantic + 
##     famrel + freetime + goout + Dalc + Walc + health + absences + 
##     Mjob_health + Mjob_other + Mjob_services + Mjob_teacher + 
##     Fjob_health + Fjob_other + Fjob_services + Fjob_teacher + 
##     reason_home + reason_other + reason_reputation + guardian_mother + 
##     guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - Dalc               1     0.001 1939.5 753.61
## - Fedu               1     0.001 1939.5 753.61
## - Fjob_services      1     0.012 1939.5 753.61
## - Mjob_services      1     0.116 1939.6 753.64
## - Fjob_health        1     0.290 1939.8 753.68
## - traveltime         1     0.318 1939.8 753.69
## - internet           1     0.497 1940.0 753.74
## - famsize            1     0.562 1940.0 753.75
## - Mjob_other         1     0.579 1940.0 753.76
## - reason_other       1     1.096 1940.6 753.89
## - freetime           1     1.527 1941.0 754.00
## - Fjob_other         1     1.726 1941.2 754.05
## - nursery            1     2.218 1941.7 754.17
## - activities         1     2.766 1942.2 754.31
## - Pstatus            1     3.327 1942.8 754.46
## - famsup             1     4.528 1944.0 754.76
## - paid               1     5.100 1944.6 754.91
## - Walc               1     5.770 1945.2 755.08
## - address            1     6.339 1945.8 755.22
## - Mjob_teacher       1     7.305 1946.8 755.47
## <none>                           1939.5 755.61
## - guardian_other     1     9.028 1948.5 755.90
## - reason_home        1    10.826 1950.3 756.36
## - Mjob_health        1    10.832 1950.3 756.36
## - goout              1    11.238 1950.7 756.46
## - Medu               1    11.326 1950.8 756.49
## - health             1    11.510 1951.0 756.53
## - Fjob_teacher       1    12.680 1952.1 756.83
## - age                1    13.475 1952.9 757.03
## - guardian_mother    1    14.472 1953.9 757.28
## - romantic           1    16.661 1956.1 757.83
## - famrel             1    21.606 1961.1 759.08
## - studytime          1    22.208 1961.7 759.23
## - reason_reputation  1    25.269 1964.7 760.00
## - absences           1    34.523 1974.0 762.32
## - sex                1    34.539 1974.0 762.33
## - school             1    38.755 1978.2 763.38
## - schoolsup          1    53.930 1993.4 767.16
## - higher             1    78.632 2018.1 773.24
## - failures           1   185.551 2125.0 798.74
## 
## Step:  AIC=753.61
## score ~ school + sex + age + address + famsize + Pstatus + Medu + 
##     Fedu + traveltime + studytime + failures + schoolsup + famsup + 
##     paid + activities + nursery + higher + internet + romantic + 
##     famrel + freetime + goout + Walc + health + absences + Mjob_health + 
##     Mjob_other + Mjob_services + Mjob_teacher + Fjob_health + 
##     Fjob_other + Fjob_services + Fjob_teacher + reason_home + 
##     reason_other + reason_reputation + guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - Fedu               1     0.001 1939.5 751.61
## - Fjob_services      1     0.012 1939.5 751.61
## - Mjob_services      1     0.115 1939.6 751.64
## - Fjob_health        1     0.294 1939.8 751.68
## - traveltime         1     0.318 1939.8 751.69
## - internet           1     0.497 1940.0 751.74
## - famsize            1     0.561 1940.0 751.75
## - Mjob_other         1     0.578 1940.0 751.76
## - reason_other       1     1.102 1940.6 751.89
## - freetime           1     1.534 1941.0 752.00
## - Fjob_other         1     1.728 1941.2 752.05
## - nursery            1     2.222 1941.7 752.17
## - activities         1     2.766 1942.2 752.31
## - Pstatus            1     3.336 1942.8 752.46
## - famsup             1     4.537 1944.0 752.76
## - paid               1     5.137 1944.6 752.92
## - address            1     6.355 1945.8 753.23
## - Mjob_teacher       1     7.308 1946.8 753.47
## <none>                           1939.5 753.61
## - Walc               1     8.360 1947.8 753.73
## - guardian_other     1     9.050 1948.5 753.91
## - reason_home        1    10.885 1950.3 754.37
## - Mjob_health        1    10.952 1950.4 754.39
## - goout              1    11.255 1950.7 754.47
## - Medu               1    11.461 1950.9 754.52
## - health             1    11.511 1951.0 754.53
## - Fjob_teacher       1    12.689 1952.2 754.83
## - age                1    13.494 1953.0 755.03
## - guardian_mother    1    14.561 1954.0 755.30
## - romantic           1    16.744 1956.2 755.86
## - famrel             1    21.821 1961.3 757.14
## - studytime          1    22.210 1961.7 757.23
## - reason_reputation  1    25.326 1964.8 758.02
## - absences           1    34.678 1974.1 760.36
## - sex                1    34.905 1974.4 760.42
## - school             1    38.765 1978.2 761.39
## - schoolsup          1    54.182 1993.6 765.22
## - higher             1    78.635 2018.1 771.24
## - failures           1   185.550 2125.0 796.74
## 
## Step:  AIC=751.61
## score ~ school + sex + age + address + famsize + Pstatus + Medu + 
##     traveltime + studytime + failures + schoolsup + famsup + 
##     paid + activities + nursery + higher + internet + romantic + 
##     famrel + freetime + goout + Walc + health + absences + Mjob_health + 
##     Mjob_other + Mjob_services + Mjob_teacher + Fjob_health + 
##     Fjob_other + Fjob_services + Fjob_teacher + reason_home + 
##     reason_other + reason_reputation + guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - Fjob_services      1     0.013 1939.5 749.61
## - Mjob_services      1     0.114 1939.6 749.64
## - Fjob_health        1     0.297 1939.8 749.69
## - traveltime         1     0.317 1939.8 749.69
## - internet           1     0.496 1940.0 749.74
## - famsize            1     0.560 1940.0 749.75
## - Mjob_other         1     0.579 1940.0 749.76
## - reason_other       1     1.101 1940.6 749.89
## - freetime           1     1.552 1941.0 750.00
## - Fjob_other         1     1.730 1941.2 750.05
## - nursery            1     2.221 1941.7 750.17
## - activities         1     2.766 1942.2 750.31
## - Pstatus            1     3.339 1942.8 750.46
## - famsup             1     4.554 1944.0 750.77
## - paid               1     5.191 1944.7 750.93
## - address            1     6.354 1945.8 751.23
## - Mjob_teacher       1     7.326 1946.8 751.47
## <none>                           1939.5 751.61
## - Walc               1     8.425 1947.9 751.75
## - guardian_other     1     9.053 1948.5 751.91
## - reason_home        1    10.884 1950.3 752.37
## - Mjob_health        1    10.976 1950.4 752.40
## - goout              1    11.255 1950.7 752.47
## - health             1    11.514 1951.0 752.53
## - age                1    13.496 1953.0 753.03
## - Fjob_teacher       1    13.693 1953.2 753.08
## - guardian_mother    1    14.853 1954.3 753.38
## - Medu               1    15.523 1955.0 753.55
## - romantic           1    16.980 1956.4 753.92
## - famrel             1    21.832 1961.3 755.14
## - studytime          1    22.218 1961.7 755.24
## - reason_reputation  1    25.328 1964.8 756.02
## - absences           1    34.966 1974.4 758.44
## - sex                1    34.977 1974.4 758.44
## - school             1    38.764 1978.2 759.39
## - schoolsup          1    54.190 1993.7 763.22
## - higher             1    79.095 2018.6 769.36
## - failures           1   186.855 2126.3 795.05
## 
## Step:  AIC=749.61
## score ~ school + sex + age + address + famsize + Pstatus + Medu + 
##     traveltime + studytime + failures + schoolsup + famsup + 
##     paid + activities + nursery + higher + internet + romantic + 
##     famrel + freetime + goout + Walc + health + absences + Mjob_health + 
##     Mjob_other + Mjob_services + Mjob_teacher + Fjob_health + 
##     Fjob_other + Fjob_teacher + reason_home + reason_other + 
##     reason_reputation + guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - Mjob_services      1     0.125 1939.6 747.64
## - traveltime         1     0.325 1939.8 747.70
## - internet           1     0.507 1940.0 747.74
## - Fjob_health        1     0.514 1940.0 747.74
## - famsize            1     0.558 1940.0 747.75
## - Mjob_other         1     0.593 1940.1 747.76
## - reason_other       1     1.114 1940.6 747.90
## - freetime           1     1.541 1941.0 748.00
## - nursery            1     2.238 1941.7 748.18
## - activities         1     2.755 1942.2 748.31
## - Pstatus            1     3.344 1942.8 748.46
## - famsup             1     4.571 1944.0 748.78
## - Fjob_other         1     4.938 1944.4 748.87
## - paid               1     5.196 1944.7 748.93
## - address            1     6.352 1945.8 749.23
## - Mjob_teacher       1     7.385 1946.9 749.49
## <none>                           1939.5 749.61
## - Walc               1     8.425 1947.9 749.75
## - guardian_other     1     9.044 1948.5 749.91
## - reason_home        1    10.896 1950.4 750.38
## - Mjob_health        1    11.114 1950.6 750.44
## - goout              1    11.283 1950.8 750.48
## - health             1    11.545 1951.0 750.54
## - age                1    13.488 1953.0 751.04
## - guardian_mother    1    14.860 1954.3 751.38
## - Medu               1    15.564 1955.0 751.56
## - romantic           1    16.968 1956.4 751.92
## - Fjob_teacher       1    20.274 1959.8 752.75
## - famrel             1    21.955 1961.4 753.17
## - studytime          1    22.223 1961.7 753.24
## - reason_reputation  1    25.388 1964.9 754.04
## - sex                1    34.977 1974.5 756.44
## - absences           1    35.216 1974.7 756.50
## - school             1    39.150 1978.6 757.48
## - schoolsup          1    54.356 1993.8 761.27
## - higher             1    79.086 2018.6 767.36
## - failures           1   187.089 2126.6 793.11
## 
## Step:  AIC=747.64
## score ~ school + sex + age + address + famsize + Pstatus + Medu + 
##     traveltime + studytime + failures + schoolsup + famsup + 
##     paid + activities + nursery + higher + internet + romantic + 
##     famrel + freetime + goout + Walc + health + absences + Mjob_health + 
##     Mjob_other + Mjob_teacher + Fjob_health + Fjob_other + Fjob_teacher + 
##     reason_home + reason_other + reason_reputation + guardian_mother + 
##     guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - traveltime         1     0.314 1939.9 745.72
## - Mjob_other         1     0.488 1940.1 745.77
## - Fjob_health        1     0.540 1940.1 745.78
## - famsize            1     0.561 1940.2 745.79
## - internet           1     0.660 1940.3 745.81
## - reason_other       1     1.129 1940.7 745.93
## - freetime           1     1.556 1941.2 746.04
## - nursery            1     2.278 1941.9 746.22
## - activities         1     2.777 1942.4 746.35
## - Pstatus            1     3.312 1942.9 746.49
## - famsup             1     4.519 1944.1 746.79
## - Fjob_other         1     4.823 1944.4 746.87
## - paid               1     5.396 1945.0 747.02
## - address            1     6.475 1946.1 747.29
## <none>                           1939.6 747.64
## - Walc               1     8.407 1948.0 747.78
## - Mjob_teacher       1     8.844 1948.4 747.89
## - guardian_other     1     9.212 1948.8 747.99
## - reason_home        1    10.943 1950.5 748.42
## - goout              1    11.236 1950.8 748.50
## - health             1    11.433 1951.0 748.55
## - Mjob_health        1    13.334 1952.9 749.03
## - age                1    13.457 1953.1 749.06
## - guardian_mother    1    14.825 1954.4 749.41
## - romantic           1    17.167 1956.8 750.00
## - Medu               1    17.406 1957.0 750.06
## - Fjob_teacher       1    20.200 1959.8 750.76
## - famrel             1    21.896 1961.5 751.19
## - studytime          1    22.265 1961.9 751.28
## - reason_reputation  1    25.583 1965.2 752.12
## - sex                1    34.901 1974.5 754.45
## - absences           1    35.103 1974.7 754.50
## - school             1    39.636 1979.2 755.64
## - schoolsup          1    54.286 1993.9 759.28
## - higher             1    80.034 2019.6 765.62
## - failures           1   187.027 2126.6 791.12
## 
## Step:  AIC=745.72
## score ~ school + sex + age + address + famsize + Pstatus + Medu + 
##     studytime + failures + schoolsup + famsup + paid + activities + 
##     nursery + higher + internet + romantic + famrel + freetime + 
##     goout + Walc + health + absences + Mjob_health + Mjob_other + 
##     Mjob_teacher + Fjob_health + Fjob_other + Fjob_teacher + 
##     reason_home + reason_other + reason_reputation + guardian_mother + 
##     guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - Mjob_other         1     0.482 1940.4 743.85
## - famsize            1     0.551 1940.5 743.86
## - Fjob_health        1     0.563 1940.5 743.87
## - internet           1     0.612 1940.5 743.88
## - reason_other       1     1.043 1941.0 743.99
## - freetime           1     1.476 1941.4 744.10
## - nursery            1     2.173 1942.1 744.28
## - activities         1     2.845 1942.8 744.45
## - Pstatus            1     3.324 1943.2 744.57
## - famsup             1     4.418 1944.3 744.85
## - Fjob_other         1     5.159 1945.1 745.04
## - paid               1     5.523 1945.4 745.13
## - address            1     6.167 1946.1 745.29
## <none>                           1939.9 745.72
## - Walc               1     8.337 1948.2 745.84
## - Mjob_teacher       1     8.904 1948.8 745.99
## - guardian_other     1     9.025 1948.9 746.02
## - reason_home        1    10.654 1950.6 746.43
## - goout              1    11.044 1951.0 746.53
## - health             1    11.737 1951.7 746.70
## - Mjob_health        1    13.321 1953.2 747.10
## - age                1    13.347 1953.3 747.11
## - guardian_mother    1    15.178 1955.1 747.57
## - Medu               1    17.125 1957.0 748.07
## - romantic           1    17.146 1957.1 748.07
## - Fjob_teacher       1    20.513 1960.4 748.92
## - famrel             1    21.968 1961.9 749.29
## - studytime          1    22.407 1962.3 749.40
## - reason_reputation  1    25.289 1965.2 750.12
## - sex                1    34.588 1974.5 752.45
## - absences           1    35.000 1974.9 752.56
## - school             1    39.337 1979.2 753.64
## - schoolsup          1    54.241 1994.2 757.35
## - higher             1    80.140 2020.0 763.72
## - failures           1   187.155 2127.1 789.22
## 
## Step:  AIC=743.85
## score ~ school + sex + age + address + famsize + Pstatus + Medu + 
##     studytime + failures + schoolsup + famsup + paid + activities + 
##     nursery + higher + internet + romantic + famrel + freetime + 
##     goout + Walc + health + absences + Mjob_health + Mjob_teacher + 
##     Fjob_health + Fjob_other + Fjob_teacher + reason_home + reason_other + 
##     reason_reputation + guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - famsize            1     0.486 1940.9 741.97
## - Fjob_health        1     0.553 1941.0 741.99
## - internet           1     0.581 1941.0 741.99
## - reason_other       1     1.041 1941.4 742.11
## - freetime           1     1.470 1941.9 742.22
## - nursery            1     2.277 1942.7 742.43
## - activities         1     2.797 1943.2 742.56
## - Pstatus            1     3.275 1943.7 742.68
## - famsup             1     4.762 1945.2 743.06
## - paid               1     5.632 1946.0 743.28
## - Fjob_other         1     6.110 1946.5 743.40
## - address            1     6.247 1946.6 743.43
## <none>                           1940.4 743.85
## - Mjob_teacher       1     8.422 1948.8 743.99
## - Walc               1     8.654 1949.0 744.05
## - guardian_other     1     8.964 1949.4 744.12
## - goout              1    10.843 1951.2 744.60
## - reason_home        1    11.187 1951.6 744.69
## - health             1    11.724 1952.1 744.82
## - Mjob_health        1    12.890 1953.3 745.12
## - age                1    13.193 1953.6 745.19
## - guardian_mother    1    15.667 1956.1 745.82
## - romantic           1    16.874 1957.3 746.12
## - Medu               1    16.930 1957.3 746.14
## - Fjob_teacher       1    20.706 1961.1 747.09
## - famrel             1    21.793 1962.2 747.36
## - studytime          1    22.456 1962.8 747.53
## - reason_reputation  1    25.733 1966.1 748.36
## - sex                1    34.341 1974.7 750.51
## - absences           1    35.041 1975.4 750.69
## - school             1    39.282 1979.7 751.75
## - schoolsup          1    54.135 1994.5 755.44
## - higher             1    81.954 2022.3 762.28
## - failures           1   187.233 2127.6 787.35
## 
## Step:  AIC=741.97
## score ~ school + sex + age + address + Pstatus + Medu + studytime + 
##     failures + schoolsup + famsup + paid + activities + nursery + 
##     higher + internet + romantic + famrel + freetime + goout + 
##     Walc + health + absences + Mjob_health + Mjob_teacher + Fjob_health + 
##     Fjob_other + Fjob_teacher + reason_home + reason_other + 
##     reason_reputation + guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - Fjob_health        1     0.526 1941.4 740.10
## - internet           1     0.590 1941.5 740.12
## - reason_other       1     0.989 1941.9 740.22
## - freetime           1     1.475 1942.4 740.35
## - nursery            1     2.078 1943.0 740.50
## - activities         1     2.762 1943.6 740.67
## - Pstatus            1     2.869 1943.8 740.70
## - famsup             1     4.826 1945.7 741.20
## - paid               1     5.849 1946.7 741.46
## - Fjob_other         1     6.035 1946.9 741.50
## - address            1     6.342 1947.2 741.58
## <none>                           1940.9 741.97
## - Walc               1     8.352 1949.2 742.09
## - Mjob_teacher       1     8.732 1949.6 742.19
## - guardian_other     1     8.955 1949.8 742.24
## - goout              1    11.020 1951.9 742.77
## - reason_home        1    11.287 1952.2 742.84
## - health             1    11.966 1952.8 743.01
## - Mjob_health        1    12.915 1953.8 743.25
## - age                1    13.306 1954.2 743.35
## - guardian_mother    1    15.707 1956.6 743.95
## - Medu               1    16.605 1957.5 744.18
## - romantic           1    17.104 1958.0 744.30
## - Fjob_teacher       1    20.375 1961.3 745.13
## - famrel             1    21.827 1962.7 745.50
## - studytime          1    22.451 1963.3 745.65
## - reason_reputation  1    25.882 1966.8 746.51
## - sex                1    33.910 1974.8 748.53
## - absences           1    35.957 1976.8 749.04
## - school             1    39.129 1980.0 749.83
## - schoolsup          1    54.325 1995.2 753.61
## - higher             1    81.808 2022.7 760.37
## - failures           1   188.560 2129.4 785.77
## 
## Step:  AIC=740.1
## score ~ school + sex + age + address + Pstatus + Medu + studytime + 
##     failures + schoolsup + famsup + paid + activities + nursery + 
##     higher + internet + romantic + famrel + freetime + goout + 
##     Walc + health + absences + Mjob_health + Mjob_teacher + Fjob_other + 
##     Fjob_teacher + reason_home + reason_other + reason_reputation + 
##     guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - internet           1     0.648 1942.0 738.27
## - reason_other       1     0.939 1942.3 738.34
## - freetime           1     1.534 1942.9 738.49
## - nursery            1     2.142 1943.5 738.65
## - activities         1     2.795 1944.2 738.81
## - Pstatus            1     2.945 1944.3 738.85
## - famsup             1     4.953 1946.4 739.36
## - paid               1     5.744 1947.2 739.56
## - address            1     6.216 1947.6 739.68
## - Fjob_other         1     7.311 1948.7 739.96
## <none>                           1941.4 740.10
## - Walc               1     8.115 1949.5 740.17
## - Mjob_teacher       1     8.468 1949.9 740.25
## - guardian_other     1     8.862 1950.3 740.35
## - reason_home        1    11.330 1952.7 740.98
## - goout              1    11.459 1952.9 741.01
## - Mjob_health        1    12.400 1953.8 741.25
## - health             1    12.811 1954.2 741.35
## - age                1    13.554 1955.0 741.54
## - guardian_mother    1    15.472 1956.9 742.03
## - Medu               1    16.328 1957.7 742.24
## - romantic           1    17.263 1958.7 742.48
## - famrel             1    21.756 1963.2 743.61
## - Fjob_teacher       1    21.995 1963.4 743.67
## - studytime          1    22.004 1963.4 743.67
## - reason_reputation  1    25.649 1967.1 744.59
## - sex                1    34.011 1975.4 746.68
## - absences           1    36.608 1978.0 747.33
## - school             1    38.846 1980.2 747.89
## - schoolsup          1    56.168 1997.6 752.19
## - higher             1    82.760 2024.2 758.73
## - failures           1   188.088 2129.5 783.79
## 
## Step:  AIC=738.27
## score ~ school + sex + age + address + Pstatus + Medu + studytime + 
##     failures + schoolsup + famsup + paid + activities + nursery + 
##     higher + romantic + famrel + freetime + goout + Walc + health + 
##     absences + Mjob_health + Mjob_teacher + Fjob_other + Fjob_teacher + 
##     reason_home + reason_other + reason_reputation + guardian_mother + 
##     guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - reason_other       1     0.976 1943.0 736.52
## - freetime           1     1.608 1943.7 736.68
## - nursery            1     2.215 1944.3 736.83
## - activities         1     2.785 1944.8 736.98
## - Pstatus            1     3.190 1945.2 737.08
## - famsup             1     4.831 1946.9 737.50
## - paid               1     5.600 1947.7 737.69
## - address            1     6.620 1948.7 737.95
## - Fjob_other         1     7.211 1949.3 738.10
## <none>                           1942.0 738.27
## - Walc               1     7.999 1950.0 738.30
## - Mjob_teacher       1     8.727 1950.8 738.48
## - guardian_other     1     8.805 1950.9 738.50
## - goout              1    11.060 1953.1 739.07
## - reason_home        1    11.392 1953.5 739.16
## - Mjob_health        1    12.522 1954.6 739.44
## - health             1    13.155 1955.2 739.60
## - age                1    14.076 1956.1 739.84
## - guardian_mother    1    15.844 1957.9 740.28
## - romantic           1    16.779 1958.8 740.52
## - Medu               1    17.889 1959.9 740.80
## - Fjob_teacher       1    21.503 1963.6 741.71
## - studytime          1    21.808 1963.9 741.79
## - famrel             1    22.424 1964.5 741.94
## - reason_reputation  1    26.605 1968.7 742.99
## - sex                1    33.947 1976.0 744.83
## - absences           1    36.832 1978.9 745.55
## - school             1    41.137 1983.2 746.62
## - schoolsup          1    56.683 1998.7 750.48
## - higher             1    83.899 2026.0 757.16
## - failures           1   188.521 2130.6 782.04
## 
## Step:  AIC=736.52
## score ~ school + sex + age + address + Pstatus + Medu + studytime + 
##     failures + schoolsup + famsup + paid + activities + nursery + 
##     higher + romantic + famrel + freetime + goout + Walc + health + 
##     absences + Mjob_health + Mjob_teacher + Fjob_other + Fjob_teacher + 
##     reason_home + reason_reputation + guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - freetime           1     1.442 1944.5 734.88
## - nursery            1     2.180 1945.2 735.07
## - activities         1     2.602 1945.6 735.18
## - Pstatus            1     3.126 1946.2 735.31
## - famsup             1     5.023 1948.0 735.79
## - paid               1     5.270 1948.3 735.86
## - address            1     6.441 1949.5 736.15
## - Fjob_other         1     7.124 1950.2 736.33
## - Walc               1     7.805 1950.8 736.50
## <none>                           1943.0 736.52
## - Mjob_teacher       1     8.502 1951.5 736.67
## - guardian_other     1     9.237 1952.3 736.86
## - reason_home        1    10.451 1953.5 737.17
## - goout              1    11.168 1954.2 737.35
## - Mjob_health        1    13.045 1956.1 737.82
## - health             1    13.422 1956.5 737.92
## - age                1    14.029 1957.1 738.07
## - guardian_mother    1    15.915 1959.0 738.55
## - romantic           1    16.538 1959.6 738.70
## - Medu               1    17.952 1961.0 739.06
## - studytime          1    21.583 1964.6 739.97
## - Fjob_teacher       1    21.662 1964.7 739.99
## - famrel             1    23.018 1966.0 740.34
## - reason_reputation  1    25.669 1968.7 741.00
## - sex                1    33.579 1976.6 742.98
## - absences           1    36.739 1979.8 743.77
## - school             1    40.307 1983.3 744.66
## - schoolsup          1    56.015 1999.0 748.56
## - higher             1    83.125 2026.2 755.21
## - failures           1   190.917 2133.9 780.82
## 
## Step:  AIC=734.88
## score ~ school + sex + age + address + Pstatus + Medu + studytime + 
##     failures + schoolsup + famsup + paid + activities + nursery + 
##     higher + romantic + famrel + goout + Walc + health + absences + 
##     Mjob_health + Mjob_teacher + Fjob_other + Fjob_teacher + 
##     reason_home + reason_reputation + guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - nursery            1     2.282 1946.8 733.46
## - activities         1     3.165 1947.6 733.69
## - Pstatus            1     3.227 1947.7 733.70
## - famsup             1     4.921 1949.4 734.13
## - paid               1     5.793 1950.3 734.35
## - address            1     6.250 1950.7 734.47
## - Fjob_other         1     7.339 1951.8 734.75
## <none>                           1944.5 734.88
## - Walc               1     8.353 1952.8 735.00
## - guardian_other     1     9.126 1953.6 735.20
## - Mjob_teacher       1     9.290 1953.8 735.24
## - goout              1     9.777 1954.2 735.36
## - reason_home        1    10.198 1954.7 735.47
## - health             1    12.815 1957.3 736.13
## - Mjob_health        1    13.052 1957.5 736.19
## - age                1    13.375 1957.8 736.27
## - guardian_mother    1    15.706 1960.2 736.86
## - romantic           1    16.103 1960.6 736.96
## - Medu               1    17.603 1962.1 737.34
## - studytime          1    21.483 1966.0 738.31
## - Fjob_teacher       1    21.503 1966.0 738.32
## - famrel             1    23.669 1968.1 738.86
## - reason_reputation  1    25.626 1970.1 739.35
## - sex                1    32.386 1976.9 741.04
## - absences           1    37.243 1981.7 742.26
## - school             1    40.059 1984.5 742.96
## - schoolsup          1    55.343 1999.8 746.75
## - higher             1    81.744 2026.2 753.23
## - failures           1   189.487 2134.0 778.82
## 
## Step:  AIC=733.46
## score ~ school + sex + age + address + Pstatus + Medu + studytime + 
##     failures + schoolsup + famsup + paid + activities + higher + 
##     romantic + famrel + goout + Walc + health + absences + Mjob_health + 
##     Mjob_teacher + Fjob_other + Fjob_teacher + reason_home + 
##     reason_reputation + guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - activities         1     3.132 1949.9 732.26
## - Pstatus            1     3.443 1950.2 732.34
## - famsup             1     5.093 1951.8 732.75
## - paid               1     6.232 1953.0 733.04
## - address            1     6.361 1953.1 733.07
## - Walc               1     7.635 1954.4 733.40
## <none>                           1946.8 733.46
## - Fjob_other         1     7.929 1954.7 733.47
## - guardian_other     1     8.343 1955.1 733.58
## - Mjob_teacher       1     9.251 1956.0 733.81
## - goout              1     9.980 1956.7 733.99
## - reason_home        1    10.169 1956.9 734.04
## - health             1    12.989 1959.7 734.75
## - Mjob_health        1    13.071 1959.8 734.77
## - age                1    13.305 1960.1 734.83
## - guardian_mother    1    16.104 1962.9 735.53
## - romantic           1    16.656 1963.4 735.67
## - Medu               1    16.686 1963.4 735.68
## - studytime          1    21.755 1968.5 736.95
## - Fjob_teacher       1    22.842 1969.6 737.23
## - famrel             1    23.663 1970.4 737.43
## - reason_reputation  1    25.079 1971.8 737.79
## - sex                1    31.923 1978.7 739.50
## - absences           1    37.287 1984.0 740.84
## - school             1    40.604 1987.4 741.66
## - schoolsup          1    56.573 2003.3 745.61
## - higher             1    82.198 2029.0 751.89
## - failures           1   188.557 2135.3 777.13
## 
## Step:  AIC=732.26
## score ~ school + sex + age + address + Pstatus + Medu + studytime + 
##     failures + schoolsup + famsup + paid + higher + romantic + 
##     famrel + goout + Walc + health + absences + Mjob_health + 
##     Mjob_teacher + Fjob_other + Fjob_teacher + reason_home + 
##     reason_reputation + guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - Pstatus            1     4.115 1954.0 731.30
## - famsup             1     5.369 1955.3 731.62
## - paid               1     5.912 1955.8 731.75
## - address            1     6.136 1956.0 731.81
## - Fjob_other         1     7.145 1957.0 732.06
## <none>                           1949.9 732.26
## - Walc               1     8.339 1958.2 732.37
## - guardian_other     1     8.564 1958.5 732.42
## - goout              1     8.961 1958.8 732.52
## - reason_home        1     9.310 1959.2 732.61
## - Mjob_teacher       1     9.657 1959.5 732.70
## - Mjob_health        1    12.320 1962.2 733.37
## - age                1    12.360 1962.2 733.38
## - health             1    12.503 1962.4 733.41
## - guardian_mother    1    15.533 1965.4 734.18
## - romantic           1    15.643 1965.5 734.20
## - Medu               1    17.914 1967.8 734.78
## - Fjob_teacher       1    22.421 1972.3 735.91
## - studytime          1    22.502 1972.4 735.93
## - famrel             1    23.936 1973.8 736.28
## - reason_reputation  1    28.785 1978.7 737.50
## - sex                1    30.333 1980.2 737.88
## - absences           1    37.095 1987.0 739.57
## - school             1    42.427 1992.3 740.89
## - schoolsup          1    58.704 2008.6 744.91
## - higher             1    81.434 2031.3 750.47
## - failures           1   187.661 2137.6 775.65
## 
## Step:  AIC=731.3
## score ~ school + sex + age + address + Medu + studytime + failures + 
##     schoolsup + famsup + paid + higher + romantic + famrel + 
##     goout + Walc + health + absences + Mjob_health + Mjob_teacher + 
##     Fjob_other + Fjob_teacher + reason_home + reason_reputation + 
##     guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - address            1     5.110 1959.1 730.59
## - famsup             1     5.339 1959.3 730.65
## - paid               1     5.757 1959.8 730.75
## - Fjob_other         1     6.052 1960.0 730.83
## - Walc               1     7.369 1961.4 731.16
## <none>                           1954.0 731.30
## - goout              1     9.216 1963.2 731.62
## - reason_home        1     9.389 1963.4 731.67
## - guardian_other     1    10.165 1964.2 731.86
## - Mjob_teacher       1    10.408 1964.4 731.92
## - Mjob_health        1    12.481 1966.5 732.44
## - health             1    12.722 1966.7 732.50
## - age                1    13.117 1967.1 732.60
## - romantic           1    16.183 1970.2 733.37
## - Medu               1    16.711 1970.7 733.51
## - guardian_mother    1    18.464 1972.5 733.95
## - Fjob_teacher       1    21.290 1975.3 734.65
## - studytime          1    22.400 1976.4 734.93
## - famrel             1    24.840 1978.8 735.54
## - reason_reputation  1    27.776 1981.8 736.27
## - sex                1    29.856 1983.9 736.79
## - absences           1    38.713 1992.7 738.99
## - school             1    44.783 1998.8 740.49
## - schoolsup          1    59.916 2013.9 744.22
## - higher             1    81.552 2035.5 749.50
## - failures           1   186.638 2140.6 774.36
## 
## Step:  AIC=730.59
## score ~ school + sex + age + Medu + studytime + failures + schoolsup + 
##     famsup + paid + higher + romantic + famrel + goout + Walc + 
##     health + absences + Mjob_health + Mjob_teacher + Fjob_other + 
##     Fjob_teacher + reason_home + reason_reputation + guardian_mother + 
##     guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - Fjob_other         1     5.291 1964.4 729.92
## - paid               1     5.877 1965.0 730.07
## - famsup             1     5.914 1965.0 730.08
## <none>                           1959.1 730.59
## - Walc               1     8.042 1967.2 730.61
## - goout              1     8.523 1967.6 730.73
## - Mjob_teacher       1    10.102 1969.2 731.13
## - guardian_other     1    10.780 1969.9 731.30
## - reason_home        1    11.290 1970.4 731.43
## - health             1    12.853 1972.0 731.82
## - Mjob_health        1    13.144 1972.3 731.89
## - age                1    14.164 1973.3 732.15
## - romantic           1    16.768 1975.9 732.80
## - guardian_mother    1    18.561 1977.7 733.25
## - Medu               1    18.962 1978.1 733.35
## - Fjob_teacher       1    20.448 1979.6 733.72
## - studytime          1    22.630 1981.7 734.26
## - famrel             1    23.602 1982.7 734.50
## - reason_reputation  1    26.863 1986.0 735.32
## - sex                1    30.400 1989.5 736.20
## - absences           1    37.928 1997.0 738.06
## - schoolsup          1    60.926 2020.0 743.72
## - school             1    61.001 2020.1 743.74
## - higher             1    80.838 2040.0 748.56
## - failures           1   185.970 2145.1 773.39
## 
## Step:  AIC=729.92
## score ~ school + sex + age + Medu + studytime + failures + schoolsup + 
##     famsup + paid + higher + romantic + famrel + goout + Walc + 
##     health + absences + Mjob_health + Mjob_teacher + Fjob_teacher + 
##     reason_home + reason_reputation + guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - paid               1     5.892 1970.3 729.40
## - famsup             1     7.003 1971.4 729.68
## - goout              1     7.891 1972.3 729.90
## <none>                           1964.4 729.92
## - Walc               1     8.838 1973.2 730.14
## - guardian_other     1     9.036 1973.4 730.19
## - Mjob_teacher       1     9.538 1973.9 730.31
## - Mjob_health        1    11.992 1976.4 730.93
## - health             1    12.147 1976.5 730.97
## - reason_home        1    12.529 1976.9 731.06
## - age                1    13.631 1978.0 731.34
## - romantic           1    16.270 1980.7 732.00
## - guardian_mother    1    16.519 1980.9 732.06
## - Fjob_teacher       1    16.646 1981.0 732.09
## - Medu               1    18.330 1982.7 732.51
## - studytime          1    21.678 1986.1 733.34
## - famrel             1    23.608 1988.0 733.82
## - reason_reputation  1    28.451 1992.8 735.02
## - sex                1    31.186 1995.6 735.70
## - absences           1    38.156 2002.6 737.42
## - schoolsup          1    61.402 2025.8 743.13
## - school             1    64.330 2028.7 743.84
## - higher             1    81.099 2045.5 747.91
## - failures           1   185.142 2149.5 772.41
## 
## Step:  AIC=729.4
## score ~ school + sex + age + Medu + studytime + failures + schoolsup + 
##     famsup + higher + romantic + famrel + goout + Walc + health + 
##     absences + Mjob_health + Mjob_teacher + Fjob_teacher + reason_home + 
##     reason_reputation + guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## - goout              1     7.249 1977.5 729.22
## - famsup             1     7.910 1978.2 729.38
## <none>                           1970.3 729.40
## - Walc               1     9.251 1979.5 729.71
## - guardian_other     1     9.578 1979.9 729.80
## - Mjob_teacher       1    10.195 1980.5 729.95
## - reason_home        1    12.084 1982.4 730.42
## - Mjob_health        1    12.592 1982.9 730.55
## - health             1    13.429 1983.7 730.76
## - age                1    13.472 1983.8 730.77
## - Medu               1    15.678 1986.0 731.32
## - romantic           1    16.229 1986.5 731.45
## - Fjob_teacher       1    17.856 1988.2 731.86
## - guardian_mother    1    17.873 1988.2 731.86
## - studytime          1    21.992 1992.3 732.88
## - famrel             1    22.668 1993.0 733.05
## - reason_reputation  1    31.113 2001.4 735.14
## - sex                1    33.036 2003.3 735.62
## - absences           1    37.254 2007.5 736.65
## - schoolsup          1    62.735 2033.0 742.88
## - school             1    64.801 2035.1 743.39
## - higher             1    78.730 2049.0 746.76
## - failures           1   192.125 2162.4 773.37
## 
## Step:  AIC=729.22
## score ~ school + sex + age + Medu + studytime + failures + schoolsup + 
##     famsup + higher + romantic + famrel + Walc + health + absences + 
##     Mjob_health + Mjob_teacher + Fjob_teacher + reason_home + 
##     reason_reputation + guardian_mother + guardian_other
## 
##                     Df Sum of Sq    RSS    AIC
## <none>                           1977.5 729.22
## - famsup             1     8.089 1985.6 729.23
## - guardian_other     1    10.084 1987.6 729.73
## - Mjob_teacher       1    11.699 1989.2 730.13
## - Mjob_health        1    11.969 1989.5 730.20
## - health             1    12.215 1989.8 730.26
## - reason_home        1    12.504 1990.0 730.33
## - age                1    12.679 1990.2 730.37
## - Medu               1    14.648 1992.2 730.86
## - romantic           1    15.796 1993.3 731.15
## - Fjob_teacher       1    17.481 1995.0 731.56
## - guardian_mother    1    18.660 1996.2 731.85
## - Walc               1    18.713 1996.3 731.87
## - famrel             1    19.641 1997.2 732.10
## - studytime          1    22.072 1999.6 732.70
## - sex                1    30.753 2008.3 734.84
## - reason_reputation  1    32.081 2009.6 735.16
## - absences           1    39.618 2017.2 737.01
## - schoolsup          1    62.405 2040.0 742.56
## - school             1    67.550 2045.1 743.81
## - higher             1    81.717 2059.3 747.22
## - failures           1   191.367 2168.9 772.85
anova(back_m)
## Analysis of Variance Table
## 
## Response: score
##                    Df  Sum Sq Mean Sq F value    Pr(>F)    
## school              1  192.96  192.96 46.0561 3.457e-11 ***
## sex                 1   92.56   92.56 22.0910 3.417e-06 ***
## age                 1   31.13   31.13  7.4290 0.0066568 ** 
## Medu                1  199.81  199.81 47.6910 1.619e-11 ***
## studytime           1   83.34   83.34 19.8915 1.026e-05 ***
## failures            1  389.60  389.60 92.9906 < 2.2e-16 ***
## schoolsup           1   43.96   43.96 10.4914 0.0012839 ** 
## famsup              1   11.06   11.06  2.6387 0.1049577    
## higher              1  104.44  104.44 24.9281 8.389e-07 ***
## romantic            1   25.86   25.86  6.1731 0.0133164 *  
## famrel              1   21.16   21.16  5.0513 0.0250683 *  
## Walc                1   33.64   33.64  8.0293 0.0047998 ** 
## health              1   15.47   15.47  3.6921 0.0552733 .  
## absences            1   52.95   52.95 12.6376 0.0004162 ***
## Mjob_health         1   10.43   10.43  2.4886 0.1153424    
## Mjob_teacher        1   11.24   11.24  2.6830 0.1020922    
## Fjob_teacher        1   19.80   19.80  4.7257 0.0302120 *  
## reason_home         1    2.73    2.73  0.6513 0.4200659    
## reason_reputation   1   33.88   33.88  8.0876 0.0046504 ** 
## guardian_mother     1   11.02   11.02  2.6291 0.1055860    
## guardian_other      1   10.08   10.08  2.4068 0.1214817    
## Residuals         472 1977.54    4.19                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r3 <- lm_result(back_m, train, test)

## MSE_tr MSE_te 
##   4.00   6.69

3.1.7. stepwise method

step_m <- step(null_m, direction = "both", trace = 1, scope = list(lower = null_m, upper = full_m))
## Start:  AIC=951.23
## score ~ 1
## 
##                     Df Sum of Sq    RSS    AIC
## + failures           1    618.60 2756.1 853.20
## + higher             1    419.64 2955.0 887.63
## + Medu               1    290.20 3084.5 908.81
## + studytime          1    196.07 3178.6 923.66
## + school             1    192.96 3181.7 924.14
## + reason_reputation  1    152.85 3221.8 930.33
## + Fedu               1    144.90 3229.8 931.55
## + absences           1    140.50 3234.2 932.22
## + Walc               1    132.00 3242.7 933.52
## + Dalc               1    119.88 3254.8 935.36
## + Mjob_teacher       1     98.81 3275.8 938.55
## + Fjob_teacher       1     92.88 3281.8 939.44
## + sex                1     70.39 3304.3 942.81
## + address            1     66.20 3308.5 943.44
## + traveltime         1     65.29 3309.4 943.58
## + internet           1     61.09 3313.6 944.20
## + goout              1     60.46 3314.2 944.30
## + Mjob_health        1     60.27 3314.4 944.33
## + guardian_other     1     53.70 3321.0 945.30
## + famrel             1     49.16 3325.5 945.98
## + health             1     40.92 3333.7 947.20
## + age                1     39.50 3335.2 947.41
## + romantic           1     36.06 3338.6 947.92
## + freetime           1     32.75 3341.9 948.41
## + schoolsup          1     24.67 3350.0 949.60
## + reason_other       1     22.16 3352.5 949.97
## + activities         1     20.66 3354.0 950.19
## + reason_home        1     18.32 3356.3 950.54
## + paid               1     14.58 3360.1 951.09
## <none>                           3374.7 951.23
## + Fjob_health        1      5.98 3368.7 952.35
## + Fjob_services      1      5.44 3369.2 952.43
## + Mjob_other         1      3.22 3371.4 952.76
## + Fjob_other         1      1.73 3372.9 952.97
## + guardian_mother    1      1.67 3373.0 952.98
## + nursery            1      0.70 3374.0 953.13
## + famsize            1      0.51 3374.1 953.15
## + Pstatus            1      0.41 3374.2 953.17
## + Mjob_services      1      0.40 3374.3 953.17
## + famsup             1      0.07 3374.6 953.22
## 
## Step:  AIC=853.2
## score ~ failures
## 
##                     Df Sum of Sq    RSS    AIC
## + higher             1    198.42 2557.6 818.29
## + Medu               1    179.68 2576.4 821.89
## + school             1    133.83 2622.2 830.61
## + studytime          1    120.57 2635.5 833.10
## + reason_reputation  1     97.17 2658.9 837.46
## + Walc               1     94.39 2661.7 837.98
## + Dalc               1     73.59 2682.5 841.83
## + absences           1     69.83 2686.2 842.52
## + Fedu               1     68.22 2687.8 842.81
## + Mjob_health        1     58.66 2697.4 844.57
## + Fjob_teacher       1     55.35 2700.7 845.17
## + address            1     54.70 2701.4 845.29
## + Mjob_teacher       1     54.08 2702.0 845.41
## + goout              1     44.53 2711.5 847.15
## + internet           1     43.65 2712.4 847.31
## + traveltime         1     41.08 2715.0 847.78
## + sex                1     31.63 2724.4 849.49
## + famrel             1     25.66 2730.4 850.58
## + health             1     24.79 2731.3 850.73
## + reason_other       1     22.22 2733.8 851.20
## + romantic           1     19.99 2736.1 851.60
## + schoolsup          1     19.20 2736.9 851.74
## + activities         1     15.67 2740.4 852.38
## <none>                           2756.1 853.20
## + freetime           1      7.04 2749.0 853.93
## + guardian_mother    1      6.09 2750.0 854.10
## + guardian_other     1      5.13 2750.9 854.28
## + reason_home        1      4.74 2751.3 854.35
## + paid               1      3.52 2752.5 854.57
## + Fjob_services      1      3.29 2752.8 854.61
## + Mjob_other         1      2.87 2753.2 854.68
## + Pstatus            1      1.28 2754.8 854.97
## + Fjob_health        1      1.00 2755.1 855.02
## + nursery            1      0.36 2755.7 855.13
## + age                1      0.31 2755.7 855.14
## + famsize            1      0.28 2755.8 855.15
## + Mjob_services      1      0.18 2755.9 855.16
## + Fjob_other         1      0.06 2756.0 855.19
## + famsup             1      0.05 2756.0 855.19
## - failures           1    618.60 3374.7 951.23
## 
## Step:  AIC=818.29
## score ~ failures + higher
## 
##                     Df Sum of Sq    RSS    AIC
## + Medu               1    119.32 2438.3 796.69
## + school             1     94.85 2462.8 801.62
## + studytime          1     79.56 2478.1 804.67
## + reason_reputation  1     77.24 2480.4 805.14
## + Walc               1     72.57 2485.1 806.07
## + absences           1     59.75 2497.9 808.61
## + Dalc               1     55.02 2502.6 809.54
## + address            1     46.57 2511.1 811.21
## + Fjob_teacher       1     43.51 2514.1 811.81
## + Mjob_teacher       1     38.53 2519.1 812.79
## + Mjob_health        1     38.37 2519.3 812.82
## + Fedu               1     36.26 2521.4 813.23
## + schoolsup          1     31.29 2526.3 814.20
## + traveltime         1     29.93 2527.7 814.47
## + internet           1     29.49 2528.2 814.56
## + goout              1     29.14 2528.5 814.62
## + health             1     24.06 2533.6 815.62
## + famrel             1     22.77 2534.9 815.87
## + sex                1     22.27 2535.4 815.97
## + activities         1     14.36 2543.3 817.50
## + reason_other       1     13.34 2544.3 817.70
## + romantic           1     11.02 2546.6 818.15
## <none>                           2557.6 818.29
## + guardian_mother    1      9.73 2547.9 818.40
## + paid               1      9.24 2548.4 818.50
## + age                1      8.86 2548.8 818.57
## + Fjob_services      1      3.99 2553.6 819.51
## + reason_home        1      3.84 2553.8 819.54
## + Mjob_other         1      3.76 2553.9 819.56
## + famsup             1      2.29 2555.3 819.84
## + Pstatus            1      1.82 2555.8 819.93
## + guardian_other     1      1.02 2556.6 820.09
## + nursery            1      0.87 2556.8 820.12
## + freetime           1      0.76 2556.9 820.14
## + Mjob_services      1      0.68 2557.0 820.16
## + Fjob_health        1      0.51 2557.1 820.19
## + Fjob_other         1      0.02 2557.6 820.28
## + famsize            1      0.00 2557.6 820.29
## - higher             1    198.42 2756.1 853.20
## - failures           1    397.38 2955.0 887.63
## 
## Step:  AIC=796.69
## score ~ failures + higher + Medu
## 
##                     Df Sum of Sq    RSS    AIC
## + Walc               1     69.90 2368.4 784.32
## + studytime          1     68.80 2369.5 784.55
## + reason_reputation  1     60.99 2377.3 786.17
## + Dalc               1     57.93 2380.4 786.81
## + absences           1     55.24 2383.1 787.36
## + school             1     53.48 2384.8 787.73
## + sex                1     39.61 2398.7 790.59
## + goout              1     29.70 2408.6 792.63
## + schoolsup          1     25.79 2412.5 793.43
## + health             1     25.35 2413.0 793.52
## + address            1     23.22 2415.1 793.96
## + paid               1     22.35 2416.0 794.14
## + Fjob_teacher       1     17.57 2420.7 795.11
## + famrel             1     17.43 2420.9 795.14
## + guardian_mother    1     14.33 2424.0 795.77
## + age                1     13.14 2425.2 796.01
## + Mjob_health        1     12.27 2426.1 796.19
## <none>                           2438.3 796.69
## + reason_other       1      9.49 2428.8 796.76
## + romantic           1      9.31 2429.0 796.79
## + famsup             1      7.60 2430.7 797.14
## + internet           1      7.32 2431.0 797.20
## + traveltime         1      6.34 2432.0 797.40
## + activities         1      5.18 2433.1 797.63
## + Mjob_services      1      4.67 2433.7 797.74
## + Pstatus            1      4.60 2433.7 797.75
## + nursery            1      3.99 2434.3 797.88
## + Mjob_teacher       1      3.91 2434.4 797.89
## + Fjob_services      1      3.87 2434.4 797.90
## + Fjob_other         1      1.95 2436.4 798.29
## + reason_home        1      1.60 2436.7 798.36
## + Fedu               1      1.14 2437.2 798.45
## + Fjob_health        1      0.99 2437.3 798.48
## + freetime           1      0.97 2437.3 798.49
## + guardian_other     1      0.50 2437.8 798.58
## + Mjob_other         1      0.38 2437.9 798.61
## + famsize            1      0.08 2438.2 798.67
## - Medu               1    119.32 2557.6 818.29
## - higher             1    138.06 2576.4 821.89
## - failures           1    353.22 2791.5 861.52
## 
## Step:  AIC=784.32
## score ~ failures + higher + Medu + Walc
## 
##                     Df Sum of Sq    RSS    AIC
## + reason_reputation  1     58.12 2310.3 774.04
## + school             1     56.69 2311.7 774.35
## + studytime          1     48.03 2320.4 776.19
## + absences           1     38.68 2329.7 778.18
## + schoolsup          1     34.18 2334.2 779.14
## + address            1     22.59 2345.8 781.58
## + paid               1     19.42 2349.0 782.25
## + age                1     16.67 2351.8 782.83
## + health             1     16.16 2352.3 782.93
## + Mjob_health        1     14.81 2353.6 783.22
## + sex                1     14.22 2354.2 783.34
## + guardian_mother    1     12.23 2356.2 783.76
## + internet           1     12.03 2356.4 783.80
## + famrel             1     11.70 2356.7 783.87
## + Fjob_teacher       1     11.69 2356.7 783.87
## + famsup             1     10.27 2358.2 784.17
## + Pstatus            1      9.66 2358.8 784.30
## + romantic           1      9.59 2358.8 784.31
## <none>                           2368.4 784.32
## + Dalc               1      9.06 2359.3 784.42
## + reason_other       1      8.02 2360.4 784.64
## + nursery            1      8.00 2360.4 784.64
## + goout              1      6.99 2361.4 784.86
## + activities         1      5.49 2362.9 785.17
## + Mjob_teacher       1      5.28 2363.1 785.21
## + traveltime         1      5.01 2363.4 785.27
## + Mjob_services      1      2.68 2365.7 785.76
## + reason_home        1      2.65 2365.8 785.76
## + Fjob_other         1      1.52 2366.9 786.00
## + Fjob_health        1      1.50 2366.9 786.00
## + Fjob_services      1      1.15 2367.3 786.08
## + guardian_other     1      1.14 2367.3 786.08
## + famsize            1      0.82 2367.6 786.14
## + freetime           1      0.16 2368.2 786.28
## + Fedu               1      0.15 2368.3 786.29
## + Mjob_other         1      0.00 2368.4 786.32
## - Walc               1     69.90 2438.3 796.69
## - Medu               1    116.65 2485.1 806.07
## - higher             1    121.36 2489.8 807.00
## - failures           1    338.95 2707.4 848.39
## 
## Step:  AIC=774.04
## score ~ failures + higher + Medu + Walc + reason_reputation
## 
##                     Df Sum of Sq    RSS    AIC
## + school             1     43.40 2266.9 766.67
## + absences           1     41.72 2268.6 767.04
## + schoolsup          1     36.57 2273.7 768.16
## + studytime          1     33.38 2276.9 768.85
## + address            1     24.56 2285.7 770.76
## + reason_home        1     17.70 2292.6 772.24
## + age                1     14.33 2296.0 772.97
## + Fjob_teacher       1     13.80 2296.5 773.08
## + sex                1     12.82 2297.5 773.29
## + paid               1     12.44 2297.8 773.37
## + Pstatus            1     11.68 2298.6 773.54
## + famrel             1     10.73 2299.6 773.74
## + health             1     10.43 2299.9 773.81
## + Mjob_health        1     10.42 2299.9 773.81
## + famsup             1     10.36 2299.9 773.82
## <none>                           2310.3 774.04
## + guardian_mother    1      9.19 2301.1 774.07
## + nursery            1      9.07 2301.2 774.10
## + Mjob_teacher       1      8.76 2301.5 774.16
## + internet           1      7.38 2302.9 774.46
## + romantic           1      6.91 2303.4 774.56
## + goout              1      6.61 2303.7 774.63
## + Dalc               1      5.13 2305.2 774.94
## + traveltime         1      4.22 2306.1 775.14
## + Mjob_services      1      3.65 2306.6 775.26
## + Fjob_health        1      2.61 2307.7 775.48
## + reason_other       1      1.91 2308.4 775.63
## + guardian_other     1      1.61 2308.7 775.70
## + activities         1      1.01 2309.3 775.83
## + Fjob_services      1      0.90 2309.4 775.85
## + Fjob_other         1      0.88 2309.4 775.85
## + famsize            1      0.38 2309.9 775.96
## + Fedu               1      0.22 2310.1 775.99
## + freetime           1      0.21 2310.1 776.00
## + Mjob_other         1      0.04 2310.3 776.03
## - reason_reputation  1     58.12 2368.4 784.32
## - Walc               1     67.04 2377.3 786.17
## - Medu               1    100.99 2411.3 793.18
## - higher             1    111.59 2421.9 795.34
## - failures           1    318.74 2629.0 835.89
## 
## Step:  AIC=766.67
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school
## 
##                     Df Sum of Sq    RSS    AIC
## + absences           1    51.477 2215.4 757.33
## + schoolsup          1    49.468 2217.4 757.77
## + studytime          1    29.300 2237.6 762.25
## + sex                1    16.814 2250.1 765.00
## + age                1    15.971 2250.9 765.18
## + health             1    14.690 2252.2 765.46
## + paid               1    13.079 2253.8 765.81
## + Fjob_teacher       1    12.614 2254.3 765.92
## + Pstatus            1    11.309 2255.6 766.20
## + guardian_mother    1    10.942 2255.9 766.28
## + reason_home        1    10.475 2256.4 766.39
## + Mjob_health        1    10.473 2256.4 766.39
## + famsup             1    10.234 2256.7 766.44
## + Mjob_teacher       1     9.685 2257.2 766.56
## <none>                           2266.9 766.67
## + address            1     8.939 2257.9 766.72
## + famrel             1     8.677 2258.2 766.78
## + nursery            1     7.421 2259.5 767.05
## + romantic           1     6.458 2260.4 767.26
## + Mjob_services      1     6.302 2260.6 767.30
## + goout              1     4.817 2262.1 767.62
## + Dalc               1     4.389 2262.5 767.72
## + Fjob_health        1     3.459 2263.4 767.92
## + internet           1     2.899 2264.0 768.04
## + guardian_other     1     2.786 2264.1 768.07
## + traveltime         1     0.615 2266.3 768.54
## + famsize            1     0.577 2266.3 768.55
## + activities         1     0.479 2266.4 768.57
## + Fedu               1     0.236 2266.7 768.62
## + Fjob_services      1     0.225 2266.7 768.62
## + reason_other       1     0.191 2266.7 768.63
## + Mjob_other         1     0.189 2266.7 768.63
## + freetime           1     0.115 2266.8 768.65
## + Fjob_other         1     0.106 2266.8 768.65
## - school             1    43.400 2310.3 774.04
## - reason_reputation  1    44.832 2311.7 774.35
## - Medu               1    67.982 2334.9 779.27
## - Walc               1    70.118 2337.0 779.72
## - higher             1    99.098 2366.0 785.81
## - failures           1   312.856 2579.8 828.54
## 
## Step:  AIC=757.33
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences
## 
##                     Df Sum of Sq    RSS    AIC
## + schoolsup          1    57.743 2157.7 746.28
## + studytime          1    26.607 2188.8 753.36
## + age                1    25.633 2189.8 753.58
## + sex                1    18.163 2197.2 755.26
## + health             1    16.186 2199.2 755.70
## + paid               1    14.313 2201.1 756.12
## + reason_home        1    13.486 2201.9 756.31
## + address            1    10.728 2204.7 756.93
## + guardian_mother    1     9.938 2205.5 757.11
## + Fjob_teacher       1     9.875 2205.5 757.12
## + Mjob_teacher       1     9.601 2205.8 757.18
## <none>                           2215.4 757.33
## + famsup             1     8.044 2207.4 757.53
## + Pstatus            1     7.754 2207.7 757.59
## + nursery            1     7.468 2207.9 757.66
## + famrel             1     6.082 2209.3 757.97
## + Mjob_health        1     5.207 2210.2 758.16
## + Mjob_services      1     4.483 2210.9 758.33
## + romantic           1     3.780 2211.6 758.48
## + Fjob_health        1     3.133 2212.3 758.63
## + goout              1     3.010 2212.4 758.65
## + internet           1     2.963 2212.4 758.67
## + Dalc               1     2.266 2213.2 758.82
## + Fjob_services      1     0.761 2214.7 759.16
## + guardian_other     1     0.721 2214.7 759.17
## + traveltime         1     0.477 2214.9 759.22
## + activities         1     0.465 2214.9 759.22
## + reason_other       1     0.463 2214.9 759.22
## + Fjob_other         1     0.366 2215.1 759.24
## + freetime           1     0.298 2215.1 759.26
## + famsize            1     0.121 2215.3 759.30
## + Mjob_other         1     0.008 2215.4 759.32
## + Fedu               1     0.004 2215.4 759.33
## - reason_reputation  1    46.503 2261.9 765.59
## - absences           1    51.477 2266.9 766.67
## - Walc               1    51.573 2267.0 766.69
## - school             1    53.159 2268.6 767.04
## - Medu               1    61.833 2277.2 768.92
## - higher             1    93.730 2309.2 775.80
## - failures           1   277.993 2493.4 813.72
## 
## Step:  AIC=746.28
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup
## 
##                     Df Sum of Sq    RSS    AIC
## + studytime          1    28.850 2128.8 741.63
## + sex                1    28.189 2129.5 741.78
## + age                1    17.308 2140.4 744.30
## + Fjob_teacher       1    14.341 2143.3 744.99
## + health             1    14.143 2143.5 745.03
## + reason_home        1    13.743 2143.9 745.12
## + paid               1    12.801 2144.9 745.34
## + guardian_mother    1    11.636 2146.0 745.61
## + address            1     9.144 2148.5 746.18
## <none>                           2157.7 746.28
## + romantic           1     6.569 2151.1 746.77
## + Mjob_teacher       1     6.427 2151.2 746.81
## + Pstatus            1     6.308 2151.4 746.83
## + famrel             1     5.958 2151.7 746.91
## + famsup             1     5.721 2151.9 746.97
## + nursery            1     5.496 2152.2 747.02
## + Mjob_health        1     3.114 2154.6 747.57
## + Mjob_services      1     3.078 2154.6 747.57
## + goout              1     3.004 2154.7 747.59
## + Fjob_services      1     1.785 2155.9 747.87
## + internet           1     1.412 2156.3 747.96
## + guardian_other     1     1.260 2156.4 747.99
## + Fjob_health        1     1.068 2156.6 748.04
## + Dalc               1     1.023 2156.7 748.05
## + traveltime         1     0.490 2157.2 748.17
## + freetime           1     0.209 2157.5 748.23
## + Fedu               1     0.164 2157.5 748.24
## + Fjob_other         1     0.124 2157.6 748.25
## + reason_other       1     0.061 2157.6 748.27
## + Mjob_other         1     0.032 2157.6 748.27
## + famsize            1     0.019 2157.7 748.28
## + activities         1     0.009 2157.7 748.28
## - reason_reputation  1    47.447 2205.1 755.03
## - Medu               1    51.451 2209.1 755.92
## - schoolsup          1    57.743 2215.4 757.33
## - absences           1    59.752 2217.4 757.77
## - Walc               1    60.425 2218.1 757.92
## - school             1    69.317 2227.0 759.90
## - higher             1   103.706 2261.4 767.47
## - failures           1   261.929 2419.6 800.88
## 
## Step:  AIC=741.63
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime
## 
##                     Df Sum of Sq    RSS    AIC
## + sex                1    19.584 2109.2 739.06
## + Fjob_teacher       1    17.150 2111.7 739.63
## + age                1    13.928 2114.9 740.39
## + health             1    12.633 2116.2 740.69
## + reason_home        1    12.610 2116.2 740.69
## + paid               1    12.061 2116.8 740.82
## + guardian_mother    1     9.913 2118.9 741.32
## <none>                           2128.8 741.63
## + romantic           1     8.532 2120.3 741.65
## + address            1     8.441 2120.4 741.67
## + famsup             1     8.214 2120.6 741.72
## + famrel             1     7.068 2121.8 741.99
## + Mjob_teacher       1     6.588 2122.2 742.10
## + Pstatus            1     6.286 2122.5 742.17
## + nursery            1     5.334 2123.5 742.39
## + Mjob_health        1     4.314 2124.5 742.63
## + Mjob_services      1     3.525 2125.3 742.81
## + goout              1     3.154 2125.7 742.90
## + Fjob_health        1     2.310 2126.5 743.09
## + Fjob_services      1     2.053 2126.8 743.15
## + guardian_other     1     1.566 2127.3 743.27
## + internet           1     1.494 2127.3 743.28
## + Dalc               1     1.375 2127.4 743.31
## + traveltime         1     0.523 2128.3 743.51
## + Fjob_other         1     0.260 2128.6 743.57
## + Fedu               1     0.247 2128.6 743.57
## + freetime           1     0.077 2128.8 743.61
## + famsize            1     0.029 2128.8 743.62
## + Mjob_other         1     0.013 2128.8 743.63
## + activities         1     0.000 2128.8 743.63
## + reason_other       1     0.000 2128.8 743.63
## - studytime          1    28.850 2157.7 746.28
## - reason_reputation  1    35.796 2164.6 747.87
## - Walc               1    45.809 2174.6 750.15
## - Medu               1    49.236 2178.1 750.93
## - absences           1    56.913 2185.7 752.66
## - schoolsup          1    59.986 2188.8 753.36
## - school             1    64.245 2193.1 754.32
## - higher             1    90.630 2219.4 760.23
## - failures           1   252.407 2381.2 794.98
## 
## Step:  AIC=739.06
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex
## 
##                     Df Sum of Sq    RSS    AIC
## + Fjob_teacher       1    19.056 2090.2 736.58
## + reason_home        1    13.916 2095.3 737.79
## + romantic           1    13.039 2096.2 738.00
## + famsup             1    11.007 2098.2 738.48
## + guardian_mother    1    10.598 2098.6 738.58
## + age                1    10.454 2098.8 738.61
## + health             1    10.414 2098.8 738.62
## + paid               1    10.173 2099.1 738.68
## + famrel             1    10.124 2099.1 738.69
## <none>                           2109.2 739.06
## + address            1     7.761 2101.5 739.24
## + Mjob_teacher       1     7.755 2101.5 739.24
## + Pstatus            1     7.314 2101.9 739.35
## + nursery            1     6.050 2103.2 739.65
## + goout              1     4.645 2104.6 739.98
## + Mjob_health        1     3.562 2105.7 740.23
## + Mjob_services      1     3.400 2105.8 740.27
## + Fjob_health        1     2.291 2106.9 740.53
## + guardian_other     1     2.104 2107.1 740.57
## + Fjob_services      1     1.830 2107.4 740.64
## + internet           1     1.339 2107.9 740.75
## + Dalc               1     0.481 2108.8 740.95
## + Fedu               1     0.248 2109.0 741.01
## + famsize            1     0.213 2109.0 741.01
## + activities         1     0.200 2109.0 741.02
## + traveltime         1     0.148 2109.1 741.03
## + Fjob_other         1     0.113 2109.1 741.04
## + freetime           1     0.057 2109.2 741.05
## + Mjob_other         1     0.044 2109.2 741.05
## + reason_other       1     0.012 2109.2 741.06
## - sex                1    19.584 2128.8 741.63
## - studytime          1    20.245 2129.5 741.78
## - Walc               1    26.882 2136.1 743.32
## - reason_reputation  1    35.466 2144.7 745.30
## - Medu               1    56.574 2165.8 750.14
## - absences           1    59.467 2168.7 750.80
## - schoolsup          1    68.078 2177.3 752.76
## - school             1    71.418 2180.7 753.51
## - higher             1    86.692 2195.9 756.96
## - failures           1   236.378 2345.6 789.54
## 
## Step:  AIC=736.58
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher
## 
##                     Df Sum of Sq    RSS    AIC
## + romantic           1    14.678 2075.5 735.10
## + reason_home        1    14.002 2076.2 735.26
## + famrel             1    13.108 2077.1 735.47
## + health             1    10.344 2079.8 736.13
## + famsup             1    10.313 2079.9 736.14
## + guardian_mother    1    10.192 2080.0 736.17
## + age                1     9.417 2080.8 736.35
## <none>                           2090.2 736.58
## + paid               1     8.364 2081.8 736.60
## + address            1     8.345 2081.8 736.60
## + Pstatus            1     7.892 2082.3 736.71
## + Mjob_health        1     5.086 2085.1 737.38
## + nursery            1     4.706 2085.5 737.47
## + goout              1     4.370 2085.8 737.55
## + Mjob_teacher       1     4.201 2086.0 737.59
## + Mjob_services      1     2.895 2087.3 737.90
## + internet           1     2.325 2087.9 738.03
## + Fjob_other         1     2.217 2088.0 738.06
## + guardian_other     1     1.797 2088.4 738.16
## + Fjob_health        1     1.148 2089.0 738.31
## + Dalc               1     0.626 2089.6 738.43
## + Fjob_services      1     0.511 2089.7 738.46
## + famsize            1     0.488 2089.7 738.47
## + Fedu               1     0.406 2089.8 738.49
## + traveltime         1     0.285 2089.9 738.51
## + activities         1     0.192 2090.0 738.54
## + Mjob_other         1     0.115 2090.1 738.55
## + freetime           1     0.077 2090.1 738.56
## + reason_other       1     0.009 2090.2 738.58
## - Fjob_teacher       1    19.056 2109.2 739.06
## - sex                1    21.490 2111.7 739.63
## - Walc               1    22.316 2112.5 739.83
## - studytime          1    22.386 2112.6 739.84
## - reason_reputation  1    37.043 2127.2 743.26
## - Medu               1    40.669 2130.8 744.10
## - absences           1    55.873 2146.1 747.61
## - school             1    70.197 2160.4 750.90
## - schoolsup          1    74.182 2164.4 751.81
## - higher             1    85.868 2176.1 754.47
## - failures           1   228.828 2319.0 785.90
## 
## Step:  AIC=735.1
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher + 
##     romantic
## 
##                     Df Sum of Sq    RSS    AIC
## + reason_home        1    14.287 2061.2 733.69
## + famrel             1    13.422 2062.1 733.89
## + age                1    12.014 2063.5 734.23
## + guardian_mother    1    11.038 2064.5 734.47
## + famsup             1    10.889 2064.6 734.50
## + health             1    10.021 2065.5 734.71
## <none>                           2075.5 735.10
## + paid               1     8.314 2067.2 735.12
## + address            1     7.685 2067.8 735.27
## + Pstatus            1     7.052 2068.4 735.42
## + Mjob_health        1     5.733 2069.8 735.73
## + goout              1     4.524 2071.0 736.02
## + nursery            1     4.279 2071.2 736.08
## + Mjob_services      1     3.923 2071.6 736.16
## + Mjob_teacher       1     3.869 2071.6 736.18
## + internet           1     3.674 2071.8 736.22
## + Fjob_other         1     2.710 2072.8 736.45
## - romantic           1    14.678 2090.2 736.58
## + Fjob_health        1     1.072 2074.4 736.84
## + Fedu               1     1.035 2074.5 736.85
## + guardian_other     1     0.687 2074.8 736.94
## + Fjob_services      1     0.650 2074.9 736.95
## + activities         1     0.418 2075.1 737.00
## + famsize            1     0.372 2075.1 737.01
## + Mjob_other         1     0.314 2075.2 737.02
## + traveltime         1     0.226 2075.3 737.05
## + Dalc               1     0.208 2075.3 737.05
## + freetime           1     0.194 2075.3 737.05
## + reason_other       1     0.027 2075.5 737.09
## - Fjob_teacher       1    20.695 2096.2 738.00
## - Walc               1    21.070 2096.6 738.09
## - studytime          1    23.877 2099.4 738.75
## - sex                1    26.586 2102.1 739.39
## - reason_reputation  1    33.656 2109.2 741.05
## - Medu               1    40.082 2115.6 742.55
## - absences           1    51.165 2126.7 745.13
## - school             1    70.304 2145.8 749.56
## - higher             1    79.898 2155.4 751.76
## - schoolsup          1    80.444 2155.9 751.88
## - failures           1   223.191 2298.7 783.56
## 
## Step:  AIC=733.69
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher + 
##     romantic + reason_home
## 
##                     Df Sum of Sq    RSS    AIC
## + famrel             1    14.354 2046.9 732.24
## + famsup             1    11.780 2049.4 732.86
## + guardian_mother    1    10.596 2050.6 733.14
## + age                1     9.982 2051.2 733.29
## + paid               1     8.873 2052.3 733.56
## + health             1     8.448 2052.8 733.66
## <none>                           2061.2 733.69
## + Pstatus            1     7.331 2053.9 733.93
## + Mjob_health        1     5.616 2055.6 734.34
## + address            1     5.478 2055.7 734.37
## + goout              1     4.281 2056.9 734.66
## + nursery            1     4.160 2057.1 734.69
## + Mjob_teacher       1     4.091 2057.1 734.71
## + internet           1     3.376 2057.8 734.88
## + Mjob_services      1     3.187 2058.0 734.92
## - reason_home        1    14.287 2075.5 735.10
## + Fjob_other         1     1.942 2059.3 735.22
## - romantic           1    14.963 2076.2 735.26
## + reason_other       1     1.175 2060.0 735.41
## + activities         1     1.130 2060.1 735.42
## + guardian_other     1     0.997 2060.2 735.45
## + Fedu               1     0.860 2060.4 735.48
## + Fjob_health        1     0.795 2060.4 735.50
## + freetime           1     0.489 2060.7 735.57
## + Dalc               1     0.407 2060.8 735.59
## + Fjob_services      1     0.389 2060.8 735.59
## + famsize            1     0.222 2061.0 735.63
## + Mjob_other         1     0.028 2061.2 735.68
## + traveltime         1     0.000 2061.2 735.69
## - Fjob_teacher       1    20.802 2082.0 736.65
## - Walc               1    21.631 2082.8 736.84
## - studytime          1    22.560 2083.8 737.06
## - sex                1    28.160 2089.4 738.39
## - Medu               1    37.284 2098.5 740.54
## - reason_reputation  1    45.198 2106.4 742.40
## - absences           1    54.338 2115.6 744.54
## - school             1    60.584 2121.8 746.00
## - higher             1    78.981 2140.2 750.26
## - schoolsup          1    81.120 2142.3 750.76
## - failures           1   209.141 2270.4 779.43
## 
## Step:  AIC=732.24
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher + 
##     romantic + reason_home + famrel
## 
##                     Df Sum of Sq    RSS    AIC
## + famsup             1    12.315 2034.5 731.25
## + health             1    11.747 2035.1 731.39
## + guardian_mother    1    11.121 2035.7 731.54
## + paid               1    10.059 2036.8 731.80
## + age                1     9.419 2037.5 731.96
## <none>                           2046.9 732.24
## + goout              1     6.726 2040.1 732.61
## + address            1     6.522 2040.3 732.66
## + Pstatus            1     6.493 2040.4 732.67
## + Mjob_health        1     6.445 2040.4 732.68
## + Mjob_teacher       1     4.581 2042.3 733.13
## + nursery            1     4.330 2042.5 733.19
## + Mjob_services      1     3.910 2043.0 733.29
## + internet           1     2.349 2044.5 733.67
## - famrel             1    14.354 2061.2 733.69
## + Fjob_other         1     1.804 2045.1 733.80
## - reason_home        1    15.218 2062.1 733.89
## - romantic           1    15.301 2062.2 733.91
## + Fedu               1     1.124 2045.7 733.96
## + Fjob_health        1     0.862 2046.0 734.03
## + reason_other       1     0.844 2046.0 734.03
## + activities         1     0.825 2046.0 734.04
## + guardian_other     1     0.803 2046.1 734.04
## + Fjob_services      1     0.442 2046.4 734.13
## + famsize            1     0.311 2046.5 734.16
## + Dalc               1     0.140 2046.7 734.20
## + freetime           1     0.113 2046.8 734.21
## + Mjob_other         1     0.044 2046.8 734.22
## + traveltime         1     0.004 2046.9 734.23
## - Walc               1    17.100 2064.0 734.35
## - studytime          1    23.424 2070.3 735.86
## - Fjob_teacher       1    24.085 2070.9 736.01
## - sex                1    32.761 2079.6 738.08
## - Medu               1    35.175 2082.0 738.65
## - reason_reputation  1    44.859 2091.7 740.94
## - absences           1    50.316 2097.2 742.23
## - school             1    57.344 2104.2 743.88
## - higher             1    78.534 2125.4 748.83
## - schoolsup          1    82.637 2129.5 749.79
## - failures           1   200.374 2247.2 776.37
## 
## Step:  AIC=731.25
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher + 
##     romantic + reason_home + famrel + famsup
## 
##                     Df Sum of Sq    RSS    AIC
## + guardian_mother    1    11.177 2023.4 730.53
## + health             1    10.891 2023.7 730.60
## + paid               1     8.631 2025.9 731.15
## <none>                           2034.5 731.25
## + age                1     7.311 2027.2 731.48
## + Mjob_health        1     6.950 2027.6 731.56
## + Pstatus            1     6.635 2027.9 731.64
## + goout              1     6.566 2028.0 731.66
## + address            1     5.611 2028.9 731.89
## - famsup             1    12.315 2046.9 732.24
## + Mjob_teacher       1     4.166 2030.4 732.24
## + nursery            1     3.892 2030.7 732.31
## + Mjob_services      1     2.736 2031.8 732.59
## + internet           1     2.637 2031.9 732.61
## - famrel             1    14.888 2049.4 732.86
## + Fjob_other         1     1.141 2033.4 732.98
## + activities         1     0.739 2033.8 733.07
## + reason_other       1     0.642 2033.9 733.10
## + guardian_other     1     0.618 2033.9 733.10
## - romantic           1    15.944 2050.5 733.11
## + Fjob_health        1     0.515 2034.0 733.13
## + Fedu               1     0.440 2034.1 733.15
## - reason_home        1    16.175 2050.7 733.17
## + Fjob_services      1     0.272 2034.3 733.19
## + famsize            1     0.223 2034.3 733.20
## + freetime           1     0.171 2034.4 733.21
## + Dalc               1     0.072 2034.5 733.24
## + Mjob_other         1     0.030 2034.5 733.25
## + traveltime         1     0.013 2034.5 733.25
## - Walc               1    17.371 2051.9 733.45
## - Fjob_teacher       1    23.348 2057.9 734.89
## - studytime          1    25.633 2060.2 735.44
## - sex                1    36.642 2071.2 738.07
## - Medu               1    39.976 2074.5 738.87
## - reason_reputation  1    44.698 2079.2 739.99
## - absences           1    47.502 2082.1 740.66
## - school             1    56.482 2091.0 742.78
## - schoolsup          1    79.797 2114.3 748.26
## - higher             1    81.248 2115.8 748.60
## - failures           1   195.019 2229.6 774.47
## 
## Step:  AIC=730.53
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher + 
##     romantic + reason_home + famrel + famsup + guardian_mother
## 
##                     Df Sum of Sq    RSS    AIC
## + health             1    10.532 2012.8 729.95
## <none>                           2023.4 730.53
## + paid               1     7.655 2015.7 730.66
## + age                1     6.937 2016.4 730.84
## + goout              1     6.391 2017.0 730.97
## + Mjob_health        1     5.881 2017.5 731.10
## + Mjob_teacher       1     5.824 2017.5 731.11
## + address            1     5.621 2017.8 731.16
## - guardian_mother    1    11.177 2034.5 731.25
## + guardian_other     1     5.162 2018.2 731.27
## + Pstatus            1     4.836 2018.5 731.35
## - famsup             1    12.370 2035.7 731.54
## + nursery            1     2.903 2020.5 731.82
## + Mjob_services      1     2.306 2021.1 731.97
## + internet           1     1.940 2021.4 732.06
## + Fjob_other         1     1.741 2021.6 732.11
## + Fedu               1     1.420 2022.0 732.19
## + activities         1     1.073 2022.3 732.27
## - famrel             1    15.426 2038.8 732.28
## + Fjob_health        1     0.812 2022.6 732.33
## - reason_home        1    15.713 2039.1 732.35
## + reason_other       1     0.699 2022.7 732.36
## + Fjob_services      1     0.533 2022.8 732.40
## + Dalc               1     0.340 2023.0 732.45
## + famsize            1     0.311 2023.1 732.46
## + freetime           1     0.275 2023.1 732.47
## + Mjob_other         1     0.189 2023.2 732.49
## + traveltime         1     0.025 2023.3 732.53
## - Walc               1    16.652 2040.0 732.58
## - romantic           1    16.833 2040.2 732.63
## - Fjob_teacher       1    22.996 2046.4 734.12
## - studytime          1    23.871 2047.2 734.33
## - sex                1    37.818 2061.2 737.68
## - reason_reputation  1    41.699 2065.1 738.61
## - Medu               1    42.342 2065.7 738.76
## - absences           1    46.522 2069.9 739.76
## - school             1    58.872 2082.2 742.70
## - schoolsup          1    81.930 2105.3 748.14
## - higher             1    83.610 2107.0 748.54
## - failures           1   197.732 2221.1 774.59
## 
## Step:  AIC=729.95
## score ~ failures + higher + Medu + Walc + reason_reputation + 
##     school + absences + schoolsup + studytime + sex + Fjob_teacher + 
##     romantic + reason_home + famrel + famsup + guardian_mother + 
##     health
## 
##                     Df Sum of Sq    RSS    AIC
## <none>                           2012.8 729.95
## + goout              1     7.556 2005.3 730.10
## + Mjob_health        1     6.878 2006.0 730.26
## + age                1     6.818 2006.0 730.28
## + paid               1     6.474 2006.4 730.36
## + Mjob_teacher       1     6.352 2006.5 730.39
## - health             1    10.532 2023.4 730.53
## - guardian_mother    1    10.818 2023.7 730.60
## + address            1     5.402 2007.4 730.63
## + guardian_other     1     4.979 2007.9 730.73
## - famsup             1    11.524 2024.4 730.78
## + Pstatus            1     4.635 2008.2 730.82
## + nursery            1     2.736 2010.1 731.28
## - Walc               1    13.729 2026.6 731.31
## - reason_home        1    13.949 2026.8 731.37
## + Fjob_other         1     2.080 2010.8 731.44
## + Mjob_services      1     1.908 2010.9 731.49
## + internet           1     1.471 2011.4 731.59
## + activities         1     1.277 2011.6 731.64
## + Fjob_services      1     1.036 2011.8 731.70
## + Fedu               1     1.034 2011.8 731.70
## + reason_other       1     0.537 2012.3 731.82
## + freetime           1     0.510 2012.3 731.83
## + Dalc               1     0.434 2012.4 731.85
## + Fjob_health        1     0.268 2012.6 731.89
## + Mjob_other         1     0.229 2012.6 731.90
## + famsize            1     0.194 2012.7 731.91
## + traveltime         1     0.127 2012.7 731.92
## - romantic           1    16.458 2029.3 731.98
## - famrel             1    18.601 2031.4 732.50
## - studytime          1    23.178 2036.0 733.61
## - Fjob_teacher       1    23.315 2036.2 733.64
## - sex                1    34.957 2047.8 736.46
## - reason_reputation  1    35.952 2048.8 736.70
## - Medu               1    41.864 2054.7 738.12
## - absences           1    47.059 2059.9 739.37
## - school             1    62.606 2075.4 743.09
## - schoolsup          1    79.211 2092.1 747.02
## - higher             1    83.885 2096.7 748.12
## - failures           1   194.240 2207.1 773.46
anova(step_m)
## Analysis of Variance Table
## 
## Response: score
##                    Df  Sum Sq Mean Sq  F value    Pr(>F)    
## failures            1  618.60  618.60 146.2868 < 2.2e-16 ***
## higher              1  198.42  198.42  46.9236 2.290e-11 ***
## Medu                1  119.32  119.32  28.2165 1.669e-07 ***
## Walc                1   69.90   69.90  16.5308 5.600e-05 ***
## reason_reputation   1   58.12   58.12  13.7447 0.0002340 ***
## school              1   43.40   43.40  10.2633 0.0014481 ** 
## absences            1   51.48   51.48  12.1734 0.0005298 ***
## schoolsup           1   57.74   57.74  13.6551 0.0002452 ***
## studytime           1   28.85   28.85   6.8226 0.0092857 ** 
## sex                 1   19.58   19.58   4.6313 0.0318958 *  
## Fjob_teacher        1   19.06   19.06   4.5063 0.0342860 *  
## romantic            1   14.68   14.68   3.4712 0.0630620 .  
## reason_home         1   14.29   14.29   3.3786 0.0666712 .  
## famrel              1   14.35   14.35   3.3944 0.0660389 .  
## famsup              1   12.31   12.31   2.9122 0.0885662 .  
## guardian_mother     1   11.18   11.18   2.6431 0.1046636    
## health              1   10.53   10.53   2.4907 0.1151855    
## Residuals         476 2012.84    4.23                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r4 <- lm_result(step_m, train, test)

## MSE_tr MSE_te 
##   4.07   6.58

3.1.8 Summary

result <- rbind(r1, r2, r3, r4)
rownames(result) <- c("full", "forward", "backward", "stepwise")
result # forward, stepwise가 best
##            MSE_tr   MSE_te
## full     3.926032 6.907939
## forward  4.074577 6.577594
## backward 4.003125 6.690604
## stepwise 4.074577 6.577594

- stepwise regression 모델의 AIC 변화

## stepwise regression 모델의 AIC 변화
par(mfrow = c(1,1)) # 그림을 1행 1열로 표시
step_m$anova$AIC # 변수 추가 및 제거에 따른 AIC 변화량
##  [1] 951.2276 853.1969 818.2858 796.6850 784.3158 774.0416 766.6733
##  [8] 757.3261 746.2798 741.6299 739.0643 736.5811 735.0997 733.6874
## [15] 732.2353 731.2543 730.5330 729.9549
step_m$anova$Step # 각 단계에서 추가 / 제거 된 변수
##  [1] ""                    "+ failures"          "+ higher"           
##  [4] "+ Medu"              "+ Walc"              "+ reason_reputation"
##  [7] "+ school"            "+ absences"          "+ schoolsup"        
## [10] "+ studytime"         "+ sex"               "+ Fjob_teacher"     
## [13] "+ romantic"          "+ reason_home"       "+ famrel"           
## [16] "+ famsup"            "+ guardian_mother"   "+ health"
plot(step_m$anova$AIC, pch = 19, main = "AIC (stepwise regression)", xlab = "Step", ylab = "AIC")
text(step_m$anova$AIC, step_m$anova$Step, pos = 3, col = "red", cex = 0.8)

- feature importance

## 변수의 상대적 중요도 측정

imp <- calc.relimp(step_m, rela = T)
plot(imp)

imp
## Response variable: score 
## Total response variance: 6.845146 
## Analysis based on 494 observations 
## 
## 17 Regressors: 
## failures higher Medu Walc reason_reputation school absences schoolsup studytime sex Fjob_teacher romantic reason_home famrel famsup guardian_mother health 
## Proportion of variance explained by model: 40.35%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##                           lmg
## failures          0.266115242
## higher            0.150577530
## Medu              0.096897186
## Walc              0.040111380
## reason_reputation 0.057980745
## school            0.079592331
## absences          0.059429889
## schoolsup         0.042064926
## studytime         0.059718225
## sex               0.034703422
## Fjob_teacher      0.034261209
## romantic          0.016636493
## reason_home       0.013572503
## famrel            0.021687831
## famsup            0.005286136
## guardian_mother   0.004927594
## health            0.016437358
## 
## Average coefficients for different model sizes: 
## 
##                            1X         2Xs        3Xs        4Xs        5Xs
## failures          -1.83602495 -1.77120362 -1.7103768 -1.6531626 -1.5992117
## higher             3.02912406  2.87511065  2.7309804  2.5961422  2.4700364
## Medu               0.68005695  0.64363935  0.6095608  0.5775912  0.5475308
## Walc              -0.39984766 -0.37360641 -0.3496158 -0.3276138 -0.3073652
## reason_reputation  1.28632788  1.22250295  1.1644309  1.1114462  1.0629623
## school            -1.31191921 -1.24959784 -1.1936743 -1.1434810 -1.0984342
## absences          -0.15018065 -0.14311599 -0.1367777 -0.1310755 -0.1259318
## schoolsup         -0.72210100 -0.79769434 -0.8645489 -0.9239534 -0.9770035
## studytime          0.74604214  0.69720444  0.6521105  0.6104329  0.5718757
## sex               -0.77227796 -0.73746114 -0.7085303 -0.6846257 -0.6650294
## Fjob_teacher       1.87517883  1.75757217  1.6528062  1.5594641  1.4763144
## romantic          -0.55881253 -0.53073062 -0.5062478 -0.4849667 -0.4665425
## reason_home        0.47923858  0.47915100  0.4785364  0.4775267  0.4762220
## famrel             0.33225443  0.31676062  0.3028976  0.2904857  0.2793674
## famsup            -0.02545908 -0.06916087 -0.1073702 -0.1407702 -0.1699502
## guardian_mother   -0.12728967 -0.14668998 -0.1647496 -0.1815801 -0.1972868
## health            -0.20061028 -0.19099730 -0.1821856 -0.1740623 -0.1665352
##                          6Xs        7Xs        8Xs        9Xs       10Xs
## failures          -1.5482035 -1.4998434 -1.4538604 -1.4100044 -1.3680449
## higher             2.3521338  2.2419341  2.1389650  2.0427813  1.9529633
## Medu               0.5192061  0.4924658  0.4671774  0.4432245  0.4205038
## Walc              -0.2886596 -0.2713084 -0.2551432 -0.2400133 -0.2257839
## reason_reputation  1.0184619  0.9774886  0.9396392  0.9045574  0.8719284
## school            -1.0580215 -1.0217914 -0.9893444 -0.9603260 -0.9344201
## absences          -0.1212797 -0.1170622 -0.1132303 -0.1097421 -0.1065619
## schoolsup         -1.0246297 -1.0676222 -1.1066520 -1.1422899 -1.1750220
## studytime          0.5361713  0.5030778  0.4723768  0.4438711  0.4173829
## sex               -0.6491421 -0.6364636 -0.6265764 -0.6191315 -0.6138373
## Fjob_teacher       1.4022857  1.3364444  1.2779759  1.2261686  1.1804006
## romantic          -0.4506751 -0.4371028 -0.4255966 -0.4159564 -0.4080070
## reason_home        0.4746965  0.4730047  0.4711851  0.4692641  0.4672594
## famrel             0.2694057  0.2604811  0.2524903  0.2453440  0.2389656
## famsup            -0.1954197 -0.2176212 -0.2369400 -0.2537126 -0.2682341
## guardian_mother   -0.2119685 -0.2257177 -0.2386205 -0.2507568 -0.2622002
## health            -0.1595288 -0.1529816 -0.1468436 -0.1410745 -0.1356420
##                         11Xs       12Xs        13Xs        14Xs
## failures          -1.3277692 -1.2889814 -1.25150087 -1.21516149
## higher             1.8691170  1.7908725  1.71788435  1.64983009
## Medu               0.3989233  0.3784003  0.35885953  0.34023218
## Walc              -0.2123341 -0.1995554 -0.18735025 -0.17563060
## reason_reputation  0.8414736  0.8129465  0.78612883  0.76082748
## school            -0.9113444 -0.8908460 -0.87269805 -0.85669687
## absences          -0.1036593 -0.1010083 -0.09858665 -0.09637545
## schoolsup         -1.2052637 -1.2333706 -1.25964855 -1.28436147
## studytime          0.3927517  0.3698332  0.34849740  0.32862746
## sex               -0.6104498 -0.6087647 -0.60861089 -0.60984520
## Fjob_teacher       1.1401276  1.1048737  1.07422298  1.04781199
## romantic          -0.4015950 -0.3965861 -0.39286304 -0.39032323
## reason_home        0.4651813  0.4630356  0.46082426  0.45854696
## famrel             0.2332901  0.2282625  0.22383668  0.21997496
## famsup            -0.2807638 -0.2915302 -0.30073540 -0.30855811
## guardian_mother   -0.2730182 -0.2832726 -0.29301938 -0.30230927
## health            -0.1305204 -0.1256896 -0.12113415 -0.11684280
##                          15Xs        16Xs       17Xs
## failures          -1.17981091 -1.14530957 -1.1115300
## higher             1.58641030  1.52734777  1.4723868
## Medu               0.32245443  0.30546656  0.2892121
## Walc              -0.16431681 -0.15333662 -0.1426242
## reason_reputation  0.73687137  0.71410915  0.6924070
## school            -0.84265956 -0.83042213 -0.8198378
## absences          -0.09435847 -0.09252184 -0.0908536
## schoolsup         -1.30773833 -1.32997883 -1.3512583
## studytime          0.31011832  0.29287558  0.2768143
## sex               -0.61234781 -0.61601880 -0.6207752
## Fjob_teacher       1.02532404  1.00648372  0.9910523
## romantic          -0.38887721 -0.38844706 -0.3889651
## reason_home        0.45620172  0.45378559  0.4512951
## famrel             0.21664670  0.21382773  0.2114996
## famsup            -0.31515702 -0.32067311 -0.3252318
## guardian_mother   -0.31118805 -0.31969688 -0.3278727
## health            -0.11280750 -0.10902312 -0.1054869

3.2. Shrinkage method (Lidge / LASSO)

3.2.1. ridge regression

cvfit <- cv.glmnet(as.matrix(train[, 1:39]), train$score, alpha = 0) # ridge regression
plot(cvfit)

lambda <- cvfit$lambda.min # cross validation error를 최소로 만드는 람다값

- ridge regression의 계수

coef_ridge <- predict(cvfit, s = lambda, type = "coefficients") # ridge coefficients
coef_ridge
## 40 x 1 sparse Matrix of class "dgCMatrix"
##                              1
## (Intercept)        9.167737155
## school            -0.556418300
## sex               -0.460344328
## age                0.076928269
## address            0.244592945
## famsize            0.063560505
## Pstatus            0.226990972
## Medu               0.188941562
## Fedu               0.052239686
## traveltime        -0.017614395
## studytime          0.264365505
## failures          -0.937852536
## schoolsup         -0.849064862
## famsup            -0.175809489
## paid              -0.458864006
## activities         0.136721937
## nursery           -0.134694970
## higher             1.254116533
## internet           0.162342371
## romantic          -0.296269262
## famrel             0.187311926
## freetime          -0.002181064
## goout             -0.113656915
## Dalc              -0.054947299
## Walc              -0.112253238
## health            -0.096697434
## absences          -0.069186924
## Mjob_health        0.531777798
## Mjob_other         0.042627055
## Mjob_services     -0.026825340
## Mjob_teacher       0.488728706
## Fjob_health       -0.215685064
## Fjob_other         0.110702703
## Fjob_services     -0.080892755
## Fjob_teacher       0.729175303
## reason_home        0.320186924
## reason_other       0.033538667
## reason_reputation  0.527084198
## guardian_mother   -0.288609519
## guardian_other    -0.515290392

- ridge regression을 이용한 예측값 계산

# training data
pred_ridge_tr <- predict(cvfit, as.matrix(train[, 1:39]), s = lambda)
# plotting
plot(train$score, pred_ridge_tr, pch = 19, xlab = "Actual score", ylab = "Predicted score", xlim = c(5, 18), ylim = c(5, 18))
lines(train$score, train$score, col = "red", lty = 2)

# test data
pred_ridge_te <- predict(cvfit, as.matrix(test[, 1:39]), s = lambda)

- performance evaluation

MSE_ridte <- mean((pred_ridge_te - test$score)^2)
MSE_ridte
## [1] 6.619485

3.2.2. LASSO regression

cvfit <- cv.glmnet(as.matrix(train[, 1:39]), train$score, alpha = 1) # LASSO regression
plot(cvfit)

lambda <- cvfit$lambda.min # cross validation error를 최소로 만드는 람다값
## LASSO regression의 계수
coef_lasso <- predict(cvfit, s = lambda, type = "coefficients") # LASSO coefficients
coef_lasso
## 40 x 1 sparse Matrix of class "dgCMatrix"
##                             1
## (Intercept)        8.77356292
## school            -0.66338499
## sex               -0.51134488
## age                0.10188416
## address            0.22146570
## famsize            .         
## Pstatus            0.17369125
## Medu               0.21327165
## Fedu               .         
## traveltime         .         
## studytime          0.25855717
## failures          -1.13724762
## schoolsup         -1.03649021
## famsup            -0.16601759
## paid              -0.38425736
## activities         0.08759245
## nursery           -0.09217803
## higher             1.42044312
## internet           0.06355573
## romantic          -0.31532981
## famrel             0.19839417
## freetime           .         
## goout             -0.10765529
## Dalc               .         
## Walc              -0.12101969
## health            -0.09643229
## absences          -0.07801459
## Mjob_health        0.50853631
## Mjob_other         .         
## Mjob_services      .         
## Mjob_teacher       0.44210011
## Fjob_health       -0.03276677
## Fjob_other         0.14138765
## Fjob_services      .         
## Fjob_teacher       0.85101068
## reason_home        0.29936051
## reason_other       .         
## reason_reputation  0.56675028
## guardian_mother   -0.30978159
## guardian_other    -0.42207321
## LASSO regression의 예측값 계산
# training data
pred_lasso_tr <- predict(cvfit, as.matrix(train[, 1:39]), s = lambda)
plot(train$score, pred_lasso_tr, pch = 19, xlab = "Actual score", ylab = "Predicted score", xlim = c(5, 18), ylim = c(5, 18))
lines(train$score, train$score, col = "red", lty = 2)

# test data
pred_lasso_te <- predict(cvfit, as.matrix(test[, 1:39]), s = lambda)
# performance evaluation
MSE_laste <- mean((pred_lasso_te - test$score)^2)
MSE_laste
## [1] 6.66937
result <- c(MSE_ridte, MSE_laste) # test 성능 비교
names(result) <- c("Ridge", "LASSO")
result
##    Ridge    LASSO 
## 6.619485 6.669370

3.3. Knn regression

# 데이터 불러오기
st <- read.csv("./input/student.csv", row.names = 1)

# train / validation / test set
set.seed(555)
flag <- sample(c("tr", "va", "te"), size = nrow(st), c(6, 2, 2), replace = T) 
train <- st[which(flag == "tr"), ]
valid <- st[which(flag == "va"), ]
test <- st[which(flag == "te"), ]



# outlier 제거
boxplot(train)

b1 <- boxplot(train$absences)

b2 <- boxplot(train$score)

out1 <- which(train$absences > b1$stats[5])
out2 <- which(train$score < b2$stats[1])
train <- train[-c(out1, out2), ] # training data에서 absences, score 변수의 outlier 제거
boxplot(train)

### Standardization 
pp_model <- preProcess(train[, -40], method = c("center", "scale"))
train <- predict(pp_model, train)
valid <- predict(pp_model, valid)
test <- predict(pp_model, test)

tr_x <- train[, 1:39]
va_x <- valid[, 1:39]
te_x <- test[, 1:39]

tr_y <- train$score
va_y <- valid$score
te_y <- test$score



# find optimal k 
MSE_k <- NULL
for(i in 1:100){
  m_knn <- knnreg(tr_x, tr_y, k = i)
  MSE_va <- mean((predict(m_knn, va_x) - va_y)^2)
  MSE_k <- c(MSE_k, MSE_va)
}
which.min(MSE_k)
## [1] 17
plot(MSE_k, type = "l", ylab = "MSE", xlab = "k")
abline(v = which.min(MSE_k), lty = 2, col = "red")
text(which.min(MSE_k), min(MSE_k), labels = round(MSE_k[which.min(MSE_k)], 2), pos = 3, col = "red")

# test error
m_knn <- knnreg(tr_x, tr_y, k = which.min(MSE_k))
pred_te <- predict(m_knn, te_x)
MSE_te <- mean((pred_te - te_y)^2)
MSE_te
## [1] 6.899605
# test result plotting
plot(te_y, pred_te, pch = 19, xlab = "Actual score", ylab = "Predicted score", main = "Result of Regression Tree", xlim = c(5, 18), ylim = c(5, 18))
lines(te_y, te_y, lty = 2, col = "red")

3.4. Regression Tree

# 데이터 불러오기
st <- read.csv("./input/student.csv", row.names = 1)

# train / test set
set.seed(555)
flag <- sample(c("tr", "te"), size = nrow(st), c(8, 2), replace = T) 
train <- st[which(flag == "tr"), ]
test <- st[which(flag == "te"), ]



# outlier 제거
boxplot(train)

b1 <- boxplot(train$absences)

b2 <- boxplot(train$score)

out1 <- which(train$absences > b1$stats[5])
out2 <- which(train$score < b2$stats[1])
train <- train[-c(out1, out2), ] # training data에서 absences, score 변수의 outlier 제거
boxplot(train)

##### rpart
set.seed(111)
dt <- rpart(score~., data = train, cp = 0.1^20) # 모든 변수 사용하여 full tree 생성
printcp(dt) # cptable 출력
## 
## Regression tree:
## rpart(formula = score ~ ., data = train, cp = 0.1^20)
## 
## Variables actually used in tree construction:
##  [1] absences          activities        address          
##  [4] age               Dalc              failures         
##  [7] famrel            famsize           famsup           
## [10] Fedu              freetime          goout            
## [13] health            higher            internet         
## [16] Medu              Mjob_health       reason_home      
## [19] reason_reputation school            schoolsup        
## [22] studytime         traveltime        Walc             
## 
## Root node error: 3374.7/494 = 6.8313
## 
## n= 494 
## 
##            CP nsplit rel error  xerror     xstd
## 1  2.0697e-01      0   1.00000 1.00301 0.056645
## 2  7.3186e-02      1   0.79303 0.79635 0.048106
## 3  3.4056e-02      2   0.71984 0.72309 0.043693
## 4  2.4437e-02      3   0.68578 0.71183 0.044176
## 5  1.5088e-02      4   0.66135 0.70006 0.044504
## 6  1.5036e-02      5   0.64626 0.70919 0.046023
## 7  1.1735e-02      7   0.61619 0.72050 0.047333
## 8  1.0291e-02      8   0.60445 0.74127 0.050471
## 9  8.8407e-03      9   0.59416 0.75355 0.053274
## 10 7.4938e-03     10   0.58532 0.76567 0.055839
## 11 7.2498e-03     12   0.57033 0.77521 0.056217
## 12 6.8552e-03     13   0.56308 0.77959 0.056078
## 13 6.8511e-03     16   0.54252 0.79388 0.057735
## 14 6.6068e-03     17   0.53567 0.80562 0.057956
## 15 6.3976e-03     18   0.52906 0.80582 0.057948
## 16 6.2038e-03     20   0.51626 0.80934 0.058074
## 17 6.1408e-03     21   0.51006 0.80665 0.057846
## 18 6.1328e-03     23   0.49778 0.80739 0.057837
## 19 5.7126e-03     24   0.49165 0.80395 0.057706
## 20 5.0460e-03     25   0.48593 0.80728 0.058304
## 21 4.8599e-03     26   0.48089 0.82015 0.058132
## 22 4.4255e-03     27   0.47603 0.82425 0.057562
## 23 4.3704e-03     29   0.46718 0.82259 0.057384
## 24 3.9915e-03     30   0.46281 0.81782 0.057253
## 25 3.4565e-03     33   0.45083 0.82966 0.058005
## 26 3.1668e-03     35   0.44392 0.82624 0.057279
## 27 2.7740e-03     36   0.44075 0.82531 0.057404
## 28 2.2344e-03     37   0.43798 0.82984 0.057332
## 29 1.7808e-03     40   0.43128 0.83230 0.057367
## 30 1.0000e-20     41   0.42949 0.83381 0.057703
plotcp(dt) # cpplot 출력

### tree pruning
dt_prune <- prune(dt, cp = dt$cptable[which.min(dt$cptable[, "xerror"]), "CP"])


### plotting
plot(dt_prune, margin = 0.1) # tree plotting
text(dt_prune, use.n = T)

### variable importance
dt_prune$variable.importance # variable importance
##       failures         higher           Walc           Medu            age 
##     698.462257     246.978734     114.927613      82.467687      57.407857 
##           Dalc           Fedu   Mjob_teacher       absences   Fjob_teacher 
##      38.309204      31.337721      25.564983      19.135952      13.194830 
##    Mjob_health guardian_other           paid 
##      12.370153       9.567976       3.298707
barplot(dt_prune$variable.importance, ylim = c(0,800))

### performance evaluation
# trainin MSE
pred_tr <- predict(dt_prune)
MSE_tr <- mean((pred_tr - train$score)^2)
MSE_tr
## [1] 4.517856
# test MSE
pred_te <- predict(dt_prune, test)
MSE_te <- mean((pred_te - test$score)^2)
MSE_te
## [1] 7.648207

3.5. ANN

# 데이터 불러오기
st <- read.csv("./input/student.csv", row.names = 1)

# train / test set
set.seed(555)
flag <- sample(c("tr", "te"), size = nrow(st), c(8, 2), replace = T) 
train <- st[which(flag == "tr"), ]
test <- st[which(flag == "te"), ]




### Standardization 
pp_model <- preProcess(train[, -40], method = c("center", "scale"))
train <- predict(pp_model, train)
test <- predict(pp_model, test)



### ann modeling

set.seed(1004)

# formula
fml <- as.formula(paste("score~", paste(colnames(train[, 1:39]), collapse = "+"))) # 변수명으로 formula 생성


### neuralnet tuning
# ptm <- proc.time() # 알고리즘 시작 시의 시스템 시간
# # hidden node의 개수를 2개부터 8개까지 바꿔가며 validation MSE 측정
# MSE <- NULL
# for(h in 2:8){
#   ann <- neuralnet(fml, data = train, stepmax = 100000000, threshold = 0.01, linear.output = T, hidden =h, err.fct = "sse", lifesign = "full") #알고리즘 생성
#   pred <- compute(ann, test[, 1:39])$net.result # validation data의 예측값 계산
#   MSE_v <- mean((pred - test$score)^2)
#   MSE <- c(MSE, MSE_v)
# }
# 
# proc.time() - ptm # 알고리즘이 돌아가는동안 걸린 총 시간(단위: 초), elapsed확인
# plot(2:8, MSE, xlab = "Hidden node", ylab = "MSE", type = "l", main = "Validation set MSE")
# abline(v = which.max(MSE) + 1, lty = 2)



# hidden node = 2로 ann 모델 학습
m_ann <- neuralnet(fml, data = train, stepmax = 1e6, threshold = 0.01, linear.output = T, hidden = 2, err.fct = "sse", lifesign = "full") #알고리즘 생성
## hidden: 2    thresh: 0.01    rep: 1/1    steps:    1000  min thresh: 0.2335553271
##                                                    2000  min thresh: 0.1505807638
##                                                    3000  min thresh: 0.09653231758
##                                                    4000  min thresh: 0.08927646165
##                                                    5000  min thresh: 0.07649720845
##                                                    6000  min thresh: 0.06023353057
##                                                    7000  min thresh: 0.04868439858
##                                                    8000  min thresh: 0.04722636276
##                                                    9000  min thresh: 0.03894188661
##                                                   10000  min thresh: 0.03562505974
##                                                   11000  min thresh: 0.03519242481
##                                                   12000  min thresh: 0.03233752237
##                                                   13000  min thresh: 0.02933776676
##                                                   14000  min thresh: 0.0258825741
##                                                   15000  min thresh: 0.0258825741
##                                                   16000  min thresh: 0.02311210518
##                                                   17000  min thresh: 0.02163785798
##                                                   18000  min thresh: 0.0201117989
##                                                   19000  min thresh: 0.0201117989
##                                                   20000  min thresh: 0.01944093671
##                                                   21000  min thresh: 0.01482905607
##                                                   22000  min thresh: 0.01482905607
##                                                   23000  min thresh: 0.01482905607
##                                                   24000  min thresh: 0.01482905607
##                                                   25000  min thresh: 0.01331030992
##                                                   26000  min thresh: 0.01331030992
##                                                   27000  min thresh: 0.01331030992
##                                                   28000  min thresh: 0.01156519501
##                                                   29000  min thresh: 0.01156519501
##                                                   30000  min thresh: 0.0114995452
##                                                   31000  min thresh: 0.0114995452
##                                                   32000  min thresh: 0.0114995452
##                                                   33000  min thresh: 0.0109319609
##                                                   34000  min thresh: 0.0109319609
##                                                   35000  min thresh: 0.0109319609
##                                                   36000  min thresh: 0.01046851022
##                                                   37000  min thresh: 0.01005458883
##                                                   38000  min thresh: 0.01005458883
##                                                   38027  error: 954.64028    time: 9.68 secs
plot(m_ann)




### Prediction
# trainin MSE
pred_tr <- compute(m_ann, train[, 1:39])$net.result
MSE_tr <- mean((pred_tr - train$score)^2)
MSE_tr
## [1] 3.671693373
# test MSE
pred_te <- compute(m_ann, test[, 1:39])$net.result
MSE_te <- mean((pred_te - test$score)^2)
MSE_te
## [1] 9.383553398
# plotting
plot(test$score, pred_te, pch = 19, xlab = "Actual value", ylab = "Predicted value")
abline(1, 1, col = "red", lty = 2, lwd = 2)

3.6. Support Vector Regression

# 데이터 불러오기
st <- read.csv("./input/student.csv", row.names = 1)


# train / test set
set.seed(555)
flag <- sample(c("tr", "te"), size = nrow(st), c(8, 2), replace = T) 
train <- st[which(flag == "tr"), ]
test <- st[which(flag == "te"), ]




# Standardization 
pp_model <- preProcess(train[, -40], method = c("center", "scale"))
train <- predict(pp_model, train)
test <- predict(pp_model, test)



##### SVR
### SVR tuning
tune.svr <- tune(svm, score~., data = train, ranges = list(epsilon = seq(0, 1, 0.1), cost = 2^(-3:3)))
plot(tune.svr)

# Best model
svr <- tune.svr$best.model




### performance evaluation
# Trainin MSE
pred_tr <- predict(svr)
MSE_tr <- mean((pred_tr - train$score)^2)
MSE_tr
## [1] 2.264410905
# Test MSE
pred_te <- predict(svr, test)
MSE_te <- mean((pred_te - test$score)^2)
MSE_te
## [1] 6.176742383
# plotting
plot(test$score, pred_te, pch = 19, xlab = "Actual value", ylab = "Predicted value")
abline(1, 1, col = "red", lty = 2, lwd = 2)

3.7. Logistic Regression

# 데이터 불러오기
st <- read.csv("./input/student.csv", row.names = 1)

# train / test set
set.seed(555)
flag <- sample(c("tr", "te"), size = nrow(st), c(8, 2), replace = T) 
train <- st[which(flag == "tr"), ]
test <- st[which(flag == "te"), ]



# 학생의 성적 분포
h <- hist(train$score, breaks = 12)

cuts <- cut(h$breaks, c(-Inf, 12.9, Inf)) # 12.9점을 기준으로 histogram 분할
plot(h, col = c("orange", "skyblue")[cuts], main = "Histogram of Score", xlab = "Score")
abline(v = 13, col = "red", lwd = 2)

# training dataset에 성적 13점을 기준으로 class 생성
class_tr <- rep(0, dim(train)[1]) # training data에 추가할 class 변수 생성
class_tr[which(train$score > 13)] <- 1 # 성적이 13점 이상인 학생만 class = 1 (0: 하위권, 1: 상위권)
train_cls <- data.frame(train, "class" = class_tr) # class 변수 생성
train_cls <- subset(train_cls, select = -(score)) # 기존의 score 변수 제거
train_cls[1:10, ]
##    school sex age address famsize Pstatus Medu Fedu traveltime studytime
## 1       0   0  18       1       0       0    4    4          2         2
## 3       0   0  15       1       1       1    1    1          1         2
## 4       0   0  15       1       0       1    4    2          1         3
## 5       0   0  16       1       0       1    3    3          1         2
## 6       0   1  16       1       1       1    4    3          1         2
## 8       0   0  17       1       0       0    4    4          2         2
## 9       0   1  15       1       1       0    3    2          1         2
## 12      0   0  15       1       0       1    2    1          3         3
## 13      0   1  15       1       1       1    4    4          1         1
## 14      0   1  15       1       0       1    4    3          2         2
##    failures schoolsup famsup paid activities nursery higher internet
## 1         0         1      0    0          0       1      1        0
## 3         0         1      0    0          0       1      1        1
## 4         0         0      1    0          1       1      1        1
## 5         0         0      1    0          0       1      1        0
## 6         0         0      1    0          1       1      1        1
## 8         0         1      1    0          0       1      1        0
## 9         0         0      1    0          0       1      1        1
## 12        0         0      1    0          1       1      1        1
## 13        0         0      1    0          1       1      1        1
## 14        0         0      1    0          0       1      1        1
##    romantic famrel freetime goout Dalc Walc health absences Mjob_health
## 1         0      4        3     4    1    1      3        4           0
## 3         0      4        3     2    2    3      3        6           0
## 4         1      3        2     2    1    1      5        0           1
## 5         0      4        3     2    1    2      5        0           0
## 6         0      5        4     2    1    2      5        6           0
## 8         0      4        1     4    1    1      1        2           0
## 9         0      4        2     2    1    1      1        0           0
## 12        0      5        2     2    1    1      4        0           0
## 13        0      4        3     3    1    3      5        0           1
## 14        0      5        4     3    1    2      3        0           0
##    Mjob_other Mjob_services Mjob_teacher Fjob_health Fjob_other
## 1           0             0            0           0          0
## 3           0             0            0           0          1
## 4           0             0            0           0          0
## 5           1             0            0           0          1
## 6           0             1            0           0          1
## 8           1             0            0           0          0
## 9           0             1            0           0          1
## 12          0             1            0           0          1
## 13          0             0            0           0          0
## 14          0             0            1           0          1
##    Fjob_services Fjob_teacher reason_home reason_other reason_reputation
## 1              0            1           0            0                 0
## 3              0            0           0            1                 0
## 4              1            0           1            0                 0
## 5              0            0           1            0                 0
## 6              0            0           0            0                 1
## 8              0            1           1            0                 0
## 9              0            0           1            0                 0
## 12             0            0           0            0                 1
## 13             1            0           0            0                 0
## 14             0            0           0            0                 0
##    guardian_mother guardian_other class
## 1                1              0     0
## 3                1              0     0
## 4                1              0     1
## 5                0              0     0
## 6                1              0     0
## 8                1              0     0
## 9                1              0     1
## 12               0              0     0
## 13               0              0     0
## 14               1              0     0
# test dataset에 성적 13점을 기준으로 class 생성
class_te <- rep(0, dim(test)[1]) # test data에 추가할 class 변수 생성
class_te[which(test$score > 13)] <- 1 # 성적이 13점 이상인 학생만 class = 1 (0: 하위권, 1: 상위권)
test_cls <- data.frame(test, "class" = class_te) # class 변수 생성
test_cls <- subset(test_cls, select = -(score)) # 기존의 score 변수 제거
test_cls[1:10, ]
##    school sex age address famsize Pstatus Medu Fedu traveltime studytime
## 2       0   0  17       1       0       1    1    1          1         2
## 7       0   1  16       1       1       1    2    2          1         2
## 10      0   1  15       1       0       1    3    4          1         2
## 11      0   0  15       1       0       1    4    4          1         2
## 15      0   1  15       1       0       0    2    2          1         3
## 20      0   1  16       1       1       1    4    3          1         1
## 28      0   1  15       1       0       1    4    2          1         1
## 47      0   0  16       1       1       0    3    3          1         2
## 59      0   1  15       1       1       1    1    2          1         2
## 66      0   0  16       1       1       1    4    3          3         2
##    failures schoolsup famsup paid activities nursery higher internet
## 2         0         0      1    0          0       0      1        1
## 7         0         0      0    0          0       1      1        1
## 10        0         0      1    0          1       1      1        1
## 11        0         0      1    0          0       1      1        1
## 15        0         0      1    0          0       1      1        1
## 20        0         0      0    0          1       1      1        1
## 28        0         0      0    0          0       1      1        1
## 47        0         0      1    0          0       1      1        1
## 59        0         1      1    0          1       1      1        1
## 66        0         0      1    0          1       1      1        1
##    romantic famrel freetime goout Dalc Walc health absences Mjob_health
## 2         0      5        3     3    1    1      3        2           0
## 7         0      4        4     4    1    1      3        0           0
## 10        0      5        5     1    1    1      5        0           0
## 11        0      3        3     3    1    2      2        2           0
## 15        1      4        5     2    1    1      3        0           0
## 20        0      3        1     3    1    3      5        6           1
## 28        0      2        2     4    2    4      1        0           1
## 47        0      2        3     5    1    4      3        6           0
## 59        0      4        3     2    1    1      5        0           0
## 66        0      5        4     3    1    2      1        2           0
##    Mjob_other Mjob_services Mjob_teacher Fjob_health Fjob_other
## 2           0             0            0           0          1
## 7           1             0            0           0          1
## 10          1             0            0           0          1
## 11          0             0            1           1          0
## 15          1             0            0           0          1
## 20          0             0            0           0          1
## 28          0             0            0           0          0
## 47          1             0            0           0          0
## 59          1             0            0           0          0
## 66          0             0            1           0          0
##    Fjob_services Fjob_teacher reason_home reason_other reason_reputation
## 2              0            0           0            0                 0
## 7              0            0           1            0                 0
## 10             0            0           1            0                 0
## 11             0            0           0            0                 1
## 15             0            0           1            0                 0
## 20             0            0           1            0                 0
## 28             1            0           0            1                 0
## 47             1            0           1            0                 0
## 59             0            0           1            0                 0
## 66             1            0           0            0                 0
##    guardian_mother guardian_other class
## 2                0              0     0
## 7                1              0     0
## 10               1              0     0
## 11               1              0     1
## 15               0              1     1
## 20               0              0     0
## 28               1              0     0
## 47               1              0     0
## 59               0              0     1
## 66               1              0     1
##### logistic regression
m_logis <- glm(class~., data = train_cls, family = "binomial")


# test set prediction
pred_te <- predict(m_logis, test_cls, type = "response")

# test set accuracy
t <- table(test_cls$class, pred_te > 0.5)
acc <- sum(diag(t)) / sum(t)




### stepwise logistic regression
full_m <- glm(class~., data = train_cls, family = "binomial")
null_m <- glm(class~1., data = train_cls, family = "binomial")
m_logis_step <- step(null_m, direction = "both", trace = F, scope = list(lower = null_m, upper = full_m))
m_logis_step
## 
## Call:  glm(formula = class ~ failures + higher + schoolsup + studytime + 
##     Medu + sex + absences + Fjob_teacher + health + romantic + 
##     paid + reason_home + reason_reputation + famrel, family = "binomial", 
##     data = train_cls)
## 
## Coefficients:
##       (Intercept)           failures             higher  
##       -4.36909238        -2.56551880         2.33791649  
##         schoolsup          studytime               Medu  
##       -2.09631351         0.30253953         0.34358680  
##               sex           absences       Fjob_teacher  
##       -0.78946338        -0.07735731         1.09973044  
##            health           romantic               paid  
##       -0.13732595        -0.39770436        -0.86197535  
##       reason_home  reason_reputation             famrel  
##        0.64838051         0.50040822         0.20136710  
## 
## Degrees of Freedom: 519 Total (i.e. Null);  505 Residual
## Null Deviance:       611.6955 
## Residual Deviance: 462.2943  AIC: 492.2943
summary(m_logis_step)
## 
## Call:
## glm(formula = class ~ failures + higher + schoolsup + studytime + 
##     Medu + sex + absences + Fjob_teacher + health + romantic + 
##     paid + reason_home + reason_reputation + famrel, family = "binomial", 
##     data = train_cls)
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -2.0376043  -0.7851192  -0.2899791   0.7855093   2.4544009  
## 
## Coefficients:
##                      Estimate  Std. Error  z value   Pr(>|z|)    
## (Intercept)       -4.36909238  1.21458880 -3.59718 0.00032169 ***
## failures          -2.56551880  1.01301914 -2.53255 0.01132371 *  
## higher             2.33791649  1.03663340  2.25530 0.02411467 *  
## schoolsup         -2.09631351  0.53246202 -3.93702  0.0000825 ***
## studytime          0.30253953  0.14011290  2.15926 0.03083036 *  
## Medu               0.34358680  0.11230351  3.05945 0.00221745 ** 
## sex               -0.78946338  0.26000885 -3.03629 0.00239506 ** 
## absences          -0.07735731  0.03085085 -2.50746 0.01216019 *  
## Fjob_teacher       1.09973044  0.46626611  2.35859 0.01834453 *  
## health            -0.13732595  0.08013480 -1.71369 0.08658624 .  
## romantic          -0.39770436  0.24740580 -1.60750 0.10794514    
## paid              -0.86197535  0.60265526 -1.43030 0.15263211    
## reason_home        0.64838051  0.29192043  2.22109 0.02634511 *  
## reason_reputation  0.50040822  0.27736578  1.80415 0.07120851 .  
## famrel             0.20136710  0.13133223  1.53326 0.12521060    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 611.69553  on 519  degrees of freedom
## Residual deviance: 462.29431  on 505  degrees of freedom
## AIC: 492.29431
## 
## Number of Fisher Scoring iterations: 7
# training data class prediction 
pred_tr <- predict(m_logis_step, type = "response")



# trainig set classification 성능
t <- table(train_cls$class, pred_tr > 0.5) # threshold = 0.5기준으로 confusion matrix
t
##    
##     FALSE TRUE
##   0   338   39
##   1    80   63
acc_tr <- sum(diag(t)) / sum(t) # training accuracy
acc_tr
## [1] 0.7711538462
# test dataset classification 성능
pred_te <- predict(m_logis_step, test, type = "response") # class prediction


t_te <- table(test_cls$class, pred_te >0.5) # confusion matrix
t_te
##    
##     FALSE TRUE
##   0    84    8
##   1    26   11
acc_te <- sum(diag(t_te)) / sum(t_te) # test accuracy
acc_te
## [1] 0.7364341085
### 가장 높은 accuracy를 보여주는 threshold 탐색
acc_th <- NULL
threshold <- seq(0.1, 0.9, by = 0.1) # threshold 범위: 0.1~0.9 범위에서 0.1씩 증가시키면서 탐색
for(i in threshold){
  pred_te <- predict(m_logis_step, test, type = "response") # class prediction

  t_te <- table(test_cls$class, pred_te > i) # confusion matrix
  t_te
  acc_te <- sum(diag(t_te)) / sum(t_te) # test accuracy
  acc_te # test accuracy
  
  acc_th <- c(acc_th, acc_te)
}


# plotting
plot(threshold, acc_th, type = "l", xlab = "Threshold", ylab = "Test accuracy")
abline(v = threshold[which.max(acc_th)], col = "red", lty = 2)
points(threshold[which.max(acc_th)], max(acc_th), pch = 19, col = "red")
text(threshold[which.max(acc_th)], max(acc_th), pos =4, labels = round(max(acc_th), 2), col = "red", cex = 0.8)

##### threshold = 0.6 test result
t_te <- table(test_cls$class, pred_te > 0.6) # confusion matrix
t_te
##    
##     FALSE TRUE
##   0    90    2
##   1    31    6
acc_te <- sum(diag(t_te)) / sum(t_te) # test accuracy
acc_te # test accuracy
## [1] 0.7441860465
# result plot
plot(pred_te, col = as.factor(test_cls$class), pch = 19, ylim = c(0, 1), ylab = "Predicted class")
abline(h = 0.6, lty = 2)