# 2025-06-02

# mtcars 데이터 불러오기
data(mtcars)

# 종속변수: mpg
# 모든 변수 포함한 전체 모형
full_model <- lm(mpg ~ ., data = mtcars)

# 절편만 포함한 시작 모형
null_model <- lm(mpg ~ 1, data = mtcars)

# 후진제거법 (Backward Elimination)
backward_model <- step(full_model, direction = "backward")
## Start:  AIC=70.9
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb
## 
##        Df Sum of Sq    RSS    AIC
## - cyl   1    0.0799 147.57 68.915
## - vs    1    0.1601 147.66 68.932
## - carb  1    0.4067 147.90 68.986
## - gear  1    1.3531 148.85 69.190
## - drat  1    1.6270 149.12 69.249
## - disp  1    3.9167 151.41 69.736
## - hp    1    6.8399 154.33 70.348
## - qsec  1    8.8641 156.36 70.765
## <none>              147.49 70.898
## - am    1   10.5467 158.04 71.108
## - wt    1   27.0144 174.51 74.280
## 
## Step:  AIC=68.92
## mpg ~ disp + hp + drat + wt + qsec + vs + am + gear + carb
## 
##        Df Sum of Sq    RSS    AIC
## - vs    1    0.2685 147.84 66.973
## - carb  1    0.5201 148.09 67.028
## - gear  1    1.8211 149.40 67.308
## - drat  1    1.9826 149.56 67.342
## - disp  1    3.9009 151.47 67.750
## - hp    1    7.3632 154.94 68.473
## <none>              147.57 68.915
## - qsec  1   10.0933 157.67 69.032
## - am    1   11.8359 159.41 69.384
## - wt    1   27.0280 174.60 72.297
## 
## Step:  AIC=66.97
## mpg ~ disp + hp + drat + wt + qsec + am + gear + carb
## 
##        Df Sum of Sq    RSS    AIC
## - carb  1    0.6855 148.53 65.121
## - gear  1    2.1437 149.99 65.434
## - drat  1    2.2139 150.06 65.449
## - disp  1    3.6467 151.49 65.753
## - hp    1    7.1060 154.95 66.475
## <none>              147.84 66.973
## - am    1   11.5694 159.41 67.384
## - qsec  1   15.6830 163.53 68.200
## - wt    1   27.3799 175.22 70.410
## 
## Step:  AIC=65.12
## mpg ~ disp + hp + drat + wt + qsec + am + gear
## 
##        Df Sum of Sq    RSS    AIC
## - gear  1     1.565 150.09 63.457
## - drat  1     1.932 150.46 63.535
## <none>              148.53 65.121
## - disp  1    10.110 158.64 65.229
## - am    1    12.323 160.85 65.672
## - hp    1    14.826 163.35 66.166
## - qsec  1    26.408 174.94 68.358
## - wt    1    69.127 217.66 75.350
## 
## Step:  AIC=63.46
## mpg ~ disp + hp + drat + wt + qsec + am
## 
##        Df Sum of Sq    RSS    AIC
## - drat  1     3.345 153.44 62.162
## - disp  1     8.545 158.64 63.229
## <none>              150.09 63.457
## - hp    1    13.285 163.38 64.171
## - am    1    20.036 170.13 65.466
## - qsec  1    25.574 175.67 66.491
## - wt    1    67.572 217.66 73.351
## 
## Step:  AIC=62.16
## mpg ~ disp + hp + wt + qsec + am
## 
##        Df Sum of Sq    RSS    AIC
## - disp  1     6.629 160.07 61.515
## <none>              153.44 62.162
## - hp    1    12.572 166.01 62.682
## - qsec  1    26.470 179.91 65.255
## - am    1    32.198 185.63 66.258
## - wt    1    69.043 222.48 72.051
## 
## Step:  AIC=61.52
## mpg ~ hp + wt + qsec + am
## 
##        Df Sum of Sq    RSS    AIC
## - hp    1     9.219 169.29 61.307
## <none>              160.07 61.515
## - qsec  1    20.225 180.29 63.323
## - am    1    25.993 186.06 64.331
## - wt    1    78.494 238.56 72.284
## 
## Step:  AIC=61.31
## mpg ~ wt + qsec + am
## 
##        Df Sum of Sq    RSS    AIC
## <none>              169.29 61.307
## - am    1    26.178 195.46 63.908
## - qsec  1   109.034 278.32 75.217
## - wt    1   183.347 352.63 82.790
summary(backward_model)
## 
## Call:
## lm(formula = mpg ~ wt + qsec + am, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4811 -1.5555 -0.7257  1.4110  4.6610 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.6178     6.9596   1.382 0.177915    
## wt           -3.9165     0.7112  -5.507 6.95e-06 ***
## qsec          1.2259     0.2887   4.247 0.000216 ***
## am            2.9358     1.4109   2.081 0.046716 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.459 on 28 degrees of freedom
## Multiple R-squared:  0.8497, Adjusted R-squared:  0.8336 
## F-statistic: 52.75 on 3 and 28 DF,  p-value: 1.21e-11
# ✔ full_model부터 시작해서 AIC 기준으로 설명력이 낮은 변수를 하나씩 제거합니다.

# 전진선택법 (Forward Selection)
forward_model <- step(null_model,
                      scope = list(lower = null_model, upper = full_model),
                      direction = "forward")
## Start:  AIC=115.94
## mpg ~ 1
## 
##        Df Sum of Sq     RSS     AIC
## + wt    1    847.73  278.32  73.217
## + cyl   1    817.71  308.33  76.494
## + disp  1    808.89  317.16  77.397
## + hp    1    678.37  447.67  88.427
## + drat  1    522.48  603.57  97.988
## + vs    1    496.53  629.52  99.335
## + am    1    405.15  720.90 103.672
## + carb  1    341.78  784.27 106.369
## + gear  1    259.75  866.30 109.552
## + qsec  1    197.39  928.66 111.776
## <none>              1126.05 115.943
## 
## Step:  AIC=73.22
## mpg ~ wt
## 
##        Df Sum of Sq    RSS    AIC
## + cyl   1    87.150 191.17 63.198
## + hp    1    83.274 195.05 63.840
## + qsec  1    82.858 195.46 63.908
## + vs    1    54.228 224.09 68.283
## + carb  1    44.602 233.72 69.628
## + disp  1    31.639 246.68 71.356
## <none>              278.32 73.217
## + drat  1     9.081 269.24 74.156
## + gear  1     1.137 277.19 75.086
## + am    1     0.002 278.32 75.217
## 
## Step:  AIC=63.2
## mpg ~ wt + cyl
## 
##        Df Sum of Sq    RSS    AIC
## + hp    1   14.5514 176.62 62.665
## + carb  1   13.7724 177.40 62.805
## <none>              191.17 63.198
## + qsec  1   10.5674 180.60 63.378
## + gear  1    3.0281 188.14 64.687
## + disp  1    2.6796 188.49 64.746
## + vs    1    0.7059 190.47 65.080
## + am    1    0.1249 191.05 65.177
## + drat  1    0.0010 191.17 65.198
## 
## Step:  AIC=62.66
## mpg ~ wt + cyl + hp
## 
##        Df Sum of Sq    RSS    AIC
## <none>              176.62 62.665
## + am    1    6.6228 170.00 63.442
## + disp  1    6.1762 170.44 63.526
## + carb  1    2.5187 174.10 64.205
## + drat  1    2.2453 174.38 64.255
## + qsec  1    1.4010 175.22 64.410
## + gear  1    0.8558 175.76 64.509
## + vs    1    0.0599 176.56 64.654
summary(forward_model)
## 
## Call:
## lm(formula = mpg ~ wt + cyl + hp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9290 -1.5598 -0.5311  1.1850  5.8986 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 38.75179    1.78686  21.687  < 2e-16 ***
## wt          -3.16697    0.74058  -4.276 0.000199 ***
## cyl         -0.94162    0.55092  -1.709 0.098480 .  
## hp          -0.01804    0.01188  -1.519 0.140015    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.512 on 28 degrees of freedom
## Multiple R-squared:  0.8431, Adjusted R-squared:  0.8263 
## F-statistic: 50.17 on 3 and 28 DF,  p-value: 2.184e-11
# null_model부터 시작해서 AIC 기준으로 설명력이 높은 변수를 하나씩 추가합니다.

# 단계적 선택 수행 (전진 + 후진 동시에)
stepwise_model <- step(null_model,
                       scope = list(lower = null_model, upper = full_model),
                       direction = "both")
## Start:  AIC=115.94
## mpg ~ 1
## 
##        Df Sum of Sq     RSS     AIC
## + wt    1    847.73  278.32  73.217
## + cyl   1    817.71  308.33  76.494
## + disp  1    808.89  317.16  77.397
## + hp    1    678.37  447.67  88.427
## + drat  1    522.48  603.57  97.988
## + vs    1    496.53  629.52  99.335
## + am    1    405.15  720.90 103.672
## + carb  1    341.78  784.27 106.369
## + gear  1    259.75  866.30 109.552
## + qsec  1    197.39  928.66 111.776
## <none>              1126.05 115.943
## 
## Step:  AIC=73.22
## mpg ~ wt
## 
##        Df Sum of Sq     RSS     AIC
## + cyl   1     87.15  191.17  63.198
## + hp    1     83.27  195.05  63.840
## + qsec  1     82.86  195.46  63.908
## + vs    1     54.23  224.09  68.283
## + carb  1     44.60  233.72  69.628
## + disp  1     31.64  246.68  71.356
## <none>               278.32  73.217
## + drat  1      9.08  269.24  74.156
## + gear  1      1.14  277.19  75.086
## + am    1      0.00  278.32  75.217
## - wt    1    847.73 1126.05 115.943
## 
## Step:  AIC=63.2
## mpg ~ wt + cyl
## 
##        Df Sum of Sq    RSS    AIC
## + hp    1    14.551 176.62 62.665
## + carb  1    13.772 177.40 62.805
## <none>              191.17 63.198
## + qsec  1    10.567 180.60 63.378
## + gear  1     3.028 188.14 64.687
## + disp  1     2.680 188.49 64.746
## + vs    1     0.706 190.47 65.080
## + am    1     0.125 191.05 65.177
## + drat  1     0.001 191.17 65.198
## - cyl   1    87.150 278.32 73.217
## - wt    1   117.162 308.33 76.494
## 
## Step:  AIC=62.66
## mpg ~ wt + cyl + hp
## 
##        Df Sum of Sq    RSS    AIC
## <none>              176.62 62.665
## - hp    1    14.551 191.17 63.198
## + am    1     6.623 170.00 63.442
## + disp  1     6.176 170.44 63.526
## - cyl   1    18.427 195.05 63.840
## + carb  1     2.519 174.10 64.205
## + drat  1     2.245 174.38 64.255
## + qsec  1     1.401 175.22 64.410
## + gear  1     0.856 175.76 64.509
## + vs    1     0.060 176.56 64.654
## - wt    1   115.354 291.98 76.750
# 결과 요약
summary(stepwise_model)
## 
## Call:
## lm(formula = mpg ~ wt + cyl + hp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9290 -1.5598 -0.5311  1.1850  5.8986 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 38.75179    1.78686  21.687  < 2e-16 ***
## wt          -3.16697    0.74058  -4.276 0.000199 ***
## cyl         -0.94162    0.55092  -1.709 0.098480 .  
## hp          -0.01804    0.01188  -1.519 0.140015    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.512 on 28 degrees of freedom
## Multiple R-squared:  0.8431, Adjusted R-squared:  0.8263 
## F-statistic: 50.17 on 3 and 28 DF,  p-value: 2.184e-11
# 전진선택법과 후진제거법을 모두 수행
# step() ㅎ마수는 AIC 기준으로 변수를 추가하거나 제거하며 최적의 조합을 찾습니다.