## 실행시간 측정
time1 <- Sys.time()

library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ purrr::lift()   masks caret::lift()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.1 ──
## ✔ broom        1.0.5      ✔ rsample      1.2.0 
## ✔ dials        1.2.1      ✔ tune         1.1.2 
## ✔ infer        1.0.6      ✔ workflows    1.1.4 
## ✔ modeldata    1.3.0      ✔ workflowsets 1.0.1 
## ✔ parsnip      1.2.0      ✔ yardstick    1.3.0 
## ✔ recipes      1.0.10     
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard()        masks purrr::discard()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ recipes::fixed()         masks stringr::fixed()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ purrr::lift()            masks caret::lift()
## ✖ yardstick::precision()   masks caret::precision()
## ✖ yardstick::recall()      masks caret::recall()
## ✖ yardstick::sensitivity() masks caret::sensitivity()
## ✖ yardstick::spec()        masks readr::spec()
## ✖ yardstick::specificity() masks caret::specificity()
## ✖ recipes::step()          masks stats::step()
## • Learn how to get started at https://www.tidymodels.org/start/
library(skimr)
library(naniar)
## 
## Attaching package: 'naniar'
## 
## The following object is masked from 'package:skimr':
## 
##     n_complete
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggpubr)
library(scales)

# 읽기
## 파일 읽기

DF <- read_csv("C:/Users/top15/OneDrive - 동덕여자대학교/대학교/4학년 1학기/비데마/df2015na.csv")
## Rows: 300 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): gnd, bld
## dbl (11): age, ht, wt, wa, hdln, hdwd, ftln, ftwd, lft, smk, alc
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dim(DF)
## [1] 300  13
str(DF)
## spc_tbl_ [300 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ gnd : chr [1:300] "M" "M" "F" "F" ...
##  $ age : num [1:300] 42 23 32 30 NA 26 20 24 18 58 ...
##  $ ht  : num [1:300] 165 188 162 162 160 ...
##  $ wt  : num [1:300] 79 77.8 59.6 52 58.9 76.3 61.4 63 65.2 58.3 ...
##  $ wa  : num [1:300] 96.4 76.6 83 65.8 75 83.8 70.1 72.4 73.5 83.1 ...
##  $ hdln: num [1:300] 17.9 20.4 17.5 16.4 17.4 19 19.8 18.5 20.1 15.6 ...
##  $ hdwd: num [1:300] 8.6 8.3 7.7 6.6 7.7 8.6 7.8 7.9 8 7.6 ...
##  $ ftln: num [1:300] 24.5 28.9 23.3 23.7 24.1 25.7 26.5 25.7 25.8 22.1 ...
##  $ ftwd: num [1:300] 9.6 10.6 10.4 8.4 9.4 10.7 9.6 10.2 10.3 8.7 ...
##  $ bld : chr [1:300] "O" "O" "A" "B" ...
##  $ lft : num [1:300] 0 0 0 0 0 0 0 0 0 0 ...
##  $ smk : num [1:300] 0 1 0 0 0 0 0 0 0 0 ...
##  $ alc : num [1:300] 1 0 0 0 0 0 0 0 0 0 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   gnd = col_character(),
##   ..   age = col_double(),
##   ..   ht = col_double(),
##   ..   wt = col_double(),
##   ..   wa = col_double(),
##   ..   hdln = col_double(),
##   ..   hdwd = col_double(),
##   ..   ftln = col_double(),
##   ..   ftwd = col_double(),
##   ..   bld = col_character(),
##   ..   lft = col_double(),
##   ..   smk = col_double(),
##   ..   alc = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
head(DF)
## # A tibble: 6 × 13
##   gnd     age    ht    wt    wa  hdln  hdwd  ftln  ftwd bld     lft   smk   alc
##   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 M        42  165   79    96.4  17.9   8.6  24.5   9.6 O         0     0     1
## 2 M        23  188.  77.8  76.6  20.4   8.3  28.9  10.6 O         0     1     0
## 3 F        32  162.  59.6  83    17.5   7.7  23.3  10.4 A         0     0     0
## 4 F        30  162   52    65.8  16.4   6.6  23.7   8.4 B         0     0     0
## 5 F        NA  160   58.9  75    17.4   7.7  24.1   9.4 A         0     0     0
## 6 M        26  179   76.3  83.8  19     8.6  25.7  10.7 B         0     0     0
# 변수 조정
## 문자 변수(gnd, bld)를 factor화
## {0,1}로 코딩된 이산형 변수를 숫자로 처리하거나 factor해서 사용 가능

DF <- 
  DF %>% 
  mutate(gnd=factor(gnd), bld=factor(bld),
         lft=factor(lft, labels = c('N', 'Y')),
         smk=factor(smk, labels = c('N', 'Y')),
         alc=factor(alc, labels = c('N', 'Y')))
str(DF)
## tibble [300 × 13] (S3: tbl_df/tbl/data.frame)
##  $ gnd : Factor w/ 2 levels "F","M": 2 2 1 1 1 2 2 2 2 1 ...
##  $ age : num [1:300] 42 23 32 30 NA 26 20 24 18 58 ...
##  $ ht  : num [1:300] 165 188 162 162 160 ...
##  $ wt  : num [1:300] 79 77.8 59.6 52 58.9 76.3 61.4 63 65.2 58.3 ...
##  $ wa  : num [1:300] 96.4 76.6 83 65.8 75 83.8 70.1 72.4 73.5 83.1 ...
##  $ hdln: num [1:300] 17.9 20.4 17.5 16.4 17.4 19 19.8 18.5 20.1 15.6 ...
##  $ hdwd: num [1:300] 8.6 8.3 7.7 6.6 7.7 8.6 7.8 7.9 8 7.6 ...
##  $ ftln: num [1:300] 24.5 28.9 23.3 23.7 24.1 25.7 26.5 25.7 25.8 22.1 ...
##  $ ftwd: num [1:300] 9.6 10.6 10.4 8.4 9.4 10.7 9.6 10.2 10.3 8.7 ...
##  $ bld : Factor w/ 4 levels "A","AB","B","O": 4 4 1 3 1 3 2 1 4 3 ...
##  $ lft : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
##  $ smk : Factor w/ 2 levels "N","Y": 1 2 1 1 1 1 1 1 1 1 ...
##  $ alc : Factor w/ 2 levels "N","Y": 2 1 1 1 1 1 1 1 1 1 ...
# 결측
## skim::skim(data, ...) : summary()에 결측정보를 추가, gtoup_by와 연결
## 결측 현황: skim이나 naniar로 확인

DF %>% skim()
Data summary
Name Piped data
Number of rows 300
Number of columns 13
_______________________
Column type frequency:
factor 5
numeric 8
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
gnd 0 1.00 FALSE 2 M: 155, F: 145
bld 0 1.00 FALSE 4 B: 110, A: 84, O: 68, AB: 38
lft 5 0.98 FALSE 2 N: 283, Y: 12
smk 2 0.99 FALSE 2 N: 237, Y: 61
alc 6 0.98 FALSE 2 N: 256, Y: 38

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age 7 0.98 29.91 13.33 15.0 19.0 26.00 37.00 68.0 ▇▃▂▂▁
ht 0 1.00 165.64 9.03 144.3 158.0 165.85 172.83 190.5 ▂▇▇▆▁
wt 3 0.99 63.77 12.38 39.9 55.6 61.80 71.20 106.1 ▃▇▅▂▁
wa 5 0.98 78.23 9.72 59.0 71.0 77.20 84.15 111.6 ▅▇▆▂▁
hdln 0 1.00 17.57 1.09 14.9 16.8 17.50 18.30 20.8 ▂▆▇▃▁
hdwd 1 1.00 7.80 0.55 6.4 7.4 7.80 8.20 9.2 ▁▇▇▇▂
ftln 4 0.99 24.37 1.51 19.9 23.3 24.30 25.50 28.9 ▁▅▇▅▁
ftwd 1 1.00 9.68 0.70 8.0 9.2 9.70 10.15 12.2 ▂▇▇▂▁
DF %>% group_by(gnd) %>% skim()
Data summary
Name Piped data
Number of rows 300
Number of columns 13
_______________________
Column type frequency:
factor 4
numeric 8
________________________
Group variables gnd

Variable type: factor

skim_variable gnd n_missing complete_rate ordered n_unique top_counts
bld F 0 1.00 FALSE 4 B: 59, A: 41, O: 27, AB: 18
bld M 0 1.00 FALSE 4 B: 51, A: 43, O: 41, AB: 20
lft F 1 0.99 FALSE 2 N: 141, Y: 3
lft M 4 0.97 FALSE 2 N: 142, Y: 9
smk F 1 0.99 FALSE 2 N: 133, Y: 11
smk M 1 0.99 FALSE 2 N: 104, Y: 50
alc F 2 0.99 FALSE 2 N: 130, Y: 13
alc M 4 0.97 FALSE 2 N: 126, Y: 25

Variable type: numeric

skim_variable gnd n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age F 4 0.97 32.60 14.29 16.0 19.00 31.00 45.00 68.0 ▇▅▃▂▂
age M 3 0.98 27.41 11.88 15.0 19.00 23.50 33.00 66.0 ▇▃▂▁▁
ht F 0 1.00 158.27 5.76 144.3 154.20 158.00 162.10 172.0 ▂▅▇▆▂
ht M 0 1.00 172.53 5.33 156.2 169.45 172.70 175.80 190.5 ▁▅▇▃▁
wt F 2 0.99 57.24 10.11 39.9 49.90 56.30 61.20 98.2 ▅▇▂▁▁
wt M 1 0.99 69.84 11.15 45.6 61.65 68.55 75.88 106.1 ▂▇▆▂▁
wa F 3 0.98 76.00 10.02 59.0 68.32 75.05 81.25 111.6 ▆▇▅▁▁
wa M 2 0.99 80.29 8.98 63.1 73.90 80.00 86.50 109.0 ▅▇▇▂▁
hdln F 0 1.00 16.82 0.75 14.9 16.30 16.90 17.40 18.8 ▂▅▇▅▁
hdln M 0 1.00 18.28 0.87 15.9 17.80 18.20 18.80 20.8 ▁▅▇▃▁
hdwd F 0 1.00 7.41 0.41 6.4 7.10 7.40 7.70 8.4 ▂▇▇▅▁
hdwd M 1 0.99 8.17 0.40 7.2 7.90 8.20 8.50 9.2 ▂▇▇▇▂
ftln F 2 0.99 23.26 1.02 19.9 22.60 23.30 23.90 25.9 ▁▃▇▇▁
ftln M 2 0.99 25.42 1.08 22.8 24.60 25.50 26.10 28.9 ▂▇▇▂▁
ftwd F 0 1.00 9.26 0.47 8.0 8.90 9.30 9.60 10.4 ▁▅▇▅▂
ftwd M 1 0.99 10.08 0.65 8.0 9.80 10.00 10.50 12.2 ▁▂▇▃▁
### 완측한 관측값 비율=완전한 관측값/n
sum(complete.cases(DF))/nrow(DF)*100
## [1] 89.66667
### 변수별 결측비율, Missing=결측셀비율, Present=비결측셀비율
naniar::vis_miss(DF)

naniar::miss_var_summary(DF)
## # A tibble: 13 × 3
##    variable n_miss pct_miss
##    <chr>     <int>    <num>
##  1 age           7    2.33 
##  2 alc           6    2    
##  3 wa            5    1.67 
##  4 lft           5    1.67 
##  5 ftln          4    1.33 
##  6 wt            3    1    
##  7 smk           2    0.667
##  8 hdwd          1    0.333
##  9 ftwd          1    0.333
## 10 gnd           0    0    
## 11 ht            0    0    
## 12 hdln          0    0    
## 13 bld           0    0
#간단탐색
featurePlot(x=DF%>%select_if(is.numeric), y=DF$gnd, 
            plot='box', 
            scales=list(x=list(relation='free'), y=list(relation='free')))

featurePlot(x=DF%>%select_if(is.numeric), y=DF$bld, 
            plot='box', 
            scales=list(x=list(relation='free'), y=list(relation='free')))

featurePlot(x=DF%>%select_if(is.numeric), y=DF$lft, 
            plot='box', 
            scales=list(x=list(relation='free'), y=list(relation='free')))

featurePlot(x=DF%>%select_if(is.numeric), y=DF$smk, 
            plot='box', 
            scales=list(x=list(relation='free'), y=list(relation='free')))

featurePlot(x=DF%>%select_if(is.numeric), y=DF$alc, 
            plot='box', 
            scales=list(x=list(relation='free'), y=list(relation='free')))

## 연속 ~ 연속
### use='pairwise.complete.obs' 지정해야 상관계수가 NA가 안 됨
R <- cor(DF%>%select_if(is.numeric), use='pairwise.complete.obs')
round(R, 4)
##          age      ht     wt     wa    hdln   hdwd    ftln    ftwd
## age   1.0000 -0.3257 0.0054 0.3421 -0.1058 0.1819 -0.1508 -0.0759
## ht   -0.3257  1.0000 0.5817 0.1617  0.8158 0.6029  0.8391  0.6021
## wt    0.0054  0.5817 1.0000 0.8310  0.5336 0.6578  0.6000  0.5966
## wa    0.3421  0.1617 0.8310 1.0000  0.2288 0.5044  0.2740  0.3764
## hdln -0.1058  0.8158 0.5336 0.2288  1.0000 0.6434  0.8635  0.6505
## hdwd  0.1819  0.6029 0.6578 0.5044  0.6434 1.0000  0.6231  0.6212
## ftln -0.1508  0.8391 0.6000 0.2740  0.8635 0.6231  1.0000  0.7289
## ftwd -0.0759  0.6021 0.5966 0.3764  0.6505 0.6212  0.7289  1.0000
sort(R['ht',], decreasing=TRUE)
##         ht       ftln       hdln       hdwd       ftwd         wt         wa 
##  1.0000000  0.8390865  0.8157977  0.6029119  0.6021319  0.5817457  0.1617029 
##        age 
## -0.3256508
corrplot::corrplot.mixed(R, upper = 'ellipse', order='FPC')

library(GGally)
DF%>%select_if(is.numeric) %>%
  ggcorr(geom='tile', label=TRUE)

### ggpairs: 산점도행렬과 상관계수
DF %>%
  ggpairs(columns = c('ht', 'ftln', 'hdln', 'ftwd', 'hdwd', 'wt'),
          lower=list(continuous=wrap('points', alpha=0.05, col='blue')),
          diag = list(continuous='barDiag'))
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 4 rows containing missing values
## Warning: Removing 1 row that contained a missing value
## Removing 1 row that contained a missing value
## Warning: Removed 3 rows containing missing values
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 4 rows containing missing values
## Warning: Removed 5 rows containing missing values
## Removed 5 rows containing missing values
## Warning: Removed 7 rows containing missing values
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removing 1 row that contained a missing value
## Warning: Removing 1 row that contained a missing value
## Warning: Removed 3 rows containing missing values
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values
## Warning: Removed 4 rows containing missing values
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 4 rows containing missing values
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_bin()`).

DF %>%
  ggplot(aes(x=wt, y=ht)) +
  geom_density2d()+
  geom_point(aes(col=gnd, shape=gnd))
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_density2d()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

# 분할/예측값 저장소 준비
## TR:TS를 0.75:0.25로 1회 분할
set.seed(0488)
IS <- initial_split(DF, prop=0.75)
TR <- training(IS)
TS <- testing(IS)

### 예측값을 저장할 장소
TROUT <- TR%>%dplyr::select(ht)
TSOUT <- TS%>%dplyr::select(ht)

# 전처리
RC <- recipe(ht~., data = TR)%>%
  step_impute_median(all_numeric_predictors()) %>%
  step_impute_mode(all_nominal_predictors()) %>%
  step_dummy(all_nominal_predictors())
RC
## 
## ── Recipe ──────────────────────────────────────────────────────────────────────
## 
## ── Inputs 
## Number of variables by role
## outcome:    1
## predictor: 12
## 
## ── Operations 
## • Median imputation for: all_numeric_predictors()
## • Mode imputation for: all_nominal_predictors()
## • Dummy variables from: all_nominal_predictors()
# 튜닝계획 지정
## 5-fold CV (반복 1회)
### method='boot, repeatdecv..', repeats = 1, 
### returnResamp = 'final'이므로 M$resample에는 최적모수에 대한 CV결과만 저장됨
trCtrl <- trainControl(method = 'cv', number = 5)

# lm: 선형회귀모형
## 튜닝모수 없음. intercept는 튜닝 안 함
modelLookup('lm')
##   model parameter     label forReg forClass probModel
## 1    lm intercept intercept   TRUE    FALSE     FALSE
## 적합
set.seed(0488)
Mlm <- 
  train(RC, data=TR,
        method='lm', 
        trControl = trCtrl)
Mlm
## Linear Regression 
## 
## 225 samples
##  12 predictor
## 
## Recipe steps: impute_median, impute_mode, dummy 
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 180, 179, 181, 180, 180 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   3.398719  0.8562773  2.703031
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
Mlm$results
##   intercept     RMSE  Rsquared      MAE    RMSESD RsquaredSD     MAESD
## 1      TRUE 3.398719 0.8562773 2.703031 0.3088128 0.03487682 0.2951742
### (X) plot(Mlm)
summary(Mlm)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6418 -2.3208  0.0593  1.7938  9.3590 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 108.55188    6.96967  15.575  < 2e-16 ***
## age          -0.04780    0.02252  -2.123 0.034950 *  
## wt            0.40611    0.05128   7.920 1.36e-13 ***
## wa           -0.39631    0.05547  -7.145 1.46e-11 ***
## hdln          1.95873    0.42356   4.624 6.56e-06 ***
## hdwd          0.30494    0.74670   0.408 0.683406    
## ftln          1.36924    0.34847   3.929 0.000116 ***
## ftwd         -1.01132    0.50519  -2.002 0.046587 *  
## gnd_M         5.42690    0.79414   6.834 8.81e-11 ***
## bld_AB        1.00394    0.74327   1.351 0.178243    
## bld_B         0.45131    0.56788   0.795 0.427677    
## bld_O         0.59163    0.65087   0.909 0.364403    
## lft_Y        -0.75251    1.08103  -0.696 0.487133    
## smk_Y         0.22907    0.60866   0.376 0.707035    
## alc_Y        -0.23412    0.74691  -0.313 0.754246    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.289 on 210 degrees of freedom
## Multiple R-squared:  0.872,  Adjusted R-squared:  0.8634 
## F-statistic: 102.2 on 14 and 210 DF,  p-value: < 2.2e-16
plot(varImp(Mlm))

Mlm$bestTune
##   intercept
## 1      TRUE
Mlm$finalModel #lm객체
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Coefficients:
## (Intercept)          age           wt           wa         hdln         hdwd  
##    108.5519      -0.0478       0.4061      -0.3963       1.9587       0.3049  
##        ftln         ftwd        gnd_M       bld_AB        bld_B        bld_O  
##      1.3692      -1.0113       5.4269       1.0039       0.4513       0.5916  
##       lft_Y        smk_Y        alc_Y  
##     -0.7525       0.2291      -0.2341
Mlm$resample #최적모수값에 대한 CV통계량
##       RMSE  Rsquared      MAE Resample
## 1 3.783464 0.8041820 3.055840    Fold1
## 2 3.116350 0.8902070 2.374710    Fold2
## 3 3.426790 0.8540874 2.839293    Fold3
## 4 3.064790 0.8863103 2.416799    Fold4
## 5 3.602199 0.8466001 2.828514    Fold5
### 예측값 저장
TROUT <- TR%>%dplyr::select(ht)
TSOUT <- TS%>%dplyr::select(ht)
TROUT <- TROUT %>% bind_cols(yhlm=predict(Mlm, newdata = TR))
TSOUT <- TSOUT %>% bind_cols(yhlm=predict(Mlm, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 2
##      ht  yhlm
##   <dbl> <dbl>
## 1  188.  186.
## 2  174.  173.
## 3  176.  176.
## 4  150.  153.
## 5  153.  155.
## 6  149.  156.
### 성능평가 일괄 계산 사용자 함수
metreg <- function(y, yh){
  c(rmse=rmse_vec(y, yh),
    mae=mae_vec(y, yh),
    rsq=rsq_vec(y, yh))
}
metreg(TSOUT$ht, TSOUT$yhlm)
##      rmse       mae       rsq 
## 3.7332197 2.9327742 0.8424828
METlm <-
  metreg(TROUT$ht, TROUT$yhlm) %>%
  bind_rows(metreg(TSOUT$ht, TSOUT$yhlm)) %>%
  bind_cols(data.frame(model=c('lm', 'lm'), TRTS=c('TR', 'TS')))
METlm
## # A tibble: 2 × 5
##    rmse   mae   rsq model TRTS 
##   <dbl> <dbl> <dbl> <chr> <chr>
## 1  3.18  2.49 0.872 lm    TR   
## 2  3.73  2.93 0.842 lm    TS
g1 <- TROUT %>% ggplot(aes(x=yhlm, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhlm, y=ht-yhlm)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhlm, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhlm, y=ht-yhlm)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

# lmStepAIC: AIC 변수 선택
## 튜닝모수 없음. intercept는 튜닝 안 함
## parsnip에 없음
modelLookup('lmStepAIC')
##       model parameter     label forReg forClass probModel
## 1 lmStepAIC parameter parameter   TRUE    FALSE     FALSE
## 적합
set.seed(0488)
Mstep <-
  train(RC, data=TR,
        method = 'lmStepAIC',
        direction = 'backward',
        trControl=trCtrl)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## Start:  AIC=432.27
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_B + bld_O + lft_Y + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - bld_B   1      0.19 1682.3 430.29
## - alc_Y   1      1.06 1683.2 430.39
## - smk_Y   1      4.83 1687.0 430.79
## - bld_AB  1      5.07 1687.2 430.82
## - bld_O   1      8.22 1690.3 431.15
## - lft_Y   1      8.96 1691.1 431.23
## - hdwd    1     11.11 1693.2 431.46
## <none>                1682.1 432.27
## - age     1     32.95 1715.1 433.77
## - ftwd    1     43.46 1725.6 434.87
## - ftln    1    160.88 1843.0 446.72
## - hdln    1    169.68 1851.8 447.57
## - gnd_M   1    384.71 2066.8 467.35
## - wa      1    419.97 2102.1 470.39
## - wt      1    515.33 2197.4 478.38
## 
## Step:  AIC=430.29
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_O + lft_Y + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - alc_Y   1      0.99 1683.3 428.40
## - smk_Y   1      5.15 1687.5 428.84
## - bld_AB  1      5.41 1687.7 428.87
## - lft_Y   1      8.94 1691.3 429.25
## - bld_O   1      9.33 1691.7 429.29
## - hdwd    1     11.73 1694.0 429.55
## <none>                1682.3 430.29
## - age     1     32.76 1715.1 431.77
## - ftwd    1     43.60 1725.9 432.90
## - ftln    1    163.29 1845.6 444.97
## - hdln    1    169.51 1851.8 445.57
## - gnd_M   1    393.89 2076.2 466.16
## - wa      1    425.41 2107.7 468.87
## - wt      1    518.06 2200.4 476.62
## 
## Step:  AIC=428.4
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_O + lft_Y + smk_Y
## 
##          Df Sum of Sq    RSS    AIC
## - smk_Y   1      4.48 1687.8 426.88
## - bld_AB  1      5.22 1688.5 426.96
## - bld_O   1      8.80 1692.1 427.34
## - lft_Y   1      9.38 1692.7 427.40
## - hdwd    1     11.54 1694.8 427.63
## <none>                1683.3 428.40
## - age     1     33.28 1716.6 429.92
## - ftwd    1     43.09 1726.4 430.95
## - ftln    1    162.50 1845.8 442.99
## - hdln    1    169.78 1853.1 443.70
## - gnd_M   1    396.09 2079.4 464.44
## - wa      1    425.67 2109.0 466.98
## - wt      1    517.09 2200.4 474.62
## 
## Step:  AIC=426.88
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_O + lft_Y
## 
##          Df Sum of Sq    RSS    AIC
## - bld_AB  1      5.27 1693.1 425.44
## - lft_Y   1      8.56 1696.3 425.79
## - bld_O   1      8.86 1696.7 425.82
## - hdwd    1     14.26 1702.1 426.39
## <none>                1687.8 426.88
## - age     1     38.24 1726.0 428.91
## - ftwd    1     46.12 1733.9 429.73
## - ftln    1    159.42 1847.2 441.13
## - hdln    1    177.18 1865.0 442.85
## - gnd_M   1    400.48 2088.3 463.20
## - wa      1    427.11 2114.9 465.49
## - wt      1    513.58 2201.4 472.70
## 
## Step:  AIC=425.44
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_O + lft_Y
## 
##         Df Sum of Sq    RSS    AIC
## - bld_O  1      6.37 1699.4 424.12
## - lft_Y  1      8.99 1702.1 424.39
## - hdwd   1     14.58 1707.6 424.98
## <none>               1693.1 425.44
## - age    1     39.12 1732.2 427.55
## - ftwd   1     44.45 1737.5 428.11
## - ftln   1    155.59 1848.7 439.27
## - hdln   1    181.02 1874.1 441.73
## - gnd_M  1    409.92 2103.0 462.47
## - wa     1    427.21 2120.3 463.94
## - wt     1    510.38 2203.4 470.87
## 
## Step:  AIC=424.12
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     lft_Y
## 
##         Df Sum of Sq    RSS    AIC
## - lft_Y  1      9.82 1709.3 423.15
## - hdwd   1     11.76 1711.2 423.36
## <none>               1699.4 424.12
## - age    1     40.15 1739.6 426.32
## - ftwd   1     44.44 1743.9 426.76
## - ftln   1    153.66 1853.1 437.70
## - hdln   1    188.56 1888.0 441.06
## - wa     1    424.26 2123.7 462.23
## - gnd_M  1    446.44 2145.9 464.10
## - wt     1    507.49 2206.9 469.15
## 
## Step:  AIC=423.15
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M
## 
##         Df Sum of Sq    RSS    AIC
## - hdwd   1     10.34 1719.6 422.24
## <none>               1709.3 423.15
## - age    1     38.41 1747.7 425.16
## - ftwd   1     45.36 1754.6 425.87
## - ftln   1    151.96 1861.2 436.49
## - hdln   1    191.24 1900.5 440.24
## - wa     1    426.19 2135.4 461.23
## - gnd_M  1    445.01 2154.3 462.81
## - wt     1    514.27 2223.5 468.50
## 
## Step:  AIC=422.24
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M
## 
##         Df Sum of Sq    RSS    AIC
## <none>               1719.6 422.24
## - age    1     28.47 1748.1 423.20
## - ftwd   1     38.86 1758.5 424.26
## - ftln   1    145.66 1865.2 434.88
## - hdln   1    205.97 1925.6 440.60
## - wa     1    443.75 2163.3 461.56
## - wt     1    627.63 2347.2 476.25
## - gnd_M  1    674.32 2393.9 479.79
## Start:  AIC=446.54
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_B + bld_O + lft_Y + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - hdwd    1      0.27 1834.6 444.56
## - lft_Y   1      0.59 1834.9 444.60
## - smk_Y   1      0.85 1835.1 444.62
## - alc_Y   1      1.68 1836.0 444.70
## - bld_O   1      4.61 1838.9 444.99
## - bld_B   1      5.55 1839.8 445.08
## - bld_AB  1     14.88 1849.2 445.98
## <none>                1834.3 446.54
## - age     1     39.08 1873.4 448.31
## - ftwd    1     43.58 1877.9 448.74
## - ftln    1    137.13 1971.4 457.44
## - hdln    1    169.02 2003.3 460.32
## - wa      1    387.41 2221.7 478.84
## - gnd_M   1    454.98 2289.3 484.20
## - wt      1    545.47 2379.8 491.14
## 
## Step:  AIC=444.56
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B + bld_O + lft_Y + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - lft_Y   1      0.66 1835.2 442.63
## - smk_Y   1      1.00 1835.5 442.66
## - alc_Y   1      1.62 1836.2 442.72
## - bld_O   1      4.86 1839.4 443.04
## - bld_B   1      5.44 1840.0 443.09
## - bld_AB  1     14.70 1849.3 443.99
## <none>                1834.6 444.56
## - ftwd    1     47.46 1882.0 447.14
## - age     1     49.37 1883.9 447.32
## - ftln    1    143.98 1978.5 456.09
## - hdln    1    177.18 2011.7 459.07
## - wa      1    387.29 2221.8 476.85
## - wt      1    575.29 2409.8 491.39
## - gnd_M   1    617.78 2452.3 494.52
## 
## Step:  AIC=442.63
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B + bld_O + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - smk_Y   1      0.91 1836.1 440.72
## - alc_Y   1      1.59 1836.8 440.78
## - bld_O   1      5.13 1840.3 441.13
## - bld_B   1      5.31 1840.5 441.15
## - bld_AB  1     14.38 1849.6 442.03
## <none>                1835.2 442.63
## - ftwd    1     47.68 1882.9 445.22
## - age     1     49.31 1884.5 445.37
## - ftln    1    143.63 1978.8 454.12
## - hdln    1    177.45 2012.7 457.15
## - wa      1    387.38 2222.6 474.91
## - wt      1    577.43 2412.7 489.60
## - gnd_M   1    623.16 2458.4 492.96
## 
## Step:  AIC=440.72
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B + bld_O + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - alc_Y   1      1.19 1837.3 438.83
## - bld_O   1      4.71 1840.8 439.17
## - bld_B   1      5.13 1841.3 439.22
## - bld_AB  1     13.98 1850.1 440.07
## <none>                1836.1 440.72
## - ftwd    1     46.86 1883.0 443.23
## - age     1     48.55 1884.7 443.39
## - ftln    1    144.49 1980.6 452.28
## - hdln    1    176.65 2012.8 455.16
## - wa      1    387.47 2223.6 472.99
## - wt      1    578.50 2414.6 487.74
## - gnd_M   1    645.97 2482.1 492.68
## 
## Step:  AIC=438.83
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B + bld_O
## 
##          Df Sum of Sq    RSS    AIC
## - bld_O   1      4.86 1842.2 437.31
## - bld_B   1      5.55 1842.9 437.37
## - bld_AB  1     14.74 1852.0 438.26
## <none>                1837.3 438.83
## - ftwd    1     48.45 1885.8 441.49
## - age     1     49.81 1887.1 441.62
## - ftln    1    144.62 1981.9 450.40
## - hdln    1    176.46 2013.8 453.25
## - wa      1    388.30 2225.6 471.15
## - wt      1    582.15 2419.5 486.10
## - gnd_M   1    648.27 2485.6 490.93
## 
## Step:  AIC=437.31
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B
## 
##          Df Sum of Sq    RSS    AIC
## - bld_B   1      2.21 1844.4 435.52
## - bld_AB  1     10.50 1852.7 436.32
## <none>                1842.2 437.31
## - ftwd    1     48.54 1890.7 439.96
## - age     1     53.31 1895.5 440.41
## - ftln    1    140.97 1983.1 448.50
## - hdln    1    183.86 2026.0 452.33
## - wa      1    385.78 2228.0 469.34
## - wt      1    578.31 2420.5 484.18
## - gnd_M   1    651.89 2494.1 489.54
## 
## Step:  AIC=435.52
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB
## 
##          Df Sum of Sq    RSS    AIC
## - bld_AB  1      8.44 1852.8 434.34
## <none>                1844.4 435.52
## - ftwd    1     48.22 1892.6 438.14
## - age     1     51.41 1895.8 438.44
## - ftln    1    140.72 1985.1 446.68
## - hdln    1    186.44 2030.8 450.76
## - wa      1    402.81 2247.2 468.88
## - wt      1    610.70 2455.1 484.72
## - gnd_M   1    666.66 2511.1 488.75
## 
## Step:  AIC=434.34
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M
## 
##         Df Sum of Sq    RSS    AIC
## <none>               1852.8 434.34
## - ftwd   1     44.05 1896.9 436.54
## - age    1     50.96 1903.8 437.19
## - ftln   1    138.17 1991.0 445.21
## - hdln   1    187.01 2039.8 449.55
## - wa     1    406.33 2259.2 467.83
## - wt     1    612.69 2465.5 483.48
## - gnd_M  1    665.42 2518.2 487.26
## Start:  AIC=443.91
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_B + bld_O + lft_Y + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - hdwd    1      0.16 1781.8 441.93
## - alc_Y   1      1.33 1783.0 442.05
## - lft_Y   1     12.33 1794.0 443.16
## - bld_B   1     13.42 1795.1 443.27
## - smk_Y   1     15.76 1797.5 443.51
## <none>                1781.7 443.91
## - bld_O   1     23.48 1805.2 444.28
## - ftwd    1     24.66 1806.3 444.40
## - bld_AB  1     28.18 1809.9 444.75
## - age     1     71.46 1853.1 449.03
## - ftln    1    146.51 1928.2 456.22
## - hdln    1    199.30 1981.0 461.11
## - wa      1    372.60 2154.3 476.29
## - gnd_M   1    380.93 2162.6 476.98
## - wt      1    466.39 2248.1 484.00
## 
## Step:  AIC=441.93
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B + bld_O + lft_Y + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - alc_Y   1      1.27 1783.1 440.06
## - lft_Y   1     12.17 1794.0 441.16
## - bld_B   1     13.89 1795.7 441.34
## - smk_Y   1     15.64 1797.5 441.51
## <none>                1781.8 441.93
## - bld_O   1     23.45 1805.3 442.30
## - ftwd    1     25.09 1806.9 442.46
## - bld_AB  1     28.51 1810.4 442.80
## - age     1     80.41 1862.2 447.92
## - ftln    1    146.40 1928.2 454.22
## - hdln    1    209.57 1991.4 460.06
## - wa      1    372.45 2154.3 474.29
## - gnd_M   1    470.71 2252.6 482.36
## - wt      1    485.59 2267.4 483.55
## 
## Step:  AIC=440.06
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B + bld_O + lft_Y + smk_Y
## 
##          Df Sum of Sq    RSS    AIC
## - lft_Y   1     12.07 1795.2 439.28
## - smk_Y   1     14.46 1797.6 439.52
## - bld_B   1     15.01 1798.1 439.58
## <none>                1783.1 440.06
## - ftwd    1     25.10 1808.2 440.59
## - bld_O   1     25.53 1808.7 440.63
## - bld_AB  1     28.81 1811.9 440.96
## - age     1     80.06 1863.2 446.01
## - ftln    1    147.21 1930.3 452.42
## - hdln    1    210.01 1993.1 458.21
## - wa      1    377.46 2160.6 472.81
## - gnd_M   1    471.48 2254.6 480.52
## - wt      1    492.11 2275.2 482.17
## 
## Step:  AIC=439.28
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B + bld_O + smk_Y
## 
##          Df Sum of Sq    RSS    AIC
## - bld_B   1     12.55 1807.7 438.54
## - smk_Y   1     12.94 1808.1 438.58
## <none>                1795.2 439.28
## - bld_O   1     25.28 1820.5 439.81
## - ftwd    1     26.37 1821.6 439.92
## - bld_AB  1     27.17 1822.4 440.00
## - age     1     83.52 1878.7 445.51
## - ftln    1    145.40 1940.6 451.38
## - hdln    1    219.02 2014.2 458.12
## - wa      1    371.77 2167.0 471.35
## - gnd_M   1    465.10 2260.3 478.98
## - wt      1    485.44 2280.6 480.60
## 
## Step:  AIC=438.54
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_O + smk_Y
## 
##          Df Sum of Sq    RSS    AIC
## - smk_Y   1     10.76 1818.5 437.62
## - bld_O   1     14.26 1822.0 437.96
## - bld_AB  1     16.84 1824.6 438.22
## <none>                1807.7 438.54
## - ftwd    1     28.30 1836.0 439.35
## - age     1     78.21 1886.0 444.21
## - ftln    1    147.83 1955.6 450.77
## - hdln    1    222.54 2030.3 457.56
## - wa      1    384.62 2192.4 471.46
## - gnd_M   1    457.83 2265.6 477.40
## - wt      1    510.32 2318.1 481.55
## 
## Step:  AIC=437.62
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_O
## 
##          Df Sum of Sq    RSS    AIC
## - bld_O   1     15.21 1833.7 437.12
## - bld_AB  1     16.11 1834.6 437.21
## <none>                1818.5 437.62
## - ftwd    1     27.22 1845.7 438.31
## - age     1     74.33 1892.8 442.87
## - ftln    1    152.42 1970.9 450.18
## - hdln    1    217.90 2036.4 456.10
## - wa      1    388.46 2206.9 470.66
## - gnd_M   1    497.96 2316.5 479.42
## - wt      1    519.44 2337.9 481.09
## 
## Step:  AIC=437.12
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB
## 
##          Df Sum of Sq    RSS    AIC
## - bld_AB  1     11.33 1845.0 436.24
## <none>                1833.7 437.12
## - ftwd    1     31.79 1865.5 438.23
## - age     1     78.38 1912.1 442.70
## - ftln    1    149.34 1983.0 449.29
## - hdln    1    223.76 2057.5 455.96
## - wa      1    384.77 2218.5 469.60
## - wt      1    513.36 2347.1 479.80
## - gnd_M   1    538.83 2372.5 481.75
## 
## Step:  AIC=436.24
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M
## 
##         Df Sum of Sq    RSS    AIC
## <none>               1845.0 436.24
## - ftwd   1     31.09 1876.1 437.26
## - age    1     77.10 1922.1 441.65
## - ftln   1    146.56 1991.6 448.07
## - hdln   1    226.26 2071.3 455.18
## - wa     1    389.40 2234.4 468.90
## - wt     1    520.11 2365.1 479.19
## - gnd_M  1    531.27 2376.3 480.04
## Start:  AIC=452.41
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_B + bld_O + lft_Y + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - hdwd    1      0.41 1881.6 450.44
## - bld_O   1      1.31 1882.5 450.53
## - bld_B   1      1.94 1883.1 450.59
## - lft_Y   1      2.49 1883.7 450.64
## - smk_Y   1      3.33 1884.5 450.72
## - alc_Y   1      3.85 1885.0 450.77
## - bld_AB  1      9.22 1890.4 451.29
## <none>                1881.2 452.41
## - ftwd    1     26.06 1907.2 452.88
## - age     1     36.75 1917.9 453.89
## - ftln    1     62.24 1943.4 456.26
## - hdln    1    221.40 2102.6 470.43
## - wa      1    451.38 2332.6 489.12
## - gnd_M   1    473.75 2354.9 490.84
## - wt      1    490.80 2372.0 492.13
## 
## Step:  AIC=450.44
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B + bld_O + lft_Y + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - bld_O   1      1.20 1882.8 448.56
## - bld_B   1      2.06 1883.7 448.64
## - lft_Y   1      2.41 1884.0 448.67
## - smk_Y   1      3.10 1884.7 448.74
## - alc_Y   1      3.74 1885.3 448.80
## - bld_AB  1      9.14 1890.7 449.32
## <none>                1881.6 450.44
## - ftwd    1     25.65 1907.2 450.88
## - age     1     38.82 1920.4 452.12
## - ftln    1     61.91 1943.5 454.27
## - hdln    1    230.81 2112.4 469.27
## - wa      1    451.73 2333.3 487.18
## - wt      1    518.37 2399.9 492.24
## - gnd_M   1    621.28 2502.9 499.80
## 
## Step:  AIC=448.56
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B + lft_Y + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - bld_B   1      1.08 1883.9 446.66
## - lft_Y   1      2.39 1885.2 446.79
## - smk_Y   1      3.30 1886.1 446.87
## - alc_Y   1      4.01 1886.8 446.94
## - bld_AB  1      7.94 1890.7 447.32
## <none>                1882.8 448.56
## - ftwd    1     25.41 1908.2 448.97
## - age     1     39.32 1922.1 450.28
## - ftln    1     61.50 1944.3 452.34
## - hdln    1    233.88 2116.7 467.64
## - wa      1    452.25 2335.0 485.31
## - wt      1    517.92 2400.7 490.30
## - gnd_M   1    621.64 2504.4 497.91
## 
## Step:  AIC=446.66
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     lft_Y + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - lft_Y   1      2.10 1886.0 444.86
## - smk_Y   1      3.26 1887.1 444.97
## - alc_Y   1      4.44 1888.3 445.09
## - bld_AB  1      6.92 1890.8 445.32
## <none>                1883.9 446.66
## - ftwd    1     25.61 1909.5 447.09
## - age     1     38.25 1922.1 448.28
## - ftln    1     61.70 1945.6 450.46
## - hdln    1    234.83 2118.7 465.81
## - wa      1    463.28 2347.2 484.24
## - wt      1    532.41 2416.3 489.46
## - gnd_M   1    631.74 2515.6 496.72
## 
## Step:  AIC=444.86
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - smk_Y   1      3.30 1889.3 443.18
## - alc_Y   1      4.53 1890.5 443.29
## - bld_AB  1      6.38 1892.3 443.47
## <none>                1886.0 444.86
## - ftwd    1     26.08 1912.0 445.33
## - age     1     38.14 1924.1 446.47
## - ftln    1     61.77 1947.7 448.66
## - hdln    1    233.88 2119.8 463.91
## - wa      1    469.47 2355.4 482.87
## - wt      1    540.46 2426.4 488.22
## - gnd_M   1    629.81 2515.8 494.73
## 
## Step:  AIC=443.18
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - alc_Y   1      3.50 1892.8 441.51
## - bld_AB  1      6.58 1895.8 441.80
## <none>                1889.3 443.18
## - ftwd    1     25.82 1915.1 443.62
## - age     1     36.20 1925.5 444.59
## - ftln    1     63.32 1952.6 447.11
## - hdln    1    231.85 2121.1 462.01
## - wa      1    470.01 2359.3 481.17
## - wt      1    545.80 2435.1 486.86
## - gnd_M   1    694.47 2583.7 497.53
## 
## Step:  AIC=441.51
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB
## 
##          Df Sum of Sq    RSS    AIC
## - bld_AB  1      6.77 1899.5 440.15
## <none>                1892.8 441.51
## - ftwd    1     26.00 1918.8 441.97
## - age     1     36.35 1929.1 442.94
## - ftln    1     64.34 1957.1 445.53
## - hdln    1    232.98 2125.8 460.41
## - wa      1    470.53 2363.3 479.47
## - wt      1    544.29 2437.1 485.01
## - gnd_M   1    691.18 2583.9 495.54
## 
## Step:  AIC=440.15
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M
## 
##         Df Sum of Sq    RSS    AIC
## <none>               1899.5 440.15
## - ftwd   1     22.97 1922.5 440.32
## - age    1     40.45 1940.0 441.95
## - ftln   1     62.37 1961.9 443.97
## - hdln   1    232.93 2132.5 458.97
## - wa     1    473.66 2373.2 478.23
## - wt     1    547.42 2447.0 483.74
## - gnd_M  1    693.02 2592.6 494.14
## Start:  AIC=440.89
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_B + bld_O + lft_Y + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - alc_Y   1      0.05 1764.7 438.90
## - smk_Y   1      0.74 1765.3 438.96
## - lft_Y   1      1.58 1766.2 439.05
## - hdwd    1      6.27 1770.9 439.53
## - bld_O   1      6.82 1771.4 439.58
## - bld_B   1     11.85 1776.5 440.09
## - age     1     12.59 1777.2 440.17
## <none>                1764.6 440.89
## - bld_AB  1     25.96 1790.6 441.52
## - ftwd    1     27.82 1792.4 441.70
## - hdln    1    152.88 1917.5 453.85
## - ftln    1    168.07 1932.7 455.27
## - gnd_M   1    255.93 2020.5 463.27
## - wa      1    572.97 2337.6 489.50
## - wt      1    674.42 2439.0 497.15
## 
## Step:  AIC=438.9
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_B + bld_O + lft_Y + smk_Y
## 
##          Df Sum of Sq    RSS    AIC
## - smk_Y   1      0.81 1765.5 436.98
## - lft_Y   1      1.58 1766.2 437.06
## - hdwd    1      6.34 1771.0 437.54
## - bld_O   1      6.77 1771.4 437.58
## - bld_B   1     11.89 1776.5 438.10
## - age     1     12.74 1777.4 438.19
## <none>                1764.7 438.90
## - bld_AB  1     25.91 1790.6 439.52
## - ftwd    1     28.08 1792.7 439.74
## - hdln    1    153.09 1917.7 451.87
## - ftln    1    168.09 1932.7 453.27
## - gnd_M   1    258.34 2023.0 461.49
## - wa      1    582.08 2346.7 488.21
## - wt      1    684.05 2448.7 495.86
## 
## Step:  AIC=436.98
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_B + bld_O + lft_Y
## 
##          Df Sum of Sq    RSS    AIC
## - lft_Y   1      1.54 1767.0 435.13
## - hdwd    1      5.84 1771.3 435.57
## - bld_O   1      6.82 1772.3 435.67
## - age     1     12.04 1777.5 436.20
## - bld_B   1     12.14 1777.6 436.21
## <none>                1765.5 436.98
## - bld_AB  1     26.23 1791.7 437.63
## - ftwd    1     27.56 1793.0 437.77
## - hdln    1    152.38 1917.8 449.88
## - ftln    1    168.22 1933.7 451.36
## - gnd_M   1    294.34 2059.8 462.73
## - wa      1    581.32 2346.8 486.21
## - wt      1    684.15 2449.6 493.93
## 
## Step:  AIC=435.13
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_B + bld_O
## 
##          Df Sum of Sq    RSS    AIC
## - hdwd    1      5.69 1772.7 433.71
## - bld_O   1      6.95 1774.0 433.84
## - bld_B   1     11.75 1778.8 434.33
## - age     1     11.92 1778.9 434.34
## <none>                1767.0 435.13
## - bld_AB  1     25.60 1792.6 435.72
## - ftwd    1     29.10 1796.1 436.07
## - hdln    1    156.06 1923.0 448.37
## - ftln    1    166.94 1933.9 449.38
## - gnd_M   1    292.84 2059.8 460.74
## - wa      1    587.49 2354.5 484.80
## - wt      1    696.84 2463.8 492.97
## 
## Step:  AIC=433.71
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B + bld_O
## 
##          Df Sum of Sq    RSS    AIC
## - bld_O   1      6.23 1778.9 432.35
## - age     1      7.95 1780.6 432.52
## - bld_B   1     12.66 1785.3 432.99
## <none>                1772.7 433.71
## - ftwd    1     24.83 1797.5 434.22
## - bld_AB  1     26.73 1799.4 434.41
## - ftln    1    162.78 1935.5 447.53
## - hdln    1    171.04 1943.7 448.29
## - gnd_M   1    415.54 2188.2 469.62
## - wa      1    587.29 2360.0 483.22
## - wt      1    735.44 2508.1 494.18
## 
## Step:  AIC=432.35
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B
## 
##          Df Sum of Sq    RSS    AIC
## - bld_B   1      7.04 1786.0 431.06
## - age     1      9.02 1787.9 431.26
## <none>                1778.9 432.35
## - bld_AB  1     20.71 1799.6 432.43
## - ftwd    1     26.13 1805.1 432.97
## - ftln    1    161.49 1940.4 445.99
## - hdln    1    180.38 1959.3 447.73
## - gnd_M   1    414.63 2193.6 468.06
## - wa      1    585.80 2364.7 481.58
## - wt      1    733.25 2512.2 492.47
## 
## Step:  AIC=431.06
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB
## 
##          Df Sum of Sq    RSS    AIC
## - age     1      7.22 1793.2 429.78
## - bld_AB  1     15.13 1801.1 430.57
## <none>                1786.0 431.06
## - ftwd    1     27.02 1813.0 431.76
## - ftln    1    164.50 1950.5 444.92
## - hdln    1    183.98 1969.9 446.70
## - gnd_M   1    411.01 2197.0 466.34
## - wa      1    611.72 2397.7 482.07
## - wt      1    775.30 2561.3 493.95
## 
## Step:  AIC=429.78
## .outcome ~ wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB
## 
##          Df Sum of Sq    RSS    AIC
## - bld_AB  1     13.50 1806.7 429.13
## <none>                1793.2 429.78
## - ftwd    1     26.18 1819.4 430.39
## - ftln    1    161.55 1954.7 443.31
## - hdln    1    179.97 1973.2 445.00
## - gnd_M   1    410.05 2203.2 464.85
## - wa      1   1071.54 2864.7 512.11
## - wt      1   1115.60 2908.8 514.86
## 
## Step:  AIC=429.13
## .outcome ~ wt + wa + hdln + ftln + ftwd + gnd_M
## 
##         Df Sum of Sq    RSS    AIC
## <none>               1806.7 429.13
## - ftwd   1     24.18 1830.9 429.53
## - ftln   1    154.52 1961.2 441.90
## - hdln   1    187.79 1994.5 444.93
## - gnd_M  1    416.19 2222.9 464.45
## - wa     1   1084.97 2891.7 511.79
## - wt     1   1121.63 2928.3 514.06
## Start:  AIC=550.17
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_B + bld_O + lft_Y + smk_Y + alc_Y
## 
##          Df Sum of Sq    RSS    AIC
## - alc_Y   1      1.06 2272.1 548.28
## - smk_Y   1      1.53 2272.5 548.32
## - hdwd    1      1.80 2272.8 548.35
## - lft_Y   1      5.24 2276.2 548.69
## - bld_B   1      6.83 2277.8 548.85
## - bld_O   1      8.94 2279.9 549.06
## - bld_AB  1     19.73 2290.7 550.12
## <none>                2271.0 550.17
## - ftwd    1     43.34 2314.3 552.43
## - age     1     48.73 2319.7 552.95
## - ftln    1    166.97 2438.0 564.13
## - hdln    1    231.27 2502.3 569.99
## - gnd_M   1    505.02 2776.0 593.35
## - wa      1    552.07 2823.1 597.13
## - wt      1    678.38 2949.4 606.98
## 
## Step:  AIC=548.28
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_B + bld_O + lft_Y + smk_Y
## 
##          Df Sum of Sq    RSS    AIC
## - smk_Y   1      1.15 2273.2 546.39
## - hdwd    1      1.74 2273.8 546.45
## - lft_Y   1      5.17 2277.2 546.79
## - bld_B   1      7.50 2279.6 547.02
## - bld_O   1      9.58 2281.6 547.22
## <none>                2272.1 548.28
## - bld_AB  1     20.35 2292.4 548.28
## - ftwd    1     43.47 2315.5 550.54
## - age     1     48.48 2320.5 551.03
## - ftln    1    167.85 2439.9 562.31
## - hdln    1    231.25 2503.3 568.09
## - gnd_M   1    503.96 2776.0 591.35
## - wa      1    555.15 2827.2 595.46
## - wt      1    683.62 2955.7 605.46
## 
## Step:  AIC=546.39
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M + 
##     bld_AB + bld_B + bld_O + lft_Y
## 
##          Df Sum of Sq    RSS    AIC
## - hdwd    1      1.41 2274.6 544.53
## - lft_Y   1      5.08 2278.3 544.89
## - bld_B   1      7.24 2280.4 545.11
## - bld_O   1      9.35 2282.6 545.31
## - bld_AB  1     20.10 2293.3 546.37
## <none>                2273.2 546.39
## - ftwd    1     42.67 2315.9 548.58
## - age     1     47.33 2320.6 549.03
## - ftln    1    169.02 2442.2 560.53
## - hdln    1    230.35 2503.6 566.11
## - gnd_M   1    547.81 2821.0 592.97
## - wa      1    555.20 2828.4 593.56
## - wt      1    688.83 2962.0 603.94
## 
## Step:  AIC=544.53
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B + bld_O + lft_Y
## 
##          Df Sum of Sq    RSS    AIC
## - lft_Y   1      4.78 2279.4 543.00
## - bld_B   1      7.86 2282.5 543.31
## - bld_O   1      8.92 2283.5 543.41
## <none>                2274.6 544.53
## - bld_AB  1     20.64 2295.3 544.56
## - ftwd    1     41.27 2315.9 546.58
## - age     1     47.98 2322.6 547.23
## - ftln    1    167.63 2442.2 558.53
## - hdln    1    245.88 2520.5 565.62
## - wa      1    556.60 2831.2 591.78
## - wt      1    737.25 3011.9 605.70
## - gnd_M   1    737.62 3012.2 605.73
## 
## Step:  AIC=543
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_B + bld_O
## 
##          Df Sum of Sq    RSS    AIC
## - bld_B   1      7.18 2286.6 541.71
## - bld_O   1      9.26 2288.7 541.92
## - bld_AB  1     19.76 2299.2 542.94
## <none>                2279.4 543.00
## - ftwd    1     42.74 2322.1 545.18
## - age     1     47.92 2327.3 545.68
## - ftln    1    166.48 2445.9 556.86
## - hdln    1    248.40 2527.8 564.28
## - wa      1    559.32 2838.7 590.38
## - gnd_M   1    732.95 3012.3 603.73
## - wt      1    743.14 3022.5 604.49
## 
## Step:  AIC=541.71
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB + 
##     bld_O
## 
##          Df Sum of Sq    RSS    AIC
## - bld_O   1      3.93 2290.5 540.10
## - bld_AB  1     13.19 2299.8 541.00
## <none>                2286.6 541.71
## - ftwd    1     43.43 2330.0 543.94
## - age     1     44.86 2331.4 544.08
## - ftln    1    167.44 2454.0 555.61
## - hdln    1    253.62 2540.2 563.38
## - wa      1    576.90 2863.5 590.33
## - gnd_M   1    727.59 3014.2 601.87
## - wt      1    769.28 3055.9 604.96
## 
## Step:  AIC=540.1
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB
## 
##          Df Sum of Sq    RSS    AIC
## - bld_AB  1     10.83 2301.3 539.16
## <none>                2290.5 540.10
## - ftwd    1     44.09 2334.6 542.39
## - age     1     48.28 2338.8 542.79
## - ftln    1    165.56 2456.1 553.80
## - hdln    1    258.23 2548.7 562.13
## - wa      1    573.34 2863.8 588.36
## - gnd_M   1    753.14 3043.6 602.06
## - wt      1    765.51 3056.0 602.97
## 
## Step:  AIC=539.16
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M
## 
##         Df Sum of Sq    RSS    AIC
## <none>               2301.3 539.16
## - ftwd   1     40.90 2342.2 541.12
## - age    1     48.22 2349.6 541.82
## - ftln   1    160.89 2462.2 552.36
## - hdln   1    261.44 2562.8 561.37
## - wa     1    579.73 2881.1 587.71
## - gnd_M  1    755.26 3056.6 601.01
## - wt     1    770.14 3071.5 602.11
Mstep
## Linear Regression with Stepwise Selection 
## 
## 225 samples
##  12 predictor
## 
## Recipe steps: impute_median, impute_mode, dummy 
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 180, 179, 181, 180, 180 
## Resampling results:
## 
##   RMSE      Rsquared   MAE    
##   3.353692  0.8602538  2.64515
Mstep$results
##   parameter     RMSE  Rsquared     MAE    RMSESD RsquaredSD     MAESD
## 1      none 3.353692 0.8602538 2.64515 0.2791423 0.03008863 0.2413835
### (X) plot(Mstep)
summary(Mstep)
## 
## Call:
## lm(formula = .outcome ~ age + wt + wa + hdln + ftln + ftwd + 
##     gnd_M, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.0560 -2.4142  0.0227  1.9226  9.4697 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 110.05529    6.09782  18.048  < 2e-16 ***
## age          -0.04331    0.02031  -2.132 0.034113 *  
## wt            0.41642    0.04887   8.522 2.66e-15 ***
## wa           -0.40274    0.05447  -7.394 3.07e-12 ***
## hdln          2.03558    0.40998   4.965 1.39e-06 ***
## ftln          1.33180    0.34193   3.895 0.000131 ***
## ftwd         -0.95779    0.48772  -1.964 0.050828 .  
## gnd_M         5.54390    0.65694   8.439 4.54e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.257 on 217 degrees of freedom
## Multiple R-squared:  0.8703, Adjusted R-squared:  0.8661 
## F-statistic: 207.9 on 7 and 217 DF,  p-value: < 2.2e-16
ggplot(varImp(Mstep)) #stepAIC 변수 중요도는 참고만 할 것

Mstep$bestTune # 튜닝모수 없음
##   parameter
## 1      none
Mstep$finalModel # lm객체
## 
## Call:
## lm(formula = .outcome ~ age + wt + wa + hdln + ftln + ftwd + 
##     gnd_M, data = dat)
## 
## Coefficients:
## (Intercept)          age           wt           wa         hdln         ftln  
##   110.05529     -0.04331      0.41642     -0.40274      2.03558      1.33180  
##        ftwd        gnd_M  
##    -0.95779      5.54390
Mstep$resample
##       RMSE  Rsquared      MAE Resample
## 1 3.654728 0.8169766 2.876370    Fold1
## 2 3.138540 0.8891631 2.410983    Fold2
## 3 3.237786 0.8687751 2.593860    Fold3
## 4 3.085081 0.8834617 2.426642    Fold4
## 5 3.652327 0.8428924 2.917897    Fold5
TROUT <- TROUT %>% mutate(yhstep=predict(Mstep, newdata = TR))
TSOUT <- TSOUT %>% mutate(yhstep=predict(Mstep, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 3
##      ht  yhlm yhstep
##   <dbl> <dbl>  <dbl>
## 1  188.  186.   186.
## 2  174.  173.   174.
## 3  176.  176.   176.
## 4  150.  153.   153.
## 5  153.  155.   155.
## 6  149.  156.   156.
g1 <- TROUT %>% ggplot(aes(x=yhstep, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhstep, y=ht-yhstep)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhstep, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhstep, y=ht-yhstep)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

METstep <-
  metreg(TROUT$ht, TROUT$yhstep) %>%
  bind_rows(metreg(TSOUT$ht, TSOUT$yhstep)) %>%
  bind_cols(data.frame(model=c('lmStepAIC', 'lmStepAIC'), TRTS=c('TR', 'TS')))
METstep
## # A tibble: 2 × 5
##    rmse   mae   rsq model     TRTS 
##   <dbl> <dbl> <dbl> <chr>     <chr>
## 1  3.20  2.51 0.870 lmStepAIC TR   
## 2  3.68  2.87 0.847 lmStepAIC TS
# glmnet, elasticnet, lasso, ridge
## enet은 분류분석에 사용 못 함. glmnet 사용해야함
modelLookup('enet')
##   model parameter                     label forReg forClass probModel
## 1  enet  fraction Fraction of Full Solution   TRUE    FALSE     FALSE
## 2  enet    lambda              Weight Decay   TRUE    FALSE     FALSE
modelLookup('glmnet') #권장
##    model parameter                    label forReg forClass probModel
## 1 glmnet     alpha        Mixing Percentage   TRUE     TRUE      TRUE
## 2 glmnet    lambda Regularization Parameter   TRUE     TRUE      TRUE
## 적합
set.seed(0488)
glmnetGrid <- expand.grid(alpha=seq(0,1, by=0.25), lambda=seq(0.0, 0.1, by=0.01))
trCtrl <- trainControl(method = 'cv', number=5)
Mglmnet <- 
  train(RC, data=TR,
        method='glmnet',
        trControl=trCtrl, 
        tuneGrid = glmnetGrid)
## Loading required namespace: glmnet
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loaded glmnet 4.1-8
Mglmnet
## glmnet 
## 
## 225 samples
##  12 predictor
## 
## Recipe steps: impute_median, impute_mode, dummy 
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 180, 179, 181, 180, 180 
## Resampling results across tuning parameters:
## 
##   alpha  lambda  RMSE      Rsquared   MAE     
##   0.00   0.00    3.460189  0.8512458  2.746543
##   0.00   0.01    3.460189  0.8512458  2.746543
##   0.00   0.02    3.460189  0.8512458  2.746543
##   0.00   0.03    3.460189  0.8512458  2.746543
##   0.00   0.04    3.460189  0.8512458  2.746543
##   0.00   0.05    3.460189  0.8512458  2.746543
##   0.00   0.06    3.460189  0.8512458  2.746543
##   0.00   0.07    3.460189  0.8512458  2.746543
##   0.00   0.08    3.460189  0.8512458  2.746543
##   0.00   0.09    3.460189  0.8512458  2.746543
##   0.00   0.10    3.460189  0.8512458  2.746543
##   0.25   0.00    3.394221  0.8565529  2.700429
##   0.25   0.01    3.394221  0.8565529  2.700429
##   0.25   0.02    3.393211  0.8566314  2.699671
##   0.25   0.03    3.391188  0.8567596  2.698172
##   0.25   0.04    3.390069  0.8568164  2.697198
##   0.25   0.05    3.389193  0.8568578  2.696320
##   0.25   0.06    3.388684  0.8568732  2.695581
##   0.25   0.07    3.388364  0.8568767  2.694806
##   0.25   0.08    3.388091  0.8568851  2.693902
##   0.25   0.09    3.387987  0.8568834  2.693015
##   0.25   0.10    3.388103  0.8568669  2.692495
##   0.50   0.00    3.394046  0.8565528  2.700313
##   0.50   0.01    3.394046  0.8565528  2.700313
##   0.50   0.02    3.392076  0.8566942  2.699112
##   0.50   0.03    3.390683  0.8567601  2.698089
##   0.50   0.04    3.389402  0.8568333  2.696896
##   0.50   0.05    3.388462  0.8568905  2.695697
##   0.50   0.06    3.387869  0.8569265  2.694434
##   0.50   0.07    3.387617  0.8569428  2.693101
##   0.50   0.08    3.387736  0.8569348  2.692022
##   0.50   0.09    3.388044  0.8569201  2.691042
##   0.50   0.10    3.388614  0.8568830  2.689748
##   0.75   0.00    3.393800  0.8565747  2.700413
##   0.75   0.01    3.393800  0.8565747  2.700413
##   0.75   0.02    3.391887  0.8566808  2.699143
##   0.75   0.03    3.390053  0.8567944  2.697773
##   0.75   0.04    3.388574  0.8568947  2.696120
##   0.75   0.05    3.387744  0.8569541  2.694455
##   0.75   0.06    3.387309  0.8569966  2.692481
##   0.75   0.07    3.387196  0.8570112  2.690035
##   0.75   0.08    3.387846  0.8569726  2.687965
##   0.75   0.09    3.388913  0.8569066  2.686602
##   0.75   0.10    3.390103  0.8568231  2.685515
##   1.00   0.00    3.394239  0.8565395  2.700872
##   1.00   0.01    3.394203  0.8565468  2.700951
##   1.00   0.02    3.391449  0.8567042  2.699006
##   1.00   0.03    3.389301  0.8568505  2.697081
##   1.00   0.04    3.388099  0.8569392  2.695074
##   1.00   0.05    3.387190  0.8570195  2.692010
##   1.00   0.06    3.387191  0.8570299  2.688708
##   1.00   0.07    3.387900  0.8569923  2.686385
##   1.00   0.08    3.388972  0.8569183  2.684956
##   1.00   0.09    3.390470  0.8568108  2.683302
##   1.00   0.10    3.392453  0.8566759  2.682683
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.05.
Mglmnet$results
##    alpha lambda     RMSE  Rsquared      MAE    RMSESD RsquaredSD     MAESD
## 1   0.00   0.00 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 2   0.00   0.01 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 3   0.00   0.02 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 4   0.00   0.03 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 5   0.00   0.04 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 6   0.00   0.05 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 7   0.00   0.06 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 8   0.00   0.07 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 9   0.00   0.08 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 10  0.00   0.09 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 11  0.00   0.10 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 12  0.25   0.00 3.394221 0.8565529 2.700429 0.3027311 0.03490790 0.2903516
## 13  0.25   0.01 3.394221 0.8565529 2.700429 0.3027311 0.03490790 0.2903516
## 14  0.25   0.02 3.393211 0.8566314 2.699671 0.3019246 0.03488703 0.2898859
## 15  0.25   0.03 3.391188 0.8567596 2.698172 0.2992612 0.03490345 0.2877654
## 16  0.25   0.04 3.390069 0.8568164 2.697198 0.2965459 0.03491090 0.2853095
## 17  0.25   0.05 3.389193 0.8568578 2.696320 0.2939192 0.03492075 0.2830385
## 18  0.25   0.06 3.388684 0.8568732 2.695581 0.2915694 0.03494396 0.2810670
## 19  0.25   0.07 3.388364 0.8568767 2.694806 0.2893999 0.03497481 0.2792454
## 20  0.25   0.08 3.388091 0.8568851 2.693902 0.2870111 0.03496574 0.2773484
## 21  0.25   0.09 3.387987 0.8568834 2.693015 0.2847315 0.03495641 0.2755221
## 22  0.25   0.10 3.388103 0.8568669 2.692495 0.2825898 0.03495238 0.2741339
## 23  0.50   0.00 3.394046 0.8565528 2.700313 0.3019272 0.03480718 0.2882531
## 24  0.50   0.01 3.394046 0.8565528 2.700313 0.3019272 0.03480718 0.2882531
## 25  0.50   0.02 3.392076 0.8566942 2.699112 0.2995276 0.03477110 0.2861194
## 26  0.50   0.03 3.390683 0.8567601 2.698089 0.2950819 0.03470370 0.2812395
## 27  0.50   0.04 3.389402 0.8568333 2.696896 0.2901345 0.03456775 0.2761066
## 28  0.50   0.05 3.388462 0.8568905 2.695697 0.2851261 0.03441035 0.2711052
## 29  0.50   0.06 3.387869 0.8569265 2.694434 0.2802519 0.03426259 0.2663071
## 30  0.50   0.07 3.387617 0.8569428 2.693101 0.2755584 0.03412288 0.2616370
## 31  0.50   0.08 3.387736 0.8569348 2.692022 0.2710417 0.03399097 0.2569720
## 32  0.50   0.09 3.388044 0.8569201 2.691042 0.2668744 0.03387810 0.2533362
## 33  0.50   0.10 3.388614 0.8568830 2.689748 0.2637489 0.03383854 0.2508490
## 34  0.75   0.00 3.393800 0.8565747 2.700413 0.3010477 0.03471719 0.2871362
## 35  0.75   0.01 3.393800 0.8565747 2.700413 0.3010477 0.03471719 0.2871362
## 36  0.75   0.02 3.391887 0.8566808 2.699143 0.2968899 0.03465866 0.2818545
## 37  0.75   0.03 3.390053 0.8567944 2.697773 0.2895669 0.03436406 0.2739675
## 38  0.75   0.04 3.388574 0.8568947 2.696120 0.2822258 0.03407048 0.2660945
## 39  0.75   0.05 3.387744 0.8569541 2.694455 0.2750455 0.03378985 0.2584306
## 40  0.75   0.06 3.387309 0.8569966 2.692481 0.2682482 0.03353275 0.2512769
## 41  0.75   0.07 3.387196 0.8570112 2.690035 0.2630214 0.03339403 0.2457693
## 42  0.75   0.08 3.387846 0.8569726 2.687965 0.2587185 0.03332160 0.2416450
## 43  0.75   0.09 3.388913 0.8569066 2.686602 0.2547306 0.03326634 0.2389601
## 44  0.75   0.10 3.390103 0.8568231 2.685515 0.2514686 0.03324946 0.2379585
## 45  1.00   0.00 3.394239 0.8565395 2.700872 0.3007203 0.03467006 0.2862602
## 46  1.00   0.01 3.394203 0.8565468 2.700951 0.3006824 0.03467415 0.2860891
## 47  1.00   0.02 3.391449 0.8567042 2.699006 0.2932610 0.03442786 0.2770418
## 48  1.00   0.03 3.389301 0.8568505 2.697081 0.2837907 0.03400147 0.2664174
## 49  1.00   0.04 3.388099 0.8569392 2.695074 0.2743029 0.03358437 0.2560177
## 50  1.00   0.05 3.387190 0.8570195 2.692010 0.2660054 0.03325401 0.2472415
## 51  1.00   0.06 3.387191 0.8570299 2.688708 0.2599941 0.03310059 0.2401056
## 52  1.00   0.07 3.387900 0.8569923 2.686385 0.2543718 0.03297360 0.2351519
## 53  1.00   0.08 3.388972 0.8569183 2.684956 0.2496818 0.03289632 0.2327728
## 54  1.00   0.09 3.390470 0.8568108 2.683302 0.2454866 0.03285191 0.2310418
## 55  1.00   0.10 3.392453 0.8566759 2.682683 0.2417135 0.03283938 0.2292401
ggplot(Mglmnet)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the caret package.
##   Please report the issue at <https://github.com/topepo/caret/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# (X) summary(Mglmnet)
ggplot(varImp(Mglmnet))

Mglmnet$bestTune
##    alpha lambda
## 50     1   0.05
Mglmnet$resample # 최적 모수값에 대한 CV 통계량
##       RMSE  Rsquared      MAE Resample
## 1 3.430340 0.8526686 2.846789    Fold3
## 2 3.760754 0.8049131 2.995033    Fold1
## 3 3.113259 0.8825001 2.465397    Fold4
## 4 3.147883 0.8892450 2.417267    Fold2
## 5 3.483714 0.8557705 2.735561    Fold5
# lasso plot: L1 Norm vs Coefficients
plot(Mglmnet$finalModel)

# lasso plot: x: log(lambda) vs Coefficients
plot(Mglmnet$finalModel, xvar = 'lambda', label = TRUE)
abline(v=log(Mglmnet$bestTune$lambda), lty=2)

coef(Mglmnet$final, s=Mglmnet$bestTune$lambda)
## 15 x 1 sparse Matrix of class "dgCMatrix"
##                       s1
## (Intercept) 106.85948761
## age          -0.05046063
## wt            0.37289685
## wa           -0.35791353
## hdln          1.99552027
## hdwd          0.11722409
## ftln          1.33070389
## ftwd         -0.72328287
## gnd_M         5.56962477
## bld_AB        0.66247137
## bld_B         0.20388551
## bld_O         0.28486628
## lft_Y        -0.48428108
## smk_Y         0.04666870
## alc_Y        -0.10878455
TROUT <- TROUT %>% mutate(yhglmnet=predict(Mglmnet, newdata = TR))
TSOUT <- TSOUT %>% mutate(yhglmnet=predict(Mglmnet, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 4
##      ht  yhlm yhstep yhglmnet
##   <dbl> <dbl>  <dbl>    <dbl>
## 1  188.  186.   186.     186.
## 2  174.  173.   174.     173.
## 3  176.  176.   176.     176.
## 4  150.  153.   153.     153.
## 5  153.  155.   155.     155.
## 6  149.  156.   156.     156.
g1 <- TROUT %>% ggplot(aes(x=yhglmnet, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhglmnet, y=ht-yhglmnet)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhglmnet, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhglmnet, y=ht-yhglmnet)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

METglmnet <-
  metreg(TROUT$ht, TROUT$yhglmnet) %>%
  bind_rows(metreg(TSOUT$ht, TSOUT$yhglmnet)) %>%
  bind_cols(data.frame(model=c('glmnet', 'glmnet'), TRTS=c('TR', 'TS')))
METglmnet
## # A tibble: 2 × 5
##    rmse   mae   rsq model  TRTS 
##   <dbl> <dbl> <dbl> <chr>  <chr>
## 1  3.19  2.50 0.871 glmnet TR   
## 2  3.67  2.88 0.848 glmnet TS
# nnet
## nnet: 은닉층(은닉층을 집어넣으면 곡선이 된다, 많아지면 deeplearning)이 1개인 MLP
## 얕은 학습
## 각 은닉층마다 몇 개(노드) 집어넣고, 은닉층을 몇 개(이건 한 개) 만들지.
## 규제화가 다 L2 규제화. decay. 튜닝 파라멘트. 

modelLookup('nnet')
##   model parameter         label forReg forClass probModel
## 1  nnet      size #Hidden Units   TRUE     TRUE      TRUE
## 2  nnet     decay  Weight Decay   TRUE     TRUE      TRUE
## 적합
set.seed(0488)
nnetGrid <- expand.grid(size=5:8, decay=seq(0.0, 0.1, by=0.01))
Mnnet <- train(RC, data=TR, 
        method='nnet', 
        maxit=1000, 
        trace=FALSE, 
        linout=TRUE,
        trControl = trCtrl,
        tuneGrid = nnetGrid)
## Warning in train_rec(rec = x, dat = data, info = trainInfo, method = models, :
## There were missing values in resampled performance measures.
Mnnet # 튜닝결과
## Neural Network 
## 
## 225 samples
##  12 predictor
## 
## Recipe steps: impute_median, impute_mode, dummy 
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 180, 179, 181, 180, 180 
## Resampling results across tuning parameters:
## 
##   size  decay  RMSE      Rsquared    MAE     
##   5     0.00   8.872907  0.01170586  7.501016
##   5     0.01   8.112337  0.51471773  4.756408
##   5     0.02   6.896826  0.64821109  4.197015
##   5     0.03   7.482631  0.59733018  4.743527
##   5     0.04   7.740027  0.54737824  4.920355
##   5     0.05   7.400454  0.59359248  4.550895
##   5     0.06   5.980705  0.64986862  4.222740
##   5     0.07   5.776194  0.68854233  4.101199
##   5     0.08   6.486424  0.63886078  4.615376
##   5     0.09   5.987036  0.61979537  4.249446
##   5     0.10   8.607704  0.46204008  5.363570
##   6     0.00   7.945246  0.80437199  6.682819
##   6     0.01   8.152304  0.54172115  5.387514
##   6     0.02   8.121740  0.53742958  5.233198
##   6     0.03   6.898333  0.60439118  4.873729
##   6     0.04   7.360487  0.52659728  4.906017
##   6     0.05   6.343929  0.62784944  4.432261
##   6     0.06   7.892998  0.56233010  5.104312
##   6     0.07   7.025030  0.59339757  4.871052
##   6     0.08   6.651826  0.58137995  4.681930
##   6     0.09   5.014524  0.73487251  3.753549
##   6     0.10   5.435373  0.67762809  4.115682
##   7     0.00   8.871490  0.01818756  7.501540
##   7     0.01   8.360488  0.53655648  5.233891
##   7     0.02   6.634937  0.60112584  4.585553
##   7     0.03   8.566726  0.50116464  5.660672
##   7     0.04   7.109152  0.53349759  4.942018
##   7     0.05   6.008680  0.63394413  4.382288
##   7     0.06   6.000947  0.65671739  4.431645
##   7     0.07   5.891262  0.64885323  4.198854
##   7     0.08   6.592146  0.59743888  4.910413
##   7     0.09   7.179581  0.55464780  5.130847
##   7     0.10   6.196570  0.66923278  4.343874
##   8     0.00   7.832634  0.43129377  6.563441
##   8     0.01   6.716660  0.58026585  4.884645
##   8     0.02   6.854825  0.60840526  5.018617
##   8     0.03   7.321154  0.54093398  5.181368
##   8     0.04   7.617038  0.51498336  5.420868
##   8     0.05   7.261541  0.56050640  5.198748
##   8     0.06   7.477960  0.57440108  5.483460
##   8     0.07   6.433188  0.59485748  4.785787
##   8     0.08   6.694810  0.63342945  5.166155
##   8     0.09   7.080552  0.54637588  5.207493
##   8     0.10   6.462711  0.59468691  4.857116
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 6 and decay = 0.09.
Mnnet$results # Resampling results across tuning parameters
##    size decay     RMSE   Rsquared      MAE    RMSESD RsquaredSD     MAESD
## 1     5  0.00 8.872907 0.01170586 7.501016 0.3656130         NA 0.2408544
## 2     5  0.01 8.112337 0.51471773 4.756408 2.7276701 0.16451095 0.7929259
## 3     5  0.02 6.896826 0.64821109 4.197015 2.3086610 0.10941787 0.7252106
## 4     5  0.03 7.482631 0.59733018 4.743527 2.9939777 0.19172230 1.3052219
## 5     5  0.04 7.740027 0.54737824 4.920355 2.5483314 0.18195468 0.8787680
## 6     5  0.05 7.400454 0.59359248 4.550895 3.1384532 0.19060106 1.0082657
## 7     5  0.06 5.980705 0.64986862 4.222740 1.1817115 0.09442975 0.7617456
## 8     5  0.07 5.776194 0.68854233 4.101199 1.7733084 0.14985370 0.8040059
## 9     5  0.08 6.486424 0.63886078 4.615376 1.8259538 0.07728760 0.8346316
## 10    5  0.09 5.987036 0.61979537 4.249446 2.0552027 0.19981419 0.8899431
## 11    5  0.10 8.607704 0.46204008 5.363570 1.5589722 0.19156059 0.6189232
## 12    6  0.00 7.945246 0.80437199 6.682819 2.3426814         NA 2.0334071
## 13    6  0.01 8.152304 0.54172115 5.387514 4.5818737 0.29576760 2.3959106
## 14    6  0.02 8.121740 0.53742958 5.233198 2.5781167 0.22444821 1.0586841
## 15    6  0.03 6.898333 0.60439118 4.873729 1.9338478 0.13215994 1.0126518
## 16    6  0.04 7.360487 0.52659728 4.906017 2.2884006 0.18919810 1.0485234
## 17    6  0.05 6.343929 0.62784944 4.432261 1.1079431 0.10828858 0.5539098
## 18    6  0.06 7.892998 0.56233010 5.104312 2.2532132 0.12941816 1.0822171
## 19    6  0.07 7.025030 0.59339757 4.871052 3.1475050 0.20018732 1.8576586
## 20    6  0.08 6.651826 0.58137995 4.681930 1.7932033 0.21748990 0.8713729
## 21    6  0.09 5.014524 0.73487251 3.753549 0.9250741 0.10219567 0.5722561
## 22    6  0.10 5.435373 0.67762809 4.115682 1.3616172 0.13874112 0.7973156
## 23    7  0.00 8.871490 0.01818756 7.501540 0.3649372         NA 0.2411710
## 24    7  0.01 8.360488 0.53655648 5.233891 3.7879168 0.22882388 1.9461402
## 25    7  0.02 6.634937 0.60112584 4.585553 1.0894783 0.12207406 0.2385578
## 26    7  0.03 8.566726 0.50116464 5.660672 2.9877700 0.18667642 1.4504721
## 27    7  0.04 7.109152 0.53349759 4.942018 2.1916257 0.22156553 1.2876144
## 28    7  0.05 6.008680 0.63394413 4.382288 0.8629085 0.09133978 0.5265050
## 29    7  0.06 6.000947 0.65671739 4.431645 0.8642205 0.09150494 0.4850043
## 30    7  0.07 5.891262 0.64885323 4.198854 0.9303253 0.06756823 0.5673762
## 31    7  0.08 6.592146 0.59743888 4.910413 0.4563401 0.03502457 0.5372902
## 32    7  0.09 7.179581 0.55464780 5.130847 1.1830184 0.14927402 0.7188310
## 33    7  0.10 6.196570 0.66923278 4.343874 2.4364809 0.10318627 1.0074906
## 34    8  0.00 7.832634 0.43129377 6.563441 2.4132500 0.59108759 2.1257462
## 35    8  0.01 6.716660 0.58026585 4.884645 0.9073881 0.08108978 0.5972624
## 36    8  0.02 6.854825 0.60840526 5.018617 1.1468821 0.09157402 0.9578436
## 37    8  0.03 7.321154 0.54093398 5.181368 1.0538250 0.09501276 0.7934303
## 38    8  0.04 7.617038 0.51498336 5.420868 0.6325402 0.08172747 0.5255899
## 39    8  0.05 7.261541 0.56050640 5.198748 1.2454291 0.13008848 0.8557573
## 40    8  0.06 7.477960 0.57440108 5.483460 1.5793805 0.11917529 1.2733875
## 41    8  0.07 6.433188 0.59485748 4.785787 1.8153892 0.15441230 1.1139038
## 42    8  0.08 6.694810 0.63342945 5.166155 0.9109904 0.04770128 0.5376007
## 43    8  0.09 7.080552 0.54637588 5.207493 0.1753208 0.07642876 0.3273956
## 44    8  0.10 6.462711 0.59468691 4.857116 0.7902413 0.08980137 0.6919173
ggplot(Mnnet) # M$results 시각화 size vs RMSE

# (x) summary(Mnnet)
ggplot(varImp(Mnnet))

Mnnet$bestTune
##    size decay
## 21    6  0.09
Mnnet$finalModel # nnet 객체
## a 14-6-1 network with 97 weights
## inputs: age wt wa hdln hdwd ftln ftwd gnd_M bld_AB bld_B bld_O lft_Y smk_Y alc_Y 
## output(s): .outcome 
## options were - linear output units  decay=0.09
# (x) plot(Mnnet$finalModel)
Mnnet$resample
##       RMSE  Rsquared      MAE Resample
## 1 5.986581 0.5941134 4.558619    Fold1
## 2 4.140781 0.7888664 3.358809    Fold4
## 3 5.636683 0.6730540 3.831284    Fold2
## 4 5.385077 0.7634082 3.945868    Fold5
## 5 3.923500 0.8549205 3.073166    Fold3
TROUT <- TROUT %>% mutate(yhnnet=predict(Mglmnet, newdata = TR))
TSOUT <- TSOUT %>% mutate(yhnnet=predict(Mglmnet, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 5
##      ht  yhlm yhstep yhglmnet yhnnet
##   <dbl> <dbl>  <dbl>    <dbl>  <dbl>
## 1  188.  186.   186.     186.   186.
## 2  174.  173.   174.     173.   173.
## 3  176.  176.   176.     176.   176.
## 4  150.  153.   153.     153.   153.
## 5  153.  155.   155.     155.   155.
## 6  149.  156.   156.     156.   156.
g1 <- TROUT %>% ggplot(aes(x=yhnnet, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhnnet, y=ht-yhnnet)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhnnet, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhnnet, y=ht-yhnnet)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

METnnet <-
  metreg(TROUT$ht, TROUT$yhnnet) %>%
  bind_rows(metreg(TSOUT$ht, TSOUT$yhnnet)) %>%
  bind_cols(data.frame(model=c('nnet', 'nnet'), TRTS=c('TR', 'TS')))
METnnet
## # A tibble: 2 × 5
##    rmse   mae   rsq model TRTS 
##   <dbl> <dbl> <dbl> <chr> <chr>
## 1  3.19  2.50 0.871 nnet  TR   
## 2  3.67  2.88 0.848 nnet  TS
# svmRadial
## svmRadial : 딥러닝이 나오기 전 cctv에 많이 쓰이는 알고리즘. 

modelLookup('svmRadial')
##       model parameter label forReg forClass probModel
## 1 svmRadial     sigma Sigma   TRUE     TRUE      TRUE
## 2 svmRadial         C  Cost   TRUE     TRUE      TRUE
set.seed(100)
svmGrid <- expand.grid(sigma=2^(-2:2), C=2^(-2:2))

MsvmRadial <- 
  train(RC, data=TR,
        method='svmRadial', 
        trControl=trCtrl,
        tuneGrid = svmGrid)
## Loading required namespace: kernlab
## 
## Attaching package: 'kernlab'
## 
## The following object is masked from 'package:scales':
## 
##     alpha
## 
## The following object is masked from 'package:purrr':
## 
##     cross
## 
## The following object is masked from 'package:ggplot2':
## 
##     alpha
MsvmRadial
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 225 samples
##  12 predictor
## 
## Recipe steps: impute_median, impute_mode, dummy 
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 180, 180, 179, 181, 180 
## Resampling results across tuning parameters:
## 
##   sigma  C     RMSE      Rsquared    MAE     
##   0.25   0.25  6.504948  0.57196412  5.122941
##   0.25   0.50  5.925111  0.62083728  4.587084
##   0.25   1.00  5.648077  0.63314034  4.352867
##   0.25   2.00  5.607617  0.62917244  4.307676
##   0.25   4.00  5.617975  0.62626459  4.307778
##   0.50   0.25  7.712777  0.39280592  6.180663
##   0.50   0.50  7.213184  0.44711294  5.683800
##   0.50   1.00  6.791644  0.50280683  5.278865
##   0.50   2.00  6.667258  0.50690232  5.135604
##   0.50   4.00  6.660641  0.50678061  5.128044
##   1.00   0.25  8.468350  0.25717696  7.037233
##   1.00   0.50  8.218209  0.27758938  6.752115
##   1.00   1.00  7.978321  0.31580153  6.507231
##   1.00   2.00  7.889072  0.33362222  6.432228
##   1.00   4.00  7.886509  0.33328022  6.429203
##   2.00   0.25  8.780269  0.14309304  7.385748
##   2.00   0.50  8.689951  0.15554409  7.303059
##   2.00   1.00  8.579399  0.17666516  7.207339
##   2.00   2.00  8.557111  0.19041560  7.193468
##   2.00   4.00  8.556620  0.19114833  7.192470
##   4.00   0.25  8.862189  0.07150516  7.469460
##   4.00   0.50  8.830508  0.07504188  7.454661
##   4.00   1.00  8.784719  0.08927474  7.436753
##   4.00   2.00  8.778399  0.09515654  7.441111
##   4.00   4.00  8.778417  0.09531415  7.440158
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.25 and C = 2.
MsvmRadial$results
##    sigma    C     RMSE   Rsquared      MAE    RMSESD RsquaredSD     MAESD
## 1   0.25 0.25 6.504948 0.57196412 5.122941 0.9368681 0.02903560 0.6385417
## 2   0.25 0.50 5.925111 0.62083728 4.587084 0.8895953 0.03642303 0.6283493
## 3   0.25 1.00 5.648077 0.63314034 4.352867 0.8350354 0.04153702 0.6127182
## 4   0.25 2.00 5.607617 0.62917244 4.307676 0.8108384 0.04200980 0.5979709
## 5   0.25 4.00 5.617975 0.62626459 4.307778 0.8163394 0.04439819 0.5944251
## 6   0.50 0.25 7.712777 0.39280592 6.180663 0.9646214 0.06115396 0.7017110
## 7   0.50 0.50 7.213184 0.44711294 5.683800 0.9781378 0.05037441 0.7179529
## 8   0.50 1.00 6.791644 0.50280683 5.278865 0.9450208 0.04312137 0.6844987
## 9   0.50 2.00 6.667258 0.50690232 5.135604 0.9274403 0.04344485 0.6255326
## 10  0.50 4.00 6.660641 0.50678061 5.128044 0.9279437 0.04367226 0.6310740
## 11  1.00 0.25 8.468350 0.25717696 7.037233 0.9397437 0.07250093 0.6864022
## 12  1.00 0.50 8.218209 0.27758938 6.752115 0.9684299 0.06526089 0.7152336
## 13  1.00 1.00 7.978321 0.31580153 6.507231 0.9619417 0.05709918 0.7349334
## 14  1.00 2.00 7.889072 0.33362222 6.432228 0.9306728 0.04605314 0.7072335
## 15  1.00 4.00 7.886509 0.33328022 6.429203 0.9291967 0.04576473 0.7056114
## 16  2.00 0.25 8.780269 0.14309304 7.385748 0.9117275 0.05215221 0.6903220
## 17  2.00 0.50 8.689951 0.15554409 7.303059 0.9386777 0.05714281 0.7011466
## 18  2.00 1.00 8.579399 0.17666516 7.207339 0.9752442 0.05574224 0.7365204
## 19  2.00 2.00 8.557111 0.19041560 7.193468 0.9665694 0.05625723 0.7331381
## 20  2.00 4.00 8.556620 0.19114833 7.192470 0.9665912 0.05785451 0.7328873
## 21  4.00 0.25 8.862189 0.07150516 7.469460 0.8925240 0.03769194 0.6914814
## 22  4.00 0.50 8.830508 0.07504188 7.454661 0.9028108 0.04105834 0.6995668
## 23  4.00 1.00 8.784719 0.08927474 7.436753 0.9202761 0.04707859 0.7092631
## 24  4.00 2.00 8.778399 0.09515654 7.441111 0.9245715 0.04597558 0.7122406
## 25  4.00 4.00 8.778417 0.09531415 7.440158 0.9250024 0.04616185 0.7126751
ggplot(MsvmRadial)

ggplot(varImp(MsvmRadial))

MsvmRadial$bestTune
##   sigma C
## 4  0.25 2
MsvmRadial$finalModel
## Support Vector Machine object of class "ksvm" 
## 
## SV type: eps-svr  (regression) 
##  parameter : epsilon = 0.1  cost C = 2 
## 
## Gaussian Radial Basis kernel function. 
##  Hyperparameter : sigma =  0.25 
## 
## Number of Support Vectors : 187 
## 
## Objective Function Value : -56.8466 
## Training error : 0.013455
# (x) plot(MsvmRadial$finalModel)
MsvmRadial$resample
##       RMSE  Rsquared      MAE Resample
## 1 4.383396 0.6601292 3.367002    Fold4
## 2 6.452131 0.5778985 4.966978    Fold1
## 3 5.638845 0.6232156 4.208097    Fold3
## 4 6.203888 0.6029947 4.617003    Fold5
## 5 5.359824 0.6816243 4.379300    Fold2
TROUT <- TROUT %>% mutate(yhsvmRadial=predict(MsvmRadial, newdata = TR))
TSOUT <- TSOUT %>% mutate(yhsvmRadial=predict(MsvmRadial, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 6
##      ht  yhlm yhstep yhglmnet yhnnet yhsvmRadial
##   <dbl> <dbl>  <dbl>    <dbl>  <dbl>       <dbl>
## 1  188.  186.   186.     186.   186.        170.
## 2  174.  173.   174.     173.   173.        173.
## 3  176.  176.   176.     176.   176.        169.
## 4  150.  153.   153.     153.   153.        159.
## 5  153.  155.   155.     155.   155.        155.
## 6  149.  156.   156.     156.   156.        157.
g1 <- TROUT %>% ggplot(aes(x=yhsvmRadial, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhsvmRadial, y=ht-yhsvmRadial)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhsvmRadial, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhsvmRadial, y=ht-yhsvmRadial)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

METsvmRadial <-
  metreg(TROUT$ht, TROUT$yhsvmRadial) %>%
  bind_rows(metreg(TSOUT$ht, TSOUT$yhsvmRadial)) %>%
  bind_cols(data.frame(model=c('svmRadial', 'svmRadial'), TRTS=c('TR', 'TS')))
METsvmRadial
## # A tibble: 2 × 5
##    rmse   mae   rsq model     TRTS 
##   <dbl> <dbl> <dbl> <chr>     <chr>
## 1  1.03 0.897 0.988 svmRadial TR   
## 2  5.83 4.69  0.662 svmRadial TS
# rpart
## rpart (회귀나무)

modelLookup('rpart')
##   model parameter                label forReg forClass probModel
## 1 rpart        cp Complexity Parameter   TRUE     TRUE      TRUE
modelLookup('rpart2')
##    model parameter          label forReg forClass probModel
## 1 rpart2  maxdepth Max Tree Depth   TRUE     TRUE      TRUE
set.seed(0488)
rpartGrid <- expand.grid(cp=seq(0, 0.2, length=10))
Mrpart <- 
  train(RC, data=TR, 
        method='rpart', 
        trControl = trCtrl,
        tuneGrid = rpartGrid)
## 
## Attaching package: 'rpart'
## 
## The following object is masked from 'package:dials':
## 
##     prune
Mrpart
## CART 
## 
## 225 samples
##  12 predictor
## 
## Recipe steps: impute_median, impute_mode, dummy 
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 180, 179, 181, 180, 180 
## Resampling results across tuning parameters:
## 
##   cp          RMSE      Rsquared   MAE     
##   0.00000000  4.324843  0.7709030  3.509419
##   0.02222222  4.572731  0.7453769  3.726639
##   0.04444444  4.572731  0.7453769  3.726639
##   0.06666667  4.724824  0.7235329  3.783926
##   0.08888889  5.234300  0.6552439  4.116163
##   0.11111111  5.234300  0.6552439  4.116163
##   0.13333333  5.234300  0.6552439  4.116163
##   0.15555556  5.234300  0.6552439  4.116163
##   0.17777778  5.234300  0.6552439  4.116163
##   0.20000000  5.234300  0.6552439  4.116163
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.
Mrpart$results
##            cp     RMSE  Rsquared      MAE    RMSESD RsquaredSD     MAESD
## 1  0.00000000 4.324843 0.7709030 3.509419 0.4962337 0.05456251 0.4927866
## 2  0.02222222 4.572731 0.7453769 3.726639 0.3471641 0.03234916 0.2672540
## 3  0.04444444 4.572731 0.7453769 3.726639 0.3471641 0.03234916 0.2672540
## 4  0.06666667 4.724824 0.7235329 3.783926 0.3098771 0.04311460 0.3216701
## 5  0.08888889 5.234300 0.6552439 4.116163 0.2493465 0.03769615 0.2569557
## 6  0.11111111 5.234300 0.6552439 4.116163 0.2493465 0.03769615 0.2569557
## 7  0.13333333 5.234300 0.6552439 4.116163 0.2493465 0.03769615 0.2569557
## 8  0.15555556 5.234300 0.6552439 4.116163 0.2493465 0.03769615 0.2569557
## 9  0.17777778 5.234300 0.6552439 4.116163 0.2493465 0.03769615 0.2569557
## 10 0.20000000 5.234300 0.6552439 4.116163 0.2493465 0.03769615 0.2569557
ggplot(Mrpart)

ggplot(varImp(Mrpart))

Mrpart$bestTune
##   cp
## 1  0
Mrpart$finalModel
## n= 225 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 225 17737.35000 165.5658  
##    2) gnd_M< 0.5 106  3194.97400 157.9604  
##      4) ftln< 22.85 36   525.00890 153.1556  
##        8) age>=39 12   108.36670 150.1333 *
##        9) age< 39 24   252.23330 154.6667  
##         18) hdln< 15.9 9    72.52000 152.1667 *
##         19) hdln>=15.9 15    89.71333 156.1667 *
##      5) ftln>=22.85 70  1411.43100 160.4314  
##       10) age>=46.5 13   156.13690 156.4846 *
##       11) age< 46.5 57  1006.60300 161.3316  
##         22) ftln< 24.25 46   671.24430 160.4652  
##           44) ftln< 23.65 25   300.66960 159.4040  
##             88) age>=27.5 11   104.28910 157.9091 *
##             89) age< 27.5 14   152.48360 160.5786 *
##           45) ftln>=23.65 21   308.90290 161.7286  
##             90) wa>=73.65 10   126.58400 160.3600 *
##             91) wa< 73.65 11   146.56180 162.9727 *
##         23) ftln>=24.25 11   156.44730 164.9545 *
##    3) gnd_M>=0.5 119  2949.64600 172.3403  
##      6) ftln< 25.15 54   873.31500 169.3167  
##       12) age>=28.5 19   228.83790 166.6105 *
##       13) age< 28.5 35   429.80290 170.7857  
##         26) wt< 58.7 14   103.45500 168.5500 *
##         27) wt>=58.7 21   209.71810 172.2762  
##           54) hdln< 18.15 13    68.80769 171.2692 *
##           55) hdln>=18.15 8   106.30870 173.9125 *
##      7) ftln>=25.15 65  1172.48200 174.8523  
##       14) wt< 80.05 50   675.51220 173.8660  
##         28) hdln< 18.85 30   274.00800 172.5200  
##           56) wa>=76.75 18   117.85110 171.7778 *
##           57) wa< 76.75 12   131.36670 173.6333 *
##         29) hdln>=18.85 20   265.62550 175.8850  
##           58) age>=25.5 10    88.54100 174.3700 *
##           59) age< 25.5 10   131.18000 177.4000 *
##       15) wt>=80.05 15   286.19600 178.1400 *
plot(Mrpart$finalModel)
text(Mrpart$finalModel)

library(rpart.plot)
rpart.plot::rpart.plot(Mrpart$finalModel)

Mrpart$resample
##       RMSE  Rsquared      MAE Resample
## 1 4.638521 0.7161797 3.661524    Fold1
## 2 4.601339 0.7260757 3.904056    Fold3
## 3 4.502987 0.7705117 3.702418    Fold4
## 4 3.449128 0.8517755 2.648823    Fold5
## 5 4.432241 0.7899722 3.630272    Fold2
TROUT <- TROUT %>% mutate(yhrpart=predict(Mrpart, newdata = TR))
TSOUT <- TSOUT %>% mutate(yhrpart=predict(Mrpart, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 7
##      ht  yhlm yhstep yhglmnet yhnnet yhsvmRadial yhrpart
##   <dbl> <dbl>  <dbl>    <dbl>  <dbl>       <dbl>   <dbl>
## 1  188.  186.   186.     186.   186.        170.    177.
## 2  174.  173.   174.     173.   173.        173.    174.
## 3  176.  176.   176.     176.   176.        169.    174.
## 4  150.  153.   153.     153.   153.        159.    152.
## 5  153.  155.   155.     155.   155.        155.    152.
## 6  149.  156.   156.     156.   156.        157.    156.
g1 <- TROUT %>% ggplot(aes(x=yhrpart, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhrpart, y=ht-yhrpart)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhrpart, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhrpart, y=ht-yhrpart)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

METrpart <-
  metreg(TROUT$ht, TROUT$yhrpart) %>%
  bind_rows(metreg(TSOUT$ht, TSOUT$yhrpart)) %>%
  bind_cols(data.frame(model=c('rpart', 'rpart'), TRTS=c('TR', 'TS')))
METrpart
## # A tibble: 2 × 5
##    rmse   mae   rsq model TRTS 
##   <dbl> <dbl> <dbl> <chr> <chr>
## 1  3.25  2.52 0.866 rpart TR   
## 2  4.51  3.54 0.773 rpart TS
# ranger

modelLookup('ranger')
##    model     parameter                         label forReg forClass probModel
## 1 ranger          mtry #Randomly Selected Predictors   TRUE     TRUE      TRUE
## 2 ranger     splitrule                Splitting Rule   TRUE     TRUE      TRUE
## 3 ranger min.node.size             Minimal Node Size   TRUE     TRUE      TRUE
## 적합
set.seed(0488)
rangerGrid <- 
  expand.grid(
    mtry=seq(2, ncol(TR)-1, by=2), 
    min.node.size=1:3, 
    splitrule=c('extratrees')
  )

Mranger <-
  train(RC, data=TR, 
        method = 'ranger', 
        importance='impurity', 
        trControl = trCtrl,
        tuneGrid = rangerGrid)
## Loading required namespace: e1071
## Loading required namespace: ranger
## 
## Attaching package: 'e1071'
## 
## The following object is masked from 'package:tune':
## 
##     tune
## 
## The following object is masked from 'package:rsample':
## 
##     permutations
## 
## The following object is masked from 'package:parsnip':
## 
##     tune
## 
## The following object is masked from 'package:ggplot2':
## 
##     element
Mranger
## Random Forest 
## 
## 225 samples
##  12 predictor
## 
## Recipe steps: impute_median, impute_mode, dummy 
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 180, 179, 181, 180, 180 
## Resampling results across tuning parameters:
## 
##   mtry  min.node.size  RMSE      Rsquared   MAE     
##    2    1              4.168851  0.8005161  3.296839
##    2    2              4.128123  0.8040248  3.271421
##    2    3              4.168968  0.8003318  3.302569
##    4    1              3.965145  0.8064540  3.129232
##    4    2              3.947337  0.8089740  3.108412
##    4    3              3.942206  0.8099285  3.107190
##    6    1              3.920338  0.8089353  3.100145
##    6    2              3.921989  0.8088344  3.087544
##    6    3              3.919172  0.8101216  3.079854
##    8    1              3.930481  0.8074287  3.095187
##    8    2              3.926138  0.8090105  3.094183
##    8    3              3.921633  0.8095582  3.098947
##   10    1              3.935923  0.8076059  3.110366
##   10    2              3.920330  0.8085521  3.099600
##   10    3              3.925316  0.8085535  3.103864
##   12    1              3.925628  0.8082391  3.094780
##   12    2              3.934327  0.8074922  3.114638
##   12    3              3.913992  0.8094680  3.103733
## 
## Tuning parameter 'splitrule' was held constant at a value of extratrees
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were mtry = 12, splitrule = extratrees
##  and min.node.size = 3.
Mranger$results
##    mtry min.node.size  splitrule     RMSE  Rsquared      MAE    RMSESD
## 1     2             1 extratrees 4.168851 0.8005161 3.296839 0.2780157
## 2     2             2 extratrees 4.128123 0.8040248 3.271421 0.2761709
## 3     2             3 extratrees 4.168968 0.8003318 3.302569 0.2934775
## 4     4             1 extratrees 3.965145 0.8064540 3.129232 0.2857537
## 5     4             2 extratrees 3.947337 0.8089740 3.108412 0.3232101
## 6     4             3 extratrees 3.942206 0.8099285 3.107190 0.3012651
## 7     6             1 extratrees 3.920338 0.8089353 3.100145 0.3230954
## 8     6             2 extratrees 3.921989 0.8088344 3.087544 0.3178604
## 9     6             3 extratrees 3.919172 0.8101216 3.079854 0.3278649
## 10    8             1 extratrees 3.930481 0.8074287 3.095187 0.3119245
## 11    8             2 extratrees 3.926138 0.8090105 3.094183 0.3397868
## 12    8             3 extratrees 3.921633 0.8095582 3.098947 0.2988308
## 13   10             1 extratrees 3.935923 0.8076059 3.110366 0.3320812
## 14   10             2 extratrees 3.920330 0.8085521 3.099600 0.3533986
## 15   10             3 extratrees 3.925316 0.8085535 3.103864 0.3468099
## 16   12             1 extratrees 3.925628 0.8082391 3.094780 0.3516549
## 17   12             2 extratrees 3.934327 0.8074922 3.114638 0.3637272
## 18   12             3 extratrees 3.913992 0.8094680 3.103733 0.3754559
##    RsquaredSD     MAESD
## 1  0.04188683 0.2121470
## 2  0.04000509 0.2234718
## 3  0.03997508 0.2151334
## 4  0.03842212 0.2372788
## 5  0.03868059 0.2864089
## 6  0.03951649 0.2402829
## 7  0.04048938 0.2807711
## 8  0.03975012 0.2611653
## 9  0.03892153 0.2671271
## 10 0.03814036 0.2652337
## 11 0.03999964 0.2775484
## 12 0.03631677 0.2419464
## 13 0.03821755 0.3058889
## 14 0.04155547 0.2864441
## 15 0.04032754 0.3072332
## 16 0.04030557 0.2975056
## 17 0.04138779 0.3228948
## 18 0.04196922 0.3294780
ggplot(Mranger)

ggplot(varImp(Mranger))

Mranger$bestTune
##    mtry  splitrule min.node.size
## 18   12 extratrees             3
Mranger$finalModel
## Ranger result
## 
## Call:
##  ranger::ranger(dependent.variable.name = ".outcome", data = x,      mtry = min(param$mtry, ncol(x)), min.node.size = param$min.node.size,      splitrule = as.character(param$splitrule), write.forest = TRUE,      probability = classProbs, ...) 
## 
## Type:                             Regression 
## Number of trees:                  500 
## Sample size:                      225 
## Number of independent variables:  14 
## Mtry:                             12 
## Target node size:                 3 
## Variable importance mode:         impurity 
## Splitrule:                        extratrees 
## Number of random splits:          1 
## OOB prediction error (MSE):       15.29872 
## R squared (OOB):                  0.8067967
Mranger$resample
##       RMSE  Rsquared      MAE Resample
## 1 4.084030 0.7874631 3.296884    Fold3
## 2 4.180057 0.7583122 3.200368    Fold1
## 3 4.001462 0.8289483 3.209543    Fold2
## 4 4.051861 0.8037177 3.292027    Fold4
## 5 3.252549 0.8688986 2.519844    Fold5
TROUT <- TROUT %>% mutate(yhranger=predict(Mranger, newdata = TR))
TSOUT <- TSOUT %>% mutate(yhranger=predict(Mranger, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 8
##      ht  yhlm yhstep yhglmnet yhnnet yhsvmRadial yhrpart yhranger
##   <dbl> <dbl>  <dbl>    <dbl>  <dbl>       <dbl>   <dbl>    <dbl>
## 1  188.  186.   186.     186.   186.        170.    177.     182.
## 2  174.  173.   174.     173.   173.        173.    174.     173.
## 3  176.  176.   176.     176.   176.        169.    174.     173.
## 4  150.  153.   153.     153.   153.        159.    152.     153.
## 5  153.  155.   155.     155.   155.        155.    152.     155.
## 6  149.  156.   156.     156.   156.        157.    156.     157.
g1 <- TROUT %>% ggplot(aes(x=yhranger, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhranger, y=ht-yhranger)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhranger, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhranger, y=ht-yhranger)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

METranger <-
  metreg(TROUT$ht, TROUT$yhranger) %>%
  bind_rows(metreg(TSOUT$ht, TSOUT$yhranger)) %>%
  bind_cols(data.frame(model=c('ranger', 'ranger'), TRTS=c('TR', 'TS')))
METranger
## # A tibble: 2 × 5
##    rmse   mae   rsq model  TRTS 
##   <dbl> <dbl> <dbl> <chr>  <chr>
## 1  1.61  1.24 0.970 ranger TR   
## 2  4.21  3.45 0.803 ranger TS
# 평가
## CV 평가
RESAMP <- resamples(list(LM=Mlm, 
                         STEP=Mstep,
                         GLMNET=Mglmnet,
                         NNET=Mnnet,
                         SVM=MsvmRadial,
                         RPART=Mrpart,
                         RANGER=Mranger))
summary(RESAMP)
## 
## Call:
## summary.resamples(object = RESAMP)
## 
## Models: LM, STEP, GLMNET, NNET, SVM, RPART, RANGER 
## Number of resamples: 5 
## 
## MAE 
##            Min.  1st Qu.   Median     Mean  3rd Qu.     Max. NA's
## LM     2.374710 2.416799 2.828514 2.703031 2.839293 3.055840    0
## STEP   2.410983 2.426642 2.593860 2.645150 2.876370 2.917897    0
## GLMNET 2.417267 2.465397 2.735561 2.692010 2.846789 2.995033    0
## NNET   3.073166 3.358809 3.831284 3.753549 3.945868 4.558619    0
## SVM    3.367002 4.208097 4.379300 4.307676 4.617003 4.966978    0
## RPART  2.648823 3.630272 3.661524 3.509419 3.702418 3.904056    0
## RANGER 2.519844 3.200368 3.209543 3.103733 3.292027 3.296884    0
## 
## RMSE 
##            Min.  1st Qu.   Median     Mean  3rd Qu.     Max. NA's
## LM     3.064790 3.116350 3.426790 3.398719 3.602199 3.783464    0
## STEP   3.085081 3.138540 3.237786 3.353692 3.652327 3.654728    0
## GLMNET 3.113259 3.147883 3.430340 3.387190 3.483714 3.760754    0
## NNET   3.923500 4.140781 5.385077 5.014524 5.636683 5.986581    0
## SVM    4.383396 5.359824 5.638845 5.607617 6.203888 6.452131    0
## RPART  3.449128 4.432241 4.502987 4.324843 4.601339 4.638521    0
## RANGER 3.252549 4.001462 4.051861 3.913992 4.084030 4.180057    0
## 
## Rsquared 
##             Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## LM     0.8041820 0.8466001 0.8540874 0.8562773 0.8863103 0.8902070    0
## STEP   0.8169766 0.8428924 0.8687751 0.8602538 0.8834617 0.8891631    0
## GLMNET 0.8049131 0.8526686 0.8557705 0.8570195 0.8825001 0.8892450    0
## NNET   0.5941134 0.6730540 0.7634082 0.7348725 0.7888664 0.8549205    0
## SVM    0.5778985 0.6029947 0.6232156 0.6291724 0.6601292 0.6816243    0
## RPART  0.7161797 0.7260757 0.7705117 0.7709030 0.7899722 0.8517755    0
## RANGER 0.7583122 0.7874631 0.8037177 0.8094680 0.8289483 0.8688986    0
bwplot(RESAMP)

splom(RESAMP, metric = 'Rsquared')

# TR, TS 평가
## 모형별 성능평가결과 결합

MET <-
  bind_rows(METlm, METstep, METglmnet, METnnet, METsvmRadial,
            METrpart, METranger)
MET <-
  arrange(bind_rows(METlm, METstep, METglmnet, METnnet,
                    METsvmRadial, METrpart, METranger),
          rmse)
g1 <- ggplot(MET, aes(x=model, y=rsq, shape = TRTS, col=TRTS, group = TRTS)) +
  geom_line() +
  geom_point(size=3)
g2 <- ggplot(MET, aes(x=model, y=rmse, shape = TRTS, col=TRTS, group = TRTS)) +
  geom_line() +
  geom_point(size=3)
grid.arrange(g1, g2, nrow=2, ncol=1)

# 실행시간
time2<-Sys.time()
time2-time1
## Time difference of 2.710014 mins
# 참고
set.seed(0488)
lassoGrid <- expand.grid(alpha=1, lambda=seq(0.0, 0.5, by=0.1))
Ctrl <- trainControl(method='cv', number=10, returnResamp = 'all')
Mlasso <-
  train(RC, data=TR, 
        method='glmnet', 
        trControl=Ctrl,
        tuneGrid = lassoGrid)
ggplot(Mlasso)

ggplot(varImp(Mlasso))

Mlasso$bestTune
##   alpha lambda
## 2     1    0.1
Mlasso$results
##   alpha lambda     RMSE  Rsquared      MAE    RMSESD RsquaredSD     MAESD
## 1     1    0.0 3.415495 0.8597192 2.687169 0.3503130 0.05180525 0.2975563
## 2     1    0.1 3.402952 0.8621860 2.683478 0.3254798 0.04080997 0.2972265
## 3     1    0.2 3.432394 0.8609549 2.719084 0.3563010 0.03616555 0.3280094
## 4     1    0.3 3.518366 0.8550552 2.797090 0.4037056 0.03725082 0.3628590
## 5     1    0.4 3.656557 0.8440725 2.917482 0.4583642 0.04077074 0.4084912
## 6     1    0.5 3.775271 0.8344951 3.021244 0.4759694 0.04256465 0.4282060
Mlasso$resample
##    alpha lambda     RMSE  Rsquared      MAE Resample
## 1      1    0.5 3.848152 0.7898975 2.943508   Fold01
## 2      1    0.0 3.227300 0.8517397 2.426577   Fold01
## 3      1    0.1 3.289144 0.8467568 2.495247   Fold01
## 4      1    0.2 3.412459 0.8344061 2.658831   Fold01
## 5      1    0.3 3.551669 0.8204819 2.774818   Fold01
## 6      1    0.4 3.746599 0.7999935 2.894056   Fold01
## 7      1    0.5 3.695747 0.8635646 2.722216   Fold02
## 8      1    0.0 3.149710 0.8939176 2.386667   Fold02
## 9      1    0.1 3.151405 0.8942121 2.277195   Fold02
## 10     1    0.2 3.220155 0.8910643 2.305226   Fold02
## 11     1    0.3 3.341247 0.8858761 2.414358   Fold02
## 12     1    0.4 3.540030 0.8735664 2.596620   Fold02
## 13     1    0.5 3.706860 0.8784555 3.106235   Fold03
## 14     1    0.0 3.324910 0.9074213 2.488514   Fold03
## 15     1    0.1 3.361073 0.9036488 2.595566   Fold03
## 16     1    0.2 3.402613 0.9005594 2.706676   Fold03
## 17     1    0.3 3.475149 0.8959221 2.827461   Fold03
## 18     1    0.4 3.579974 0.8882424 2.969256   Fold03
## 19     1    0.5 3.279660 0.9080098 2.502694   Fold04
## 20     1    0.0 2.856230 0.9174424 2.352743   Fold04
## 21     1    0.1 2.868624 0.9213231 2.375429   Fold04
## 22     1    0.2 2.923230 0.9208866 2.373260   Fold04
## 23     1    0.3 3.016600 0.9188001 2.388707   Fold04
## 24     1    0.4 3.139462 0.9145164 2.435592   Fold04
## 25     1    0.5 4.130185 0.8212706 3.474061   Fold05
## 26     1    0.0 3.480949 0.8711279 2.819359   Fold05
## 27     1    0.1 3.479304 0.8671708 2.855488   Fold05
## 28     1    0.2 3.580723 0.8584645 2.923714   Fold05
## 29     1    0.3 3.739193 0.8490503 3.051323   Fold05
## 30     1    0.4 3.981486 0.8319861 3.327939   Fold05
## 31     1    0.5 3.704816 0.8100739 3.166136   Fold06
## 32     1    0.0 3.497791 0.8297913 2.755565   Fold06
## 33     1    0.1 3.448829 0.8325138 2.793293   Fold06
## 34     1    0.2 3.452784 0.8330293 2.879718   Fold06
## 35     1    0.3 3.508011 0.8283968 2.959599   Fold06
## 36     1    0.4 3.602217 0.8200191 3.050433   Fold06
## 37     1    0.5 3.080467 0.8686406 2.471308   Fold07
## 38     1    0.0 3.305784 0.8672897 2.652783   Fold07
## 39     1    0.1 3.170825 0.8710366 2.508455   Fold07
## 40     1    0.2 3.028466 0.8772582 2.393632   Fold07
## 41     1    0.3 2.997415 0.8767640 2.419937   Fold07
## 42     1    0.4 3.016492 0.8738763 2.445738   Fold07
## 43     1    0.5 4.850809 0.8020190 3.883104   Fold08
## 44     1    0.0 3.838168 0.8666737 3.142259   Fold08
## 45     1    0.1 4.033644 0.8584801 3.272460   Fold08
## 46     1    0.2 4.225246 0.8482324 3.423480   Fold08
## 47     1    0.3 4.442518 0.8333140 3.592975   Fold08
## 48     1    0.4 4.692170 0.8140519 3.778431   Fold08
## 49     1    0.5 3.696029 0.7795224 3.006703   Fold09
## 50     1    0.0 4.103717 0.7325120 3.192956   Fold09
## 51     1    0.1 3.757960 0.7763220 2.952007   Fold09
## 52     1    0.2 3.540020 0.8018698 2.764131   Fold09
## 53     1    0.3 3.513816 0.8037105 2.737517   Fold09
## 54     1    0.4 3.587874 0.7936232 2.816635   Fold09
## 55     1    0.5 3.759989 0.8234974 2.936473   Fold10
## 56     1    0.0 3.370386 0.8592762 2.654268   Fold10
## 57     1    0.1 3.468709 0.8503959 2.709641   Fold10
## 58     1    0.2 3.538238 0.8437786 2.762169   Fold10
## 59     1    0.3 3.598040 0.8382359 2.804205   Fold10
## 60     1    0.4 3.679264 0.8308502 2.860119   Fold10
# M$resample에서 M$results 계산하기
Mlasso$resample %>%
  group_by(lambda) %>%
  dplyr::summarize(n=n(), mnRMSE=mean(RMSE), sdRMSE=sd(RMSE)) %>% data.frame()
##   lambda  n   mnRMSE    sdRMSE
## 1    0.0 10 3.415495 0.3503130
## 2    0.1 10 3.402952 0.3254798
## 3    0.2 10 3.432394 0.3563010
## 4    0.3 10 3.518366 0.4037056
## 5    0.4 10 3.656557 0.4583642
## 6    0.5 10 3.775271 0.4759694
# returnResamp='final'이면 M$resample는 최적모수에 대한 CV 결과만 저장
set.seed(0488)
Ctrl <- trainControl(method='cv', number = 10)
M2 <- 
  train(RC, data=TR, 
        method = 'glmnet',
        trControl=Ctrl,
        tuneGrid = lassoGrid)
M2$bestTune
##   alpha lambda
## 2     1    0.1
M2$results
##   alpha lambda     RMSE  Rsquared      MAE    RMSESD RsquaredSD     MAESD
## 1     1    0.0 3.415495 0.8597192 2.687169 0.3503130 0.05180525 0.2975563
## 2     1    0.1 3.402952 0.8621860 2.683478 0.3254798 0.04080997 0.2972265
## 3     1    0.2 3.432394 0.8609549 2.719084 0.3563010 0.03616555 0.3280094
## 4     1    0.3 3.518366 0.8550552 2.797090 0.4037056 0.03725082 0.3628590
## 5     1    0.4 3.656557 0.8440725 2.917482 0.4583642 0.04077074 0.4084912
## 6     1    0.5 3.775271 0.8344951 3.021244 0.4759694 0.04256465 0.4282060
M2$results %>%filter(lambda==0.1) %>% dplyr::select(starts_with('RMSE'))
##       RMSE    RMSESD
## 1 3.402952 0.3254798
M2$resample %>% dplyr::summarize(n=n(), mnRMSE=mean(RMSE), sdRMSE=sd(RMSE))
##    n   mnRMSE    sdRMSE
## 1 10 3.402952 0.3254798
g1 <-
  ggplot(Mlasso$resample, aes(x=factor(lambda), y=RMSE)) +
  geom_point() +
  geom_line(data=M2$results, aes(x=factor(lambda), y=RMSE, group = 1)) +
  geom_point(data=M2$results, aes(x=factor(lambda), y=RMSE), shape=15, size=5, color='red')

g2 <-
  ggplot(Mlasso$resample, aes(x=factor(lambda), y=RMSE)) +
  geom_point() +
  stat_summary(fun='mean', geom='line', aes(group=1)) +
  stat_summary(fun='mean', geom='point', shape=15, size=5, color='red')

ggarrange(g1, g2, nrow=1, ncol=2)