## 실행시간 측정
time1 <- Sys.time()
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.1 ──
## ✔ broom 1.0.5 ✔ rsample 1.2.0
## ✔ dials 1.2.1 ✔ tune 1.1.2
## ✔ infer 1.0.6 ✔ workflows 1.1.4
## ✔ modeldata 1.3.0 ✔ workflowsets 1.0.1
## ✔ parsnip 1.2.0 ✔ yardstick 1.3.0
## ✔ recipes 1.0.10
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ✖ yardstick::precision() masks caret::precision()
## ✖ yardstick::recall() masks caret::recall()
## ✖ yardstick::sensitivity() masks caret::sensitivity()
## ✖ yardstick::spec() masks readr::spec()
## ✖ yardstick::specificity() masks caret::specificity()
## ✖ recipes::step() masks stats::step()
## • Learn how to get started at https://www.tidymodels.org/start/
library(skimr)
library(naniar)
##
## Attaching package: 'naniar'
##
## The following object is masked from 'package:skimr':
##
## n_complete
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(ggpubr)
library(scales)
# 읽기
## 파일 읽기
DF <- read_csv("C:/Users/top15/OneDrive - 동덕여자대학교/대학교/4학년 1학기/비데마/df2015na.csv")
## Rows: 300 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): gnd, bld
## dbl (11): age, ht, wt, wa, hdln, hdwd, ftln, ftwd, lft, smk, alc
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dim(DF)
## [1] 300 13
str(DF)
## spc_tbl_ [300 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ gnd : chr [1:300] "M" "M" "F" "F" ...
## $ age : num [1:300] 42 23 32 30 NA 26 20 24 18 58 ...
## $ ht : num [1:300] 165 188 162 162 160 ...
## $ wt : num [1:300] 79 77.8 59.6 52 58.9 76.3 61.4 63 65.2 58.3 ...
## $ wa : num [1:300] 96.4 76.6 83 65.8 75 83.8 70.1 72.4 73.5 83.1 ...
## $ hdln: num [1:300] 17.9 20.4 17.5 16.4 17.4 19 19.8 18.5 20.1 15.6 ...
## $ hdwd: num [1:300] 8.6 8.3 7.7 6.6 7.7 8.6 7.8 7.9 8 7.6 ...
## $ ftln: num [1:300] 24.5 28.9 23.3 23.7 24.1 25.7 26.5 25.7 25.8 22.1 ...
## $ ftwd: num [1:300] 9.6 10.6 10.4 8.4 9.4 10.7 9.6 10.2 10.3 8.7 ...
## $ bld : chr [1:300] "O" "O" "A" "B" ...
## $ lft : num [1:300] 0 0 0 0 0 0 0 0 0 0 ...
## $ smk : num [1:300] 0 1 0 0 0 0 0 0 0 0 ...
## $ alc : num [1:300] 1 0 0 0 0 0 0 0 0 0 ...
## - attr(*, "spec")=
## .. cols(
## .. gnd = col_character(),
## .. age = col_double(),
## .. ht = col_double(),
## .. wt = col_double(),
## .. wa = col_double(),
## .. hdln = col_double(),
## .. hdwd = col_double(),
## .. ftln = col_double(),
## .. ftwd = col_double(),
## .. bld = col_character(),
## .. lft = col_double(),
## .. smk = col_double(),
## .. alc = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
head(DF)
## # A tibble: 6 × 13
## gnd age ht wt wa hdln hdwd ftln ftwd bld lft smk alc
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 M 42 165 79 96.4 17.9 8.6 24.5 9.6 O 0 0 1
## 2 M 23 188. 77.8 76.6 20.4 8.3 28.9 10.6 O 0 1 0
## 3 F 32 162. 59.6 83 17.5 7.7 23.3 10.4 A 0 0 0
## 4 F 30 162 52 65.8 16.4 6.6 23.7 8.4 B 0 0 0
## 5 F NA 160 58.9 75 17.4 7.7 24.1 9.4 A 0 0 0
## 6 M 26 179 76.3 83.8 19 8.6 25.7 10.7 B 0 0 0
# 변수 조정
## 문자 변수(gnd, bld)를 factor화
## {0,1}로 코딩된 이산형 변수를 숫자로 처리하거나 factor해서 사용 가능
DF <-
DF %>%
mutate(gnd=factor(gnd), bld=factor(bld),
lft=factor(lft, labels = c('N', 'Y')),
smk=factor(smk, labels = c('N', 'Y')),
alc=factor(alc, labels = c('N', 'Y')))
str(DF)
## tibble [300 × 13] (S3: tbl_df/tbl/data.frame)
## $ gnd : Factor w/ 2 levels "F","M": 2 2 1 1 1 2 2 2 2 1 ...
## $ age : num [1:300] 42 23 32 30 NA 26 20 24 18 58 ...
## $ ht : num [1:300] 165 188 162 162 160 ...
## $ wt : num [1:300] 79 77.8 59.6 52 58.9 76.3 61.4 63 65.2 58.3 ...
## $ wa : num [1:300] 96.4 76.6 83 65.8 75 83.8 70.1 72.4 73.5 83.1 ...
## $ hdln: num [1:300] 17.9 20.4 17.5 16.4 17.4 19 19.8 18.5 20.1 15.6 ...
## $ hdwd: num [1:300] 8.6 8.3 7.7 6.6 7.7 8.6 7.8 7.9 8 7.6 ...
## $ ftln: num [1:300] 24.5 28.9 23.3 23.7 24.1 25.7 26.5 25.7 25.8 22.1 ...
## $ ftwd: num [1:300] 9.6 10.6 10.4 8.4 9.4 10.7 9.6 10.2 10.3 8.7 ...
## $ bld : Factor w/ 4 levels "A","AB","B","O": 4 4 1 3 1 3 2 1 4 3 ...
## $ lft : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
## $ smk : Factor w/ 2 levels "N","Y": 1 2 1 1 1 1 1 1 1 1 ...
## $ alc : Factor w/ 2 levels "N","Y": 2 1 1 1 1 1 1 1 1 1 ...
# 결측
## skim::skim(data, ...) : summary()에 결측정보를 추가, gtoup_by와 연결
## 결측 현황: skim이나 naniar로 확인
DF %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
300 |
| Number of columns |
13 |
| _______________________ |
|
| Column type frequency: |
|
| factor |
5 |
| numeric |
8 |
| ________________________ |
|
| Group variables |
None |
Variable type: factor
| skim_variable |
n_missing |
complete_rate |
ordered |
n_unique |
top_counts |
| gnd |
0 |
1.00 |
FALSE |
2 |
M: 155, F: 145 |
| bld |
0 |
1.00 |
FALSE |
4 |
B: 110, A: 84, O: 68, AB: 38 |
| lft |
5 |
0.98 |
FALSE |
2 |
N: 283, Y: 12 |
| smk |
2 |
0.99 |
FALSE |
2 |
N: 237, Y: 61 |
| alc |
6 |
0.98 |
FALSE |
2 |
N: 256, Y: 38 |
Variable type: numeric
| skim_variable |
n_missing |
complete_rate |
mean |
sd |
p0 |
p25 |
p50 |
p75 |
p100 |
hist |
| age |
7 |
0.98 |
29.91 |
13.33 |
15.0 |
19.0 |
26.00 |
37.00 |
68.0 |
▇▃▂▂▁ |
| ht |
0 |
1.00 |
165.64 |
9.03 |
144.3 |
158.0 |
165.85 |
172.83 |
190.5 |
▂▇▇▆▁ |
| wt |
3 |
0.99 |
63.77 |
12.38 |
39.9 |
55.6 |
61.80 |
71.20 |
106.1 |
▃▇▅▂▁ |
| wa |
5 |
0.98 |
78.23 |
9.72 |
59.0 |
71.0 |
77.20 |
84.15 |
111.6 |
▅▇▆▂▁ |
| hdln |
0 |
1.00 |
17.57 |
1.09 |
14.9 |
16.8 |
17.50 |
18.30 |
20.8 |
▂▆▇▃▁ |
| hdwd |
1 |
1.00 |
7.80 |
0.55 |
6.4 |
7.4 |
7.80 |
8.20 |
9.2 |
▁▇▇▇▂ |
| ftln |
4 |
0.99 |
24.37 |
1.51 |
19.9 |
23.3 |
24.30 |
25.50 |
28.9 |
▁▅▇▅▁ |
| ftwd |
1 |
1.00 |
9.68 |
0.70 |
8.0 |
9.2 |
9.70 |
10.15 |
12.2 |
▂▇▇▂▁ |
DF %>% group_by(gnd) %>% skim()
Data summary
| Name |
Piped data |
| Number of rows |
300 |
| Number of columns |
13 |
| _______________________ |
|
| Column type frequency: |
|
| factor |
4 |
| numeric |
8 |
| ________________________ |
|
| Group variables |
gnd |
Variable type: factor
| skim_variable |
gnd |
n_missing |
complete_rate |
ordered |
n_unique |
top_counts |
| bld |
F |
0 |
1.00 |
FALSE |
4 |
B: 59, A: 41, O: 27, AB: 18 |
| bld |
M |
0 |
1.00 |
FALSE |
4 |
B: 51, A: 43, O: 41, AB: 20 |
| lft |
F |
1 |
0.99 |
FALSE |
2 |
N: 141, Y: 3 |
| lft |
M |
4 |
0.97 |
FALSE |
2 |
N: 142, Y: 9 |
| smk |
F |
1 |
0.99 |
FALSE |
2 |
N: 133, Y: 11 |
| smk |
M |
1 |
0.99 |
FALSE |
2 |
N: 104, Y: 50 |
| alc |
F |
2 |
0.99 |
FALSE |
2 |
N: 130, Y: 13 |
| alc |
M |
4 |
0.97 |
FALSE |
2 |
N: 126, Y: 25 |
Variable type: numeric
| skim_variable |
gnd |
n_missing |
complete_rate |
mean |
sd |
p0 |
p25 |
p50 |
p75 |
p100 |
hist |
| age |
F |
4 |
0.97 |
32.60 |
14.29 |
16.0 |
19.00 |
31.00 |
45.00 |
68.0 |
▇▅▃▂▂ |
| age |
M |
3 |
0.98 |
27.41 |
11.88 |
15.0 |
19.00 |
23.50 |
33.00 |
66.0 |
▇▃▂▁▁ |
| ht |
F |
0 |
1.00 |
158.27 |
5.76 |
144.3 |
154.20 |
158.00 |
162.10 |
172.0 |
▂▅▇▆▂ |
| ht |
M |
0 |
1.00 |
172.53 |
5.33 |
156.2 |
169.45 |
172.70 |
175.80 |
190.5 |
▁▅▇▃▁ |
| wt |
F |
2 |
0.99 |
57.24 |
10.11 |
39.9 |
49.90 |
56.30 |
61.20 |
98.2 |
▅▇▂▁▁ |
| wt |
M |
1 |
0.99 |
69.84 |
11.15 |
45.6 |
61.65 |
68.55 |
75.88 |
106.1 |
▂▇▆▂▁ |
| wa |
F |
3 |
0.98 |
76.00 |
10.02 |
59.0 |
68.32 |
75.05 |
81.25 |
111.6 |
▆▇▅▁▁ |
| wa |
M |
2 |
0.99 |
80.29 |
8.98 |
63.1 |
73.90 |
80.00 |
86.50 |
109.0 |
▅▇▇▂▁ |
| hdln |
F |
0 |
1.00 |
16.82 |
0.75 |
14.9 |
16.30 |
16.90 |
17.40 |
18.8 |
▂▅▇▅▁ |
| hdln |
M |
0 |
1.00 |
18.28 |
0.87 |
15.9 |
17.80 |
18.20 |
18.80 |
20.8 |
▁▅▇▃▁ |
| hdwd |
F |
0 |
1.00 |
7.41 |
0.41 |
6.4 |
7.10 |
7.40 |
7.70 |
8.4 |
▂▇▇▅▁ |
| hdwd |
M |
1 |
0.99 |
8.17 |
0.40 |
7.2 |
7.90 |
8.20 |
8.50 |
9.2 |
▂▇▇▇▂ |
| ftln |
F |
2 |
0.99 |
23.26 |
1.02 |
19.9 |
22.60 |
23.30 |
23.90 |
25.9 |
▁▃▇▇▁ |
| ftln |
M |
2 |
0.99 |
25.42 |
1.08 |
22.8 |
24.60 |
25.50 |
26.10 |
28.9 |
▂▇▇▂▁ |
| ftwd |
F |
0 |
1.00 |
9.26 |
0.47 |
8.0 |
8.90 |
9.30 |
9.60 |
10.4 |
▁▅▇▅▂ |
| ftwd |
M |
1 |
0.99 |
10.08 |
0.65 |
8.0 |
9.80 |
10.00 |
10.50 |
12.2 |
▁▂▇▃▁ |
### 완측한 관측값 비율=완전한 관측값/n
sum(complete.cases(DF))/nrow(DF)*100
## [1] 89.66667
### 변수별 결측비율, Missing=결측셀비율, Present=비결측셀비율
naniar::vis_miss(DF)

naniar::miss_var_summary(DF)
## # A tibble: 13 × 3
## variable n_miss pct_miss
## <chr> <int> <num>
## 1 age 7 2.33
## 2 alc 6 2
## 3 wa 5 1.67
## 4 lft 5 1.67
## 5 ftln 4 1.33
## 6 wt 3 1
## 7 smk 2 0.667
## 8 hdwd 1 0.333
## 9 ftwd 1 0.333
## 10 gnd 0 0
## 11 ht 0 0
## 12 hdln 0 0
## 13 bld 0 0
#간단탐색
featurePlot(x=DF%>%select_if(is.numeric), y=DF$gnd,
plot='box',
scales=list(x=list(relation='free'), y=list(relation='free')))

featurePlot(x=DF%>%select_if(is.numeric), y=DF$bld,
plot='box',
scales=list(x=list(relation='free'), y=list(relation='free')))

featurePlot(x=DF%>%select_if(is.numeric), y=DF$lft,
plot='box',
scales=list(x=list(relation='free'), y=list(relation='free')))

featurePlot(x=DF%>%select_if(is.numeric), y=DF$smk,
plot='box',
scales=list(x=list(relation='free'), y=list(relation='free')))

featurePlot(x=DF%>%select_if(is.numeric), y=DF$alc,
plot='box',
scales=list(x=list(relation='free'), y=list(relation='free')))

## 연속 ~ 연속
### use='pairwise.complete.obs' 지정해야 상관계수가 NA가 안 됨
R <- cor(DF%>%select_if(is.numeric), use='pairwise.complete.obs')
round(R, 4)
## age ht wt wa hdln hdwd ftln ftwd
## age 1.0000 -0.3257 0.0054 0.3421 -0.1058 0.1819 -0.1508 -0.0759
## ht -0.3257 1.0000 0.5817 0.1617 0.8158 0.6029 0.8391 0.6021
## wt 0.0054 0.5817 1.0000 0.8310 0.5336 0.6578 0.6000 0.5966
## wa 0.3421 0.1617 0.8310 1.0000 0.2288 0.5044 0.2740 0.3764
## hdln -0.1058 0.8158 0.5336 0.2288 1.0000 0.6434 0.8635 0.6505
## hdwd 0.1819 0.6029 0.6578 0.5044 0.6434 1.0000 0.6231 0.6212
## ftln -0.1508 0.8391 0.6000 0.2740 0.8635 0.6231 1.0000 0.7289
## ftwd -0.0759 0.6021 0.5966 0.3764 0.6505 0.6212 0.7289 1.0000
sort(R['ht',], decreasing=TRUE)
## ht ftln hdln hdwd ftwd wt wa
## 1.0000000 0.8390865 0.8157977 0.6029119 0.6021319 0.5817457 0.1617029
## age
## -0.3256508
corrplot::corrplot.mixed(R, upper = 'ellipse', order='FPC')

library(GGally)
DF%>%select_if(is.numeric) %>%
ggcorr(geom='tile', label=TRUE)

### ggpairs: 산점도행렬과 상관계수
DF %>%
ggpairs(columns = c('ht', 'ftln', 'hdln', 'ftwd', 'hdwd', 'wt'),
lower=list(continuous=wrap('points', alpha=0.05, col='blue')),
diag = list(continuous='barDiag'))
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 4 rows containing missing values
## Warning: Removing 1 row that contained a missing value
## Removing 1 row that contained a missing value
## Warning: Removed 3 rows containing missing values
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 4 rows containing missing values
## Warning: Removed 5 rows containing missing values
## Removed 5 rows containing missing values
## Warning: Removed 7 rows containing missing values
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removing 1 row that contained a missing value
## Warning: Removing 1 row that contained a missing value
## Warning: Removed 3 rows containing missing values
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values
## Warning: Removed 4 rows containing missing values
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 4 rows containing missing values
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_bin()`).

DF %>%
ggplot(aes(x=wt, y=ht)) +
geom_density2d()+
geom_point(aes(col=gnd, shape=gnd))
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_density2d()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

# 분할/예측값 저장소 준비
## TR:TS를 0.75:0.25로 1회 분할
set.seed(0488)
IS <- initial_split(DF, prop=0.75)
TR <- training(IS)
TS <- testing(IS)
### 예측값을 저장할 장소
TROUT <- TR%>%dplyr::select(ht)
TSOUT <- TS%>%dplyr::select(ht)
# 전처리
RC <- recipe(ht~., data = TR)%>%
step_impute_median(all_numeric_predictors()) %>%
step_impute_mode(all_nominal_predictors()) %>%
step_dummy(all_nominal_predictors())
RC
##
## ── Recipe ──────────────────────────────────────────────────────────────────────
##
## ── Inputs
## Number of variables by role
## outcome: 1
## predictor: 12
##
## ── Operations
## • Median imputation for: all_numeric_predictors()
## • Mode imputation for: all_nominal_predictors()
## • Dummy variables from: all_nominal_predictors()
# 튜닝계획 지정
## 5-fold CV (반복 1회)
### method='boot, repeatdecv..', repeats = 1,
### returnResamp = 'final'이므로 M$resample에는 최적모수에 대한 CV결과만 저장됨
trCtrl <- trainControl(method = 'cv', number = 5)
# lm: 선형회귀모형
## 튜닝모수 없음. intercept는 튜닝 안 함
modelLookup('lm')
## model parameter label forReg forClass probModel
## 1 lm intercept intercept TRUE FALSE FALSE
## 적합
set.seed(0488)
Mlm <-
train(RC, data=TR,
method='lm',
trControl = trCtrl)
Mlm
## Linear Regression
##
## 225 samples
## 12 predictor
##
## Recipe steps: impute_median, impute_mode, dummy
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 180, 179, 181, 180, 180
## Resampling results:
##
## RMSE Rsquared MAE
## 3.398719 0.8562773 2.703031
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
Mlm$results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 3.398719 0.8562773 2.703031 0.3088128 0.03487682 0.2951742
### (X) plot(Mlm)
summary(Mlm)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6418 -2.3208 0.0593 1.7938 9.3590
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 108.55188 6.96967 15.575 < 2e-16 ***
## age -0.04780 0.02252 -2.123 0.034950 *
## wt 0.40611 0.05128 7.920 1.36e-13 ***
## wa -0.39631 0.05547 -7.145 1.46e-11 ***
## hdln 1.95873 0.42356 4.624 6.56e-06 ***
## hdwd 0.30494 0.74670 0.408 0.683406
## ftln 1.36924 0.34847 3.929 0.000116 ***
## ftwd -1.01132 0.50519 -2.002 0.046587 *
## gnd_M 5.42690 0.79414 6.834 8.81e-11 ***
## bld_AB 1.00394 0.74327 1.351 0.178243
## bld_B 0.45131 0.56788 0.795 0.427677
## bld_O 0.59163 0.65087 0.909 0.364403
## lft_Y -0.75251 1.08103 -0.696 0.487133
## smk_Y 0.22907 0.60866 0.376 0.707035
## alc_Y -0.23412 0.74691 -0.313 0.754246
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.289 on 210 degrees of freedom
## Multiple R-squared: 0.872, Adjusted R-squared: 0.8634
## F-statistic: 102.2 on 14 and 210 DF, p-value: < 2.2e-16
plot(varImp(Mlm))

Mlm$bestTune
## intercept
## 1 TRUE
Mlm$finalModel #lm객체
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Coefficients:
## (Intercept) age wt wa hdln hdwd
## 108.5519 -0.0478 0.4061 -0.3963 1.9587 0.3049
## ftln ftwd gnd_M bld_AB bld_B bld_O
## 1.3692 -1.0113 5.4269 1.0039 0.4513 0.5916
## lft_Y smk_Y alc_Y
## -0.7525 0.2291 -0.2341
Mlm$resample #최적모수값에 대한 CV통계량
## RMSE Rsquared MAE Resample
## 1 3.783464 0.8041820 3.055840 Fold1
## 2 3.116350 0.8902070 2.374710 Fold2
## 3 3.426790 0.8540874 2.839293 Fold3
## 4 3.064790 0.8863103 2.416799 Fold4
## 5 3.602199 0.8466001 2.828514 Fold5
### 예측값 저장
TROUT <- TR%>%dplyr::select(ht)
TSOUT <- TS%>%dplyr::select(ht)
TROUT <- TROUT %>% bind_cols(yhlm=predict(Mlm, newdata = TR))
TSOUT <- TSOUT %>% bind_cols(yhlm=predict(Mlm, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 2
## ht yhlm
## <dbl> <dbl>
## 1 188. 186.
## 2 174. 173.
## 3 176. 176.
## 4 150. 153.
## 5 153. 155.
## 6 149. 156.
### 성능평가 일괄 계산 사용자 함수
metreg <- function(y, yh){
c(rmse=rmse_vec(y, yh),
mae=mae_vec(y, yh),
rsq=rsq_vec(y, yh))
}
metreg(TSOUT$ht, TSOUT$yhlm)
## rmse mae rsq
## 3.7332197 2.9327742 0.8424828
METlm <-
metreg(TROUT$ht, TROUT$yhlm) %>%
bind_rows(metreg(TSOUT$ht, TSOUT$yhlm)) %>%
bind_cols(data.frame(model=c('lm', 'lm'), TRTS=c('TR', 'TS')))
METlm
## # A tibble: 2 × 5
## rmse mae rsq model TRTS
## <dbl> <dbl> <dbl> <chr> <chr>
## 1 3.18 2.49 0.872 lm TR
## 2 3.73 2.93 0.842 lm TS
g1 <- TROUT %>% ggplot(aes(x=yhlm, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhlm, y=ht-yhlm)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhlm, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhlm, y=ht-yhlm)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

# lmStepAIC: AIC 변수 선택
## 튜닝모수 없음. intercept는 튜닝 안 함
## parsnip에 없음
modelLookup('lmStepAIC')
## model parameter label forReg forClass probModel
## 1 lmStepAIC parameter parameter TRUE FALSE FALSE
## 적합
set.seed(0488)
Mstep <-
train(RC, data=TR,
method = 'lmStepAIC',
direction = 'backward',
trControl=trCtrl)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
## Start: AIC=432.27
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_B + bld_O + lft_Y + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - bld_B 1 0.19 1682.3 430.29
## - alc_Y 1 1.06 1683.2 430.39
## - smk_Y 1 4.83 1687.0 430.79
## - bld_AB 1 5.07 1687.2 430.82
## - bld_O 1 8.22 1690.3 431.15
## - lft_Y 1 8.96 1691.1 431.23
## - hdwd 1 11.11 1693.2 431.46
## <none> 1682.1 432.27
## - age 1 32.95 1715.1 433.77
## - ftwd 1 43.46 1725.6 434.87
## - ftln 1 160.88 1843.0 446.72
## - hdln 1 169.68 1851.8 447.57
## - gnd_M 1 384.71 2066.8 467.35
## - wa 1 419.97 2102.1 470.39
## - wt 1 515.33 2197.4 478.38
##
## Step: AIC=430.29
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_O + lft_Y + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - alc_Y 1 0.99 1683.3 428.40
## - smk_Y 1 5.15 1687.5 428.84
## - bld_AB 1 5.41 1687.7 428.87
## - lft_Y 1 8.94 1691.3 429.25
## - bld_O 1 9.33 1691.7 429.29
## - hdwd 1 11.73 1694.0 429.55
## <none> 1682.3 430.29
## - age 1 32.76 1715.1 431.77
## - ftwd 1 43.60 1725.9 432.90
## - ftln 1 163.29 1845.6 444.97
## - hdln 1 169.51 1851.8 445.57
## - gnd_M 1 393.89 2076.2 466.16
## - wa 1 425.41 2107.7 468.87
## - wt 1 518.06 2200.4 476.62
##
## Step: AIC=428.4
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_O + lft_Y + smk_Y
##
## Df Sum of Sq RSS AIC
## - smk_Y 1 4.48 1687.8 426.88
## - bld_AB 1 5.22 1688.5 426.96
## - bld_O 1 8.80 1692.1 427.34
## - lft_Y 1 9.38 1692.7 427.40
## - hdwd 1 11.54 1694.8 427.63
## <none> 1683.3 428.40
## - age 1 33.28 1716.6 429.92
## - ftwd 1 43.09 1726.4 430.95
## - ftln 1 162.50 1845.8 442.99
## - hdln 1 169.78 1853.1 443.70
## - gnd_M 1 396.09 2079.4 464.44
## - wa 1 425.67 2109.0 466.98
## - wt 1 517.09 2200.4 474.62
##
## Step: AIC=426.88
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_O + lft_Y
##
## Df Sum of Sq RSS AIC
## - bld_AB 1 5.27 1693.1 425.44
## - lft_Y 1 8.56 1696.3 425.79
## - bld_O 1 8.86 1696.7 425.82
## - hdwd 1 14.26 1702.1 426.39
## <none> 1687.8 426.88
## - age 1 38.24 1726.0 428.91
## - ftwd 1 46.12 1733.9 429.73
## - ftln 1 159.42 1847.2 441.13
## - hdln 1 177.18 1865.0 442.85
## - gnd_M 1 400.48 2088.3 463.20
## - wa 1 427.11 2114.9 465.49
## - wt 1 513.58 2201.4 472.70
##
## Step: AIC=425.44
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_O + lft_Y
##
## Df Sum of Sq RSS AIC
## - bld_O 1 6.37 1699.4 424.12
## - lft_Y 1 8.99 1702.1 424.39
## - hdwd 1 14.58 1707.6 424.98
## <none> 1693.1 425.44
## - age 1 39.12 1732.2 427.55
## - ftwd 1 44.45 1737.5 428.11
## - ftln 1 155.59 1848.7 439.27
## - hdln 1 181.02 1874.1 441.73
## - gnd_M 1 409.92 2103.0 462.47
## - wa 1 427.21 2120.3 463.94
## - wt 1 510.38 2203.4 470.87
##
## Step: AIC=424.12
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## lft_Y
##
## Df Sum of Sq RSS AIC
## - lft_Y 1 9.82 1709.3 423.15
## - hdwd 1 11.76 1711.2 423.36
## <none> 1699.4 424.12
## - age 1 40.15 1739.6 426.32
## - ftwd 1 44.44 1743.9 426.76
## - ftln 1 153.66 1853.1 437.70
## - hdln 1 188.56 1888.0 441.06
## - wa 1 424.26 2123.7 462.23
## - gnd_M 1 446.44 2145.9 464.10
## - wt 1 507.49 2206.9 469.15
##
## Step: AIC=423.15
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M
##
## Df Sum of Sq RSS AIC
## - hdwd 1 10.34 1719.6 422.24
## <none> 1709.3 423.15
## - age 1 38.41 1747.7 425.16
## - ftwd 1 45.36 1754.6 425.87
## - ftln 1 151.96 1861.2 436.49
## - hdln 1 191.24 1900.5 440.24
## - wa 1 426.19 2135.4 461.23
## - gnd_M 1 445.01 2154.3 462.81
## - wt 1 514.27 2223.5 468.50
##
## Step: AIC=422.24
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M
##
## Df Sum of Sq RSS AIC
## <none> 1719.6 422.24
## - age 1 28.47 1748.1 423.20
## - ftwd 1 38.86 1758.5 424.26
## - ftln 1 145.66 1865.2 434.88
## - hdln 1 205.97 1925.6 440.60
## - wa 1 443.75 2163.3 461.56
## - wt 1 627.63 2347.2 476.25
## - gnd_M 1 674.32 2393.9 479.79
## Start: AIC=446.54
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_B + bld_O + lft_Y + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - hdwd 1 0.27 1834.6 444.56
## - lft_Y 1 0.59 1834.9 444.60
## - smk_Y 1 0.85 1835.1 444.62
## - alc_Y 1 1.68 1836.0 444.70
## - bld_O 1 4.61 1838.9 444.99
## - bld_B 1 5.55 1839.8 445.08
## - bld_AB 1 14.88 1849.2 445.98
## <none> 1834.3 446.54
## - age 1 39.08 1873.4 448.31
## - ftwd 1 43.58 1877.9 448.74
## - ftln 1 137.13 1971.4 457.44
## - hdln 1 169.02 2003.3 460.32
## - wa 1 387.41 2221.7 478.84
## - gnd_M 1 454.98 2289.3 484.20
## - wt 1 545.47 2379.8 491.14
##
## Step: AIC=444.56
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B + bld_O + lft_Y + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - lft_Y 1 0.66 1835.2 442.63
## - smk_Y 1 1.00 1835.5 442.66
## - alc_Y 1 1.62 1836.2 442.72
## - bld_O 1 4.86 1839.4 443.04
## - bld_B 1 5.44 1840.0 443.09
## - bld_AB 1 14.70 1849.3 443.99
## <none> 1834.6 444.56
## - ftwd 1 47.46 1882.0 447.14
## - age 1 49.37 1883.9 447.32
## - ftln 1 143.98 1978.5 456.09
## - hdln 1 177.18 2011.7 459.07
## - wa 1 387.29 2221.8 476.85
## - wt 1 575.29 2409.8 491.39
## - gnd_M 1 617.78 2452.3 494.52
##
## Step: AIC=442.63
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B + bld_O + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - smk_Y 1 0.91 1836.1 440.72
## - alc_Y 1 1.59 1836.8 440.78
## - bld_O 1 5.13 1840.3 441.13
## - bld_B 1 5.31 1840.5 441.15
## - bld_AB 1 14.38 1849.6 442.03
## <none> 1835.2 442.63
## - ftwd 1 47.68 1882.9 445.22
## - age 1 49.31 1884.5 445.37
## - ftln 1 143.63 1978.8 454.12
## - hdln 1 177.45 2012.7 457.15
## - wa 1 387.38 2222.6 474.91
## - wt 1 577.43 2412.7 489.60
## - gnd_M 1 623.16 2458.4 492.96
##
## Step: AIC=440.72
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B + bld_O + alc_Y
##
## Df Sum of Sq RSS AIC
## - alc_Y 1 1.19 1837.3 438.83
## - bld_O 1 4.71 1840.8 439.17
## - bld_B 1 5.13 1841.3 439.22
## - bld_AB 1 13.98 1850.1 440.07
## <none> 1836.1 440.72
## - ftwd 1 46.86 1883.0 443.23
## - age 1 48.55 1884.7 443.39
## - ftln 1 144.49 1980.6 452.28
## - hdln 1 176.65 2012.8 455.16
## - wa 1 387.47 2223.6 472.99
## - wt 1 578.50 2414.6 487.74
## - gnd_M 1 645.97 2482.1 492.68
##
## Step: AIC=438.83
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B + bld_O
##
## Df Sum of Sq RSS AIC
## - bld_O 1 4.86 1842.2 437.31
## - bld_B 1 5.55 1842.9 437.37
## - bld_AB 1 14.74 1852.0 438.26
## <none> 1837.3 438.83
## - ftwd 1 48.45 1885.8 441.49
## - age 1 49.81 1887.1 441.62
## - ftln 1 144.62 1981.9 450.40
## - hdln 1 176.46 2013.8 453.25
## - wa 1 388.30 2225.6 471.15
## - wt 1 582.15 2419.5 486.10
## - gnd_M 1 648.27 2485.6 490.93
##
## Step: AIC=437.31
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B
##
## Df Sum of Sq RSS AIC
## - bld_B 1 2.21 1844.4 435.52
## - bld_AB 1 10.50 1852.7 436.32
## <none> 1842.2 437.31
## - ftwd 1 48.54 1890.7 439.96
## - age 1 53.31 1895.5 440.41
## - ftln 1 140.97 1983.1 448.50
## - hdln 1 183.86 2026.0 452.33
## - wa 1 385.78 2228.0 469.34
## - wt 1 578.31 2420.5 484.18
## - gnd_M 1 651.89 2494.1 489.54
##
## Step: AIC=435.52
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB
##
## Df Sum of Sq RSS AIC
## - bld_AB 1 8.44 1852.8 434.34
## <none> 1844.4 435.52
## - ftwd 1 48.22 1892.6 438.14
## - age 1 51.41 1895.8 438.44
## - ftln 1 140.72 1985.1 446.68
## - hdln 1 186.44 2030.8 450.76
## - wa 1 402.81 2247.2 468.88
## - wt 1 610.70 2455.1 484.72
## - gnd_M 1 666.66 2511.1 488.75
##
## Step: AIC=434.34
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M
##
## Df Sum of Sq RSS AIC
## <none> 1852.8 434.34
## - ftwd 1 44.05 1896.9 436.54
## - age 1 50.96 1903.8 437.19
## - ftln 1 138.17 1991.0 445.21
## - hdln 1 187.01 2039.8 449.55
## - wa 1 406.33 2259.2 467.83
## - wt 1 612.69 2465.5 483.48
## - gnd_M 1 665.42 2518.2 487.26
## Start: AIC=443.91
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_B + bld_O + lft_Y + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - hdwd 1 0.16 1781.8 441.93
## - alc_Y 1 1.33 1783.0 442.05
## - lft_Y 1 12.33 1794.0 443.16
## - bld_B 1 13.42 1795.1 443.27
## - smk_Y 1 15.76 1797.5 443.51
## <none> 1781.7 443.91
## - bld_O 1 23.48 1805.2 444.28
## - ftwd 1 24.66 1806.3 444.40
## - bld_AB 1 28.18 1809.9 444.75
## - age 1 71.46 1853.1 449.03
## - ftln 1 146.51 1928.2 456.22
## - hdln 1 199.30 1981.0 461.11
## - wa 1 372.60 2154.3 476.29
## - gnd_M 1 380.93 2162.6 476.98
## - wt 1 466.39 2248.1 484.00
##
## Step: AIC=441.93
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B + bld_O + lft_Y + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - alc_Y 1 1.27 1783.1 440.06
## - lft_Y 1 12.17 1794.0 441.16
## - bld_B 1 13.89 1795.7 441.34
## - smk_Y 1 15.64 1797.5 441.51
## <none> 1781.8 441.93
## - bld_O 1 23.45 1805.3 442.30
## - ftwd 1 25.09 1806.9 442.46
## - bld_AB 1 28.51 1810.4 442.80
## - age 1 80.41 1862.2 447.92
## - ftln 1 146.40 1928.2 454.22
## - hdln 1 209.57 1991.4 460.06
## - wa 1 372.45 2154.3 474.29
## - gnd_M 1 470.71 2252.6 482.36
## - wt 1 485.59 2267.4 483.55
##
## Step: AIC=440.06
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B + bld_O + lft_Y + smk_Y
##
## Df Sum of Sq RSS AIC
## - lft_Y 1 12.07 1795.2 439.28
## - smk_Y 1 14.46 1797.6 439.52
## - bld_B 1 15.01 1798.1 439.58
## <none> 1783.1 440.06
## - ftwd 1 25.10 1808.2 440.59
## - bld_O 1 25.53 1808.7 440.63
## - bld_AB 1 28.81 1811.9 440.96
## - age 1 80.06 1863.2 446.01
## - ftln 1 147.21 1930.3 452.42
## - hdln 1 210.01 1993.1 458.21
## - wa 1 377.46 2160.6 472.81
## - gnd_M 1 471.48 2254.6 480.52
## - wt 1 492.11 2275.2 482.17
##
## Step: AIC=439.28
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B + bld_O + smk_Y
##
## Df Sum of Sq RSS AIC
## - bld_B 1 12.55 1807.7 438.54
## - smk_Y 1 12.94 1808.1 438.58
## <none> 1795.2 439.28
## - bld_O 1 25.28 1820.5 439.81
## - ftwd 1 26.37 1821.6 439.92
## - bld_AB 1 27.17 1822.4 440.00
## - age 1 83.52 1878.7 445.51
## - ftln 1 145.40 1940.6 451.38
## - hdln 1 219.02 2014.2 458.12
## - wa 1 371.77 2167.0 471.35
## - gnd_M 1 465.10 2260.3 478.98
## - wt 1 485.44 2280.6 480.60
##
## Step: AIC=438.54
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_O + smk_Y
##
## Df Sum of Sq RSS AIC
## - smk_Y 1 10.76 1818.5 437.62
## - bld_O 1 14.26 1822.0 437.96
## - bld_AB 1 16.84 1824.6 438.22
## <none> 1807.7 438.54
## - ftwd 1 28.30 1836.0 439.35
## - age 1 78.21 1886.0 444.21
## - ftln 1 147.83 1955.6 450.77
## - hdln 1 222.54 2030.3 457.56
## - wa 1 384.62 2192.4 471.46
## - gnd_M 1 457.83 2265.6 477.40
## - wt 1 510.32 2318.1 481.55
##
## Step: AIC=437.62
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_O
##
## Df Sum of Sq RSS AIC
## - bld_O 1 15.21 1833.7 437.12
## - bld_AB 1 16.11 1834.6 437.21
## <none> 1818.5 437.62
## - ftwd 1 27.22 1845.7 438.31
## - age 1 74.33 1892.8 442.87
## - ftln 1 152.42 1970.9 450.18
## - hdln 1 217.90 2036.4 456.10
## - wa 1 388.46 2206.9 470.66
## - gnd_M 1 497.96 2316.5 479.42
## - wt 1 519.44 2337.9 481.09
##
## Step: AIC=437.12
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB
##
## Df Sum of Sq RSS AIC
## - bld_AB 1 11.33 1845.0 436.24
## <none> 1833.7 437.12
## - ftwd 1 31.79 1865.5 438.23
## - age 1 78.38 1912.1 442.70
## - ftln 1 149.34 1983.0 449.29
## - hdln 1 223.76 2057.5 455.96
## - wa 1 384.77 2218.5 469.60
## - wt 1 513.36 2347.1 479.80
## - gnd_M 1 538.83 2372.5 481.75
##
## Step: AIC=436.24
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M
##
## Df Sum of Sq RSS AIC
## <none> 1845.0 436.24
## - ftwd 1 31.09 1876.1 437.26
## - age 1 77.10 1922.1 441.65
## - ftln 1 146.56 1991.6 448.07
## - hdln 1 226.26 2071.3 455.18
## - wa 1 389.40 2234.4 468.90
## - wt 1 520.11 2365.1 479.19
## - gnd_M 1 531.27 2376.3 480.04
## Start: AIC=452.41
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_B + bld_O + lft_Y + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - hdwd 1 0.41 1881.6 450.44
## - bld_O 1 1.31 1882.5 450.53
## - bld_B 1 1.94 1883.1 450.59
## - lft_Y 1 2.49 1883.7 450.64
## - smk_Y 1 3.33 1884.5 450.72
## - alc_Y 1 3.85 1885.0 450.77
## - bld_AB 1 9.22 1890.4 451.29
## <none> 1881.2 452.41
## - ftwd 1 26.06 1907.2 452.88
## - age 1 36.75 1917.9 453.89
## - ftln 1 62.24 1943.4 456.26
## - hdln 1 221.40 2102.6 470.43
## - wa 1 451.38 2332.6 489.12
## - gnd_M 1 473.75 2354.9 490.84
## - wt 1 490.80 2372.0 492.13
##
## Step: AIC=450.44
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B + bld_O + lft_Y + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - bld_O 1 1.20 1882.8 448.56
## - bld_B 1 2.06 1883.7 448.64
## - lft_Y 1 2.41 1884.0 448.67
## - smk_Y 1 3.10 1884.7 448.74
## - alc_Y 1 3.74 1885.3 448.80
## - bld_AB 1 9.14 1890.7 449.32
## <none> 1881.6 450.44
## - ftwd 1 25.65 1907.2 450.88
## - age 1 38.82 1920.4 452.12
## - ftln 1 61.91 1943.5 454.27
## - hdln 1 230.81 2112.4 469.27
## - wa 1 451.73 2333.3 487.18
## - wt 1 518.37 2399.9 492.24
## - gnd_M 1 621.28 2502.9 499.80
##
## Step: AIC=448.56
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B + lft_Y + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - bld_B 1 1.08 1883.9 446.66
## - lft_Y 1 2.39 1885.2 446.79
## - smk_Y 1 3.30 1886.1 446.87
## - alc_Y 1 4.01 1886.8 446.94
## - bld_AB 1 7.94 1890.7 447.32
## <none> 1882.8 448.56
## - ftwd 1 25.41 1908.2 448.97
## - age 1 39.32 1922.1 450.28
## - ftln 1 61.50 1944.3 452.34
## - hdln 1 233.88 2116.7 467.64
## - wa 1 452.25 2335.0 485.31
## - wt 1 517.92 2400.7 490.30
## - gnd_M 1 621.64 2504.4 497.91
##
## Step: AIC=446.66
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## lft_Y + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - lft_Y 1 2.10 1886.0 444.86
## - smk_Y 1 3.26 1887.1 444.97
## - alc_Y 1 4.44 1888.3 445.09
## - bld_AB 1 6.92 1890.8 445.32
## <none> 1883.9 446.66
## - ftwd 1 25.61 1909.5 447.09
## - age 1 38.25 1922.1 448.28
## - ftln 1 61.70 1945.6 450.46
## - hdln 1 234.83 2118.7 465.81
## - wa 1 463.28 2347.2 484.24
## - wt 1 532.41 2416.3 489.46
## - gnd_M 1 631.74 2515.6 496.72
##
## Step: AIC=444.86
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - smk_Y 1 3.30 1889.3 443.18
## - alc_Y 1 4.53 1890.5 443.29
## - bld_AB 1 6.38 1892.3 443.47
## <none> 1886.0 444.86
## - ftwd 1 26.08 1912.0 445.33
## - age 1 38.14 1924.1 446.47
## - ftln 1 61.77 1947.7 448.66
## - hdln 1 233.88 2119.8 463.91
## - wa 1 469.47 2355.4 482.87
## - wt 1 540.46 2426.4 488.22
## - gnd_M 1 629.81 2515.8 494.73
##
## Step: AIC=443.18
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## alc_Y
##
## Df Sum of Sq RSS AIC
## - alc_Y 1 3.50 1892.8 441.51
## - bld_AB 1 6.58 1895.8 441.80
## <none> 1889.3 443.18
## - ftwd 1 25.82 1915.1 443.62
## - age 1 36.20 1925.5 444.59
## - ftln 1 63.32 1952.6 447.11
## - hdln 1 231.85 2121.1 462.01
## - wa 1 470.01 2359.3 481.17
## - wt 1 545.80 2435.1 486.86
## - gnd_M 1 694.47 2583.7 497.53
##
## Step: AIC=441.51
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB
##
## Df Sum of Sq RSS AIC
## - bld_AB 1 6.77 1899.5 440.15
## <none> 1892.8 441.51
## - ftwd 1 26.00 1918.8 441.97
## - age 1 36.35 1929.1 442.94
## - ftln 1 64.34 1957.1 445.53
## - hdln 1 232.98 2125.8 460.41
## - wa 1 470.53 2363.3 479.47
## - wt 1 544.29 2437.1 485.01
## - gnd_M 1 691.18 2583.9 495.54
##
## Step: AIC=440.15
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M
##
## Df Sum of Sq RSS AIC
## <none> 1899.5 440.15
## - ftwd 1 22.97 1922.5 440.32
## - age 1 40.45 1940.0 441.95
## - ftln 1 62.37 1961.9 443.97
## - hdln 1 232.93 2132.5 458.97
## - wa 1 473.66 2373.2 478.23
## - wt 1 547.42 2447.0 483.74
## - gnd_M 1 693.02 2592.6 494.14
## Start: AIC=440.89
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_B + bld_O + lft_Y + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - alc_Y 1 0.05 1764.7 438.90
## - smk_Y 1 0.74 1765.3 438.96
## - lft_Y 1 1.58 1766.2 439.05
## - hdwd 1 6.27 1770.9 439.53
## - bld_O 1 6.82 1771.4 439.58
## - bld_B 1 11.85 1776.5 440.09
## - age 1 12.59 1777.2 440.17
## <none> 1764.6 440.89
## - bld_AB 1 25.96 1790.6 441.52
## - ftwd 1 27.82 1792.4 441.70
## - hdln 1 152.88 1917.5 453.85
## - ftln 1 168.07 1932.7 455.27
## - gnd_M 1 255.93 2020.5 463.27
## - wa 1 572.97 2337.6 489.50
## - wt 1 674.42 2439.0 497.15
##
## Step: AIC=438.9
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_B + bld_O + lft_Y + smk_Y
##
## Df Sum of Sq RSS AIC
## - smk_Y 1 0.81 1765.5 436.98
## - lft_Y 1 1.58 1766.2 437.06
## - hdwd 1 6.34 1771.0 437.54
## - bld_O 1 6.77 1771.4 437.58
## - bld_B 1 11.89 1776.5 438.10
## - age 1 12.74 1777.4 438.19
## <none> 1764.7 438.90
## - bld_AB 1 25.91 1790.6 439.52
## - ftwd 1 28.08 1792.7 439.74
## - hdln 1 153.09 1917.7 451.87
## - ftln 1 168.09 1932.7 453.27
## - gnd_M 1 258.34 2023.0 461.49
## - wa 1 582.08 2346.7 488.21
## - wt 1 684.05 2448.7 495.86
##
## Step: AIC=436.98
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_B + bld_O + lft_Y
##
## Df Sum of Sq RSS AIC
## - lft_Y 1 1.54 1767.0 435.13
## - hdwd 1 5.84 1771.3 435.57
## - bld_O 1 6.82 1772.3 435.67
## - age 1 12.04 1777.5 436.20
## - bld_B 1 12.14 1777.6 436.21
## <none> 1765.5 436.98
## - bld_AB 1 26.23 1791.7 437.63
## - ftwd 1 27.56 1793.0 437.77
## - hdln 1 152.38 1917.8 449.88
## - ftln 1 168.22 1933.7 451.36
## - gnd_M 1 294.34 2059.8 462.73
## - wa 1 581.32 2346.8 486.21
## - wt 1 684.15 2449.6 493.93
##
## Step: AIC=435.13
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_B + bld_O
##
## Df Sum of Sq RSS AIC
## - hdwd 1 5.69 1772.7 433.71
## - bld_O 1 6.95 1774.0 433.84
## - bld_B 1 11.75 1778.8 434.33
## - age 1 11.92 1778.9 434.34
## <none> 1767.0 435.13
## - bld_AB 1 25.60 1792.6 435.72
## - ftwd 1 29.10 1796.1 436.07
## - hdln 1 156.06 1923.0 448.37
## - ftln 1 166.94 1933.9 449.38
## - gnd_M 1 292.84 2059.8 460.74
## - wa 1 587.49 2354.5 484.80
## - wt 1 696.84 2463.8 492.97
##
## Step: AIC=433.71
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B + bld_O
##
## Df Sum of Sq RSS AIC
## - bld_O 1 6.23 1778.9 432.35
## - age 1 7.95 1780.6 432.52
## - bld_B 1 12.66 1785.3 432.99
## <none> 1772.7 433.71
## - ftwd 1 24.83 1797.5 434.22
## - bld_AB 1 26.73 1799.4 434.41
## - ftln 1 162.78 1935.5 447.53
## - hdln 1 171.04 1943.7 448.29
## - gnd_M 1 415.54 2188.2 469.62
## - wa 1 587.29 2360.0 483.22
## - wt 1 735.44 2508.1 494.18
##
## Step: AIC=432.35
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B
##
## Df Sum of Sq RSS AIC
## - bld_B 1 7.04 1786.0 431.06
## - age 1 9.02 1787.9 431.26
## <none> 1778.9 432.35
## - bld_AB 1 20.71 1799.6 432.43
## - ftwd 1 26.13 1805.1 432.97
## - ftln 1 161.49 1940.4 445.99
## - hdln 1 180.38 1959.3 447.73
## - gnd_M 1 414.63 2193.6 468.06
## - wa 1 585.80 2364.7 481.58
## - wt 1 733.25 2512.2 492.47
##
## Step: AIC=431.06
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB
##
## Df Sum of Sq RSS AIC
## - age 1 7.22 1793.2 429.78
## - bld_AB 1 15.13 1801.1 430.57
## <none> 1786.0 431.06
## - ftwd 1 27.02 1813.0 431.76
## - ftln 1 164.50 1950.5 444.92
## - hdln 1 183.98 1969.9 446.70
## - gnd_M 1 411.01 2197.0 466.34
## - wa 1 611.72 2397.7 482.07
## - wt 1 775.30 2561.3 493.95
##
## Step: AIC=429.78
## .outcome ~ wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB
##
## Df Sum of Sq RSS AIC
## - bld_AB 1 13.50 1806.7 429.13
## <none> 1793.2 429.78
## - ftwd 1 26.18 1819.4 430.39
## - ftln 1 161.55 1954.7 443.31
## - hdln 1 179.97 1973.2 445.00
## - gnd_M 1 410.05 2203.2 464.85
## - wa 1 1071.54 2864.7 512.11
## - wt 1 1115.60 2908.8 514.86
##
## Step: AIC=429.13
## .outcome ~ wt + wa + hdln + ftln + ftwd + gnd_M
##
## Df Sum of Sq RSS AIC
## <none> 1806.7 429.13
## - ftwd 1 24.18 1830.9 429.53
## - ftln 1 154.52 1961.2 441.90
## - hdln 1 187.79 1994.5 444.93
## - gnd_M 1 416.19 2222.9 464.45
## - wa 1 1084.97 2891.7 511.79
## - wt 1 1121.63 2928.3 514.06
## Start: AIC=550.17
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_B + bld_O + lft_Y + smk_Y + alc_Y
##
## Df Sum of Sq RSS AIC
## - alc_Y 1 1.06 2272.1 548.28
## - smk_Y 1 1.53 2272.5 548.32
## - hdwd 1 1.80 2272.8 548.35
## - lft_Y 1 5.24 2276.2 548.69
## - bld_B 1 6.83 2277.8 548.85
## - bld_O 1 8.94 2279.9 549.06
## - bld_AB 1 19.73 2290.7 550.12
## <none> 2271.0 550.17
## - ftwd 1 43.34 2314.3 552.43
## - age 1 48.73 2319.7 552.95
## - ftln 1 166.97 2438.0 564.13
## - hdln 1 231.27 2502.3 569.99
## - gnd_M 1 505.02 2776.0 593.35
## - wa 1 552.07 2823.1 597.13
## - wt 1 678.38 2949.4 606.98
##
## Step: AIC=548.28
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_B + bld_O + lft_Y + smk_Y
##
## Df Sum of Sq RSS AIC
## - smk_Y 1 1.15 2273.2 546.39
## - hdwd 1 1.74 2273.8 546.45
## - lft_Y 1 5.17 2277.2 546.79
## - bld_B 1 7.50 2279.6 547.02
## - bld_O 1 9.58 2281.6 547.22
## <none> 2272.1 548.28
## - bld_AB 1 20.35 2292.4 548.28
## - ftwd 1 43.47 2315.5 550.54
## - age 1 48.48 2320.5 551.03
## - ftln 1 167.85 2439.9 562.31
## - hdln 1 231.25 2503.3 568.09
## - gnd_M 1 503.96 2776.0 591.35
## - wa 1 555.15 2827.2 595.46
## - wt 1 683.62 2955.7 605.46
##
## Step: AIC=546.39
## .outcome ~ age + wt + wa + hdln + hdwd + ftln + ftwd + gnd_M +
## bld_AB + bld_B + bld_O + lft_Y
##
## Df Sum of Sq RSS AIC
## - hdwd 1 1.41 2274.6 544.53
## - lft_Y 1 5.08 2278.3 544.89
## - bld_B 1 7.24 2280.4 545.11
## - bld_O 1 9.35 2282.6 545.31
## - bld_AB 1 20.10 2293.3 546.37
## <none> 2273.2 546.39
## - ftwd 1 42.67 2315.9 548.58
## - age 1 47.33 2320.6 549.03
## - ftln 1 169.02 2442.2 560.53
## - hdln 1 230.35 2503.6 566.11
## - gnd_M 1 547.81 2821.0 592.97
## - wa 1 555.20 2828.4 593.56
## - wt 1 688.83 2962.0 603.94
##
## Step: AIC=544.53
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B + bld_O + lft_Y
##
## Df Sum of Sq RSS AIC
## - lft_Y 1 4.78 2279.4 543.00
## - bld_B 1 7.86 2282.5 543.31
## - bld_O 1 8.92 2283.5 543.41
## <none> 2274.6 544.53
## - bld_AB 1 20.64 2295.3 544.56
## - ftwd 1 41.27 2315.9 546.58
## - age 1 47.98 2322.6 547.23
## - ftln 1 167.63 2442.2 558.53
## - hdln 1 245.88 2520.5 565.62
## - wa 1 556.60 2831.2 591.78
## - wt 1 737.25 3011.9 605.70
## - gnd_M 1 737.62 3012.2 605.73
##
## Step: AIC=543
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_B + bld_O
##
## Df Sum of Sq RSS AIC
## - bld_B 1 7.18 2286.6 541.71
## - bld_O 1 9.26 2288.7 541.92
## - bld_AB 1 19.76 2299.2 542.94
## <none> 2279.4 543.00
## - ftwd 1 42.74 2322.1 545.18
## - age 1 47.92 2327.3 545.68
## - ftln 1 166.48 2445.9 556.86
## - hdln 1 248.40 2527.8 564.28
## - wa 1 559.32 2838.7 590.38
## - gnd_M 1 732.95 3012.3 603.73
## - wt 1 743.14 3022.5 604.49
##
## Step: AIC=541.71
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB +
## bld_O
##
## Df Sum of Sq RSS AIC
## - bld_O 1 3.93 2290.5 540.10
## - bld_AB 1 13.19 2299.8 541.00
## <none> 2286.6 541.71
## - ftwd 1 43.43 2330.0 543.94
## - age 1 44.86 2331.4 544.08
## - ftln 1 167.44 2454.0 555.61
## - hdln 1 253.62 2540.2 563.38
## - wa 1 576.90 2863.5 590.33
## - gnd_M 1 727.59 3014.2 601.87
## - wt 1 769.28 3055.9 604.96
##
## Step: AIC=540.1
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M + bld_AB
##
## Df Sum of Sq RSS AIC
## - bld_AB 1 10.83 2301.3 539.16
## <none> 2290.5 540.10
## - ftwd 1 44.09 2334.6 542.39
## - age 1 48.28 2338.8 542.79
## - ftln 1 165.56 2456.1 553.80
## - hdln 1 258.23 2548.7 562.13
## - wa 1 573.34 2863.8 588.36
## - gnd_M 1 753.14 3043.6 602.06
## - wt 1 765.51 3056.0 602.97
##
## Step: AIC=539.16
## .outcome ~ age + wt + wa + hdln + ftln + ftwd + gnd_M
##
## Df Sum of Sq RSS AIC
## <none> 2301.3 539.16
## - ftwd 1 40.90 2342.2 541.12
## - age 1 48.22 2349.6 541.82
## - ftln 1 160.89 2462.2 552.36
## - hdln 1 261.44 2562.8 561.37
## - wa 1 579.73 2881.1 587.71
## - gnd_M 1 755.26 3056.6 601.01
## - wt 1 770.14 3071.5 602.11
Mstep
## Linear Regression with Stepwise Selection
##
## 225 samples
## 12 predictor
##
## Recipe steps: impute_median, impute_mode, dummy
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 180, 179, 181, 180, 180
## Resampling results:
##
## RMSE Rsquared MAE
## 3.353692 0.8602538 2.64515
Mstep$results
## parameter RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 none 3.353692 0.8602538 2.64515 0.2791423 0.03008863 0.2413835
### (X) plot(Mstep)
summary(Mstep)
##
## Call:
## lm(formula = .outcome ~ age + wt + wa + hdln + ftln + ftwd +
## gnd_M, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0560 -2.4142 0.0227 1.9226 9.4697
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 110.05529 6.09782 18.048 < 2e-16 ***
## age -0.04331 0.02031 -2.132 0.034113 *
## wt 0.41642 0.04887 8.522 2.66e-15 ***
## wa -0.40274 0.05447 -7.394 3.07e-12 ***
## hdln 2.03558 0.40998 4.965 1.39e-06 ***
## ftln 1.33180 0.34193 3.895 0.000131 ***
## ftwd -0.95779 0.48772 -1.964 0.050828 .
## gnd_M 5.54390 0.65694 8.439 4.54e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.257 on 217 degrees of freedom
## Multiple R-squared: 0.8703, Adjusted R-squared: 0.8661
## F-statistic: 207.9 on 7 and 217 DF, p-value: < 2.2e-16
ggplot(varImp(Mstep)) #stepAIC 변수 중요도는 참고만 할 것

Mstep$bestTune # 튜닝모수 없음
## parameter
## 1 none
Mstep$finalModel # lm객체
##
## Call:
## lm(formula = .outcome ~ age + wt + wa + hdln + ftln + ftwd +
## gnd_M, data = dat)
##
## Coefficients:
## (Intercept) age wt wa hdln ftln
## 110.05529 -0.04331 0.41642 -0.40274 2.03558 1.33180
## ftwd gnd_M
## -0.95779 5.54390
Mstep$resample
## RMSE Rsquared MAE Resample
## 1 3.654728 0.8169766 2.876370 Fold1
## 2 3.138540 0.8891631 2.410983 Fold2
## 3 3.237786 0.8687751 2.593860 Fold3
## 4 3.085081 0.8834617 2.426642 Fold4
## 5 3.652327 0.8428924 2.917897 Fold5
TROUT <- TROUT %>% mutate(yhstep=predict(Mstep, newdata = TR))
TSOUT <- TSOUT %>% mutate(yhstep=predict(Mstep, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 3
## ht yhlm yhstep
## <dbl> <dbl> <dbl>
## 1 188. 186. 186.
## 2 174. 173. 174.
## 3 176. 176. 176.
## 4 150. 153. 153.
## 5 153. 155. 155.
## 6 149. 156. 156.
g1 <- TROUT %>% ggplot(aes(x=yhstep, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhstep, y=ht-yhstep)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhstep, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhstep, y=ht-yhstep)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

METstep <-
metreg(TROUT$ht, TROUT$yhstep) %>%
bind_rows(metreg(TSOUT$ht, TSOUT$yhstep)) %>%
bind_cols(data.frame(model=c('lmStepAIC', 'lmStepAIC'), TRTS=c('TR', 'TS')))
METstep
## # A tibble: 2 × 5
## rmse mae rsq model TRTS
## <dbl> <dbl> <dbl> <chr> <chr>
## 1 3.20 2.51 0.870 lmStepAIC TR
## 2 3.68 2.87 0.847 lmStepAIC TS
# glmnet, elasticnet, lasso, ridge
## enet은 분류분석에 사용 못 함. glmnet 사용해야함
modelLookup('enet')
## model parameter label forReg forClass probModel
## 1 enet fraction Fraction of Full Solution TRUE FALSE FALSE
## 2 enet lambda Weight Decay TRUE FALSE FALSE
modelLookup('glmnet') #권장
## model parameter label forReg forClass probModel
## 1 glmnet alpha Mixing Percentage TRUE TRUE TRUE
## 2 glmnet lambda Regularization Parameter TRUE TRUE TRUE
## 적합
set.seed(0488)
glmnetGrid <- expand.grid(alpha=seq(0,1, by=0.25), lambda=seq(0.0, 0.1, by=0.01))
trCtrl <- trainControl(method = 'cv', number=5)
Mglmnet <-
train(RC, data=TR,
method='glmnet',
trControl=trCtrl,
tuneGrid = glmnetGrid)
## Loading required namespace: glmnet
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loaded glmnet 4.1-8
Mglmnet
## glmnet
##
## 225 samples
## 12 predictor
##
## Recipe steps: impute_median, impute_mode, dummy
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 180, 179, 181, 180, 180
## Resampling results across tuning parameters:
##
## alpha lambda RMSE Rsquared MAE
## 0.00 0.00 3.460189 0.8512458 2.746543
## 0.00 0.01 3.460189 0.8512458 2.746543
## 0.00 0.02 3.460189 0.8512458 2.746543
## 0.00 0.03 3.460189 0.8512458 2.746543
## 0.00 0.04 3.460189 0.8512458 2.746543
## 0.00 0.05 3.460189 0.8512458 2.746543
## 0.00 0.06 3.460189 0.8512458 2.746543
## 0.00 0.07 3.460189 0.8512458 2.746543
## 0.00 0.08 3.460189 0.8512458 2.746543
## 0.00 0.09 3.460189 0.8512458 2.746543
## 0.00 0.10 3.460189 0.8512458 2.746543
## 0.25 0.00 3.394221 0.8565529 2.700429
## 0.25 0.01 3.394221 0.8565529 2.700429
## 0.25 0.02 3.393211 0.8566314 2.699671
## 0.25 0.03 3.391188 0.8567596 2.698172
## 0.25 0.04 3.390069 0.8568164 2.697198
## 0.25 0.05 3.389193 0.8568578 2.696320
## 0.25 0.06 3.388684 0.8568732 2.695581
## 0.25 0.07 3.388364 0.8568767 2.694806
## 0.25 0.08 3.388091 0.8568851 2.693902
## 0.25 0.09 3.387987 0.8568834 2.693015
## 0.25 0.10 3.388103 0.8568669 2.692495
## 0.50 0.00 3.394046 0.8565528 2.700313
## 0.50 0.01 3.394046 0.8565528 2.700313
## 0.50 0.02 3.392076 0.8566942 2.699112
## 0.50 0.03 3.390683 0.8567601 2.698089
## 0.50 0.04 3.389402 0.8568333 2.696896
## 0.50 0.05 3.388462 0.8568905 2.695697
## 0.50 0.06 3.387869 0.8569265 2.694434
## 0.50 0.07 3.387617 0.8569428 2.693101
## 0.50 0.08 3.387736 0.8569348 2.692022
## 0.50 0.09 3.388044 0.8569201 2.691042
## 0.50 0.10 3.388614 0.8568830 2.689748
## 0.75 0.00 3.393800 0.8565747 2.700413
## 0.75 0.01 3.393800 0.8565747 2.700413
## 0.75 0.02 3.391887 0.8566808 2.699143
## 0.75 0.03 3.390053 0.8567944 2.697773
## 0.75 0.04 3.388574 0.8568947 2.696120
## 0.75 0.05 3.387744 0.8569541 2.694455
## 0.75 0.06 3.387309 0.8569966 2.692481
## 0.75 0.07 3.387196 0.8570112 2.690035
## 0.75 0.08 3.387846 0.8569726 2.687965
## 0.75 0.09 3.388913 0.8569066 2.686602
## 0.75 0.10 3.390103 0.8568231 2.685515
## 1.00 0.00 3.394239 0.8565395 2.700872
## 1.00 0.01 3.394203 0.8565468 2.700951
## 1.00 0.02 3.391449 0.8567042 2.699006
## 1.00 0.03 3.389301 0.8568505 2.697081
## 1.00 0.04 3.388099 0.8569392 2.695074
## 1.00 0.05 3.387190 0.8570195 2.692010
## 1.00 0.06 3.387191 0.8570299 2.688708
## 1.00 0.07 3.387900 0.8569923 2.686385
## 1.00 0.08 3.388972 0.8569183 2.684956
## 1.00 0.09 3.390470 0.8568108 2.683302
## 1.00 0.10 3.392453 0.8566759 2.682683
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.05.
Mglmnet$results
## alpha lambda RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 0.00 0.00 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 2 0.00 0.01 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 3 0.00 0.02 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 4 0.00 0.03 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 5 0.00 0.04 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 6 0.00 0.05 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 7 0.00 0.06 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 8 0.00 0.07 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 9 0.00 0.08 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 10 0.00 0.09 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 11 0.00 0.10 3.460189 0.8512458 2.746543 0.3236075 0.04171927 0.3289355
## 12 0.25 0.00 3.394221 0.8565529 2.700429 0.3027311 0.03490790 0.2903516
## 13 0.25 0.01 3.394221 0.8565529 2.700429 0.3027311 0.03490790 0.2903516
## 14 0.25 0.02 3.393211 0.8566314 2.699671 0.3019246 0.03488703 0.2898859
## 15 0.25 0.03 3.391188 0.8567596 2.698172 0.2992612 0.03490345 0.2877654
## 16 0.25 0.04 3.390069 0.8568164 2.697198 0.2965459 0.03491090 0.2853095
## 17 0.25 0.05 3.389193 0.8568578 2.696320 0.2939192 0.03492075 0.2830385
## 18 0.25 0.06 3.388684 0.8568732 2.695581 0.2915694 0.03494396 0.2810670
## 19 0.25 0.07 3.388364 0.8568767 2.694806 0.2893999 0.03497481 0.2792454
## 20 0.25 0.08 3.388091 0.8568851 2.693902 0.2870111 0.03496574 0.2773484
## 21 0.25 0.09 3.387987 0.8568834 2.693015 0.2847315 0.03495641 0.2755221
## 22 0.25 0.10 3.388103 0.8568669 2.692495 0.2825898 0.03495238 0.2741339
## 23 0.50 0.00 3.394046 0.8565528 2.700313 0.3019272 0.03480718 0.2882531
## 24 0.50 0.01 3.394046 0.8565528 2.700313 0.3019272 0.03480718 0.2882531
## 25 0.50 0.02 3.392076 0.8566942 2.699112 0.2995276 0.03477110 0.2861194
## 26 0.50 0.03 3.390683 0.8567601 2.698089 0.2950819 0.03470370 0.2812395
## 27 0.50 0.04 3.389402 0.8568333 2.696896 0.2901345 0.03456775 0.2761066
## 28 0.50 0.05 3.388462 0.8568905 2.695697 0.2851261 0.03441035 0.2711052
## 29 0.50 0.06 3.387869 0.8569265 2.694434 0.2802519 0.03426259 0.2663071
## 30 0.50 0.07 3.387617 0.8569428 2.693101 0.2755584 0.03412288 0.2616370
## 31 0.50 0.08 3.387736 0.8569348 2.692022 0.2710417 0.03399097 0.2569720
## 32 0.50 0.09 3.388044 0.8569201 2.691042 0.2668744 0.03387810 0.2533362
## 33 0.50 0.10 3.388614 0.8568830 2.689748 0.2637489 0.03383854 0.2508490
## 34 0.75 0.00 3.393800 0.8565747 2.700413 0.3010477 0.03471719 0.2871362
## 35 0.75 0.01 3.393800 0.8565747 2.700413 0.3010477 0.03471719 0.2871362
## 36 0.75 0.02 3.391887 0.8566808 2.699143 0.2968899 0.03465866 0.2818545
## 37 0.75 0.03 3.390053 0.8567944 2.697773 0.2895669 0.03436406 0.2739675
## 38 0.75 0.04 3.388574 0.8568947 2.696120 0.2822258 0.03407048 0.2660945
## 39 0.75 0.05 3.387744 0.8569541 2.694455 0.2750455 0.03378985 0.2584306
## 40 0.75 0.06 3.387309 0.8569966 2.692481 0.2682482 0.03353275 0.2512769
## 41 0.75 0.07 3.387196 0.8570112 2.690035 0.2630214 0.03339403 0.2457693
## 42 0.75 0.08 3.387846 0.8569726 2.687965 0.2587185 0.03332160 0.2416450
## 43 0.75 0.09 3.388913 0.8569066 2.686602 0.2547306 0.03326634 0.2389601
## 44 0.75 0.10 3.390103 0.8568231 2.685515 0.2514686 0.03324946 0.2379585
## 45 1.00 0.00 3.394239 0.8565395 2.700872 0.3007203 0.03467006 0.2862602
## 46 1.00 0.01 3.394203 0.8565468 2.700951 0.3006824 0.03467415 0.2860891
## 47 1.00 0.02 3.391449 0.8567042 2.699006 0.2932610 0.03442786 0.2770418
## 48 1.00 0.03 3.389301 0.8568505 2.697081 0.2837907 0.03400147 0.2664174
## 49 1.00 0.04 3.388099 0.8569392 2.695074 0.2743029 0.03358437 0.2560177
## 50 1.00 0.05 3.387190 0.8570195 2.692010 0.2660054 0.03325401 0.2472415
## 51 1.00 0.06 3.387191 0.8570299 2.688708 0.2599941 0.03310059 0.2401056
## 52 1.00 0.07 3.387900 0.8569923 2.686385 0.2543718 0.03297360 0.2351519
## 53 1.00 0.08 3.388972 0.8569183 2.684956 0.2496818 0.03289632 0.2327728
## 54 1.00 0.09 3.390470 0.8568108 2.683302 0.2454866 0.03285191 0.2310418
## 55 1.00 0.10 3.392453 0.8566759 2.682683 0.2417135 0.03283938 0.2292401
ggplot(Mglmnet)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the caret package.
## Please report the issue at <https://github.com/topepo/caret/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# (X) summary(Mglmnet)
ggplot(varImp(Mglmnet))

Mglmnet$bestTune
## alpha lambda
## 50 1 0.05
Mglmnet$resample # 최적 모수값에 대한 CV 통계량
## RMSE Rsquared MAE Resample
## 1 3.430340 0.8526686 2.846789 Fold3
## 2 3.760754 0.8049131 2.995033 Fold1
## 3 3.113259 0.8825001 2.465397 Fold4
## 4 3.147883 0.8892450 2.417267 Fold2
## 5 3.483714 0.8557705 2.735561 Fold5
# lasso plot: L1 Norm vs Coefficients
plot(Mglmnet$finalModel)

# lasso plot: x: log(lambda) vs Coefficients
plot(Mglmnet$finalModel, xvar = 'lambda', label = TRUE)
abline(v=log(Mglmnet$bestTune$lambda), lty=2)

coef(Mglmnet$final, s=Mglmnet$bestTune$lambda)
## 15 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 106.85948761
## age -0.05046063
## wt 0.37289685
## wa -0.35791353
## hdln 1.99552027
## hdwd 0.11722409
## ftln 1.33070389
## ftwd -0.72328287
## gnd_M 5.56962477
## bld_AB 0.66247137
## bld_B 0.20388551
## bld_O 0.28486628
## lft_Y -0.48428108
## smk_Y 0.04666870
## alc_Y -0.10878455
TROUT <- TROUT %>% mutate(yhglmnet=predict(Mglmnet, newdata = TR))
TSOUT <- TSOUT %>% mutate(yhglmnet=predict(Mglmnet, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 4
## ht yhlm yhstep yhglmnet
## <dbl> <dbl> <dbl> <dbl>
## 1 188. 186. 186. 186.
## 2 174. 173. 174. 173.
## 3 176. 176. 176. 176.
## 4 150. 153. 153. 153.
## 5 153. 155. 155. 155.
## 6 149. 156. 156. 156.
g1 <- TROUT %>% ggplot(aes(x=yhglmnet, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhglmnet, y=ht-yhglmnet)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhglmnet, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhglmnet, y=ht-yhglmnet)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

METglmnet <-
metreg(TROUT$ht, TROUT$yhglmnet) %>%
bind_rows(metreg(TSOUT$ht, TSOUT$yhglmnet)) %>%
bind_cols(data.frame(model=c('glmnet', 'glmnet'), TRTS=c('TR', 'TS')))
METglmnet
## # A tibble: 2 × 5
## rmse mae rsq model TRTS
## <dbl> <dbl> <dbl> <chr> <chr>
## 1 3.19 2.50 0.871 glmnet TR
## 2 3.67 2.88 0.848 glmnet TS
# nnet
## nnet: 은닉층(은닉층을 집어넣으면 곡선이 된다, 많아지면 deeplearning)이 1개인 MLP
## 얕은 학습
## 각 은닉층마다 몇 개(노드) 집어넣고, 은닉층을 몇 개(이건 한 개) 만들지.
## 규제화가 다 L2 규제화. decay. 튜닝 파라멘트.
modelLookup('nnet')
## model parameter label forReg forClass probModel
## 1 nnet size #Hidden Units TRUE TRUE TRUE
## 2 nnet decay Weight Decay TRUE TRUE TRUE
## 적합
set.seed(0488)
nnetGrid <- expand.grid(size=5:8, decay=seq(0.0, 0.1, by=0.01))
Mnnet <- train(RC, data=TR,
method='nnet',
maxit=1000,
trace=FALSE,
linout=TRUE,
trControl = trCtrl,
tuneGrid = nnetGrid)
## Warning in train_rec(rec = x, dat = data, info = trainInfo, method = models, :
## There were missing values in resampled performance measures.
Mnnet # 튜닝결과
## Neural Network
##
## 225 samples
## 12 predictor
##
## Recipe steps: impute_median, impute_mode, dummy
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 180, 179, 181, 180, 180
## Resampling results across tuning parameters:
##
## size decay RMSE Rsquared MAE
## 5 0.00 8.872907 0.01170586 7.501016
## 5 0.01 8.112337 0.51471773 4.756408
## 5 0.02 6.896826 0.64821109 4.197015
## 5 0.03 7.482631 0.59733018 4.743527
## 5 0.04 7.740027 0.54737824 4.920355
## 5 0.05 7.400454 0.59359248 4.550895
## 5 0.06 5.980705 0.64986862 4.222740
## 5 0.07 5.776194 0.68854233 4.101199
## 5 0.08 6.486424 0.63886078 4.615376
## 5 0.09 5.987036 0.61979537 4.249446
## 5 0.10 8.607704 0.46204008 5.363570
## 6 0.00 7.945246 0.80437199 6.682819
## 6 0.01 8.152304 0.54172115 5.387514
## 6 0.02 8.121740 0.53742958 5.233198
## 6 0.03 6.898333 0.60439118 4.873729
## 6 0.04 7.360487 0.52659728 4.906017
## 6 0.05 6.343929 0.62784944 4.432261
## 6 0.06 7.892998 0.56233010 5.104312
## 6 0.07 7.025030 0.59339757 4.871052
## 6 0.08 6.651826 0.58137995 4.681930
## 6 0.09 5.014524 0.73487251 3.753549
## 6 0.10 5.435373 0.67762809 4.115682
## 7 0.00 8.871490 0.01818756 7.501540
## 7 0.01 8.360488 0.53655648 5.233891
## 7 0.02 6.634937 0.60112584 4.585553
## 7 0.03 8.566726 0.50116464 5.660672
## 7 0.04 7.109152 0.53349759 4.942018
## 7 0.05 6.008680 0.63394413 4.382288
## 7 0.06 6.000947 0.65671739 4.431645
## 7 0.07 5.891262 0.64885323 4.198854
## 7 0.08 6.592146 0.59743888 4.910413
## 7 0.09 7.179581 0.55464780 5.130847
## 7 0.10 6.196570 0.66923278 4.343874
## 8 0.00 7.832634 0.43129377 6.563441
## 8 0.01 6.716660 0.58026585 4.884645
## 8 0.02 6.854825 0.60840526 5.018617
## 8 0.03 7.321154 0.54093398 5.181368
## 8 0.04 7.617038 0.51498336 5.420868
## 8 0.05 7.261541 0.56050640 5.198748
## 8 0.06 7.477960 0.57440108 5.483460
## 8 0.07 6.433188 0.59485748 4.785787
## 8 0.08 6.694810 0.63342945 5.166155
## 8 0.09 7.080552 0.54637588 5.207493
## 8 0.10 6.462711 0.59468691 4.857116
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 6 and decay = 0.09.
Mnnet$results # Resampling results across tuning parameters
## size decay RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 5 0.00 8.872907 0.01170586 7.501016 0.3656130 NA 0.2408544
## 2 5 0.01 8.112337 0.51471773 4.756408 2.7276701 0.16451095 0.7929259
## 3 5 0.02 6.896826 0.64821109 4.197015 2.3086610 0.10941787 0.7252106
## 4 5 0.03 7.482631 0.59733018 4.743527 2.9939777 0.19172230 1.3052219
## 5 5 0.04 7.740027 0.54737824 4.920355 2.5483314 0.18195468 0.8787680
## 6 5 0.05 7.400454 0.59359248 4.550895 3.1384532 0.19060106 1.0082657
## 7 5 0.06 5.980705 0.64986862 4.222740 1.1817115 0.09442975 0.7617456
## 8 5 0.07 5.776194 0.68854233 4.101199 1.7733084 0.14985370 0.8040059
## 9 5 0.08 6.486424 0.63886078 4.615376 1.8259538 0.07728760 0.8346316
## 10 5 0.09 5.987036 0.61979537 4.249446 2.0552027 0.19981419 0.8899431
## 11 5 0.10 8.607704 0.46204008 5.363570 1.5589722 0.19156059 0.6189232
## 12 6 0.00 7.945246 0.80437199 6.682819 2.3426814 NA 2.0334071
## 13 6 0.01 8.152304 0.54172115 5.387514 4.5818737 0.29576760 2.3959106
## 14 6 0.02 8.121740 0.53742958 5.233198 2.5781167 0.22444821 1.0586841
## 15 6 0.03 6.898333 0.60439118 4.873729 1.9338478 0.13215994 1.0126518
## 16 6 0.04 7.360487 0.52659728 4.906017 2.2884006 0.18919810 1.0485234
## 17 6 0.05 6.343929 0.62784944 4.432261 1.1079431 0.10828858 0.5539098
## 18 6 0.06 7.892998 0.56233010 5.104312 2.2532132 0.12941816 1.0822171
## 19 6 0.07 7.025030 0.59339757 4.871052 3.1475050 0.20018732 1.8576586
## 20 6 0.08 6.651826 0.58137995 4.681930 1.7932033 0.21748990 0.8713729
## 21 6 0.09 5.014524 0.73487251 3.753549 0.9250741 0.10219567 0.5722561
## 22 6 0.10 5.435373 0.67762809 4.115682 1.3616172 0.13874112 0.7973156
## 23 7 0.00 8.871490 0.01818756 7.501540 0.3649372 NA 0.2411710
## 24 7 0.01 8.360488 0.53655648 5.233891 3.7879168 0.22882388 1.9461402
## 25 7 0.02 6.634937 0.60112584 4.585553 1.0894783 0.12207406 0.2385578
## 26 7 0.03 8.566726 0.50116464 5.660672 2.9877700 0.18667642 1.4504721
## 27 7 0.04 7.109152 0.53349759 4.942018 2.1916257 0.22156553 1.2876144
## 28 7 0.05 6.008680 0.63394413 4.382288 0.8629085 0.09133978 0.5265050
## 29 7 0.06 6.000947 0.65671739 4.431645 0.8642205 0.09150494 0.4850043
## 30 7 0.07 5.891262 0.64885323 4.198854 0.9303253 0.06756823 0.5673762
## 31 7 0.08 6.592146 0.59743888 4.910413 0.4563401 0.03502457 0.5372902
## 32 7 0.09 7.179581 0.55464780 5.130847 1.1830184 0.14927402 0.7188310
## 33 7 0.10 6.196570 0.66923278 4.343874 2.4364809 0.10318627 1.0074906
## 34 8 0.00 7.832634 0.43129377 6.563441 2.4132500 0.59108759 2.1257462
## 35 8 0.01 6.716660 0.58026585 4.884645 0.9073881 0.08108978 0.5972624
## 36 8 0.02 6.854825 0.60840526 5.018617 1.1468821 0.09157402 0.9578436
## 37 8 0.03 7.321154 0.54093398 5.181368 1.0538250 0.09501276 0.7934303
## 38 8 0.04 7.617038 0.51498336 5.420868 0.6325402 0.08172747 0.5255899
## 39 8 0.05 7.261541 0.56050640 5.198748 1.2454291 0.13008848 0.8557573
## 40 8 0.06 7.477960 0.57440108 5.483460 1.5793805 0.11917529 1.2733875
## 41 8 0.07 6.433188 0.59485748 4.785787 1.8153892 0.15441230 1.1139038
## 42 8 0.08 6.694810 0.63342945 5.166155 0.9109904 0.04770128 0.5376007
## 43 8 0.09 7.080552 0.54637588 5.207493 0.1753208 0.07642876 0.3273956
## 44 8 0.10 6.462711 0.59468691 4.857116 0.7902413 0.08980137 0.6919173
ggplot(Mnnet) # M$results 시각화 size vs RMSE

# (x) summary(Mnnet)
ggplot(varImp(Mnnet))

Mnnet$bestTune
## size decay
## 21 6 0.09
Mnnet$finalModel # nnet 객체
## a 14-6-1 network with 97 weights
## inputs: age wt wa hdln hdwd ftln ftwd gnd_M bld_AB bld_B bld_O lft_Y smk_Y alc_Y
## output(s): .outcome
## options were - linear output units decay=0.09
# (x) plot(Mnnet$finalModel)
Mnnet$resample
## RMSE Rsquared MAE Resample
## 1 5.986581 0.5941134 4.558619 Fold1
## 2 4.140781 0.7888664 3.358809 Fold4
## 3 5.636683 0.6730540 3.831284 Fold2
## 4 5.385077 0.7634082 3.945868 Fold5
## 5 3.923500 0.8549205 3.073166 Fold3
TROUT <- TROUT %>% mutate(yhnnet=predict(Mglmnet, newdata = TR))
TSOUT <- TSOUT %>% mutate(yhnnet=predict(Mglmnet, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 5
## ht yhlm yhstep yhglmnet yhnnet
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 188. 186. 186. 186. 186.
## 2 174. 173. 174. 173. 173.
## 3 176. 176. 176. 176. 176.
## 4 150. 153. 153. 153. 153.
## 5 153. 155. 155. 155. 155.
## 6 149. 156. 156. 156. 156.
g1 <- TROUT %>% ggplot(aes(x=yhnnet, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhnnet, y=ht-yhnnet)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhnnet, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhnnet, y=ht-yhnnet)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

METnnet <-
metreg(TROUT$ht, TROUT$yhnnet) %>%
bind_rows(metreg(TSOUT$ht, TSOUT$yhnnet)) %>%
bind_cols(data.frame(model=c('nnet', 'nnet'), TRTS=c('TR', 'TS')))
METnnet
## # A tibble: 2 × 5
## rmse mae rsq model TRTS
## <dbl> <dbl> <dbl> <chr> <chr>
## 1 3.19 2.50 0.871 nnet TR
## 2 3.67 2.88 0.848 nnet TS
# svmRadial
## svmRadial : 딥러닝이 나오기 전 cctv에 많이 쓰이는 알고리즘.
modelLookup('svmRadial')
## model parameter label forReg forClass probModel
## 1 svmRadial sigma Sigma TRUE TRUE TRUE
## 2 svmRadial C Cost TRUE TRUE TRUE
set.seed(100)
svmGrid <- expand.grid(sigma=2^(-2:2), C=2^(-2:2))
MsvmRadial <-
train(RC, data=TR,
method='svmRadial',
trControl=trCtrl,
tuneGrid = svmGrid)
## Loading required namespace: kernlab
##
## Attaching package: 'kernlab'
##
## The following object is masked from 'package:scales':
##
## alpha
##
## The following object is masked from 'package:purrr':
##
## cross
##
## The following object is masked from 'package:ggplot2':
##
## alpha
MsvmRadial
## Support Vector Machines with Radial Basis Function Kernel
##
## 225 samples
## 12 predictor
##
## Recipe steps: impute_median, impute_mode, dummy
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 180, 180, 179, 181, 180
## Resampling results across tuning parameters:
##
## sigma C RMSE Rsquared MAE
## 0.25 0.25 6.504948 0.57196412 5.122941
## 0.25 0.50 5.925111 0.62083728 4.587084
## 0.25 1.00 5.648077 0.63314034 4.352867
## 0.25 2.00 5.607617 0.62917244 4.307676
## 0.25 4.00 5.617975 0.62626459 4.307778
## 0.50 0.25 7.712777 0.39280592 6.180663
## 0.50 0.50 7.213184 0.44711294 5.683800
## 0.50 1.00 6.791644 0.50280683 5.278865
## 0.50 2.00 6.667258 0.50690232 5.135604
## 0.50 4.00 6.660641 0.50678061 5.128044
## 1.00 0.25 8.468350 0.25717696 7.037233
## 1.00 0.50 8.218209 0.27758938 6.752115
## 1.00 1.00 7.978321 0.31580153 6.507231
## 1.00 2.00 7.889072 0.33362222 6.432228
## 1.00 4.00 7.886509 0.33328022 6.429203
## 2.00 0.25 8.780269 0.14309304 7.385748
## 2.00 0.50 8.689951 0.15554409 7.303059
## 2.00 1.00 8.579399 0.17666516 7.207339
## 2.00 2.00 8.557111 0.19041560 7.193468
## 2.00 4.00 8.556620 0.19114833 7.192470
## 4.00 0.25 8.862189 0.07150516 7.469460
## 4.00 0.50 8.830508 0.07504188 7.454661
## 4.00 1.00 8.784719 0.08927474 7.436753
## 4.00 2.00 8.778399 0.09515654 7.441111
## 4.00 4.00 8.778417 0.09531415 7.440158
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.25 and C = 2.
MsvmRadial$results
## sigma C RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 0.25 0.25 6.504948 0.57196412 5.122941 0.9368681 0.02903560 0.6385417
## 2 0.25 0.50 5.925111 0.62083728 4.587084 0.8895953 0.03642303 0.6283493
## 3 0.25 1.00 5.648077 0.63314034 4.352867 0.8350354 0.04153702 0.6127182
## 4 0.25 2.00 5.607617 0.62917244 4.307676 0.8108384 0.04200980 0.5979709
## 5 0.25 4.00 5.617975 0.62626459 4.307778 0.8163394 0.04439819 0.5944251
## 6 0.50 0.25 7.712777 0.39280592 6.180663 0.9646214 0.06115396 0.7017110
## 7 0.50 0.50 7.213184 0.44711294 5.683800 0.9781378 0.05037441 0.7179529
## 8 0.50 1.00 6.791644 0.50280683 5.278865 0.9450208 0.04312137 0.6844987
## 9 0.50 2.00 6.667258 0.50690232 5.135604 0.9274403 0.04344485 0.6255326
## 10 0.50 4.00 6.660641 0.50678061 5.128044 0.9279437 0.04367226 0.6310740
## 11 1.00 0.25 8.468350 0.25717696 7.037233 0.9397437 0.07250093 0.6864022
## 12 1.00 0.50 8.218209 0.27758938 6.752115 0.9684299 0.06526089 0.7152336
## 13 1.00 1.00 7.978321 0.31580153 6.507231 0.9619417 0.05709918 0.7349334
## 14 1.00 2.00 7.889072 0.33362222 6.432228 0.9306728 0.04605314 0.7072335
## 15 1.00 4.00 7.886509 0.33328022 6.429203 0.9291967 0.04576473 0.7056114
## 16 2.00 0.25 8.780269 0.14309304 7.385748 0.9117275 0.05215221 0.6903220
## 17 2.00 0.50 8.689951 0.15554409 7.303059 0.9386777 0.05714281 0.7011466
## 18 2.00 1.00 8.579399 0.17666516 7.207339 0.9752442 0.05574224 0.7365204
## 19 2.00 2.00 8.557111 0.19041560 7.193468 0.9665694 0.05625723 0.7331381
## 20 2.00 4.00 8.556620 0.19114833 7.192470 0.9665912 0.05785451 0.7328873
## 21 4.00 0.25 8.862189 0.07150516 7.469460 0.8925240 0.03769194 0.6914814
## 22 4.00 0.50 8.830508 0.07504188 7.454661 0.9028108 0.04105834 0.6995668
## 23 4.00 1.00 8.784719 0.08927474 7.436753 0.9202761 0.04707859 0.7092631
## 24 4.00 2.00 8.778399 0.09515654 7.441111 0.9245715 0.04597558 0.7122406
## 25 4.00 4.00 8.778417 0.09531415 7.440158 0.9250024 0.04616185 0.7126751
ggplot(MsvmRadial)

ggplot(varImp(MsvmRadial))

MsvmRadial$bestTune
## sigma C
## 4 0.25 2
MsvmRadial$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: eps-svr (regression)
## parameter : epsilon = 0.1 cost C = 2
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.25
##
## Number of Support Vectors : 187
##
## Objective Function Value : -56.8466
## Training error : 0.013455
# (x) plot(MsvmRadial$finalModel)
MsvmRadial$resample
## RMSE Rsquared MAE Resample
## 1 4.383396 0.6601292 3.367002 Fold4
## 2 6.452131 0.5778985 4.966978 Fold1
## 3 5.638845 0.6232156 4.208097 Fold3
## 4 6.203888 0.6029947 4.617003 Fold5
## 5 5.359824 0.6816243 4.379300 Fold2
TROUT <- TROUT %>% mutate(yhsvmRadial=predict(MsvmRadial, newdata = TR))
TSOUT <- TSOUT %>% mutate(yhsvmRadial=predict(MsvmRadial, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 6
## ht yhlm yhstep yhglmnet yhnnet yhsvmRadial
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 188. 186. 186. 186. 186. 170.
## 2 174. 173. 174. 173. 173. 173.
## 3 176. 176. 176. 176. 176. 169.
## 4 150. 153. 153. 153. 153. 159.
## 5 153. 155. 155. 155. 155. 155.
## 6 149. 156. 156. 156. 156. 157.
g1 <- TROUT %>% ggplot(aes(x=yhsvmRadial, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhsvmRadial, y=ht-yhsvmRadial)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhsvmRadial, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhsvmRadial, y=ht-yhsvmRadial)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

METsvmRadial <-
metreg(TROUT$ht, TROUT$yhsvmRadial) %>%
bind_rows(metreg(TSOUT$ht, TSOUT$yhsvmRadial)) %>%
bind_cols(data.frame(model=c('svmRadial', 'svmRadial'), TRTS=c('TR', 'TS')))
METsvmRadial
## # A tibble: 2 × 5
## rmse mae rsq model TRTS
## <dbl> <dbl> <dbl> <chr> <chr>
## 1 1.03 0.897 0.988 svmRadial TR
## 2 5.83 4.69 0.662 svmRadial TS
# rpart
## rpart (회귀나무)
modelLookup('rpart')
## model parameter label forReg forClass probModel
## 1 rpart cp Complexity Parameter TRUE TRUE TRUE
modelLookup('rpart2')
## model parameter label forReg forClass probModel
## 1 rpart2 maxdepth Max Tree Depth TRUE TRUE TRUE
set.seed(0488)
rpartGrid <- expand.grid(cp=seq(0, 0.2, length=10))
Mrpart <-
train(RC, data=TR,
method='rpart',
trControl = trCtrl,
tuneGrid = rpartGrid)
##
## Attaching package: 'rpart'
##
## The following object is masked from 'package:dials':
##
## prune
Mrpart
## CART
##
## 225 samples
## 12 predictor
##
## Recipe steps: impute_median, impute_mode, dummy
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 180, 179, 181, 180, 180
## Resampling results across tuning parameters:
##
## cp RMSE Rsquared MAE
## 0.00000000 4.324843 0.7709030 3.509419
## 0.02222222 4.572731 0.7453769 3.726639
## 0.04444444 4.572731 0.7453769 3.726639
## 0.06666667 4.724824 0.7235329 3.783926
## 0.08888889 5.234300 0.6552439 4.116163
## 0.11111111 5.234300 0.6552439 4.116163
## 0.13333333 5.234300 0.6552439 4.116163
## 0.15555556 5.234300 0.6552439 4.116163
## 0.17777778 5.234300 0.6552439 4.116163
## 0.20000000 5.234300 0.6552439 4.116163
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.
Mrpart$results
## cp RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 0.00000000 4.324843 0.7709030 3.509419 0.4962337 0.05456251 0.4927866
## 2 0.02222222 4.572731 0.7453769 3.726639 0.3471641 0.03234916 0.2672540
## 3 0.04444444 4.572731 0.7453769 3.726639 0.3471641 0.03234916 0.2672540
## 4 0.06666667 4.724824 0.7235329 3.783926 0.3098771 0.04311460 0.3216701
## 5 0.08888889 5.234300 0.6552439 4.116163 0.2493465 0.03769615 0.2569557
## 6 0.11111111 5.234300 0.6552439 4.116163 0.2493465 0.03769615 0.2569557
## 7 0.13333333 5.234300 0.6552439 4.116163 0.2493465 0.03769615 0.2569557
## 8 0.15555556 5.234300 0.6552439 4.116163 0.2493465 0.03769615 0.2569557
## 9 0.17777778 5.234300 0.6552439 4.116163 0.2493465 0.03769615 0.2569557
## 10 0.20000000 5.234300 0.6552439 4.116163 0.2493465 0.03769615 0.2569557
ggplot(Mrpart)

ggplot(varImp(Mrpart))

Mrpart$bestTune
## cp
## 1 0
Mrpart$finalModel
## n= 225
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 225 17737.35000 165.5658
## 2) gnd_M< 0.5 106 3194.97400 157.9604
## 4) ftln< 22.85 36 525.00890 153.1556
## 8) age>=39 12 108.36670 150.1333 *
## 9) age< 39 24 252.23330 154.6667
## 18) hdln< 15.9 9 72.52000 152.1667 *
## 19) hdln>=15.9 15 89.71333 156.1667 *
## 5) ftln>=22.85 70 1411.43100 160.4314
## 10) age>=46.5 13 156.13690 156.4846 *
## 11) age< 46.5 57 1006.60300 161.3316
## 22) ftln< 24.25 46 671.24430 160.4652
## 44) ftln< 23.65 25 300.66960 159.4040
## 88) age>=27.5 11 104.28910 157.9091 *
## 89) age< 27.5 14 152.48360 160.5786 *
## 45) ftln>=23.65 21 308.90290 161.7286
## 90) wa>=73.65 10 126.58400 160.3600 *
## 91) wa< 73.65 11 146.56180 162.9727 *
## 23) ftln>=24.25 11 156.44730 164.9545 *
## 3) gnd_M>=0.5 119 2949.64600 172.3403
## 6) ftln< 25.15 54 873.31500 169.3167
## 12) age>=28.5 19 228.83790 166.6105 *
## 13) age< 28.5 35 429.80290 170.7857
## 26) wt< 58.7 14 103.45500 168.5500 *
## 27) wt>=58.7 21 209.71810 172.2762
## 54) hdln< 18.15 13 68.80769 171.2692 *
## 55) hdln>=18.15 8 106.30870 173.9125 *
## 7) ftln>=25.15 65 1172.48200 174.8523
## 14) wt< 80.05 50 675.51220 173.8660
## 28) hdln< 18.85 30 274.00800 172.5200
## 56) wa>=76.75 18 117.85110 171.7778 *
## 57) wa< 76.75 12 131.36670 173.6333 *
## 29) hdln>=18.85 20 265.62550 175.8850
## 58) age>=25.5 10 88.54100 174.3700 *
## 59) age< 25.5 10 131.18000 177.4000 *
## 15) wt>=80.05 15 286.19600 178.1400 *
plot(Mrpart$finalModel)
text(Mrpart$finalModel)

library(rpart.plot)
rpart.plot::rpart.plot(Mrpart$finalModel)

Mrpart$resample
## RMSE Rsquared MAE Resample
## 1 4.638521 0.7161797 3.661524 Fold1
## 2 4.601339 0.7260757 3.904056 Fold3
## 3 4.502987 0.7705117 3.702418 Fold4
## 4 3.449128 0.8517755 2.648823 Fold5
## 5 4.432241 0.7899722 3.630272 Fold2
TROUT <- TROUT %>% mutate(yhrpart=predict(Mrpart, newdata = TR))
TSOUT <- TSOUT %>% mutate(yhrpart=predict(Mrpart, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 7
## ht yhlm yhstep yhglmnet yhnnet yhsvmRadial yhrpart
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 188. 186. 186. 186. 186. 170. 177.
## 2 174. 173. 174. 173. 173. 173. 174.
## 3 176. 176. 176. 176. 176. 169. 174.
## 4 150. 153. 153. 153. 153. 159. 152.
## 5 153. 155. 155. 155. 155. 155. 152.
## 6 149. 156. 156. 156. 156. 157. 156.
g1 <- TROUT %>% ggplot(aes(x=yhrpart, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhrpart, y=ht-yhrpart)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhrpart, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhrpart, y=ht-yhrpart)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

METrpart <-
metreg(TROUT$ht, TROUT$yhrpart) %>%
bind_rows(metreg(TSOUT$ht, TSOUT$yhrpart)) %>%
bind_cols(data.frame(model=c('rpart', 'rpart'), TRTS=c('TR', 'TS')))
METrpart
## # A tibble: 2 × 5
## rmse mae rsq model TRTS
## <dbl> <dbl> <dbl> <chr> <chr>
## 1 3.25 2.52 0.866 rpart TR
## 2 4.51 3.54 0.773 rpart TS
# ranger
modelLookup('ranger')
## model parameter label forReg forClass probModel
## 1 ranger mtry #Randomly Selected Predictors TRUE TRUE TRUE
## 2 ranger splitrule Splitting Rule TRUE TRUE TRUE
## 3 ranger min.node.size Minimal Node Size TRUE TRUE TRUE
## 적합
set.seed(0488)
rangerGrid <-
expand.grid(
mtry=seq(2, ncol(TR)-1, by=2),
min.node.size=1:3,
splitrule=c('extratrees')
)
Mranger <-
train(RC, data=TR,
method = 'ranger',
importance='impurity',
trControl = trCtrl,
tuneGrid = rangerGrid)
## Loading required namespace: e1071
## Loading required namespace: ranger
##
## Attaching package: 'e1071'
##
## The following object is masked from 'package:tune':
##
## tune
##
## The following object is masked from 'package:rsample':
##
## permutations
##
## The following object is masked from 'package:parsnip':
##
## tune
##
## The following object is masked from 'package:ggplot2':
##
## element
Mranger
## Random Forest
##
## 225 samples
## 12 predictor
##
## Recipe steps: impute_median, impute_mode, dummy
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 180, 179, 181, 180, 180
## Resampling results across tuning parameters:
##
## mtry min.node.size RMSE Rsquared MAE
## 2 1 4.168851 0.8005161 3.296839
## 2 2 4.128123 0.8040248 3.271421
## 2 3 4.168968 0.8003318 3.302569
## 4 1 3.965145 0.8064540 3.129232
## 4 2 3.947337 0.8089740 3.108412
## 4 3 3.942206 0.8099285 3.107190
## 6 1 3.920338 0.8089353 3.100145
## 6 2 3.921989 0.8088344 3.087544
## 6 3 3.919172 0.8101216 3.079854
## 8 1 3.930481 0.8074287 3.095187
## 8 2 3.926138 0.8090105 3.094183
## 8 3 3.921633 0.8095582 3.098947
## 10 1 3.935923 0.8076059 3.110366
## 10 2 3.920330 0.8085521 3.099600
## 10 3 3.925316 0.8085535 3.103864
## 12 1 3.925628 0.8082391 3.094780
## 12 2 3.934327 0.8074922 3.114638
## 12 3 3.913992 0.8094680 3.103733
##
## Tuning parameter 'splitrule' was held constant at a value of extratrees
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were mtry = 12, splitrule = extratrees
## and min.node.size = 3.
Mranger$results
## mtry min.node.size splitrule RMSE Rsquared MAE RMSESD
## 1 2 1 extratrees 4.168851 0.8005161 3.296839 0.2780157
## 2 2 2 extratrees 4.128123 0.8040248 3.271421 0.2761709
## 3 2 3 extratrees 4.168968 0.8003318 3.302569 0.2934775
## 4 4 1 extratrees 3.965145 0.8064540 3.129232 0.2857537
## 5 4 2 extratrees 3.947337 0.8089740 3.108412 0.3232101
## 6 4 3 extratrees 3.942206 0.8099285 3.107190 0.3012651
## 7 6 1 extratrees 3.920338 0.8089353 3.100145 0.3230954
## 8 6 2 extratrees 3.921989 0.8088344 3.087544 0.3178604
## 9 6 3 extratrees 3.919172 0.8101216 3.079854 0.3278649
## 10 8 1 extratrees 3.930481 0.8074287 3.095187 0.3119245
## 11 8 2 extratrees 3.926138 0.8090105 3.094183 0.3397868
## 12 8 3 extratrees 3.921633 0.8095582 3.098947 0.2988308
## 13 10 1 extratrees 3.935923 0.8076059 3.110366 0.3320812
## 14 10 2 extratrees 3.920330 0.8085521 3.099600 0.3533986
## 15 10 3 extratrees 3.925316 0.8085535 3.103864 0.3468099
## 16 12 1 extratrees 3.925628 0.8082391 3.094780 0.3516549
## 17 12 2 extratrees 3.934327 0.8074922 3.114638 0.3637272
## 18 12 3 extratrees 3.913992 0.8094680 3.103733 0.3754559
## RsquaredSD MAESD
## 1 0.04188683 0.2121470
## 2 0.04000509 0.2234718
## 3 0.03997508 0.2151334
## 4 0.03842212 0.2372788
## 5 0.03868059 0.2864089
## 6 0.03951649 0.2402829
## 7 0.04048938 0.2807711
## 8 0.03975012 0.2611653
## 9 0.03892153 0.2671271
## 10 0.03814036 0.2652337
## 11 0.03999964 0.2775484
## 12 0.03631677 0.2419464
## 13 0.03821755 0.3058889
## 14 0.04155547 0.2864441
## 15 0.04032754 0.3072332
## 16 0.04030557 0.2975056
## 17 0.04138779 0.3228948
## 18 0.04196922 0.3294780
ggplot(Mranger)

ggplot(varImp(Mranger))

Mranger$bestTune
## mtry splitrule min.node.size
## 18 12 extratrees 3
Mranger$finalModel
## Ranger result
##
## Call:
## ranger::ranger(dependent.variable.name = ".outcome", data = x, mtry = min(param$mtry, ncol(x)), min.node.size = param$min.node.size, splitrule = as.character(param$splitrule), write.forest = TRUE, probability = classProbs, ...)
##
## Type: Regression
## Number of trees: 500
## Sample size: 225
## Number of independent variables: 14
## Mtry: 12
## Target node size: 3
## Variable importance mode: impurity
## Splitrule: extratrees
## Number of random splits: 1
## OOB prediction error (MSE): 15.29872
## R squared (OOB): 0.8067967
Mranger$resample
## RMSE Rsquared MAE Resample
## 1 4.084030 0.7874631 3.296884 Fold3
## 2 4.180057 0.7583122 3.200368 Fold1
## 3 4.001462 0.8289483 3.209543 Fold2
## 4 4.051861 0.8037177 3.292027 Fold4
## 5 3.252549 0.8688986 2.519844 Fold5
TROUT <- TROUT %>% mutate(yhranger=predict(Mranger, newdata = TR))
TSOUT <- TSOUT %>% mutate(yhranger=predict(Mranger, newdata = TS))
head(TSOUT)
## # A tibble: 6 × 8
## ht yhlm yhstep yhglmnet yhnnet yhsvmRadial yhrpart yhranger
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 188. 186. 186. 186. 186. 170. 177. 182.
## 2 174. 173. 174. 173. 173. 173. 174. 173.
## 3 176. 176. 176. 176. 176. 169. 174. 173.
## 4 150. 153. 153. 153. 153. 159. 152. 153.
## 5 153. 155. 155. 155. 155. 155. 152. 155.
## 6 149. 156. 156. 156. 156. 157. 156. 157.
g1 <- TROUT %>% ggplot(aes(x=yhranger, y=ht)) + geom_point()
g2 <- TROUT %>% ggplot(aes(x=yhranger, y=ht-yhranger)) + geom_point()
g3 <- TSOUT %>% ggplot(aes(x=yhranger, y=ht)) + geom_point()
g4 <- TSOUT %>% ggplot(aes(x=yhranger, y=ht-yhranger)) + geom_point()
grid.arrange(g1, g2, g3, g4, ncol=2)

METranger <-
metreg(TROUT$ht, TROUT$yhranger) %>%
bind_rows(metreg(TSOUT$ht, TSOUT$yhranger)) %>%
bind_cols(data.frame(model=c('ranger', 'ranger'), TRTS=c('TR', 'TS')))
METranger
## # A tibble: 2 × 5
## rmse mae rsq model TRTS
## <dbl> <dbl> <dbl> <chr> <chr>
## 1 1.61 1.24 0.970 ranger TR
## 2 4.21 3.45 0.803 ranger TS
# 평가
## CV 평가
RESAMP <- resamples(list(LM=Mlm,
STEP=Mstep,
GLMNET=Mglmnet,
NNET=Mnnet,
SVM=MsvmRadial,
RPART=Mrpart,
RANGER=Mranger))
summary(RESAMP)
##
## Call:
## summary.resamples(object = RESAMP)
##
## Models: LM, STEP, GLMNET, NNET, SVM, RPART, RANGER
## Number of resamples: 5
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## LM 2.374710 2.416799 2.828514 2.703031 2.839293 3.055840 0
## STEP 2.410983 2.426642 2.593860 2.645150 2.876370 2.917897 0
## GLMNET 2.417267 2.465397 2.735561 2.692010 2.846789 2.995033 0
## NNET 3.073166 3.358809 3.831284 3.753549 3.945868 4.558619 0
## SVM 3.367002 4.208097 4.379300 4.307676 4.617003 4.966978 0
## RPART 2.648823 3.630272 3.661524 3.509419 3.702418 3.904056 0
## RANGER 2.519844 3.200368 3.209543 3.103733 3.292027 3.296884 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## LM 3.064790 3.116350 3.426790 3.398719 3.602199 3.783464 0
## STEP 3.085081 3.138540 3.237786 3.353692 3.652327 3.654728 0
## GLMNET 3.113259 3.147883 3.430340 3.387190 3.483714 3.760754 0
## NNET 3.923500 4.140781 5.385077 5.014524 5.636683 5.986581 0
## SVM 4.383396 5.359824 5.638845 5.607617 6.203888 6.452131 0
## RPART 3.449128 4.432241 4.502987 4.324843 4.601339 4.638521 0
## RANGER 3.252549 4.001462 4.051861 3.913992 4.084030 4.180057 0
##
## Rsquared
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## LM 0.8041820 0.8466001 0.8540874 0.8562773 0.8863103 0.8902070 0
## STEP 0.8169766 0.8428924 0.8687751 0.8602538 0.8834617 0.8891631 0
## GLMNET 0.8049131 0.8526686 0.8557705 0.8570195 0.8825001 0.8892450 0
## NNET 0.5941134 0.6730540 0.7634082 0.7348725 0.7888664 0.8549205 0
## SVM 0.5778985 0.6029947 0.6232156 0.6291724 0.6601292 0.6816243 0
## RPART 0.7161797 0.7260757 0.7705117 0.7709030 0.7899722 0.8517755 0
## RANGER 0.7583122 0.7874631 0.8037177 0.8094680 0.8289483 0.8688986 0
bwplot(RESAMP)

splom(RESAMP, metric = 'Rsquared')

# TR, TS 평가
## 모형별 성능평가결과 결합
MET <-
bind_rows(METlm, METstep, METglmnet, METnnet, METsvmRadial,
METrpart, METranger)
MET <-
arrange(bind_rows(METlm, METstep, METglmnet, METnnet,
METsvmRadial, METrpart, METranger),
rmse)
g1 <- ggplot(MET, aes(x=model, y=rsq, shape = TRTS, col=TRTS, group = TRTS)) +
geom_line() +
geom_point(size=3)
g2 <- ggplot(MET, aes(x=model, y=rmse, shape = TRTS, col=TRTS, group = TRTS)) +
geom_line() +
geom_point(size=3)
grid.arrange(g1, g2, nrow=2, ncol=1)

# 실행시간
time2<-Sys.time()
time2-time1
## Time difference of 2.710014 mins
# 참고
set.seed(0488)
lassoGrid <- expand.grid(alpha=1, lambda=seq(0.0, 0.5, by=0.1))
Ctrl <- trainControl(method='cv', number=10, returnResamp = 'all')
Mlasso <-
train(RC, data=TR,
method='glmnet',
trControl=Ctrl,
tuneGrid = lassoGrid)
ggplot(Mlasso)

ggplot(varImp(Mlasso))

Mlasso$bestTune
## alpha lambda
## 2 1 0.1
Mlasso$results
## alpha lambda RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.0 3.415495 0.8597192 2.687169 0.3503130 0.05180525 0.2975563
## 2 1 0.1 3.402952 0.8621860 2.683478 0.3254798 0.04080997 0.2972265
## 3 1 0.2 3.432394 0.8609549 2.719084 0.3563010 0.03616555 0.3280094
## 4 1 0.3 3.518366 0.8550552 2.797090 0.4037056 0.03725082 0.3628590
## 5 1 0.4 3.656557 0.8440725 2.917482 0.4583642 0.04077074 0.4084912
## 6 1 0.5 3.775271 0.8344951 3.021244 0.4759694 0.04256465 0.4282060
Mlasso$resample
## alpha lambda RMSE Rsquared MAE Resample
## 1 1 0.5 3.848152 0.7898975 2.943508 Fold01
## 2 1 0.0 3.227300 0.8517397 2.426577 Fold01
## 3 1 0.1 3.289144 0.8467568 2.495247 Fold01
## 4 1 0.2 3.412459 0.8344061 2.658831 Fold01
## 5 1 0.3 3.551669 0.8204819 2.774818 Fold01
## 6 1 0.4 3.746599 0.7999935 2.894056 Fold01
## 7 1 0.5 3.695747 0.8635646 2.722216 Fold02
## 8 1 0.0 3.149710 0.8939176 2.386667 Fold02
## 9 1 0.1 3.151405 0.8942121 2.277195 Fold02
## 10 1 0.2 3.220155 0.8910643 2.305226 Fold02
## 11 1 0.3 3.341247 0.8858761 2.414358 Fold02
## 12 1 0.4 3.540030 0.8735664 2.596620 Fold02
## 13 1 0.5 3.706860 0.8784555 3.106235 Fold03
## 14 1 0.0 3.324910 0.9074213 2.488514 Fold03
## 15 1 0.1 3.361073 0.9036488 2.595566 Fold03
## 16 1 0.2 3.402613 0.9005594 2.706676 Fold03
## 17 1 0.3 3.475149 0.8959221 2.827461 Fold03
## 18 1 0.4 3.579974 0.8882424 2.969256 Fold03
## 19 1 0.5 3.279660 0.9080098 2.502694 Fold04
## 20 1 0.0 2.856230 0.9174424 2.352743 Fold04
## 21 1 0.1 2.868624 0.9213231 2.375429 Fold04
## 22 1 0.2 2.923230 0.9208866 2.373260 Fold04
## 23 1 0.3 3.016600 0.9188001 2.388707 Fold04
## 24 1 0.4 3.139462 0.9145164 2.435592 Fold04
## 25 1 0.5 4.130185 0.8212706 3.474061 Fold05
## 26 1 0.0 3.480949 0.8711279 2.819359 Fold05
## 27 1 0.1 3.479304 0.8671708 2.855488 Fold05
## 28 1 0.2 3.580723 0.8584645 2.923714 Fold05
## 29 1 0.3 3.739193 0.8490503 3.051323 Fold05
## 30 1 0.4 3.981486 0.8319861 3.327939 Fold05
## 31 1 0.5 3.704816 0.8100739 3.166136 Fold06
## 32 1 0.0 3.497791 0.8297913 2.755565 Fold06
## 33 1 0.1 3.448829 0.8325138 2.793293 Fold06
## 34 1 0.2 3.452784 0.8330293 2.879718 Fold06
## 35 1 0.3 3.508011 0.8283968 2.959599 Fold06
## 36 1 0.4 3.602217 0.8200191 3.050433 Fold06
## 37 1 0.5 3.080467 0.8686406 2.471308 Fold07
## 38 1 0.0 3.305784 0.8672897 2.652783 Fold07
## 39 1 0.1 3.170825 0.8710366 2.508455 Fold07
## 40 1 0.2 3.028466 0.8772582 2.393632 Fold07
## 41 1 0.3 2.997415 0.8767640 2.419937 Fold07
## 42 1 0.4 3.016492 0.8738763 2.445738 Fold07
## 43 1 0.5 4.850809 0.8020190 3.883104 Fold08
## 44 1 0.0 3.838168 0.8666737 3.142259 Fold08
## 45 1 0.1 4.033644 0.8584801 3.272460 Fold08
## 46 1 0.2 4.225246 0.8482324 3.423480 Fold08
## 47 1 0.3 4.442518 0.8333140 3.592975 Fold08
## 48 1 0.4 4.692170 0.8140519 3.778431 Fold08
## 49 1 0.5 3.696029 0.7795224 3.006703 Fold09
## 50 1 0.0 4.103717 0.7325120 3.192956 Fold09
## 51 1 0.1 3.757960 0.7763220 2.952007 Fold09
## 52 1 0.2 3.540020 0.8018698 2.764131 Fold09
## 53 1 0.3 3.513816 0.8037105 2.737517 Fold09
## 54 1 0.4 3.587874 0.7936232 2.816635 Fold09
## 55 1 0.5 3.759989 0.8234974 2.936473 Fold10
## 56 1 0.0 3.370386 0.8592762 2.654268 Fold10
## 57 1 0.1 3.468709 0.8503959 2.709641 Fold10
## 58 1 0.2 3.538238 0.8437786 2.762169 Fold10
## 59 1 0.3 3.598040 0.8382359 2.804205 Fold10
## 60 1 0.4 3.679264 0.8308502 2.860119 Fold10
# M$resample에서 M$results 계산하기
Mlasso$resample %>%
group_by(lambda) %>%
dplyr::summarize(n=n(), mnRMSE=mean(RMSE), sdRMSE=sd(RMSE)) %>% data.frame()
## lambda n mnRMSE sdRMSE
## 1 0.0 10 3.415495 0.3503130
## 2 0.1 10 3.402952 0.3254798
## 3 0.2 10 3.432394 0.3563010
## 4 0.3 10 3.518366 0.4037056
## 5 0.4 10 3.656557 0.4583642
## 6 0.5 10 3.775271 0.4759694
# returnResamp='final'이면 M$resample는 최적모수에 대한 CV 결과만 저장
set.seed(0488)
Ctrl <- trainControl(method='cv', number = 10)
M2 <-
train(RC, data=TR,
method = 'glmnet',
trControl=Ctrl,
tuneGrid = lassoGrid)
M2$bestTune
## alpha lambda
## 2 1 0.1
M2$results
## alpha lambda RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.0 3.415495 0.8597192 2.687169 0.3503130 0.05180525 0.2975563
## 2 1 0.1 3.402952 0.8621860 2.683478 0.3254798 0.04080997 0.2972265
## 3 1 0.2 3.432394 0.8609549 2.719084 0.3563010 0.03616555 0.3280094
## 4 1 0.3 3.518366 0.8550552 2.797090 0.4037056 0.03725082 0.3628590
## 5 1 0.4 3.656557 0.8440725 2.917482 0.4583642 0.04077074 0.4084912
## 6 1 0.5 3.775271 0.8344951 3.021244 0.4759694 0.04256465 0.4282060
M2$results %>%filter(lambda==0.1) %>% dplyr::select(starts_with('RMSE'))
## RMSE RMSESD
## 1 3.402952 0.3254798
M2$resample %>% dplyr::summarize(n=n(), mnRMSE=mean(RMSE), sdRMSE=sd(RMSE))
## n mnRMSE sdRMSE
## 1 10 3.402952 0.3254798
g1 <-
ggplot(Mlasso$resample, aes(x=factor(lambda), y=RMSE)) +
geom_point() +
geom_line(data=M2$results, aes(x=factor(lambda), y=RMSE, group = 1)) +
geom_point(data=M2$results, aes(x=factor(lambda), y=RMSE), shape=15, size=5, color='red')
g2 <-
ggplot(Mlasso$resample, aes(x=factor(lambda), y=RMSE)) +
geom_point() +
stat_summary(fun='mean', geom='line', aes(group=1)) +
stat_summary(fun='mean', geom='point', shape=15, size=5, color='red')
ggarrange(g1, g2, nrow=1, ncol=2)
