knitr::opts_chunk$set(echo = TRUE)
# install.packages(c('gam','spTimer'))
library(gam)
## Warning: package 'gam' was built under R version 3.5.2
## Loading required package: splines
## Loading required package: foreach
## Loaded gam 1.16
library(spTimer)
## Warning: package 'spTimer' was built under R version 3.5.2
## 
## ## spTimer version: 3.3
library(CARS)
## Warning: package 'CARS' was built under R version 3.5.2

일반화 가법 모형

비선형 회귀 중 한 방법으로 과적합 문제를 가지며 모델이 가산적이어야 한다는 제한점이 있다.

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
set.seed(1)
idx=sample(nrow(iris),nrow(iris)*.7)
train=iris[idx,]
test=iris[-idx,]
fit=gam(Sepal.Width ~ s(Sepal.Length,10), data=train)

spT.validation(test$Sepal.Width, predict(fit,test))
##     MSE    RMSE     MAE    MAPE    BIAS   rBIAS   rMSEP 
##  0.1576  0.3970  0.3024  9.9170 -0.0626 -0.0203  0.7736

모형비교

*hold-out 기법

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
set.seed(1)
idx=sample(nrow(iris),nrow(iris)*.7)
train=iris[idx,]
test=iris[-idx,]

최적의 rmse 모형 찾기

validation=NULL
for(i in 1:2){
    for(j in 1:2){
      fit=gam(Sepal.Width ~ s(Sepal.Length,i)+s(Petal.Length,j), data=train)
      validation=rbind(validation,data.frame(SL=i,PL=j,
          t(spT.validation(test$Sepal.Width, predict(fit,test)))))
      message('Sepal.Width=',i,'\nPetal.Length=',j)
  } 
}
## Sepal.Width=1
## Petal.Length=1
## Sepal.Width=1
## Petal.Length=2
## Sepal.Width=2
## Petal.Length=1
## Sepal.Width=2
## Petal.Length=2
head(validation)
##   SL PL    MSE   RMSE    MAE   MAPE    BIAS   rBIAS  rMSEP
## 1  1  1 0.0937 0.3061 0.2470 8.3489 -0.0182 -0.0059 0.4681
## 2  1  2 0.0805 0.2838 0.2346 7.9069 -0.0163 -0.0053 0.4025
## 3  2  1 0.0990 0.3147 0.2475 8.4058 -0.0091 -0.0029 0.4954
## 4  2  2 0.0822 0.2866 0.2290 7.7896  0.0022  0.0007 0.4111
validation[order(validation$RMSE),][1,]
##   SL PL    MSE   RMSE    MAE   MAPE    BIAS   rBIAS  rMSEP
## 2  1  2 0.0805 0.2838 0.2346 7.9069 -0.0163 -0.0053 0.4025
fit=gam(Sepal.Width ~ s(Sepal.Length,1)+s(Petal.Length,2), data=iris)
fit
## Call:
## gam(formula = Sepal.Width ~ s(Sepal.Length, 1) + s(Petal.Length, 
##     2), data = iris)
## 
## Degrees of Freedom: 149 total; 146.0001 Residual
## Residual Deviance: 13.43771