gam {mgcv} をためす

library(mgcv)
## This is mgcv 1.7-22. For overview type 'help("mgcv-package")'.
# ?gam
set.seed(2)  ## simulate some data... 
dat <- gamSim(1, n = 400, dist = "normal", scale = 2)
## Gu & Wahba 4 term additive model
# ?gamSim GAMにあてはまるサンプルデータ。
plot(dat)

plot of chunk unnamed-chunk-1

b <- gam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = dat)
# ?s # Defining smooths in GAM formulae 平滑化関数
summary(b)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## y ~ s(x0) + s(x1) + s(x2) + s(x3)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   7.8333     0.0988    79.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##       edf Ref.df     F p-value    
## s(x0) 2.5   3.12  6.91 0.00013 ***
## s(x1) 2.4   2.98 81.86 < 2e-16 ***
## s(x2) 7.7   8.56 88.16 < 2e-16 ***
## s(x3) 1.0   1.00  4.34 0.03781 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.715   Deviance explained = 72.5%
## GCV score = 4.0505  Scale est. = 3.9027    n = 400
plot(b)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

points(dat$x0, dat$y - mean(dat$y))

plot of chunk unnamed-chunk-1

plot(b, pages = 1, residuals = TRUE)  ## show partial residuals

plot of chunk unnamed-chunk-1

plot(b, pages = 1, seWithMean = TRUE)  ## `with intercept' CIs

plot of chunk unnamed-chunk-1

## run some basic model checks, including checking smoothing basis
## dimensions...
gam.check(b)

plot of chunk unnamed-chunk-1

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 12 iterations.
## The RMS GCV score gradiant at convergence was 1.74e-07 .
## The Hessian was positive definite.
## The estimated model rank was 37 (maximum possible: 37)
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##          k'   edf k-index p-value
## s(x0) 9.000 2.500   1.045    0.77
## s(x1) 9.000 2.401   1.027    0.64
## s(x2) 9.000 7.698   0.969    0.30
## s(x3) 9.000 1.000   1.030    0.74


b.test <- gam(y ~ s(x2), data = dat)
summary(b.test)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## y ~ s(x2)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    7.833      0.127    61.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##        edf Ref.df    F p-value    
## s(x2) 8.17   8.81 50.6  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.527   Deviance explained = 53.6%
## GCV score = 6.6424  Scale est. = 6.4901    n = 400
plot(b.test, page = 1, residuals = TRUE)
points(dat$x2, dat$y - mean(dat$y))

plot of chunk unnamed-chunk-1