StepAIC減少法のWAIC版StepWAICを書いてみました.実はあまりいい結果が出せてなくて実践投入してません. うまく言ってない理由で考えられるのは下記

WAICの計算はこれを参考にしました.

あと下記は資料作成のためにiterationを少なめにしてますが,WAICを計算するならもっと多めにとったよさそう?

アドバイスなど頂ける方は @piroyoungまで

require( rstan)
## Loading required package: rstan
## Loading required package: Rcpp
## Loading required package: inline
## 
## Attaching package: 'inline'
## 
## The following object is masked from 'package:Rcpp':
## 
##     registerPlugin
## 
## rstan (Version 2.4.0, packaged: 2014-07-21 17:16:17 UTC, GitRev: c995bc8b9d67)
require( dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
iteration <- 1000

疑似データの発生

numData <- 50
numCol <- 5
X <- matrix(rnorm(numData*numCol,0,1),ncol = numCol,nrow = numData)
X <- apply(X,2,cumsum)
# 正解モデル
y <- 5*X[,2] + 7*X[,3] + rnorm(numData,0,1)

WAICを返す関数

waic <- function(stanfit){
  loglik <- extract(stanfit)$loglik
  apply(exp(loglik),2,mean) %>% log() %>% sum() -> lpd
  apply(loglik,2,var) %>% sum() -> p_waic
  waic = -2*(lpd - p_waic)
  return(waic)
}

stancode

stancode <- "
data{
  int numData;
  int numCol;
  matrix[numData,numCol] X;
  vector[numData] y;
}
parameters{
  vector[numCol] a;
  real<lower=0> sigma;
}
transformed parameters{
  vector[numData] yp;
  yp <- X*a;
}
model{
  a ~ normal( 0,1000);
  sigma ~ normal( 0,1);

  y ~ normal( yp,sigma);
}
generated quantities{
  vector[numData] loglik;
  for( i in 1:numData){
    loglik[i] <- normal_log( y[i],yp[i],sigma);
  }
}
"

ここからがstepWAIC

dat.orig <- list( numData=numData,numCol=numCol,X=X,y=as.numeric(y))

fit <- stan(model_code = stancode , data = dat.orig, iter = iteration , chains=1)
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 3.06498 seconds (Warm-up)
## #                3.07247 seconds (Sampling)
## #                6.13745 seconds (Total)
waic.cur <- waic(fit)
param <- 1:numCol
deleted <- c()

while(TRUE){
  waic.new <- list()
  print(param)
  print('====')

  for(j in param){
    print(j)
    dat.cur <- list( numData=numData,numCol=ncol(X[,param[param != j]]),X=X[,param[param != j]],y=as.numeric(y))
    fit <- stan(model_code = stancode , data = dat.orig, iter = iteration , chains=1)
    waic.new[j] <- waic(fit)
    
    cat('waic.cur:')
    cat(waic.cur)
    cat('\n')
    cat(param[param != j])
    print(waic(fit))
    print('==')
  }
  waic.new[deleted] <- Inf
  waic.new <- as.numeric(waic.new)
  if(waic.cur < min(waic.new)){
    break
  }else{
    waic.cur <- min(waic.new)
    param <- param[-which.min(waic.new)]
    deleted <- c(deleted,which.min(waic.new))
  }
  
}
## [1] 1 2 3 4 5
## [1] "===="
## [1] 1
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 3.50475 seconds (Warm-up)
## #                3.38048 seconds (Sampling)
## #                6.88523 seconds (Total)
## 
## waic.cur:149.4
## 2 3 4 5[1] 148.7
## [1] "=="
## [1] 2
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 3.65407 seconds (Warm-up)
## #                4.3298 seconds (Sampling)
## #                7.98387 seconds (Total)
## 
## waic.cur:149.4
## 1 3 4 5[1] 148.4
## [1] "=="
## [1] 3
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 3.75892 seconds (Warm-up)
## #                3.64649 seconds (Sampling)
## #                7.4054 seconds (Total)
## 
## waic.cur:149.4
## 1 2 4 5[1] 149.7
## [1] "=="
## [1] 4
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 2.97446 seconds (Warm-up)
## #                3.41949 seconds (Sampling)
## #                6.39395 seconds (Total)
## 
## waic.cur:149.4
## 1 2 3 5[1] 149.3
## [1] "=="
## [1] 5
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 3.15535 seconds (Warm-up)
## #                3.13382 seconds (Sampling)
## #                6.28918 seconds (Total)
## 
## waic.cur:149.4
## 1 2 3 4[1] 148.2
## [1] "=="
## [1] 1 2 3 4
## [1] "===="
## [1] 1
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 4.10543 seconds (Warm-up)
## #                3.10395 seconds (Sampling)
## #                7.20938 seconds (Total)
## 
## waic.cur:148.2
## 2 3 4[1] 148.5
## [1] "=="
## [1] 2
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 3.50509 seconds (Warm-up)
## #                2.43572 seconds (Sampling)
## #                5.94081 seconds (Total)
## 
## waic.cur:148.2
## 1 3 4[1] 148
## [1] "=="
## [1] 3
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 3.69924 seconds (Warm-up)
## #                3.91605 seconds (Sampling)
## #                7.61529 seconds (Total)
## 
## waic.cur:148.2
## 1 2 4[1] 148.6
## [1] "=="
## [1] 4
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 3.4219 seconds (Warm-up)
## #                3.07921 seconds (Sampling)
## #                6.50111 seconds (Total)
## 
## waic.cur:148.2
## 1 2 3[1] 148.8
## [1] "=="
## [1] 1 3 4
## [1] "===="
## [1] 1
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 3.5113 seconds (Warm-up)
## #                3.01026 seconds (Sampling)
## #                6.52156 seconds (Total)
## 
## waic.cur:148
## 3 4[1] 148.7
## [1] "=="
## [1] 3
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 3.77268 seconds (Warm-up)
## #                3.30345 seconds (Sampling)
## #                7.07613 seconds (Total)
## 
## waic.cur:148
## 1 4[1] 149.2
## [1] "=="
## [1] 4
## 
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
## 
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
## 
## Iteration:   1 / 1000 [  0%]  (Warmup)
## Iteration: 100 / 1000 [ 10%]  (Warmup)
## Iteration: 200 / 1000 [ 20%]  (Warmup)
## Iteration: 300 / 1000 [ 30%]  (Warmup)
## Iteration: 400 / 1000 [ 40%]  (Warmup)
## Iteration: 500 / 1000 [ 50%]  (Warmup)
## Iteration: 501 / 1000 [ 50%]  (Sampling)
## Iteration: 600 / 1000 [ 60%]  (Sampling)
## Iteration: 700 / 1000 [ 70%]  (Sampling)
## Iteration: 800 / 1000 [ 80%]  (Sampling)
## Iteration: 900 / 1000 [ 90%]  (Sampling)
## Iteration: 1000 / 1000 [100%]  (Sampling)
## #  Elapsed Time: 3.57417 seconds (Warm-up)
## #                3.44192 seconds (Sampling)
## #                7.01609 seconds (Total)
## 
## waic.cur:148
## 1 3[1] 149.3
## [1] "=="
print(param)
## [1] 1 3 4