StepAIC減少法のWAIC版StepWAICを書いてみました.実はあまりいい結果が出せてなくて実践投入してません. うまく言ってない理由で考えられるのは下記
WAICの計算はこれを参考にしました.
あと下記は資料作成のためにiterationを少なめにしてますが,WAICを計算するならもっと多めにとったよさそう?
アドバイスなど頂ける方は @piroyoungまで
require( rstan)
## Loading required package: rstan
## Loading required package: Rcpp
## Loading required package: inline
##
## Attaching package: 'inline'
##
## The following object is masked from 'package:Rcpp':
##
## registerPlugin
##
## rstan (Version 2.4.0, packaged: 2014-07-21 17:16:17 UTC, GitRev: c995bc8b9d67)
require( dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
iteration <- 1000
numData <- 50
numCol <- 5
X <- matrix(rnorm(numData*numCol,0,1),ncol = numCol,nrow = numData)
X <- apply(X,2,cumsum)
# 正解モデル
y <- 5*X[,2] + 7*X[,3] + rnorm(numData,0,1)
waic <- function(stanfit){
loglik <- extract(stanfit)$loglik
apply(exp(loglik),2,mean) %>% log() %>% sum() -> lpd
apply(loglik,2,var) %>% sum() -> p_waic
waic = -2*(lpd - p_waic)
return(waic)
}
stancode <- "
data{
int numData;
int numCol;
matrix[numData,numCol] X;
vector[numData] y;
}
parameters{
vector[numCol] a;
real<lower=0> sigma;
}
transformed parameters{
vector[numData] yp;
yp <- X*a;
}
model{
a ~ normal( 0,1000);
sigma ~ normal( 0,1);
y ~ normal( yp,sigma);
}
generated quantities{
vector[numData] loglik;
for( i in 1:numData){
loglik[i] <- normal_log( y[i],yp[i],sigma);
}
}
"
dat.orig <- list( numData=numData,numCol=numCol,X=X,y=as.numeric(y))
fit <- stan(model_code = stancode , data = dat.orig, iter = iteration , chains=1)
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 3.06498 seconds (Warm-up)
## # 3.07247 seconds (Sampling)
## # 6.13745 seconds (Total)
waic.cur <- waic(fit)
param <- 1:numCol
deleted <- c()
while(TRUE){
waic.new <- list()
print(param)
print('====')
for(j in param){
print(j)
dat.cur <- list( numData=numData,numCol=ncol(X[,param[param != j]]),X=X[,param[param != j]],y=as.numeric(y))
fit <- stan(model_code = stancode , data = dat.orig, iter = iteration , chains=1)
waic.new[j] <- waic(fit)
cat('waic.cur:')
cat(waic.cur)
cat('\n')
cat(param[param != j])
print(waic(fit))
print('==')
}
waic.new[deleted] <- Inf
waic.new <- as.numeric(waic.new)
if(waic.cur < min(waic.new)){
break
}else{
waic.cur <- min(waic.new)
param <- param[-which.min(waic.new)]
deleted <- c(deleted,which.min(waic.new))
}
}
## [1] 1 2 3 4 5
## [1] "===="
## [1] 1
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 3.50475 seconds (Warm-up)
## # 3.38048 seconds (Sampling)
## # 6.88523 seconds (Total)
##
## waic.cur:149.4
## 2 3 4 5[1] 148.7
## [1] "=="
## [1] 2
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 3.65407 seconds (Warm-up)
## # 4.3298 seconds (Sampling)
## # 7.98387 seconds (Total)
##
## waic.cur:149.4
## 1 3 4 5[1] 148.4
## [1] "=="
## [1] 3
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 3.75892 seconds (Warm-up)
## # 3.64649 seconds (Sampling)
## # 7.4054 seconds (Total)
##
## waic.cur:149.4
## 1 2 4 5[1] 149.7
## [1] "=="
## [1] 4
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 2.97446 seconds (Warm-up)
## # 3.41949 seconds (Sampling)
## # 6.39395 seconds (Total)
##
## waic.cur:149.4
## 1 2 3 5[1] 149.3
## [1] "=="
## [1] 5
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 3.15535 seconds (Warm-up)
## # 3.13382 seconds (Sampling)
## # 6.28918 seconds (Total)
##
## waic.cur:149.4
## 1 2 3 4[1] 148.2
## [1] "=="
## [1] 1 2 3 4
## [1] "===="
## [1] 1
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 4.10543 seconds (Warm-up)
## # 3.10395 seconds (Sampling)
## # 7.20938 seconds (Total)
##
## waic.cur:148.2
## 2 3 4[1] 148.5
## [1] "=="
## [1] 2
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 3.50509 seconds (Warm-up)
## # 2.43572 seconds (Sampling)
## # 5.94081 seconds (Total)
##
## waic.cur:148.2
## 1 3 4[1] 148
## [1] "=="
## [1] 3
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 3.69924 seconds (Warm-up)
## # 3.91605 seconds (Sampling)
## # 7.61529 seconds (Total)
##
## waic.cur:148.2
## 1 2 4[1] 148.6
## [1] "=="
## [1] 4
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 3.4219 seconds (Warm-up)
## # 3.07921 seconds (Sampling)
## # 6.50111 seconds (Total)
##
## waic.cur:148.2
## 1 2 3[1] 148.8
## [1] "=="
## [1] 1 3 4
## [1] "===="
## [1] 1
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 3.5113 seconds (Warm-up)
## # 3.01026 seconds (Sampling)
## # 6.52156 seconds (Total)
##
## waic.cur:148
## 3 4[1] 148.7
## [1] "=="
## [1] 3
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 3.77268 seconds (Warm-up)
## # 3.30345 seconds (Sampling)
## # 7.07613 seconds (Total)
##
## waic.cur:148
## 1 4[1] 149.2
## [1] "=="
## [1] 4
##
## TRANSLATING MODEL 'stancode' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'stancode' NOW.
##
## SAMPLING FOR MODEL 'stancode' NOW (CHAIN 1).
##
## Iteration: 1 / 1000 [ 0%] (Warmup)
## Iteration: 100 / 1000 [ 10%] (Warmup)
## Iteration: 200 / 1000 [ 20%] (Warmup)
## Iteration: 300 / 1000 [ 30%] (Warmup)
## Iteration: 400 / 1000 [ 40%] (Warmup)
## Iteration: 500 / 1000 [ 50%] (Warmup)
## Iteration: 501 / 1000 [ 50%] (Sampling)
## Iteration: 600 / 1000 [ 60%] (Sampling)
## Iteration: 700 / 1000 [ 70%] (Sampling)
## Iteration: 800 / 1000 [ 80%] (Sampling)
## Iteration: 900 / 1000 [ 90%] (Sampling)
## Iteration: 1000 / 1000 [100%] (Sampling)
## # Elapsed Time: 3.57417 seconds (Warm-up)
## # 3.44192 seconds (Sampling)
## # 7.01609 seconds (Total)
##
## waic.cur:148
## 1 3[1] 149.3
## [1] "=="
print(param)
## [1] 1 3 4