{rstan} で 「状態空間時系列分析入門」 を再現したい。やっていること / 数式はテキストを参照。
リポジトリ: https://github.com/sinhrks/stan-statespace
library(devtools)
# devtools::install_github('hoxo-m/pforeach')
# devtools::install_github('sinhrks/ggfortify')
library(rstan)
library(pforeach)
library(ggplot2)
ggplot2::theme_set(theme_bw(base_family="HiraKakuProN-W3"))
library(ggfortify)
# モデルが収束しているか確認
is.converged <- function(stanfit) {
summarized <- summary(stanfit)
all(summarized$summary[, 'Rhat'] < 1.1)
}
# 値がだいたい近いか確認
is.almost.fitted <- function(result, expected, tolerance = 0.001) {
if (abs(result - expected) > tolerance) {
print(paste('Result is ', result))
return(FALSE)
} else {
return(TRUE)
}
}
ukdrivers <- read.table('../data/UKdriversKSI.txt', skip = 1)
ukdrivers <- ts(ukdrivers[[1]], start = c(1969, 1), frequency = 12)
ukdrivers <- log(ukdrivers)
ukpetrol <- read.table('../data/logUKpetrolprice.txt', skip = 1)
ukpetrol <- ts(ukpetrol, start = start(ukdrivers), frequency = frequency(ukdrivers))
確率的レベルと説明変数のあるローカルレベルモデル。
※初期値/制約を与えてもかなり値がずれる
model_file <- '../models/fig05_04.stan'
cat(paste(readLines(model_file)), sep = '\n')
data {
int<lower=1> n;
vector[n] y;
vector[n] x;
}
parameters {
# 確率的レベル
vector<lower=mean(y)-3*sd(y), upper=mean(y)+3*sd(y)>[n] mu;
# 確定的回帰係数
real<lower=-0.5, upper=0.5> beta;
# レベル撹乱項
real<lower=0> sigma_level;
# 観測撹乱項
real<lower=0> sigma_irreg;
}
transformed parameters {
vector[n] yhat;
for(t in 1:n) {
yhat[t] <- mu[t] + beta * x[t];
}
}
model {
# 式 5.2
mu[1] ~ normal(y[1], sigma_level);
for (t in 2:n) {
mu[t] ~ normal(mu[t-1], sigma_level);
}
for (t in 1:n) {
y[t] ~ normal(yhat[t], sigma_irreg);
}
sigma_level ~ inv_gamma(0.001, 0.001);
sigma_irreg ~ inv_gamma(0.001, 0.001);
beta ~ normal(0, 2);
}
y <- ukdrivers
x <- ukpetrol
standata <- within(list(), {
y <- as.vector(y)
x <- as.vector(x)
n <- length(y)
})
lmresult <- lm(y ~ x, data = data.frame(x = 1:length(y), y = as.numeric(y)))
init <- list(list(mu = rep(mean(y), length(y)), beta = coefficients(lmresult)[[2]],
sigma_level = sd(y) / 2, sigma_irreg = 0.001))
stan_fit <- stan(file = model_file, chains = 0)
##
## TRANSLATING MODEL 'fig05_04' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'fig05_04' NOW.
fit <- pforeach(i = 1:4, .final = sflist2stanfit)({
stan(fit = stan_fit, data = standata,
iter = 4000, chains = 1, seed = i, init = init)
})
stopifnot(is.converged(fit))
yhat <- get_posterior_mean(fit, par = 'yhat')[, 'mean-all chains']
mu <- get_posterior_mean(fit, par = 'mu')[, 'mean-all chains']
beta <- get_posterior_mean(fit, par = 'beta')[, 'mean-all chains']
sigma_irreg <- get_posterior_mean(fit, par = 'sigma_irreg')[, 'mean-all chains']
# stopifnot(is.almost.fitted(mu[[1]], 6.824))
is.almost.fitted(mu[[1]], 6.824)
## [1] "Result is 7.41083812316372"
## [1] FALSE
# stopifnot(is.almost.fitted(beta, -0.26105))
is.almost.fitted(beta, -0.26105)
## [1] "Result is -0.0032011620125695"
## [1] FALSE
# stopifnot(is.almost.fitted(sigma_irreg^2, 0.0116673))
is.almost.fitted(sigma_irreg^2, 0.0116673)
## [1] "Result is 0.00215832121838154"
## [1] FALSE
title <- 'Figure 5.4. Stochastic level and deterministic explanatory variable ‘log petrol price’.'
title <- '図 5.4 確率的レベルと確定的説明変数「対数石油価格」'
p <- autoplot(y)
yhat <- ts(yhat, start = start(y), frequency = frequency(y))
p <- autoplot(yhat, p = p, ts.colour = 'blue')
p + ggtitle(title)
title <- 'Figure 5.5. Irregular for stochastic level model with deterministic explanatory variable ‘log petrol price’.'
title <- paste('図 5.5 確定的説明変数「対数石油価格」のある',
'確率的レベル・モデルの不規則要素', sep = '\n')
autoplot(y - yhat, ts.linetype = 'dashed') + ggtitle(title)