{rstan}「状態空間時系列分析入門」 を再現したい。やっていること / 数式はテキストを参照。

リポジトリ: https://github.com/sinhrks/stan-statespace

必要パッケージのインストール/読み込み

library(devtools)
# devtools::install_github('hoxo-m/pforeach')
# devtools::install_github('sinhrks/ggfortify')
library(rstan)
library(pforeach)
library(ggplot2)
ggplot2::theme_set(theme_bw(base_family="HiraKakuProN-W3"))
library(ggfortify)

関数定義

# モデルが収束しているか確認
is.converged <- function(stanfit) {
  summarized <- summary(stanfit)  
  all(summarized$summary[, 'Rhat'] < 1.1)
}

# 値がだいたい近いか確認
is.almost.fitted <- function(result, expected, tolerance = 0.001) {
  if (abs(result - expected) > tolerance) {
    print(paste('Result is ', result))
    return(FALSE)
  } else {
    return(TRUE)
  }
}

データの読み込み

ukdrivers <- read.table('../data/UKdriversKSI.txt', skip = 1)
ukdrivers <- ts(ukdrivers[[1]], start = c(1969, 1), frequency = 12)
ukdrivers <- log(ukdrivers)

モデル定義

線形回帰モデル。

model_file <- '../models/fig01_01.stan'
cat(paste(readLines(model_file)), sep = '\n')
data {
  int<lower=1> n;
  real y[n];
}
parameters {
  # 係数
  real slope;
  # 切片
  real intercept;
  # 撹乱項
  real<lower=0> sigma;
}
model {
  for (t in 1:n){
    y[t] ~ normal(intercept + slope * t, sigma);
  }
}
y <- ukdrivers

standata <- within(list(), {
  y <- as.vector(y)
  n <- length(y)
})
stan_fit <- stan(file = model_file, chains = 0)
## 
## TRANSLATING MODEL 'fig01_01' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'fig01_01' NOW.
fit <- pforeach(i = 1:4, .final = sflist2stanfit)({
  stan(fit = stan_fit, data = standata,
       iter = 2000, chains = 1, seed = i)
})
stopifnot(is.converged(fit))

slope <- get_posterior_mean(fit, par = 'slope')[, 'mean-all chains']
intercept <- get_posterior_mean(fit, par = 'intercept')[, 'mean-all chains']
title <- paste('Figure 1.1. Scatter plot of the log of the number of UK drivers',
               'KSI against time (in months), including regression line.', sep = '\n')
title <- paste('図 1.1 (月次)時間に対する英国ドライバーの',
               '死傷者数の対数に回帰線を含めた散布図', sep = '\n')

# 原系列
p <- autoplot(y, ts.geom = 'point')

# stan
yhat <- ts(1:length(y) * slope + intercept,
           start = start(y), frequency = frequency(y))
p <- autoplot(yhat, p = p, ts.colour = 'blue')

# 線形回帰 (lm)
df <- data.frame(y = y, x = 1:length(y))
fit.lm <- lm(y ~ x, data = df)
intercept.lm <- coefficients(fit.lm)[[1]]
slope.lm <- coefficients(fit.lm)[[2]]
lm.yhat <- ts(df$x * slope.lm + intercept.lm,
              start = start(y), frequency = frequency(y))
p <- autoplot(lm.yhat, p = p, ts.colour = 'red', ts.linetype = 'dashed')
p + ggtitle(title)

title <- 'Figure 1.2. Log of the number of UK drivers KSI plotted as a time series.'
title <- '図 1.2 英国ドライバーの死傷者数の対数の時系列'
autoplot(y) + ggtitle(title)

title <- paste('Figure 1.3. Residuals of classical linear regression of the ',
               'log of the number of UK drivers KSI on time.', sep = '\n')
title <- paste('図 1.3 時間に対する英国ドライバーの',
               '死傷者数の対数の古典的線形型回帰の残差', sep = '\n')
autoplot(y - yhat, ts.linetype = 'dashed') + ggtitle(title)