このページでは、株式会社インサイト・ファクトリー 2020年度社内研修「マーケティング・ミックス・モデリング」V章「時系列分析の基礎」のRコードを公開しています。

準備

library(tidyverse)

## -- Attaching packages --------------------------------------------------------- tidyverse 1.3.0 --

## √ ggplot2 3.3.0     √ purrr   0.3.3
## √ tibble  3.0.0     √ dplyr   0.8.5
## √ tidyr   1.0.2     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0

## -- Conflicts ------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(assertr)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

library(patchwork)
library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(urca)
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(nlme)

## 
## Attaching package: 'nlme'

## The following object is masked from 'package:forecast':
## 
##     getResponse

## The following object is masked from 'package:dplyr':
## 
##     collapse

# 民間設備投資
# 出典: 田中(2008)
# http://www.cap-shuppan.co.jp/kikan_keiryou_jikeiretsu_toukei.html
dfIP <- read_csv("./IPO1980.csv")

## Parsed with column specification:
## cols(
##   IP = col_double()
## )

# 電力需要
# 出典: 福地・伊藤(2011)
# https://sites.google.com/site/econometricsr/
dfDenryokuOrg <- read_csv("./denryoku.csv")

## Parsed with column specification:
## cols(
##   day = col_character(),
##   week = col_character(),
##   demand = col_double(),
##   temp = col_double()
## )

# AR(1)を生成する関数
#   arima.sim()では, 非定常過程となるときにエラーとなる
#   stats.filter()でも生成できるが、ここでは理解を深めるために自作する
sub_generateAR1.1 <- function(nLength, gPhi, gMu=0, gSigmaSq=1, gInit=0){
  agWhiteNoise = rnorm(nLength, sd = sqrt(gSigmaSq))
  out <- numeric(nLength)
  gPrevY <- gInit
  for (t in 1:nLength){
    out[t] <- gMu + gPhi * gPrevY + agWhiteNoise[t]
    gPrevY <- out[t]
  }
  out
}

code 1: みせかけの相関

# データの作成
set.seed(123)
nNumTrial <- 1000
nNumTime  <- 100
dfIn <- tibble(
  nTrial = as.vector(sapply(1:nNumTrial, function(x) rep(x,nNumTime))),
  nTime  = as.vector(sapply(1:nNumTrial, function(x) 1:nNumTime)),
  nDeltaX = sample(-100:100, nNumTrial*nNumTime, replace = T),
  nDeltaY = sample(-100:100, nNumTrial*nNumTime, replace = T)
) %>%
  group_by(nTrial) %>%
  mutate(
    nX = cumsum(nDeltaX),
    nY = cumsum(nDeltaY)
  ) %>%
  ungroup()
## print(dfIn)
# 時系列描画用のデータ
dfPlot1 <- dfIn %>%
  filter(nTrial %in% 1:11) %>%
  gather(sVar, nValue, c(nDeltaX, nDeltaY, nX, nY))
# 相関係数
dfPlot2 <- dfIn %>%
  group_by(nTrial) %>%
  summarize(
    gCorr1 = cor(nDeltaX, nDeltaY),
    gCorr2 = cor(nX, nY)
  ) %>%
  ungroup() %>%
  mutate(
    sLabel1 = sprintf("r = %0.2f", gCorr1),
    sLabel2 = sprintf("r = %0.2f", gCorr2)
  )

# 例1) ランダムな時系列のプロット
g <- ggplot(
  data = dfPlot1 %>%
    filter(sVar %in% c("nDeltaX", "nDeltaY")) %>%
    mutate(
      fVar = factor(sVar, levels = c("nDeltaX", "nDeltaY"), labels = c("X", "Y"))
    ),
  aes(x = nTime, y = nValue, color = fVar)
)
g <- g + geom_line()
g <- g + geom_text(
  data=dfPlot2 %>% filter(nTrial %in% 1:11),
  aes(label = sLabel1), color=1, x = 50, y = -80
)
g <- g + scale_color_discrete(name = "")
g <- g + labs(x = "Time", y = "Value")
g <- g + theme_bw()
g <- g + facet_wrap(~ nTrial, ncol=4)
print(g)

ggsave(g, file = paste0("./chap5_code1_1.png"), width = 22, height = 9, units = "cm")

# その相関の散布図
g <- ggplot(data=dfPlot2, aes(x = gCorr1))
g <- g + geom_histogram(binwidth = 0.02)
g <- g + scale_x_continuous(limits = c(-1, 1))
g <- g + labs(x = "相関係数")
g <- g + theme_bw()
print(g)

## Warning: Removed 2 rows containing missing values (geom_bar).

ggsave(g, file = paste0("./chap5_code1_2.png"), width = 18, height = 4, units = "cm")

## Warning: Removed 2 rows containing missing values (geom_bar).

# 例2) 自己回帰する時系列のプロット
g <- ggplot(
  data = dfPlot1 %>%
    filter(sVar %in% c("nX", "nY")) %>%
    mutate(
      fVar = factor(sVar, levels = c("nX", "nY"), labels = c("X", "Y"))
    ),
  aes(x = nTime, y = nValue, color = fVar)
)
g <- g + geom_line()
g <- g + geom_text(
  data=dfPlot2 %>% filter(nTrial %in% 1:11),
  aes(label = sLabel2), color=1, x = 50, y = -800
)
g <- g + scale_color_discrete(name = "")
g <- g + labs(x = "Time", y = "Value")
g <- g + theme_bw()
g <- g + facet_wrap(~ nTrial, ncol=4)
print(g)

ggsave(g, file = paste0("./chap5_code1_3.png"), width = 22, height = 9, units = "cm")

# その相関の散布図
g <- ggplot(data=dfPlot2, aes(x = gCorr2))
g <- g + geom_histogram(binwidth = 0.02)
g <- g + scale_x_continuous(limits = c(-1, 1))
g <- g + labs(x = "相関係数")
g <- g + theme_bw()
print(g)

## Warning: Removed 2 rows containing missing values (geom_bar).

ggsave(g, file = paste0("./chap5_code1_4.png"), width = 18, height = 4, units = "cm")

## Warning: Removed 2 rows containing missing values (geom_bar).

code 2: ACF

plot(UKDriverDeaths)

acf(UKDriverDeaths)

code 3: 定常性・トレンド定常・差分定常

set.seed(12345)

# 定常っぽい
dfIn <- tibble(
  t  = 1:100,
  v1 = as.vector(arima.sim(n = 100, list(ar = c(0.8)))),
  v2 = as.vector(arima.sim(n = 100, list(ar = c(0.5, -0.5)))),
  v3 = as.vector(arima.sim(n = 100, list(ma = c(0.9))))
) %>% gather(sVar, gValue, -t)
g <- ggplot(data=dfIn, aes(x = t, y = gValue))
g <- g + geom_line()
g <- g + labs(x = "Time", y = "Value")
g <- g + theme_bw()
g <- g + facet_wrap(~ sVar)
print(g)

ggsave(g, file = paste0("./chap5_code3_1.png"), width = 18, height = 6, units = "cm")

# 非定常っぽい
dfIn <- tibble(
  t  = 1:100,
  v1 = as.vector(arima.sim(n = 100, list(order=c(2,1,0), ar = c(0, 0.9))))[-1]/10,
  v2 = cumsum(rnorm(100)),
  v3 = as.vector(arima.sim(n = 100, list(ar = c(0.2))))
) %>%
  mutate(v3 = if_else(t > 50, v3 * 2.5, v3)) %>%
  gather(sVar, gValue, -t)
g <- ggplot(data=dfIn, aes(x = t, y = gValue))
g <- g + geom_line()
g <- g + labs(x = "Time", y = "Value")
g <- g + theme_bw()
g <- g + facet_wrap(~ sVar)
print(g)

ggsave(g, file = paste0("./chap5_code3_2.png"), width = 18, height = 6, units = "cm")

# トレンド定常っぽい
set.seed(12345)
dfIn <- tibble(
  t  = 1:100,
  v2 = as.vector(arima.sim(n = 100, list(ar = c(0.9)))),
  v1 = t * 0.08 + v2,
  v3 = v1 - lag(v1)
)
g1 <- ggplot(data=dfIn, aes(x = t, y = v1))
g1 <- g1 + geom_line()
g1 <- g1 + geom_abline(slope = 0.08, intercept = 0, color="red")
g1 <- g1 + labs(x = "Time", y = "Y[Time]")
g1 <- g1 + theme_bw()
g2 <- ggplot(data=dfIn, aes(x = t, y = v2))
g2 <- g2 + geom_line()
g2 <- g2 + geom_hline(yintercept = 0, color="red")
g2 <- g2 + labs(x = "Time", y = "Y[Time] - Time * 0.08")
g2 <- g2 + ylim(-10, 10)
g2 <- g2 + theme_bw()
g3 <- ggplot(data=dfIn, aes(x = t, y = v3))
g3 <- g3 + geom_line()
g3 <- g3 + ylim(-10, 10)
g3 <- g3 + labs(x = "Time", y = "Y[Time] - Y[Time-1]")
g3 <- g3 + theme_bw()
g3

## Warning: Removed 1 row(s) containing missing values (geom_path).

g <- g1 + g2 + g3
g

## Warning: Removed 1 row(s) containing missing values (geom_path).

ggsave(g, file = paste0("./chap5_code3_3.png"), width = 22, height = 6, units = "cm")

## Warning: Removed 1 row(s) containing missing values (geom_path).

# 差分定常っぽい
set.seed(1234)
dfIn <- tibble(
  t = 1:100,
  v1 = sub_generateAR1.1(nLength = 100, gMu = 0.5, gPhi = 1),
  v3 = v1 - lag(v1)
)
oReg <- lm(v1 ~ 0 + t, data=dfIn)
gRegCoef <- coef(oReg)[1]
dfIn <- dfIn %>%
  mutate(
    v2 = v1 - t * gRegCoef
  )
g1 <- ggplot(data=dfIn, aes(x = t, y = v1))
g1 <- g1 + geom_line()
g1 <- g1 + labs(x = "Time", y = "Value")
g1 <- g1 + geom_abline(slope = gRegCoef, intercept = 0, color="red")
g1 <- g1 + labs(x = "Time", y = "Y[Time]")
g1 <- g1 + theme_bw()
g2 <- ggplot(data=dfIn, aes(x = t, y = v2))
g2 <- g2 + geom_line()
g2 <- g2 + labs(x = "Time", y = sprintf("Y[Time] - Time *  %0.2f", gRegCoef))
g2 <- g2 + ylim(-14, 14)
g2 <- g2 + geom_hline(yintercept = 0, color="red")
g2 <- g2 + theme_bw()
g3 <- ggplot(data=dfIn, aes(x = t, y = v3))
g3 <- g3 + geom_line()
g3 <- g3 + ylim(-14, 14)
g3 <- g3 + labs(x = "Time", y = "Y[Time] - Y[Time-1]")
g3 <- g3 + theme_bw()
g <- g1 + g2 + g3
g

## Warning: Removed 1 row(s) containing missing values (geom_path).

ggsave(g, file = paste0("./chap5_code3_4.png"), width = 22, height = 6, units = "cm")

## Warning: Removed 1 row(s) containing missing values (geom_path).

code 4: ホワイトノイズ

set.seed(12345)
dfIn <- tibble(
  t = 1:100,
  v1 = runif(n=100, min=-2, max=+2),
  v2 = rnorm(n=100)
) %>%
  gather(sVar, gValue, -t)

g <- ggplot(data=dfIn, aes(x = t, y = gValue))
g <- g + geom_line()
g <- g + labs(x = "Time", y = "Value")
g <- g + theme_bw()
g <- g + facet_wrap(~ sVar)
print(g)

ggsave(g, file = paste0("./chap5_code4.png"), width = 18, height = 10, units = "cm")

code 5: AR(1) の定常性条件

agPhi <- seq(0.9, 1.1, by = 0.1)
nNumTrial <- 10
nNumTime  <- 100
dfIn <- crossing(
  gPhi = agPhi,
  nTrial = 1:nNumTrial,
  nTime  = 1:nNumTime
) %>%
  group_by(gPhi, nTrial) %>%
  mutate(
    gX = sub_generateAR1.1(nNumTime, gPhi[1])
  ) %>%
  ungroup()
lG <- lapply(
  agPhi,
  function(gCurrent){
    g <- ggplot(
       dfIn %>% filter(gPhi == gCurrent),
       aes(x = nTime, y = gX, group = nTrial)
    )
    g <- g + geom_line(alpha=0.5)
    g <- g + labs(
      title = paste0("Y[t] = ", gCurrent, "*Y[t-1] + e"), x = "Time", y = "Y")
    g <- g + theme_bw()
    g
  }
)
g <- wrap_plots(lG)
g

ggsave(g, file = paste0("./chap5_code5.png"), width = 24, height = 10, units = "cm")

code 6: AR1の標本コレログラム

agPhi <- c(seq(0.2, 0.8, by = 0.2), seq(-0.2, -0.8, by = -0.2))
dfIn <- data.frame(
  t    = rep(1:100, length(agPhi)),
  gPhi = unlist(lapply(agPhi, function(x) rep(x, 100))),
  agX = unlist(lapply(
    agPhi,
    function(gPhi){
      as.vector(stats::filter(rnorm(n = 100), gPhi, method = "recursive"))
    }
  ))
)
lG1 <- lapply(
  agPhi,
  function(gCurrent){
    dfPlot <- dfIn %>% filter(gPhi == gCurrent)
    g <- ggplot(data=dfPlot, aes(x = t, y = agX))
    g <- g + geom_line()
    g <- g + labs(title = sprintf("Phi = %0.1f", gCurrent))
    g <- g + theme_bw()
    g
  }
)
lG2 <- lapply(
  agPhi,
  function(gCurrent){
    oACF <- acf(dfIn %>% filter(gPhi == gCurrent) %>% pull(agX), plot = FALSE)
    dfPlot <- tibble(
      k = oACF$lag[,1,1],
      acf = oACF$acf[,1,1]
    )
    g <- ggplot(data=dfPlot, aes(x = k, y = acf))
    g <- g + geom_col()
    g <- g + labs(title = sprintf("Phi = %0.1f", gCurrent))
    g <- g + theme_bw()
    g
  }
)
lTemp <- list(
  lG1[[1]], lG2[[1]],
  lG1[[5]], lG2[[5]],
  lG1[[2]], lG2[[2]],
  lG1[[6]], lG2[[6]],
  lG1[[3]], lG2[[3]],
  lG1[[7]], lG2[[7]],
  lG1[[4]], lG2[[4]],
  lG1[[8]], lG2[[8]]
)
g <- wrap_plots(lTemp, nrow=4)
g

ggsave(g, file = paste0("./chap5_code6.png"), width = 22, height = 14, units = "cm")

code 7: MA1の標本コレログラム

set.seed(12345)
agTheta <- c(seq(0.2, 0.8, by = 0.2), seq(-0.2, -0.8, by = -0.2))
dfIn <- tibble(
  t      = rep(1:100, length(agTheta)),
  gTheta = unlist(lapply(agTheta, function(x) rep(x, 100))),
  gX = unlist(lapply(
    agTheta,
    function(gTheta){
      as.vector(arima.sim(model=list(ma=gTheta), n=100))
    }
  ))
)
## print(acf(dfIn %>% filter(gTheta == 0.8) %>% pull(gX)))
lG1 <- lapply(
  agTheta,
  function(gCurrent){
    dfPlot <- dfIn %>% filter(gTheta == gCurrent)
    g <- ggplot(data=dfPlot, aes(x = t, y = gX))
    g <- g + geom_line()
    g <- g + labs(title = sprintf("Theta = %0.1f", gCurrent))
    g <- g + theme_bw()
    g
  }
)
lG2 <- lapply(
  agTheta,
  function(gCurrent){
    oACF <- acf(dfIn %>% filter(gTheta == gCurrent) %>% pull(gX), plot = FALSE)
    dfPlot <- tibble(
      k   = oACF$lag[,1,1],
      acf = oACF$acf[,1,1]
    )
    g <- ggplot(data=dfPlot, aes(x = k, y = acf))
    g <- g + geom_col()
    g <- g + labs(title = sprintf("Theta = %0.1f", gCurrent))
    g <- g + theme_bw()
    g
  }
)
lTemp <- list(
  lG1[[1]], lG2[[1]],
  lG1[[5]], lG2[[5]],
  lG1[[2]], lG2[[2]],
  lG1[[6]], lG2[[6]],
  lG1[[3]], lG2[[3]],
  lG1[[7]], lG2[[7]],
  lG1[[4]], lG2[[4]],
  lG1[[8]], lG2[[8]]
)
g <- wrap_plots(lTemp, nrow=4)
g

ggsave(g, file = paste0("./chap5_code7.png"), width = 22, height = 14, units = "cm")

code 8: random walk

set.seed(123)
dfIn <- tibble(nTrial = unlist(lapply(1:10, function(x) rep(x, 100)))) %>%
  group_by(nTrial) %>%
  mutate(
    nT = 1:100,
    gRW = cumsum(rnorm(100)),
    gWN = sub_generateAR1.1(100, gPhi = 0.9)
  )
g1 <- ggplot(data=dfIn, aes(x = nT, y = gRW, group = nTrial))
g1 <- g1 + geom_line(alpha=0.3)
g1 <- g1 + scale_y_continuous(limits = c(-15, 15))
g1 <- g1 + theme_bw()
g2 <- ggplot(data=dfIn, aes(x = nT, y = gWN, group = nTrial))
g2 <- g2 + geom_line(alpha=0.3)
g2 <- g2 + scale_y_continuous(limits = c(-15, 15))
g2 <- g2 + theme_bw()
g <- g1 + g2
g

ggsave(g, file = paste0("./chap5_code8.png"), width = 22, height = 10, units = "cm")

code 9: drift つき random walk

set.seed(123)
c <- 0.1
dfIn <- tibble(nTrial = unlist(lapply(1:10, function(x) rep(x, 100)))) %>%
  group_by(nTrial) %>%
  mutate(
    nT = 1:100,
    gRW = nT * c + cumsum(rnorm(100)),
    gWN = nT * c + sub_generateAR1.1(100, gPhi = 0.9)
  )
g1 <- ggplot(data=dfIn, aes(x = nT, y = gRW, group = nTrial))
g1 <- g1 + geom_abline(slope = 0.1, color="red")
g1 <- g1 + geom_line(alpha=0.3)
g1 <- g1 + scale_y_continuous(limits = c(-10, 25))
g1 <- g1 + theme_bw()
g2 <- ggplot(data=dfIn, aes(x = nT, y = gWN, group = nTrial))
g2 <- g2 + geom_abline(slope = 0.1, color="red")
g2 <- g2 + geom_line(alpha=0.3)
g2 <- g2 + scale_y_continuous(limits = c(-10, 25))
g2 <- g2 + theme_bw()
g <- g1 + g2
g

ggsave(g, file = paste0("./chap5_code9.png"), width = 22, height = 10, units = "cm")

code 10: trend つき random walk

set.seed(123)
c <- 0.05
d <- 0.004
dfIn <- tibble(nTrial = unlist(lapply(1:10, function(x) rep(x, 100)))) %>%
  group_by(nTrial) %>%
  mutate(
    nT = 1:100,
    gRW = nT * c + cumsum(nT) * d + cumsum(rnorm(100)),
    gWN = nT * c + cumsum(nT) * d + sub_generateAR1.1(100, 0.9),
    gExp = nT * c + (1/2)*(nT + nT^2)*d
  )
g1 <- ggplot(data=dfIn, aes(x = nT, y = gRW, group = nTrial))
g1 <- g1 + geom_line(alpha=0.3)
g1 <- g1 + geom_line(aes(x = nT, y = gExp), color="red")
g1 <- g1 + scale_y_continuous(limits = c(-10, 40))
g1 <- g1 + theme_bw()
g2 <- ggplot(data=dfIn, aes(x = nT, y = gWN, group = nTrial))
g2 <- g2 + geom_line(alpha=0.3)
g2 <- g2 + geom_line(aes(x = nT, y = gExp), color="red")
g2 <- g2 + scale_y_continuous(limits = c(-10, 40))
g2 <- g2 + theme_bw()
g <- g1 + g2
g

ggsave(g, file = paste0("./chap5_code10.png"), width = 22, height = 10, units = "cm")

code 11: 民間設備投資

dfIP <- dfIP %>%
  mutate(
    nYear = c( unlist(lapply(1980:2004, function(x) rep(x, 4))), 2005, 2005),
    nQ    = c(rep(1:4, 2004-1980 + 1), 1, 2),
    dYQ   = yq(nYear * 100 + nQ),
    LIP   = log(IP)
  ) %>%
  dplyr::select(dYQ, IP, LIP)

# 原系列
g <- ggplot(data=dfIP, aes(x = dYQ, y = IP))
g <- g + geom_line()
g <- g + labs(x = "期間(四半期)", y = "実質民間設備投資(兆円)")
g <- g + theme_bw()
print(g)

# ACF
acf(dfIP$IP)

# season plot
seasonplot(ts(dfIP$IP, frequency = 4))

# 対数系列
g <- ggplot(data=dfIP, aes(x = dYQ, y = LIP))
g <- g + geom_line()
g <- g + labs(x = "期間(四半期)", y = "実質民間設備投資(兆円)(対数)")
g <- g + theme_bw()
print(g)

code 12: 変数変換

# 対数
set.seed(12345)
tsX <- exp(arima.sim(model = list(ar=c(0.9)), sd=0.4, n = 100))
par(mfrow=c(1,2))
plot(tsX)
plot(log(tsX))

# 差分
set.seed(12345)
tsX <- arima.sim(model = list(order=c(1,1,0), ar=c(0.5)), sd=0.4, n = 100)
par(mfrow=c(1,2))
plot(tsX)
plot(diff(tsX))

# 対数差分
set.seed(12345)
tsX <- exp(arima.sim(model = list(ar=c(0.9)), sd=0.1, n = 100)+ (1:100)*0.02)
par(mfrow=c(1,2))
plot(tsX)
plot(diff(log(tsX)))

# 前年同期比
par(mfrow=c(1,2))
plot(UKDriverDeaths)
plot(diff(log(UKDriverDeaths), lag=12))

code 13: 単位根検定

LIP <- log(dfIP$IP)

oURDF1 <- ur.df(LIP, type = "trend", lags=4)
summary(oURDF1)

## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.109590 -0.024968 -0.008654  0.031423  0.105416 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.3201651  0.1243460   2.575   0.0117 *  
## z.lag.1     -0.0818769  0.0326016  -2.511   0.0138 *  
## tt           0.0006361  0.0003163   2.011   0.0473 *  
## z.diff.lag1 -0.0963497  0.0722731  -1.333   0.1859    
## z.diff.lag2 -0.0365374  0.0705355  -0.518   0.6057    
## z.diff.lag3 -0.1393002  0.0696887  -1.999   0.0486 *  
## z.diff.lag4  0.7690449  0.0687855  11.180   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0441 on 90 degrees of freedom
## Multiple R-squared:  0.8901, Adjusted R-squared:  0.8828 
## F-statistic: 121.5 on 6 and 90 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -2.5114 2.6964 3.1949 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.99 -3.43 -3.13
## phi2  6.22  4.75  4.07
## phi3  8.43  6.49  5.47

oURDF2 <- ur.df(LIP, type = "drift", lags=4)
summary(oURDF2)

## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.113547 -0.027109 -0.003352  0.031242  0.101719 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.11514    0.07237   1.591   0.1151    
## z.lag.1     -0.02554    0.01696  -1.507   0.1354    
## z.diff.lag1 -0.14707    0.06885  -2.136   0.0354 *  
## z.diff.lag2 -0.07528    0.06898  -1.091   0.2780    
## z.diff.lag3 -0.17159    0.06894  -2.489   0.0146 *  
## z.diff.lag4  0.75120    0.06934  10.833   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04484 on 91 degrees of freedom
## Multiple R-squared:  0.8852, Adjusted R-squared:  0.8789 
## F-statistic: 140.3 on 5 and 91 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -1.5065 1.957 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1  6.52  4.63  3.81

oURDF3 <- ur.df(LIP, type = "none", lags=4)
summary(oURDF3)

## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.117731 -0.025814 -0.005051  0.032132  0.103038 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## z.lag.1      0.001368   0.001173   1.166   0.2465    
## z.diff.lag1 -0.156547   0.069164  -2.263   0.0260 *  
## z.diff.lag2 -0.075600   0.069551  -1.087   0.2799    
## z.diff.lag3 -0.166829   0.069445  -2.402   0.0183 *  
## z.diff.lag4  0.760465   0.069670  10.915   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04521 on 92 degrees of freedom
## Multiple R-squared:  0.8825, Adjusted R-squared:  0.8761 
## F-statistic: 138.2 on 5 and 92 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: 1.1662 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62

DLIP <- diff(LIP)
ts.plot(DLIP, type = "l")

oURDF1 <- ur.df(DLIP, type = "trend", lags=3)
summary(oURDF1)

## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.118232 -0.027526 -0.004958  0.031374  0.101757 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.868e-03  1.027e-02   0.864  0.39011    
## z.lag.1     -6.573e-01  2.272e-01  -2.893  0.00477 ** 
## tt          -4.645e-05  1.665e-04  -0.279  0.78084    
## z.diff.lag1 -5.030e-01  1.759e-01  -2.859  0.00527 ** 
## z.diff.lag2 -5.833e-01  1.273e-01  -4.583 1.46e-05 ***
## z.diff.lag3 -7.554e-01  7.054e-02 -10.708  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04537 on 91 degrees of freedom
## Multiple R-squared:  0.9659, Adjusted R-squared:  0.964 
## F-statistic: 515.2 on 5 and 91 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -2.8932 2.8028 4.1964 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.99 -3.43 -3.13
## phi2  6.22  4.75  4.07
## phi3  8.43  6.49  5.47

oURDF2 <- ur.df(DLIP, type = "drift", lags=3)
summary(oURDF2)

## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.118073 -0.027069 -0.005819  0.031957  0.102086 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.006369   0.005001   1.274  0.20603    
## z.lag.1     -0.647927   0.223566  -2.898  0.00469 ** 
## z.diff.lag1 -0.510441   0.173022  -2.950  0.00403 ** 
## z.diff.lag2 -0.588340   0.125337  -4.694 9.31e-06 ***
## z.diff.lag3 -0.757703   0.069683 -10.874  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04514 on 92 degrees of freedom
## Multiple R-squared:  0.9658, Adjusted R-squared:  0.9644 
## F-statistic: 650.5 on 4 and 92 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -2.8981 4.2074 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1  6.52  4.63  3.81

oURDF3 <- ur.df(DLIP, type = "none", lags=3)
summary(oURDF3)

## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.111212 -0.020289 -0.000807  0.033546  0.113031 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## z.lag.1     -0.53407    0.20560  -2.598 0.010913 *  
## z.diff.lag1 -0.59580    0.16005  -3.723 0.000338 ***
## z.diff.lag2 -0.64505    0.11755  -5.487  3.5e-07 ***
## z.diff.lag3 -0.78605    0.06625 -11.864  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0453 on 93 degrees of freedom
## Multiple R-squared:  0.9652, Adjusted R-squared:  0.9638 
## F-statistic: 645.8 on 4 and 93 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -2.5976 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62

ndiffs(LIP, test = "adf", type = "trend")

## [1] 1

code 14: ARMAモデルの推定

set.seed(123)
dfIn <- tibble(
  nTime = 1:100,
  gY    = sub_generateAR1.1(100, 0.5, gMu = 2),
  gLagY = lag(gY)
)
# dfIn$gYに時系列がはいっている
plot(as.ts(dfIn$gY))

# forecast::Arima() で AR(1)モデルを当てはめる
library(forecast)
Arima(dfIn$gY, order = c(1, 0, 0), method = "CSS")

## Series: dfIn$gY 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1    mean
##       0.4487  4.1901
## s.e.  0.0858  0.1649
## 
## sigma^2 estimated as 0.8344:  part log likelihood=-132.33

# 参考: lm()で推定した場合
# dfIn$gLagYに前期の値が入っているとして
summary(lm(gY ~ gLagY, data = dfIn))

## 
## Call:
## lm(formula = gY ~ gLagY, data = dfIn)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.37379 -0.58777  0.00698  0.57049  2.08449 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.31016    0.37320   6.190 1.44e-08 ***
## gLagY        0.44866    0.08709   5.151 1.36e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9181 on 97 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2148, Adjusted R-squared:  0.2067 
## F-statistic: 26.54 on 1 and 97 DF,  p-value: 1.358e-06

# ARMA(2, 0)
Arima(dfIn$gY, order = c(2, 0, 0))

## Series: dfIn$gY 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2    mean
##       0.5053  -0.0659  4.1207
## s.e.  0.1020   0.1034  0.1659
## 
## sigma^2 estimated as 0.8989:  log likelihood=-135.17
## AIC=278.34   AICc=278.76   BIC=288.76

# ARMA(1, 1)
Arima(dfIn$gY, order = c(1, 0, 1))

## Series: dfIn$gY 
## ARIMA(1,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1     ma1    mean
##       0.1169  0.4254  4.1293
## s.e.  0.3476  0.3473  0.1498
## 
## sigma^2 estimated as 0.8917:  log likelihood=-134.8
## AIC=277.59   AICc=278.01   BIC=288.01

auto.arima(
  dfIn$gY, d = 0,
  trace=T, seasonal = F, ic ="aic", stepwise=F, approximation = F
)

## 
##  ARIMA(0,0,0) with zero mean     : 576.4883
##  ARIMA(0,0,0) with non-zero mean : 298.7064
##  ARIMA(0,0,1) with zero mean     : 467.3743
##  ARIMA(0,0,1) with non-zero mean : 275.7177
##  ARIMA(0,0,2) with zero mean     : 425.262
##  ARIMA(0,0,2) with non-zero mean : 277.6738
##  ARIMA(0,0,3) with zero mean     : 387.1492
##  ARIMA(0,0,3) with non-zero mean : 275.7871
##  ARIMA(0,0,4) with zero mean     : 366.6385
##  ARIMA(0,0,4) with non-zero mean : 277.7856
##  ARIMA(0,0,5) with zero mean     : 357.8372
##  ARIMA(0,0,5) with non-zero mean : 279.7058
##  ARIMA(1,0,0) with zero mean     : 303.8079
##  ARIMA(1,0,0) with non-zero mean : 276.7485
##  ARIMA(1,0,1) with zero mean     : Inf
##  ARIMA(1,0,1) with non-zero mean : 277.5924
##  ARIMA(1,0,2) with zero mean     : Inf
##  ARIMA(1,0,2) with non-zero mean : 278.726
##  ARIMA(1,0,3) with zero mean     : Inf
##  ARIMA(1,0,3) with non-zero mean : 277.7859
##  ARIMA(1,0,4) with zero mean     : Inf
##  ARIMA(1,0,4) with non-zero mean : Inf
##  ARIMA(2,0,0) with zero mean     : 301.7191
##  ARIMA(2,0,0) with non-zero mean : 278.3427
##  ARIMA(2,0,1) with zero mean     : Inf
##  ARIMA(2,0,1) with non-zero mean : 276.635
##  ARIMA(2,0,2) with zero mean     : Inf
##  ARIMA(2,0,2) with non-zero mean : 278.3583
##  ARIMA(2,0,3) with zero mean     : Inf
##  ARIMA(2,0,3) with non-zero mean : Inf
##  ARIMA(3,0,0) with zero mean     : 294.4183
##  ARIMA(3,0,0) with non-zero mean : 278.9864
##  ARIMA(3,0,1) with zero mean     : Inf
##  ARIMA(3,0,1) with non-zero mean : 278.267
##  ARIMA(3,0,2) with zero mean     : Inf
##  ARIMA(3,0,2) with non-zero mean : Inf
##  ARIMA(4,0,0) with zero mean     : 296.4182
##  ARIMA(4,0,0) with non-zero mean : 278.3183
##  ARIMA(4,0,1) with zero mean     : Inf
##  ARIMA(4,0,1) with non-zero mean : 280.1795
##  ARIMA(5,0,0) with zero mean     : Inf
##  ARIMA(5,0,0) with non-zero mean : 280.2263
## 
## 
## 
##  Best model: ARIMA(0,0,1) with non-zero mean

## Series: dfIn$gY 
## ARIMA(0,0,1) with non-zero mean 
## 
## Coefficients:
##          ma1    mean
##       0.5313  4.1324
## s.e.  0.0925  0.1420
## 
## sigma^2 estimated as 0.8835:  log likelihood=-134.86
## AIC=275.72   AICc=275.97   BIC=283.53

code 15: ARIMAモデルの推定

IP <- ts(dfIP$IP, start=c(1980, 1), frequency = 4)
LIP <- log(IP)

Arima(LIP, order = c(2, 1, 2), method = "ML")

## Series: LIP 
## ARIMA(2,1,2) 
## 
## Coefficients:
##           ar1     ar2     ma1      ma2
##       -0.6612  0.3365  0.4000  -0.4750
## s.e.   0.2055  0.2052  0.1734   0.1542
## 
## sigma^2 estimated as 0.004752:  log likelihood=127.56
## AIC=-245.12   AICc=-244.49   BIC=-232.05

x <- auto.arima(
  LIP, d = 1,
  trace=T, seasonal = F, ic ="aic", stepwise=F, approximation = F
)

## 
##  ARIMA(0,1,0)                              : -129.0637
##  ARIMA(0,1,0)           with drift         : -127.4935
##  ARIMA(0,1,1)                              : -177.0597
##  ARIMA(0,1,1)           with drift         : -180.2615
##  ARIMA(0,1,2)                              : -214.5526
##  ARIMA(0,1,2)           with drift         : -215.1704
##  ARIMA(0,1,3)                              : -185.5557
##  ARIMA(0,1,3)           with drift         : -188.0912
##  ARIMA(0,1,4)                              : -238.8765
##  ARIMA(0,1,4)           with drift         : -238.0338
##  ARIMA(0,1,5)                              : -259.8235
##  ARIMA(0,1,5)           with drift         : -261.9403
##  ARIMA(1,1,0)                              : -202.8732
##  ARIMA(1,1,0)           with drift         : -204.5221
##  ARIMA(1,1,1)                    : Inf
##  ARIMA(1,1,1) with drift         : Inf
##  ARIMA(1,1,2)                              : -213.5177
##  ARIMA(1,1,2)           with drift         : -213.7697
##  ARIMA(1,1,3)                    : Inf
##  ARIMA(1,1,3)           with drift         : Inf
##  ARIMA(1,1,4)                              : Inf
##  ARIMA(1,1,4)           with drift         : Inf
##  ARIMA(2,1,0)                              : -202.1178
##  ARIMA(2,1,0)           with drift         : -203.1421
##  ARIMA(2,1,1)                              : -198.9304
##  ARIMA(2,1,1)           with drift         : -201.2576
##  ARIMA(2,1,2)                    : Inf
##  ARIMA(2,1,2) with drift         : Inf
##  ARIMA(2,1,3)                    : Inf
##  ARIMA(2,1,3) with drift         : Inf
##  ARIMA(3,1,0)                              : -241.633
##  ARIMA(3,1,0)           with drift         : -250.7896
##  ARIMA(3,1,1)                    : Inf
##  ARIMA(3,1,1) with drift         : Inf
##  ARIMA(3,1,2)                              : -294.4893
##  ARIMA(3,1,2)           with drift         : -301.522
##  ARIMA(4,1,0)                    : Inf
##  ARIMA(4,1,0) with drift         : Inf
##  ARIMA(4,1,1)                    : Inf
##  ARIMA(4,1,1) with drift         : Inf
##  ARIMA(5,1,0)                    : Inf
##  ARIMA(5,1,0) with drift         : Inf
## 
## 
## 
##  Best model: ARIMA(3,1,2)           with drift

## Series: LIP 
## ARIMA(3,1,2) with drift 
## 
## Coefficients:
##           ar1      ar2      ar3     ma1     ma2   drift
##       -0.9764  -0.8642  -0.8481  0.6576  0.5076  0.0092
## s.e.   0.0590   0.0884   0.0614  0.1201  0.1349  0.0029
## 
## sigma^2 estimated as 0.00262:  log likelihood=157.76
## AIC=-301.52   AICc=-300.32   BIC=-283.22

# 回帰診断
tsdiag(x)

# 短期的予測
plot(forecast(x, h = 12))

code 16: SARIMAモデルの推定

IP <- ts(dfIP$IP, start=c(1980, 1), frequency = 4)
LIP <- log(IP)

# SARIMA(2, 1, 2, 1, 1, 1)の推定
Arima(
  LIP,
  order = c(2, 1, 2),
  seasonal = list(order = c(1,1,1), period = 4),
  method = "ML"
)

## Series: LIP 
## ARIMA(2,1,2)(1,1,1)[4] 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2    sar1     sma1
##       0.9742  -0.6440  -1.1279  0.9367  0.1518  -0.5818
## s.e.  0.1685   0.1064   0.0915  0.0644  0.2397   0.1980
## 
## sigma^2 estimated as 0.00177:  log likelihood=171.75
## AIC=-329.49   AICc=-328.24   BIC=-311.47

x <- auto.arima(
  LIP, d = 1,
  trace=T, seasonal = T, ic ="aic", stepwise=F, approximation = F
)

## 
##  ARIMA(0,1,0)(0,1,0)[4]                    : -316.2635
##  ARIMA(0,1,0)(0,1,1)[4]                    : -326.3591
##  ARIMA(0,1,0)(0,1,2)[4]                    : -325.2726
##  ARIMA(0,1,0)(1,1,0)[4]                    : -322.6142
##  ARIMA(0,1,0)(1,1,1)[4]                    : -325.2517
##  ARIMA(0,1,0)(1,1,2)[4]                    : -323.2832
##  ARIMA(0,1,0)(2,1,0)[4]                    : -324.1089
##  ARIMA(0,1,0)(2,1,1)[4]                    : -323.2944
##  ARIMA(0,1,0)(2,1,2)[4]                    : -321.2942
##  ARIMA(0,1,1)(0,1,0)[4]                    : -314.569
##  ARIMA(0,1,1)(0,1,1)[4]                    : -324.6076
##  ARIMA(0,1,1)(0,1,2)[4]                    : -323.7809
##  ARIMA(0,1,1)(1,1,0)[4]                    : -320.6965
##  ARIMA(0,1,1)(1,1,1)[4]                    : -323.7305
##  ARIMA(0,1,1)(1,1,2)[4]                    : -321.7913
##  ARIMA(0,1,1)(2,1,0)[4]                    : -322.5703
##  ARIMA(0,1,1)(2,1,1)[4]                    : -321.7989
##  ARIMA(0,1,1)(2,1,2)[4]                    : -319.8015
##  ARIMA(0,1,2)(0,1,0)[4]                    : -317.9835
##  ARIMA(0,1,2)(0,1,1)[4]                    : -324.9519
##  ARIMA(0,1,2)(0,1,2)[4]                    : -324.0557
##  ARIMA(0,1,2)(1,1,0)[4]                    : -321.9168
##  ARIMA(0,1,2)(1,1,1)[4]                    : -324.0905
##  ARIMA(0,1,2)(1,1,2)[4]                    : -322.0983
##  ARIMA(0,1,2)(2,1,0)[4]                    : -323.0307
##  ARIMA(0,1,2)(2,1,1)[4]                    : -322.0945
##  ARIMA(0,1,3)(0,1,0)[4]                    : -316.4917
##  ARIMA(0,1,3)(0,1,1)[4]                    : -323.4725
##  ARIMA(0,1,3)(0,1,2)[4]                    : -322.6869
##  ARIMA(0,1,3)(1,1,0)[4]                    : -320.4206
##  ARIMA(0,1,3)(1,1,1)[4]                    : -322.6156
##  ARIMA(0,1,3)(2,1,0)[4]                    : -321.8897
##  ARIMA(1,1,0)(0,1,0)[4]                    : -314.663
##  ARIMA(1,1,0)(0,1,1)[4]                    : -324.6836
##  ARIMA(1,1,0)(0,1,2)[4]                    : -323.9349
##  ARIMA(1,1,0)(1,1,0)[4]                    : -320.7239
##  ARIMA(1,1,0)(1,1,1)[4]                    : -323.8774
##  ARIMA(1,1,0)(1,1,2)[4]                    : -321.9457
##  ARIMA(1,1,0)(2,1,0)[4]                    : -322.7093
##  ARIMA(1,1,0)(2,1,1)[4]                    : -321.9497
##  ARIMA(1,1,0)(2,1,2)[4]                    : -319.9546
##  ARIMA(1,1,1)(0,1,0)[4]                    : -312.9603
##  ARIMA(1,1,1)(0,1,1)[4]                    : Inf
##  ARIMA(1,1,1)(0,1,2)[4]                    : Inf
##  ARIMA(1,1,1)(1,1,0)[4]                    : -319.1684
##  ARIMA(1,1,1)(1,1,1)[4]                    : -322.6238
##  ARIMA(1,1,1)(1,1,2)[4]                    : -320.6322
##  ARIMA(1,1,1)(2,1,0)[4]                    : -321.1827
##  ARIMA(1,1,1)(2,1,1)[4]                    : Inf
##  ARIMA(1,1,2)(0,1,0)[4]                    : -316.1325
##  ARIMA(1,1,2)(0,1,1)[4]                    : -323.6681
##  ARIMA(1,1,2)(0,1,2)[4]                    : -322.6174
##  ARIMA(1,1,2)(1,1,0)[4]                    : -320.3261
##  ARIMA(1,1,2)(1,1,1)[4]                    : -322.5914
##  ARIMA(1,1,2)(2,1,0)[4]                    : -321.6761
##  ARIMA(1,1,3)(0,1,0)[4]                    : -318.7062
##  ARIMA(1,1,3)(0,1,1)[4]                    : -322.004
##  ARIMA(1,1,3)(1,1,0)[4]                    : -319.2309
##  ARIMA(2,1,0)(0,1,0)[4]                    : -314.7958
##  ARIMA(2,1,0)(0,1,1)[4]                    : -324.9045
##  ARIMA(2,1,0)(0,1,2)[4]                    : -323.7631
##  ARIMA(2,1,0)(1,1,0)[4]                    : -321.4036
##  ARIMA(2,1,0)(1,1,1)[4]                    : -323.8094
##  ARIMA(2,1,0)(1,1,2)[4]                    : -321.8095
##  ARIMA(2,1,0)(2,1,0)[4]                    : -322.5249
##  ARIMA(2,1,0)(2,1,1)[4]                    : -321.8095
##  ARIMA(2,1,1)(0,1,0)[4]                    : -313.1823
##  ARIMA(2,1,1)(0,1,1)[4]                    : -323.2195
##  ARIMA(2,1,1)(0,1,2)[4]                    : -322.0775
##  ARIMA(2,1,1)(1,1,0)[4]                    : -319.652
##  ARIMA(2,1,1)(1,1,1)[4]                    : -322.0835
##  ARIMA(2,1,1)(2,1,0)[4]                    : -320.9829
##  ARIMA(2,1,2)(0,1,0)[4]                    : -316.9733
##  ARIMA(2,1,2)(0,1,1)[4]                    : -331.1188
##  ARIMA(2,1,2)(1,1,0)[4]                    : -327.0455
##  ARIMA(2,1,3)(0,1,0)[4]                    : -318.6657
##  ARIMA(3,1,0)(0,1,0)[4]                    : -314.8657
##  ARIMA(3,1,0)(0,1,1)[4]                    : -323.3274
##  ARIMA(3,1,0)(0,1,2)[4]                    : -322.3289
##  ARIMA(3,1,0)(1,1,0)[4]                    : -319.9772
##  ARIMA(3,1,0)(1,1,1)[4]                    : -322.2921
##  ARIMA(3,1,0)(2,1,0)[4]                    : -321.521
##  ARIMA(3,1,1)(0,1,0)[4]                    : -317.6288
##  ARIMA(3,1,1)(0,1,1)[4]                    : -321.3282
##  ARIMA(3,1,1)(1,1,0)[4]                    : -318.9317
##  ARIMA(3,1,2)(0,1,0)[4]                    : -317.4148
## 
## 
## 
##  Best model: ARIMA(2,1,2)(0,1,1)[4]

## Series: LIP 
## ARIMA(2,1,2)(0,1,1)[4] 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2     sma1
##       1.0031  -0.6347  -1.1523  0.9315  -0.4625
## s.e.  0.1762   0.1055   0.0961  0.0536   0.1053
## 
## sigma^2 estimated as 0.001761:  log likelihood=171.56
## AIC=-331.12   AICc=-330.19   BIC=-315.67

# 回帰診断
tsdiag(x)

# 短期的予測
plot(forecast(x, h = 12))

code 17: 電力需要

dfDenryoku <- dfDenryokuOrg %>%
  mutate(
    dDay = as.Date(day),
    gDemand = demand / 1000,
    gTemper = temp
  )

g1 <- ggplot(data=dfDenryoku, aes(x = dDay, y = gDemand))
g1 <- g1 + geom_line()
g1 <- g1 + labs(x = "日付", y = "電力需要(1000000kWh)")
g1 <- g1 + scale_x_date(date_breaks = "1 months", date_labels = "%m/%d")
g1 <- g1 + theme_bw()

g2 <- ggplot(data=dfDenryoku, aes(x = dDay, y = gTemper))
g2 <- g2 + geom_line()
g2 <- g2 + labs(x = "日付", y = "平均気温(摂氏)")
g2 <- g2 + scale_x_date(date_breaks = "1 months", date_labels = "%m/%d")
g2 <- g2 + theme_bw()

g <- g1 + g2 + plot_layout(ncol = 1)
g

acf(dfDenryoku$gDemand)

acf(dfDenryoku$gTemper)

code 18: 電力需要の定常性

ndiffs(dfDenryokuOrg$demand, test = "adf", type = "trend")

## [1] 0

ndiffs(dfDenryokuOrg$temp, test = "adf", type = "trend")

## [1] 0

code 19: 電力需要の回帰モデル

asHoliday <- c(
  "2003/10/13", "2003/11/03", "2003/11/23", "2004/01/12",
  "2004/02/11"
)
asShogatu <- c(
  paste0("2003/12/", 30:31),
  paste0("2004/01/", 1:3)
)
dfDenryoku <- dfDenryokuOrg %>%
  mutate(
    dDay = as.Date(day),
    nWDay = wday(dDay),
    bSun = if_else(nWDay == 1, 1, 0),
    bMon = if_else(nWDay == 2, 1, 0),
    bSat = if_else(nWDay == 7, 1, 0),
    bHoliday = if_else(dDay %in% as.Date(asHoliday), 1, 0),
    bShogatu = if_else(dDay %in% as.Date(asShogatu), 1, 0),
    gDemand = demand / 1000,
    gTemper = temp
  )

# OLS
oModel1 <- lm(
  gDemand ~ gTemper + bMon + bSat + bSun + bHoliday + bShogatu,
  data=dfDenryoku
)
summary(oModel1)

## 
## Call:
## lm(formula = gDemand ~ gTemper + bMon + bSat + bSun + bHoliday + 
##     bShogatu, data = dfDenryoku)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -116.434  -15.659   -0.808   16.562   83.491 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  994.3924     6.0448 164.504  < 2e-16 ***
## gTemper      -11.5438     0.4639 -24.885  < 2e-16 ***
## bMon         -32.9638     6.9163  -4.766 4.52e-06 ***
## bSat         -71.9321     6.5546 -10.974  < 2e-16 ***
## bSun        -134.4875     6.5975 -20.384  < 2e-16 ***
## bHoliday     -35.9926    12.9406  -2.781  0.00613 ** 
## bShogatu    -236.1801    12.6029 -18.740  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.46 on 145 degrees of freedom
## Multiple R-squared:  0.9052, Adjusted R-squared:  0.9012 
## F-statistic: 230.6 on 6 and 145 DF,  p-value: < 2.2e-16

# OLS残差時系列
dfResidual <- dfDenryoku %>%
  mutate(
    Centered = as.vector(scale(gDemand, center = TRUE, scale = FALSE)),
    Fitted   = as.vector(scale(fitted(oModel1), center = TRUE, scale = FALSE)),
    Residual = as.vector(residuals(oModel1))
  ) %>%
  gather(sVar, gValue, c(Centered, Fitted, Residual))
g <- ggplot(data=dfResidual, aes(x = dDay, y = gValue, color=sVar))
g <- g + geom_line()
g <- g + geom_hline(yintercept = 0)
g <- g + theme_bw()
g

ggsave(g, file = paste0("./chap5_code19_1.png"), width = 22, height = 14, units = "cm")

# OLS残差のACF
acf(residuals(oModel1))

# BGテスト
bgtest(oModel1)

## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  oModel1
## LM test = 17.533, df = 1, p-value = 2.824e-05

# AR(1) error model
mgX <- dfDenryoku %>%
  dplyr::select(gTemper, bMon, bSat, bSun, bHoliday, bShogatu) %>%
  as.matrix(.)
oModel2 <- Arima(
  y     = dfDenryoku$gDemand,
  order = c(1, 0, 0),
  xreg  = mgX
)
oModel2

## Series: dfDenryoku$gDemand 
## Regression with ARIMA(1,0,0) errors 
## 
## Coefficients:
##          ar1  intercept  gTemper      bMon      bSat       bSun  bHoliday
##       0.7827   952.1809  -8.3642  -24.4706  -69.1524  -128.9732  -36.4822
## s.e.  0.0619    14.5001   1.0067    5.1014    4.7706     5.6495    8.5043
##        bShogatu
##       -107.1146
## s.e.    20.4405
## 
## sigma^2 estimated as 572.5:  log likelihood=-694.64
## AIC=1407.28   AICc=1408.55   BIC=1434.5

# ARMA error model
oModel3 <- auto.arima(dfDenryoku$gDemand, d = 0, xreg  = mgX)
oModel3

## Series: dfDenryoku$gDemand 
## Regression with ARIMA(5,0,1) errors 
## 
## Coefficients:
##           ar1     ar2     ar3     ar4      ar5     ma1  intercept  gTemper
##       -0.2618  0.4900  0.3610  0.2796  -0.1552  0.7538   945.8745  -7.4875
## s.e.   0.1565  0.1054  0.0944  0.1024   0.0997  0.1255    15.5885   1.0253
##           bMon      bSat       bSun  bHoliday   bShogatu
##       -30.7042  -68.4723  -135.3686  -30.1991  -169.6477
## s.e.    4.5078    4.0671     4.6051    8.9254    20.6859
## 
## sigma^2 estimated as 512.5:  log likelihood=-683.83
## AIC=1395.65   AICc=1398.72   BIC=1437.99

# 残差時系列
dfResidual <- dfDenryoku %>%
  mutate(
    OLS = as.vector(residuals(oModel1)),
    ARMA_1 = as.vector(residuals(oModel2)),
    ARMA_2 = as.vector(residuals(oModel3))
  ) %>%
  gather(sVar, gValue, c(OLS, ARMA_1, ARMA_2))
g <- ggplot(data=dfResidual, aes(x = dDay, y = gValue, color=sVar))
g <- g + geom_line()
g <- g + geom_hline(yintercept = 0)
g <- g + theme_bw()
g

ggsave(g, file = paste0("./chap5_code19_2.png"), width = 22, height = 14, units = "cm")

# FGLS, AR(1)
oModel4 <- gls(
  gDemand ~ gTemper + bMon + bSat + bSun + bHoliday + bShogatu,
  corr = corARMA(p=1),
  data=dfDenryoku
)
summary(oModel4)

## Generalized least squares fit by REML
##   Model: gDemand ~ gTemper + bMon + bSat + bSun + bHoliday + bShogatu 
##   Data: dfDenryoku 
##        AIC      BIC   logLik
##   1371.166 1397.957 -676.583
## 
## Correlation Structure: AR(1)
##  Formula: ~1 
##  Parameter estimate(s):
##       Phi 
## 0.7961851 
## 
## Coefficients:
##                 Value Std.Error   t-value p-value
## (Intercept)  951.0448 14.253143  66.72527       0
## gTemper       -8.2712  0.939645  -8.80253       0
## bMon         -24.3504  5.162340  -4.71693       0
## bSat         -69.1323  4.856087 -14.23621       0
## bSun        -128.9388  5.757543 -22.39475       0
## bHoliday     -36.5359  8.646249  -4.22564       0
## bShogatu    -104.9998 18.241606  -5.75606       0
## 
##  Correlation: 
##          (Intr) gTempr bMon   bSat   bSun   bHoldy
## gTemper  -0.747                                   
## bMon     -0.100 -0.002                            
## bSat     -0.104  0.008  0.310                     
## bSun     -0.008 -0.152  0.567  0.556              
## bHoliday  0.016 -0.022 -0.244  0.000 -0.071       
## bShogatu -0.015 -0.057  0.175  0.001  0.158 -0.040
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -4.10772640 -0.54330393  0.04209911  0.62534388  1.91621487 
## 
## Residual standard error: 39.40679 
## Degrees of freedom: 152 total; 145 residual

# FGLS, ARMA(5, 1)
oModel5 <- gls(
  gDemand ~ gTemper + bMon + bSat + bSun + bHoliday + bShogatu,
  corr = corARMA(p=5, q=1),
  data = dfDenryoku
)
summary(oModel5)

## Generalized least squares fit by REML
##   Model: gDemand ~ gTemper + bMon + bSat + bSun + bHoliday + bShogatu 
##   Data: dfDenryoku 
##       AIC      BIC    logLik
##   1360.33 1402.004 -666.1648
## 
## Correlation Structure: ARMA(5,1)
##  Formula: ~1 
##  Parameter estimate(s):
##       Phi1       Phi2       Phi3       Phi4       Phi5     Theta1 
## -0.2681756  0.5042322  0.3692546  0.2879284 -0.1367366  0.7624480 
## 
## Coefficients:
##                 Value Std.Error   t-value p-value
## (Intercept)  943.7072 16.192804  58.27942   0e+00
## gTemper       -7.3042  0.905353  -8.06781   0e+00
## bMon         -30.5827  4.398100  -6.95361   0e+00
## bSat         -68.5679  4.067206 -16.85872   0e+00
## bSun        -135.4897  4.556282 -29.73691   0e+00
## bHoliday     -30.6078  8.725392  -3.50790   6e-04
## bShogatu    -170.6390 15.137568 -11.27255   0e+00
## 
##  Correlation: 
##          (Intr) gTempr bMon   bSat   bSun   bHoldy
## gTemper  -0.649                                   
## bMon     -0.062  0.016                            
## bSat     -0.060  0.011 -0.015                     
## bSun      0.034 -0.167  0.394  0.382              
## bHoliday  0.028 -0.046 -0.304  0.043 -0.053       
## bShogatu  0.048 -0.134  0.175 -0.004  0.176 -0.055
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.76668703 -0.69624219 -0.07090714  0.71660764  2.27464780 
## 
## Residual standard error: 37.0372 
## Degrees of freedom: 152 total; 145 residual

confint(oModel5)

##                   2.5 %      97.5 %
## (Intercept)  911.969885  975.444511
## gTemper       -9.078683   -5.529763
## bMon         -39.202780  -21.962542
## bSat         -76.539457  -60.596302
## bSun        -144.419884 -126.559586
## bHoliday     -47.709272  -13.506363
## bShogatu    -200.308038 -140.969863

# 残差時系列
dfResidual <- dfDenryoku %>%
  mutate(
    OLS = as.vector(residuals(oModel1)),
    FGLS_1 = as.vector(residuals(oModel4)),
    FGLS_2 = as.vector(residuals(oModel5))
  ) %>%
  gather(sVar, gValue, c(OLS, FGLS_1, FGLS_2))
g <- ggplot(data=dfResidual, aes(x = dDay, y = gValue, color=sVar))
g <- g + geom_line()
g <- g + geom_hline(yintercept = 0)
g <- g + theme_bw()
g

ggsave(g, file = paste0("./chap5_code19_3.png"), width = 22, height = 14, units = "cm")

# 期待値
dfNew <- crossing(
  gTemper = seq(
    as.integer(min(dfDenryoku$gTemper)),
    as.integer(max(dfDenryoku$gTemper)),
    1
  ),
  nDayType = 0:3
) %>%
  mutate(
    bMon = if_else(nDayType == 1, 1, 0),
    bSat = if_else(nDayType == 2, 1, 0),
    bSun = if_else(nDayType == 3, 1, 0),
    bShogatu = 0,
    bHoliday = 0,
    fDayType = factor(
      nDayType,
      levels=0:3,
      labels=c("火・水・木・金", "月", "土", "日")
    )
  )
dfNew$gPredict <- predict(oModel5, newdata=dfNew)
g <- ggplot(data=dfNew, aes(x = gTemper, y = gPredict, color = fDayType))
g <- g + geom_line()
g <- g + geom_point()
g <- g + labs(
  y="電力需要の期待値(1000000khW)", x = "平均気温(摂氏)",
  caption = "(祝日・正月を除く期待値を示す)"
)
g <- g + theme_bw()
print(g)

ggsave(g, file = paste0("./chap5_code19_4.png"), width = 22, height = 12, units = "cm")

code 20. 調和回帰

library(fpp2)

## Loading required package: fma

## Loading required package: expsmooth

png("./chap5_code20_1.png", width = 24, height = 14, res = 72, units = "cm")
par(mfrow=c(1,2))
plot(gasoline)
seasonplot(gasoline, type = "l", col = "gray")
dev.off()

## png 
##   2

lOut <- lapply(
  1:8, 
  function(nK){
    oModel <- Arima(
      gasoline, 
      order = c(1,1,1), 
      xreg = fourier(gasoline, K=nK), 
    )
    agCoef <- coef(oModel)
    dfCoef <- tibble(
      sVar = names(agCoef), 
      gCoef = agCoef
    )
    out <- as.tibble(
      fourier(gasoline, K=nK)
    ) %>%
      mutate( 
        gTime = time(gasoline), 
        gCycle = cycle(gasoline)
      ) %>%
      gather(sVar, gValue, -c(gTime, gCycle)) %>%
      left_join(dfCoef, by = "sVar") %>%
      group_by(gTime, gCycle) %>%
      summarize(gSeason = sum(gValue * gCoef)) %>%
      ungroup() %>%
      mutate(nK = nK)
    return(out)
  }
)

## Warning: `as.tibble()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

dfPlot <- bind_rows(lOut) %>%
  mutate(sK = paste0("S=", nK))
g <- ggplot(data=dfPlot, aes(x = gCycle, y = gSeason))
g <- g + geom_line()
g <- g + labs(x = "週", y = "季節変動")
g <- g + facet_wrap(~ sK)
g <- g + theme_bw()
print(g)

ggsave(g, file = paste0("./chap5_code20_2.png"), width = 22, height = 10, units = "cm")

Appendix 1. Rの日付クラス

x <- as.Date(c("2019/03/01", "2019/04/01"))
x

## [1] "2019-03-01" "2019-04-01"

str(x)

##  Date[1:2], format: "2019-03-01" "2019-04-01"

x <- as.Date(c("2019-03-01", "2019-04-01"))
x

## [1] "2019-03-01" "2019-04-01"

str(x)

##  Date[1:2], format: "2019-03-01" "2019-04-01"

x <- ymd(c("2019/03/01", "2019/04-01"))
x

## [1] "2019-03-01" "2019-04-01"

str(x)

##  Date[1:2], format: "2019-03-01" "2019-04-01"

ymd(c(20190301, 20190401))

## [1] "2019-03-01" "2019-04-01"

x <- ymd(c("2019/03/01", "2019/04-01"))
year(x) # 年を取り出す

## [1] 2019 2019

month(x) # 月を取り出す

## [1] 3 4

day(x) # 日を取り出す

## [1] 1 1

wday(x) # 曜日を取り出す

## [1] 6 2

x[2] - x[1]  # 差を求める

## Time difference of 31 days

Appendix 2. R の時系列クラス

# データ例
dfIn <- tibble(
  sMonth = c(
    "2018-04", "2018-05", "2018-06", "2018-07", "2018-08", "2018-09",
    "2018-10", "2018-11", "2018-12", "2019-01", "2019-02", "2019-03"
  ),
  gX = c(1, 2, 1, 3, 2, 4, 3, 2, 2, 2, 3, 2)
)

# ts
tsX <- ts(dfIn$gX, start = c(2018, 4), frequency = 12)
print(tsX)

##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2018               1   2   1   3   2   4   3   2   2
## 2019   2   3   2

plot(tsX)

# zoo
library(zoo)
zooX <- zoo(dfIn$gX, as.Date(paste0(dfIn$sMonth, "-01")))
plot(zooX)

# xts
library(xts)

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

xtsX <- xts(dfIn$gX, as.Date(paste0(dfIn$sMonth, "-01")))
plot(xtsX)

plot(as.xts(tsX))

# tsibble
library(tsibble)

## 
## Attaching package: 'tsibble'

## The following object is masked from 'package:zoo':
## 
##     index

## The following objects are masked from 'package:lubridate':
## 
##     interval, new_interval

## The following object is masked from 'package:dplyr':
## 
##     id

tsIn <- dfIn %>%
  mutate( dMonth = as.Date(paste0(sMonth, "-01")) ) %>%
  dplyr::select(dMonth, gX) %>%
  as_tsibble( index = dMonth)
g <- ggplot(data=tsIn, aes(y = gX, x = dMonth))
g <- g + geom_line()
print(g)

# tibbletime
library(tibbletime)

## 
## Attaching package: 'tibbletime'

## The following object is masked from 'package:stats':
## 
##     filter

tsIn <- dfIn %>%
  mutate( dMonth = as.Date(paste0(sMonth, "-01")) ) %>%
  dplyr::select(dMonth, gX) %>%
  as_tbl_time( index = dMonth )

以上

MMM研修 V. 時系列分析の基礎

Shigeru ONO

2020年4月9日

準備

code 1: みせかけの相関

code 2: ACF

code 3: 定常性・トレンド定常・差分定常

code 4: ホワイトノイズ

code 5: AR(1) の定常性条件

code 6: AR1の標本コレログラム

code 7: MA1の標本コレログラム

code 8: random walk

code 9: drift つき random walk

code 10: trend つき random walk

code 11: 民間設備投資

code 12: 変数変換

code 13: 単位根検定

code 14: ARMAモデルの推定

code 15: ARIMAモデルの推定

code 16: SARIMAモデルの推定

code 17: 電力需要

code 18: 電力需要の定常性

code 19: 電力需要の回帰モデル

code 20. 調和回帰

Appendix 1. Rの日付クラス

Appendix 2. R の時系列クラス