このページでは、株式会社インサイト・ファクトリー 2020年度社内研修「マーケティング・ミックス・モデリング」V章「時系列分析の基礎」のRコードを公開しています。

準備

library(tidyverse)
## -- Attaching packages --------------------------------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.0     √ purrr   0.3.3
## √ tibble  3.0.0     √ dplyr   0.8.5
## √ tidyr   1.0.2     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0
## -- Conflicts ------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(assertr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(patchwork)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(urca)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(nlme)
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
## 
##     getResponse
## The following object is masked from 'package:dplyr':
## 
##     collapse
# 民間設備投資
# 出典: 田中(2008)
# http://www.cap-shuppan.co.jp/kikan_keiryou_jikeiretsu_toukei.html
dfIP <- read_csv("./IPO1980.csv")
## Parsed with column specification:
## cols(
##   IP = col_double()
## )
# 電力需要
# 出典: 福地・伊藤(2011)
# https://sites.google.com/site/econometricsr/
dfDenryokuOrg <- read_csv("./denryoku.csv")
## Parsed with column specification:
## cols(
##   day = col_character(),
##   week = col_character(),
##   demand = col_double(),
##   temp = col_double()
## )
# AR(1)を生成する関数
#   arima.sim()では, 非定常過程となるときにエラーとなる
#   stats.filter()でも生成できるが、ここでは理解を深めるために自作する
sub_generateAR1.1 <- function(nLength, gPhi, gMu=0, gSigmaSq=1, gInit=0){
  agWhiteNoise = rnorm(nLength, sd = sqrt(gSigmaSq))
  out <- numeric(nLength)
  gPrevY <- gInit
  for (t in 1:nLength){
    out[t] <- gMu + gPhi * gPrevY + agWhiteNoise[t]
    gPrevY <- out[t]
  }
  out
}

code 1: みせかけの相関

# データの作成
set.seed(123)
nNumTrial <- 1000
nNumTime  <- 100
dfIn <- tibble(
  nTrial = as.vector(sapply(1:nNumTrial, function(x) rep(x,nNumTime))),
  nTime  = as.vector(sapply(1:nNumTrial, function(x) 1:nNumTime)),
  nDeltaX = sample(-100:100, nNumTrial*nNumTime, replace = T),
  nDeltaY = sample(-100:100, nNumTrial*nNumTime, replace = T)
) %>%
  group_by(nTrial) %>%
  mutate(
    nX = cumsum(nDeltaX),
    nY = cumsum(nDeltaY)
  ) %>%
  ungroup()
## print(dfIn)
# 時系列描画用のデータ
dfPlot1 <- dfIn %>%
  filter(nTrial %in% 1:11) %>%
  gather(sVar, nValue, c(nDeltaX, nDeltaY, nX, nY))
# 相関係数
dfPlot2 <- dfIn %>%
  group_by(nTrial) %>%
  summarize(
    gCorr1 = cor(nDeltaX, nDeltaY),
    gCorr2 = cor(nX, nY)
  ) %>%
  ungroup() %>%
  mutate(
    sLabel1 = sprintf("r = %0.2f", gCorr1),
    sLabel2 = sprintf("r = %0.2f", gCorr2)
  )

# 例1) ランダムな時系列のプロット
g <- ggplot(
  data = dfPlot1 %>%
    filter(sVar %in% c("nDeltaX", "nDeltaY")) %>%
    mutate(
      fVar = factor(sVar, levels = c("nDeltaX", "nDeltaY"), labels = c("X", "Y"))
    ),
  aes(x = nTime, y = nValue, color = fVar)
)
g <- g + geom_line()
g <- g + geom_text(
  data=dfPlot2 %>% filter(nTrial %in% 1:11),
  aes(label = sLabel1), color=1, x = 50, y = -80
)
g <- g + scale_color_discrete(name = "")
g <- g + labs(x = "Time", y = "Value")
g <- g + theme_bw()
g <- g + facet_wrap(~ nTrial, ncol=4)
print(g)

ggsave(g, file = paste0("./chap5_code1_1.png"), width = 22, height = 9, units = "cm")

# その相関の散布図
g <- ggplot(data=dfPlot2, aes(x = gCorr1))
g <- g + geom_histogram(binwidth = 0.02)
g <- g + scale_x_continuous(limits = c(-1, 1))
g <- g + labs(x = "相関係数")
g <- g + theme_bw()
print(g)
## Warning: Removed 2 rows containing missing values (geom_bar).

ggsave(g, file = paste0("./chap5_code1_2.png"), width = 18, height = 4, units = "cm")
## Warning: Removed 2 rows containing missing values (geom_bar).
# 例2) 自己回帰する時系列のプロット
g <- ggplot(
  data = dfPlot1 %>%
    filter(sVar %in% c("nX", "nY")) %>%
    mutate(
      fVar = factor(sVar, levels = c("nX", "nY"), labels = c("X", "Y"))
    ),
  aes(x = nTime, y = nValue, color = fVar)
)
g <- g + geom_line()
g <- g + geom_text(
  data=dfPlot2 %>% filter(nTrial %in% 1:11),
  aes(label = sLabel2), color=1, x = 50, y = -800
)
g <- g + scale_color_discrete(name = "")
g <- g + labs(x = "Time", y = "Value")
g <- g + theme_bw()
g <- g + facet_wrap(~ nTrial, ncol=4)
print(g)

ggsave(g, file = paste0("./chap5_code1_3.png"), width = 22, height = 9, units = "cm")

# その相関の散布図
g <- ggplot(data=dfPlot2, aes(x = gCorr2))
g <- g + geom_histogram(binwidth = 0.02)
g <- g + scale_x_continuous(limits = c(-1, 1))
g <- g + labs(x = "相関係数")
g <- g + theme_bw()
print(g)
## Warning: Removed 2 rows containing missing values (geom_bar).

ggsave(g, file = paste0("./chap5_code1_4.png"), width = 18, height = 4, units = "cm")
## Warning: Removed 2 rows containing missing values (geom_bar).

code 2: ACF

plot(UKDriverDeaths)