このページでは、株式会社インサイト・ファクトリー 2020年度社内研修「マーケティング・ミックス・モデリング」V章「時系列分析の基礎」のRコードを公開しています。
library(tidyverse)
## -- Attaching packages --------------------------------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.0 √ purrr 0.3.3
## √ tibble 3.0.0 √ dplyr 0.8.5
## √ tidyr 1.0.2 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.5.0
## -- Conflicts ------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(assertr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(patchwork)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(urca)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
##
## getResponse
## The following object is masked from 'package:dplyr':
##
## collapse
# 民間設備投資
# 出典: 田中(2008)
# http://www.cap-shuppan.co.jp/kikan_keiryou_jikeiretsu_toukei.html
dfIP <- read_csv("./IPO1980.csv")
## Parsed with column specification:
## cols(
## IP = col_double()
## )
# 電力需要
# 出典: 福地・伊藤(2011)
# https://sites.google.com/site/econometricsr/
dfDenryokuOrg <- read_csv("./denryoku.csv")
## Parsed with column specification:
## cols(
## day = col_character(),
## week = col_character(),
## demand = col_double(),
## temp = col_double()
## )
# AR(1)を生成する関数
# arima.sim()では, 非定常過程となるときにエラーとなる
# stats.filter()でも生成できるが、ここでは理解を深めるために自作する
sub_generateAR1.1 <- function(nLength, gPhi, gMu=0, gSigmaSq=1, gInit=0){
agWhiteNoise = rnorm(nLength, sd = sqrt(gSigmaSq))
out <- numeric(nLength)
gPrevY <- gInit
for (t in 1:nLength){
out[t] <- gMu + gPhi * gPrevY + agWhiteNoise[t]
gPrevY <- out[t]
}
out
}
# データの作成
set.seed(123)
nNumTrial <- 1000
nNumTime <- 100
dfIn <- tibble(
nTrial = as.vector(sapply(1:nNumTrial, function(x) rep(x,nNumTime))),
nTime = as.vector(sapply(1:nNumTrial, function(x) 1:nNumTime)),
nDeltaX = sample(-100:100, nNumTrial*nNumTime, replace = T),
nDeltaY = sample(-100:100, nNumTrial*nNumTime, replace = T)
) %>%
group_by(nTrial) %>%
mutate(
nX = cumsum(nDeltaX),
nY = cumsum(nDeltaY)
) %>%
ungroup()
## print(dfIn)
# 時系列描画用のデータ
dfPlot1 <- dfIn %>%
filter(nTrial %in% 1:11) %>%
gather(sVar, nValue, c(nDeltaX, nDeltaY, nX, nY))
# 相関係数
dfPlot2 <- dfIn %>%
group_by(nTrial) %>%
summarize(
gCorr1 = cor(nDeltaX, nDeltaY),
gCorr2 = cor(nX, nY)
) %>%
ungroup() %>%
mutate(
sLabel1 = sprintf("r = %0.2f", gCorr1),
sLabel2 = sprintf("r = %0.2f", gCorr2)
)
# 例1) ランダムな時系列のプロット
g <- ggplot(
data = dfPlot1 %>%
filter(sVar %in% c("nDeltaX", "nDeltaY")) %>%
mutate(
fVar = factor(sVar, levels = c("nDeltaX", "nDeltaY"), labels = c("X", "Y"))
),
aes(x = nTime, y = nValue, color = fVar)
)
g <- g + geom_line()
g <- g + geom_text(
data=dfPlot2 %>% filter(nTrial %in% 1:11),
aes(label = sLabel1), color=1, x = 50, y = -80
)
g <- g + scale_color_discrete(name = "")
g <- g + labs(x = "Time", y = "Value")
g <- g + theme_bw()
g <- g + facet_wrap(~ nTrial, ncol=4)
print(g)
ggsave(g, file = paste0("./chap5_code1_1.png"), width = 22, height = 9, units = "cm")
# その相関の散布図
g <- ggplot(data=dfPlot2, aes(x = gCorr1))
g <- g + geom_histogram(binwidth = 0.02)
g <- g + scale_x_continuous(limits = c(-1, 1))
g <- g + labs(x = "相関係数")
g <- g + theme_bw()
print(g)
## Warning: Removed 2 rows containing missing values (geom_bar).
ggsave(g, file = paste0("./chap5_code1_2.png"), width = 18, height = 4, units = "cm")
## Warning: Removed 2 rows containing missing values (geom_bar).
# 例2) 自己回帰する時系列のプロット
g <- ggplot(
data = dfPlot1 %>%
filter(sVar %in% c("nX", "nY")) %>%
mutate(
fVar = factor(sVar, levels = c("nX", "nY"), labels = c("X", "Y"))
),
aes(x = nTime, y = nValue, color = fVar)
)
g <- g + geom_line()
g <- g + geom_text(
data=dfPlot2 %>% filter(nTrial %in% 1:11),
aes(label = sLabel2), color=1, x = 50, y = -800
)
g <- g + scale_color_discrete(name = "")
g <- g + labs(x = "Time", y = "Value")
g <- g + theme_bw()
g <- g + facet_wrap(~ nTrial, ncol=4)
print(g)
ggsave(g, file = paste0("./chap5_code1_3.png"), width = 22, height = 9, units = "cm")
# その相関の散布図
g <- ggplot(data=dfPlot2, aes(x = gCorr2))
g <- g + geom_histogram(binwidth = 0.02)
g <- g + scale_x_continuous(limits = c(-1, 1))
g <- g + labs(x = "相関係数")
g <- g + theme_bw()
print(g)
## Warning: Removed 2 rows containing missing values (geom_bar).
ggsave(g, file = paste0("./chap5_code1_4.png"), width = 18, height = 4, units = "cm")
## Warning: Removed 2 rows containing missing values (geom_bar).
plot(UKDriverDeaths)
acf(UKDriverDeaths)
set.seed(12345)
# 定常っぽい
dfIn <- tibble(
t = 1:100,
v1 = as.vector(arima.sim(n = 100, list(ar = c(0.8)))),
v2 = as.vector(arima.sim(n = 100, list(ar = c(0.5, -0.5)))),
v3 = as.vector(arima.sim(n = 100, list(ma = c(0.9))))
) %>% gather(sVar, gValue, -t)
g <- ggplot(data=dfIn, aes(x = t, y = gValue))
g <- g + geom_line()
g <- g + labs(x = "Time", y = "Value")
g <- g + theme_bw()
g <- g + facet_wrap(~ sVar)
print(g)
ggsave(g, file = paste0("./chap5_code3_1.png"), width = 18, height = 6, units = "cm")
# 非定常っぽい
dfIn <- tibble(
t = 1:100,
v1 = as.vector(arima.sim(n = 100, list(order=c(2,1,0), ar = c(0, 0.9))))[-1]/10,
v2 = cumsum(rnorm(100)),
v3 = as.vector(arima.sim(n = 100, list(ar = c(0.2))))
) %>%
mutate(v3 = if_else(t > 50, v3 * 2.5, v3)) %>%
gather(sVar, gValue, -t)
g <- ggplot(data=dfIn, aes(x = t, y = gValue))
g <- g + geom_line()
g <- g + labs(x = "Time", y = "Value")
g <- g + theme_bw()
g <- g + facet_wrap(~ sVar)
print(g)
ggsave(g, file = paste0("./chap5_code3_2.png"), width = 18, height = 6, units = "cm")
# トレンド定常っぽい
set.seed(12345)
dfIn <- tibble(
t = 1:100,
v2 = as.vector(arima.sim(n = 100, list(ar = c(0.9)))),
v1 = t * 0.08 + v2,
v3 = v1 - lag(v1)
)
g1 <- ggplot(data=dfIn, aes(x = t, y = v1))
g1 <- g1 + geom_line()
g1 <- g1 + geom_abline(slope = 0.08, intercept = 0, color="red")
g1 <- g1 + labs(x = "Time", y = "Y[Time]")
g1 <- g1 + theme_bw()
g2 <- ggplot(data=dfIn, aes(x = t, y = v2))
g2 <- g2 + geom_line()
g2 <- g2 + geom_hline(yintercept = 0, color="red")
g2 <- g2 + labs(x = "Time", y = "Y[Time] - Time * 0.08")
g2 <- g2 + ylim(-10, 10)
g2 <- g2 + theme_bw()
g3 <- ggplot(data=dfIn, aes(x = t, y = v3))
g3 <- g3 + geom_line()
g3 <- g3 + ylim(-10, 10)
g3 <- g3 + labs(x = "Time", y = "Y[Time] - Y[Time-1]")
g3 <- g3 + theme_bw()
g3
## Warning: Removed 1 row(s) containing missing values (geom_path).
g <- g1 + g2 + g3
g
## Warning: Removed 1 row(s) containing missing values (geom_path).
ggsave(g, file = paste0("./chap5_code3_3.png"), width = 22, height = 6, units = "cm")
## Warning: Removed 1 row(s) containing missing values (geom_path).
# 差分定常っぽい
set.seed(1234)
dfIn <- tibble(
t = 1:100,
v1 = sub_generateAR1.1(nLength = 100, gMu = 0.5, gPhi = 1),
v3 = v1 - lag(v1)
)
oReg <- lm(v1 ~ 0 + t, data=dfIn)
gRegCoef <- coef(oReg)[1]
dfIn <- dfIn %>%
mutate(
v2 = v1 - t * gRegCoef
)
g1 <- ggplot(data=dfIn, aes(x = t, y = v1))
g1 <- g1 + geom_line()
g1 <- g1 + labs(x = "Time", y = "Value")
g1 <- g1 + geom_abline(slope = gRegCoef, intercept = 0, color="red")
g1 <- g1 + labs(x = "Time", y = "Y[Time]")
g1 <- g1 + theme_bw()
g2 <- ggplot(data=dfIn, aes(x = t, y = v2))
g2 <- g2 + geom_line()
g2 <- g2 + labs(x = "Time", y = sprintf("Y[Time] - Time * %0.2f", gRegCoef))
g2 <- g2 + ylim(-14, 14)
g2 <- g2 + geom_hline(yintercept = 0, color="red")
g2 <- g2 + theme_bw()
g3 <- ggplot(data=dfIn, aes(x = t, y = v3))
g3 <- g3 + geom_line()
g3 <- g3 + ylim(-14, 14)
g3 <- g3 + labs(x = "Time", y = "Y[Time] - Y[Time-1]")
g3 <- g3 + theme_bw()
g <- g1 + g2 + g3
g
## Warning: Removed 1 row(s) containing missing values (geom_path).
ggsave(g, file = paste0("./chap5_code3_4.png"), width = 22, height = 6, units = "cm")
## Warning: Removed 1 row(s) containing missing values (geom_path).
set.seed(12345)
dfIn <- tibble(
t = 1:100,
v1 = runif(n=100, min=-2, max=+2),
v2 = rnorm(n=100)
) %>%
gather(sVar, gValue, -t)
g <- ggplot(data=dfIn, aes(x = t, y = gValue))
g <- g + geom_line()
g <- g + labs(x = "Time", y = "Value")
g <- g + theme_bw()
g <- g + facet_wrap(~ sVar)
print(g)
ggsave(g, file = paste0("./chap5_code4.png"), width = 18, height = 10, units = "cm")
agPhi <- seq(0.9, 1.1, by = 0.1)
nNumTrial <- 10
nNumTime <- 100
dfIn <- crossing(
gPhi = agPhi,
nTrial = 1:nNumTrial,
nTime = 1:nNumTime
) %>%
group_by(gPhi, nTrial) %>%
mutate(
gX = sub_generateAR1.1(nNumTime, gPhi[1])
) %>%
ungroup()
lG <- lapply(
agPhi,
function(gCurrent){
g <- ggplot(
dfIn %>% filter(gPhi == gCurrent),
aes(x = nTime, y = gX, group = nTrial)
)
g <- g + geom_line(alpha=0.5)
g <- g + labs(
title = paste0("Y[t] = ", gCurrent, "*Y[t-1] + e"), x = "Time", y = "Y")
g <- g + theme_bw()
g
}
)
g <- wrap_plots(lG)
g
ggsave(g, file = paste0("./chap5_code5.png"), width = 24, height = 10, units = "cm")
agPhi <- c(seq(0.2, 0.8, by = 0.2), seq(-0.2, -0.8, by = -0.2))
dfIn <- data.frame(
t = rep(1:100, length(agPhi)),
gPhi = unlist(lapply(agPhi, function(x) rep(x, 100))),
agX = unlist(lapply(
agPhi,
function(gPhi){
as.vector(stats::filter(rnorm(n = 100), gPhi, method = "recursive"))
}
))
)
lG1 <- lapply(
agPhi,
function(gCurrent){
dfPlot <- dfIn %>% filter(gPhi == gCurrent)
g <- ggplot(data=dfPlot, aes(x = t, y = agX))
g <- g + geom_line()
g <- g + labs(title = sprintf("Phi = %0.1f", gCurrent))
g <- g + theme_bw()
g
}
)
lG2 <- lapply(
agPhi,
function(gCurrent){
oACF <- acf(dfIn %>% filter(gPhi == gCurrent) %>% pull(agX), plot = FALSE)
dfPlot <- tibble(
k = oACF$lag[,1,1],
acf = oACF$acf[,1,1]
)
g <- ggplot(data=dfPlot, aes(x = k, y = acf))
g <- g + geom_col()
g <- g + labs(title = sprintf("Phi = %0.1f", gCurrent))
g <- g + theme_bw()
g
}
)
lTemp <- list(
lG1[[1]], lG2[[1]],
lG1[[5]], lG2[[5]],
lG1[[2]], lG2[[2]],
lG1[[6]], lG2[[6]],
lG1[[3]], lG2[[3]],
lG1[[7]], lG2[[7]],
lG1[[4]], lG2[[4]],
lG1[[8]], lG2[[8]]
)
g <- wrap_plots(lTemp, nrow=4)
g
ggsave(g, file = paste0("./chap5_code6.png"), width = 22, height = 14, units = "cm")
set.seed(12345)
agTheta <- c(seq(0.2, 0.8, by = 0.2), seq(-0.2, -0.8, by = -0.2))
dfIn <- tibble(
t = rep(1:100, length(agTheta)),
gTheta = unlist(lapply(agTheta, function(x) rep(x, 100))),
gX = unlist(lapply(
agTheta,
function(gTheta){
as.vector(arima.sim(model=list(ma=gTheta), n=100))
}
))
)
## print(acf(dfIn %>% filter(gTheta == 0.8) %>% pull(gX)))
lG1 <- lapply(
agTheta,
function(gCurrent){
dfPlot <- dfIn %>% filter(gTheta == gCurrent)
g <- ggplot(data=dfPlot, aes(x = t, y = gX))
g <- g + geom_line()
g <- g + labs(title = sprintf("Theta = %0.1f", gCurrent))
g <- g + theme_bw()
g
}
)
lG2 <- lapply(
agTheta,
function(gCurrent){
oACF <- acf(dfIn %>% filter(gTheta == gCurrent) %>% pull(gX), plot = FALSE)
dfPlot <- tibble(
k = oACF$lag[,1,1],
acf = oACF$acf[,1,1]
)
g <- ggplot(data=dfPlot, aes(x = k, y = acf))
g <- g + geom_col()
g <- g + labs(title = sprintf("Theta = %0.1f", gCurrent))
g <- g + theme_bw()
g
}
)
lTemp <- list(
lG1[[1]], lG2[[1]],
lG1[[5]], lG2[[5]],
lG1[[2]], lG2[[2]],
lG1[[6]], lG2[[6]],
lG1[[3]], lG2[[3]],
lG1[[7]], lG2[[7]],
lG1[[4]], lG2[[4]],
lG1[[8]], lG2[[8]]
)
g <- wrap_plots(lTemp, nrow=4)
g
ggsave(g, file = paste0("./chap5_code7.png"), width = 22, height = 14, units = "cm")
set.seed(123)
dfIn <- tibble(nTrial = unlist(lapply(1:10, function(x) rep(x, 100)))) %>%
group_by(nTrial) %>%
mutate(
nT = 1:100,
gRW = cumsum(rnorm(100)),
gWN = sub_generateAR1.1(100, gPhi = 0.9)
)
g1 <- ggplot(data=dfIn, aes(x = nT, y = gRW, group = nTrial))
g1 <- g1 + geom_line(alpha=0.3)
g1 <- g1 + scale_y_continuous(limits = c(-15, 15))
g1 <- g1 + theme_bw()
g2 <- ggplot(data=dfIn, aes(x = nT, y = gWN, group = nTrial))
g2 <- g2 + geom_line(alpha=0.3)
g2 <- g2 + scale_y_continuous(limits = c(-15, 15))
g2 <- g2 + theme_bw()
g <- g1 + g2
g
ggsave(g, file = paste0("./chap5_code8.png"), width = 22, height = 10, units = "cm")
set.seed(123)
c <- 0.1
dfIn <- tibble(nTrial = unlist(lapply(1:10, function(x) rep(x, 100)))) %>%
group_by(nTrial) %>%
mutate(
nT = 1:100,
gRW = nT * c + cumsum(rnorm(100)),
gWN = nT * c + sub_generateAR1.1(100, gPhi = 0.9)
)
g1 <- ggplot(data=dfIn, aes(x = nT, y = gRW, group = nTrial))
g1 <- g1 + geom_abline(slope = 0.1, color="red")
g1 <- g1 + geom_line(alpha=0.3)
g1 <- g1 + scale_y_continuous(limits = c(-10, 25))
g1 <- g1 + theme_bw()
g2 <- ggplot(data=dfIn, aes(x = nT, y = gWN, group = nTrial))
g2 <- g2 + geom_abline(slope = 0.1, color="red")
g2 <- g2 + geom_line(alpha=0.3)
g2 <- g2 + scale_y_continuous(limits = c(-10, 25))
g2 <- g2 + theme_bw()
g <- g1 + g2
g
ggsave(g, file = paste0("./chap5_code9.png"), width = 22, height = 10, units = "cm")
set.seed(123)
c <- 0.05
d <- 0.004
dfIn <- tibble(nTrial = unlist(lapply(1:10, function(x) rep(x, 100)))) %>%
group_by(nTrial) %>%
mutate(
nT = 1:100,
gRW = nT * c + cumsum(nT) * d + cumsum(rnorm(100)),
gWN = nT * c + cumsum(nT) * d + sub_generateAR1.1(100, 0.9),
gExp = nT * c + (1/2)*(nT + nT^2)*d
)
g1 <- ggplot(data=dfIn, aes(x = nT, y = gRW, group = nTrial))
g1 <- g1 + geom_line(alpha=0.3)
g1 <- g1 + geom_line(aes(x = nT, y = gExp), color="red")
g1 <- g1 + scale_y_continuous(limits = c(-10, 40))
g1 <- g1 + theme_bw()
g2 <- ggplot(data=dfIn, aes(x = nT, y = gWN, group = nTrial))
g2 <- g2 + geom_line(alpha=0.3)
g2 <- g2 + geom_line(aes(x = nT, y = gExp), color="red")
g2 <- g2 + scale_y_continuous(limits = c(-10, 40))
g2 <- g2 + theme_bw()
g <- g1 + g2
g
ggsave(g, file = paste0("./chap5_code10.png"), width = 22, height = 10, units = "cm")
dfIP <- dfIP %>%
mutate(
nYear = c( unlist(lapply(1980:2004, function(x) rep(x, 4))), 2005, 2005),
nQ = c(rep(1:4, 2004-1980 + 1), 1, 2),
dYQ = yq(nYear * 100 + nQ),
LIP = log(IP)
) %>%
dplyr::select(dYQ, IP, LIP)
# 原系列
g <- ggplot(data=dfIP, aes(x = dYQ, y = IP))
g <- g + geom_line()
g <- g + labs(x = "期間(四半期)", y = "実質民間設備投資(兆円)")
g <- g + theme_bw()
print(g)
# ACF
acf(dfIP$IP)
# season plot
seasonplot(ts(dfIP$IP, frequency = 4))
# 対数系列
g <- ggplot(data=dfIP, aes(x = dYQ, y = LIP))
g <- g + geom_line()
g <- g + labs(x = "期間(四半期)", y = "実質民間設備投資(兆円)(対数)")
g <- g + theme_bw()
print(g)
# 対数
set.seed(12345)
tsX <- exp(arima.sim(model = list(ar=c(0.9)), sd=0.4, n = 100))
par(mfrow=c(1,2))
plot(tsX)
plot(log(tsX))
# 差分
set.seed(12345)
tsX <- arima.sim(model = list(order=c(1,1,0), ar=c(0.5)), sd=0.4, n = 100)
par(mfrow=c(1,2))
plot(tsX)
plot(diff(tsX))
# 対数差分
set.seed(12345)
tsX <- exp(arima.sim(model = list(ar=c(0.9)), sd=0.1, n = 100)+ (1:100)*0.02)
par(mfrow=c(1,2))
plot(tsX)
plot(diff(log(tsX)))
# 前年同期比
par(mfrow=c(1,2))
plot(UKDriverDeaths)
plot(diff(log(UKDriverDeaths), lag=12))
LIP <- log(dfIP$IP)
oURDF1 <- ur.df(LIP, type = "trend", lags=4)
summary(oURDF1)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.109590 -0.024968 -0.008654 0.031423 0.105416
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3201651 0.1243460 2.575 0.0117 *
## z.lag.1 -0.0818769 0.0326016 -2.511 0.0138 *
## tt 0.0006361 0.0003163 2.011 0.0473 *
## z.diff.lag1 -0.0963497 0.0722731 -1.333 0.1859
## z.diff.lag2 -0.0365374 0.0705355 -0.518 0.6057
## z.diff.lag3 -0.1393002 0.0696887 -1.999 0.0486 *
## z.diff.lag4 0.7690449 0.0687855 11.180 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0441 on 90 degrees of freedom
## Multiple R-squared: 0.8901, Adjusted R-squared: 0.8828
## F-statistic: 121.5 on 6 and 90 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -2.5114 2.6964 3.1949
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.99 -3.43 -3.13
## phi2 6.22 4.75 4.07
## phi3 8.43 6.49 5.47
oURDF2 <- ur.df(LIP, type = "drift", lags=4)
summary(oURDF2)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.113547 -0.027109 -0.003352 0.031242 0.101719
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.11514 0.07237 1.591 0.1151
## z.lag.1 -0.02554 0.01696 -1.507 0.1354
## z.diff.lag1 -0.14707 0.06885 -2.136 0.0354 *
## z.diff.lag2 -0.07528 0.06898 -1.091 0.2780
## z.diff.lag3 -0.17159 0.06894 -2.489 0.0146 *
## z.diff.lag4 0.75120 0.06934 10.833 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04484 on 91 degrees of freedom
## Multiple R-squared: 0.8852, Adjusted R-squared: 0.8789
## F-statistic: 140.3 on 5 and 91 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -1.5065 1.957
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1 6.52 4.63 3.81
oURDF3 <- ur.df(LIP, type = "none", lags=4)
summary(oURDF3)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.117731 -0.025814 -0.005051 0.032132 0.103038
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 0.001368 0.001173 1.166 0.2465
## z.diff.lag1 -0.156547 0.069164 -2.263 0.0260 *
## z.diff.lag2 -0.075600 0.069551 -1.087 0.2799
## z.diff.lag3 -0.166829 0.069445 -2.402 0.0183 *
## z.diff.lag4 0.760465 0.069670 10.915 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04521 on 92 degrees of freedom
## Multiple R-squared: 0.8825, Adjusted R-squared: 0.8761
## F-statistic: 138.2 on 5 and 92 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: 1.1662
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
DLIP <- diff(LIP)
ts.plot(DLIP, type = "l")
oURDF1 <- ur.df(DLIP, type = "trend", lags=3)
summary(oURDF1)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.118232 -0.027526 -0.004958 0.031374 0.101757
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.868e-03 1.027e-02 0.864 0.39011
## z.lag.1 -6.573e-01 2.272e-01 -2.893 0.00477 **
## tt -4.645e-05 1.665e-04 -0.279 0.78084
## z.diff.lag1 -5.030e-01 1.759e-01 -2.859 0.00527 **
## z.diff.lag2 -5.833e-01 1.273e-01 -4.583 1.46e-05 ***
## z.diff.lag3 -7.554e-01 7.054e-02 -10.708 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04537 on 91 degrees of freedom
## Multiple R-squared: 0.9659, Adjusted R-squared: 0.964
## F-statistic: 515.2 on 5 and 91 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -2.8932 2.8028 4.1964
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.99 -3.43 -3.13
## phi2 6.22 4.75 4.07
## phi3 8.43 6.49 5.47
oURDF2 <- ur.df(DLIP, type = "drift", lags=3)
summary(oURDF2)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.118073 -0.027069 -0.005819 0.031957 0.102086
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.006369 0.005001 1.274 0.20603
## z.lag.1 -0.647927 0.223566 -2.898 0.00469 **
## z.diff.lag1 -0.510441 0.173022 -2.950 0.00403 **
## z.diff.lag2 -0.588340 0.125337 -4.694 9.31e-06 ***
## z.diff.lag3 -0.757703 0.069683 -10.874 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04514 on 92 degrees of freedom
## Multiple R-squared: 0.9658, Adjusted R-squared: 0.9644
## F-statistic: 650.5 on 4 and 92 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -2.8981 4.2074
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1 6.52 4.63 3.81
oURDF3 <- ur.df(DLIP, type = "none", lags=3)
summary(oURDF3)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.111212 -0.020289 -0.000807 0.033546 0.113031
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 -0.53407 0.20560 -2.598 0.010913 *
## z.diff.lag1 -0.59580 0.16005 -3.723 0.000338 ***
## z.diff.lag2 -0.64505 0.11755 -5.487 3.5e-07 ***
## z.diff.lag3 -0.78605 0.06625 -11.864 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0453 on 93 degrees of freedom
## Multiple R-squared: 0.9652, Adjusted R-squared: 0.9638
## F-statistic: 645.8 on 4 and 93 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -2.5976
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
ndiffs(LIP, test = "adf", type = "trend")
## [1] 1
set.seed(123)
dfIn <- tibble(
nTime = 1:100,
gY = sub_generateAR1.1(100, 0.5, gMu = 2),
gLagY = lag(gY)
)
# dfIn$gYに時系列がはいっている
plot(as.ts(dfIn$gY))
# forecast::Arima() で AR(1)モデルを当てはめる
library(forecast)
Arima(dfIn$gY, order = c(1, 0, 0), method = "CSS")
## Series: dfIn$gY
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.4487 4.1901
## s.e. 0.0858 0.1649
##
## sigma^2 estimated as 0.8344: part log likelihood=-132.33
# 参考: lm()で推定した場合
# dfIn$gLagYに前期の値が入っているとして
summary(lm(gY ~ gLagY, data = dfIn))
##
## Call:
## lm(formula = gY ~ gLagY, data = dfIn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.37379 -0.58777 0.00698 0.57049 2.08449
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.31016 0.37320 6.190 1.44e-08 ***
## gLagY 0.44866 0.08709 5.151 1.36e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9181 on 97 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2148, Adjusted R-squared: 0.2067
## F-statistic: 26.54 on 1 and 97 DF, p-value: 1.358e-06
# ARMA(2, 0)
Arima(dfIn$gY, order = c(2, 0, 0))
## Series: dfIn$gY
## ARIMA(2,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 mean
## 0.5053 -0.0659 4.1207
## s.e. 0.1020 0.1034 0.1659
##
## sigma^2 estimated as 0.8989: log likelihood=-135.17
## AIC=278.34 AICc=278.76 BIC=288.76
# ARMA(1, 1)
Arima(dfIn$gY, order = c(1, 0, 1))
## Series: dfIn$gY
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## 0.1169 0.4254 4.1293
## s.e. 0.3476 0.3473 0.1498
##
## sigma^2 estimated as 0.8917: log likelihood=-134.8
## AIC=277.59 AICc=278.01 BIC=288.01
auto.arima(
dfIn$gY, d = 0,
trace=T, seasonal = F, ic ="aic", stepwise=F, approximation = F
)
##
## ARIMA(0,0,0) with zero mean : 576.4883
## ARIMA(0,0,0) with non-zero mean : 298.7064
## ARIMA(0,0,1) with zero mean : 467.3743
## ARIMA(0,0,1) with non-zero mean : 275.7177
## ARIMA(0,0,2) with zero mean : 425.262
## ARIMA(0,0,2) with non-zero mean : 277.6738
## ARIMA(0,0,3) with zero mean : 387.1492
## ARIMA(0,0,3) with non-zero mean : 275.7871
## ARIMA(0,0,4) with zero mean : 366.6385
## ARIMA(0,0,4) with non-zero mean : 277.7856
## ARIMA(0,0,5) with zero mean : 357.8372
## ARIMA(0,0,5) with non-zero mean : 279.7058
## ARIMA(1,0,0) with zero mean : 303.8079
## ARIMA(1,0,0) with non-zero mean : 276.7485
## ARIMA(1,0,1) with zero mean : Inf
## ARIMA(1,0,1) with non-zero mean : 277.5924
## ARIMA(1,0,2) with zero mean : Inf
## ARIMA(1,0,2) with non-zero mean : 278.726
## ARIMA(1,0,3) with zero mean : Inf
## ARIMA(1,0,3) with non-zero mean : 277.7859
## ARIMA(1,0,4) with zero mean : Inf
## ARIMA(1,0,4) with non-zero mean : Inf
## ARIMA(2,0,0) with zero mean : 301.7191
## ARIMA(2,0,0) with non-zero mean : 278.3427
## ARIMA(2,0,1) with zero mean : Inf
## ARIMA(2,0,1) with non-zero mean : 276.635
## ARIMA(2,0,2) with zero mean : Inf
## ARIMA(2,0,2) with non-zero mean : 278.3583
## ARIMA(2,0,3) with zero mean : Inf
## ARIMA(2,0,3) with non-zero mean : Inf
## ARIMA(3,0,0) with zero mean : 294.4183
## ARIMA(3,0,0) with non-zero mean : 278.9864
## ARIMA(3,0,1) with zero mean : Inf
## ARIMA(3,0,1) with non-zero mean : 278.267
## ARIMA(3,0,2) with zero mean : Inf
## ARIMA(3,0,2) with non-zero mean : Inf
## ARIMA(4,0,0) with zero mean : 296.4182
## ARIMA(4,0,0) with non-zero mean : 278.3183
## ARIMA(4,0,1) with zero mean : Inf
## ARIMA(4,0,1) with non-zero mean : 280.1795
## ARIMA(5,0,0) with zero mean : Inf
## ARIMA(5,0,0) with non-zero mean : 280.2263
##
##
##
## Best model: ARIMA(0,0,1) with non-zero mean
## Series: dfIn$gY
## ARIMA(0,0,1) with non-zero mean
##
## Coefficients:
## ma1 mean
## 0.5313 4.1324
## s.e. 0.0925 0.1420
##
## sigma^2 estimated as 0.8835: log likelihood=-134.86
## AIC=275.72 AICc=275.97 BIC=283.53
IP <- ts(dfIP$IP, start=c(1980, 1), frequency = 4)
LIP <- log(IP)
Arima(LIP, order = c(2, 1, 2), method = "ML")
## Series: LIP
## ARIMA(2,1,2)
##
## Coefficients:
## ar1 ar2 ma1 ma2
## -0.6612 0.3365 0.4000 -0.4750
## s.e. 0.2055 0.2052 0.1734 0.1542
##
## sigma^2 estimated as 0.004752: log likelihood=127.56
## AIC=-245.12 AICc=-244.49 BIC=-232.05
x <- auto.arima(
LIP, d = 1,
trace=T, seasonal = F, ic ="aic", stepwise=F, approximation = F
)
##
## ARIMA(0,1,0) : -129.0637
## ARIMA(0,1,0) with drift : -127.4935
## ARIMA(0,1,1) : -177.0597
## ARIMA(0,1,1) with drift : -180.2615
## ARIMA(0,1,2) : -214.5526
## ARIMA(0,1,2) with drift : -215.1704
## ARIMA(0,1,3) : -185.5557
## ARIMA(0,1,3) with drift : -188.0912
## ARIMA(0,1,4) : -238.8765
## ARIMA(0,1,4) with drift : -238.0338
## ARIMA(0,1,5) : -259.8235
## ARIMA(0,1,5) with drift : -261.9403
## ARIMA(1,1,0) : -202.8732
## ARIMA(1,1,0) with drift : -204.5221
## ARIMA(1,1,1) : Inf
## ARIMA(1,1,1) with drift : Inf
## ARIMA(1,1,2) : -213.5177
## ARIMA(1,1,2) with drift : -213.7697
## ARIMA(1,1,3) : Inf
## ARIMA(1,1,3) with drift : Inf
## ARIMA(1,1,4) : Inf
## ARIMA(1,1,4) with drift : Inf
## ARIMA(2,1,0) : -202.1178
## ARIMA(2,1,0) with drift : -203.1421
## ARIMA(2,1,1) : -198.9304
## ARIMA(2,1,1) with drift : -201.2576
## ARIMA(2,1,2) : Inf
## ARIMA(2,1,2) with drift : Inf
## ARIMA(2,1,3) : Inf
## ARIMA(2,1,3) with drift : Inf
## ARIMA(3,1,0) : -241.633
## ARIMA(3,1,0) with drift : -250.7896
## ARIMA(3,1,1) : Inf
## ARIMA(3,1,1) with drift : Inf
## ARIMA(3,1,2) : -294.4893
## ARIMA(3,1,2) with drift : -301.522
## ARIMA(4,1,0) : Inf
## ARIMA(4,1,0) with drift : Inf
## ARIMA(4,1,1) : Inf
## ARIMA(4,1,1) with drift : Inf
## ARIMA(5,1,0) : Inf
## ARIMA(5,1,0) with drift : Inf
##
##
##
## Best model: ARIMA(3,1,2) with drift
x
## Series: LIP
## ARIMA(3,1,2) with drift
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 drift
## -0.9764 -0.8642 -0.8481 0.6576 0.5076 0.0092
## s.e. 0.0590 0.0884 0.0614 0.1201 0.1349 0.0029
##
## sigma^2 estimated as 0.00262: log likelihood=157.76
## AIC=-301.52 AICc=-300.32 BIC=-283.22
# 回帰診断
tsdiag(x)
# 短期的予測
plot(forecast(x, h = 12))
IP <- ts(dfIP$IP, start=c(1980, 1), frequency = 4)
LIP <- log(IP)
# SARIMA(2, 1, 2, 1, 1, 1)の推定
Arima(
LIP,
order = c(2, 1, 2),
seasonal = list(order = c(1,1,1), period = 4),
method = "ML"
)
## Series: LIP
## ARIMA(2,1,2)(1,1,1)[4]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sar1 sma1
## 0.9742 -0.6440 -1.1279 0.9367 0.1518 -0.5818
## s.e. 0.1685 0.1064 0.0915 0.0644 0.2397 0.1980
##
## sigma^2 estimated as 0.00177: log likelihood=171.75
## AIC=-329.49 AICc=-328.24 BIC=-311.47
x <- auto.arima(
LIP, d = 1,
trace=T, seasonal = T, ic ="aic", stepwise=F, approximation = F
)
##
## ARIMA(0,1,0)(0,1,0)[4] : -316.2635
## ARIMA(0,1,0)(0,1,1)[4] : -326.3591
## ARIMA(0,1,0)(0,1,2)[4] : -325.2726
## ARIMA(0,1,0)(1,1,0)[4] : -322.6142
## ARIMA(0,1,0)(1,1,1)[4] : -325.2517
## ARIMA(0,1,0)(1,1,2)[4] : -323.2832
## ARIMA(0,1,0)(2,1,0)[4] : -324.1089
## ARIMA(0,1,0)(2,1,1)[4] : -323.2944
## ARIMA(0,1,0)(2,1,2)[4] : -321.2942
## ARIMA(0,1,1)(0,1,0)[4] : -314.569
## ARIMA(0,1,1)(0,1,1)[4] : -324.6076
## ARIMA(0,1,1)(0,1,2)[4] : -323.7809
## ARIMA(0,1,1)(1,1,0)[4] : -320.6965
## ARIMA(0,1,1)(1,1,1)[4] : -323.7305
## ARIMA(0,1,1)(1,1,2)[4] : -321.7913
## ARIMA(0,1,1)(2,1,0)[4] : -322.5703
## ARIMA(0,1,1)(2,1,1)[4] : -321.7989
## ARIMA(0,1,1)(2,1,2)[4] : -319.8015
## ARIMA(0,1,2)(0,1,0)[4] : -317.9835
## ARIMA(0,1,2)(0,1,1)[4] : -324.9519
## ARIMA(0,1,2)(0,1,2)[4] : -324.0557
## ARIMA(0,1,2)(1,1,0)[4] : -321.9168
## ARIMA(0,1,2)(1,1,1)[4] : -324.0905
## ARIMA(0,1,2)(1,1,2)[4] : -322.0983
## ARIMA(0,1,2)(2,1,0)[4] : -323.0307
## ARIMA(0,1,2)(2,1,1)[4] : -322.0945
## ARIMA(0,1,3)(0,1,0)[4] : -316.4917
## ARIMA(0,1,3)(0,1,1)[4] : -323.4725
## ARIMA(0,1,3)(0,1,2)[4] : -322.6869
## ARIMA(0,1,3)(1,1,0)[4] : -320.4206
## ARIMA(0,1,3)(1,1,1)[4] : -322.6156
## ARIMA(0,1,3)(2,1,0)[4] : -321.8897
## ARIMA(1,1,0)(0,1,0)[4] : -314.663
## ARIMA(1,1,0)(0,1,1)[4] : -324.6836
## ARIMA(1,1,0)(0,1,2)[4] : -323.9349
## ARIMA(1,1,0)(1,1,0)[4] : -320.7239
## ARIMA(1,1,0)(1,1,1)[4] : -323.8774
## ARIMA(1,1,0)(1,1,2)[4] : -321.9457
## ARIMA(1,1,0)(2,1,0)[4] : -322.7093
## ARIMA(1,1,0)(2,1,1)[4] : -321.9497
## ARIMA(1,1,0)(2,1,2)[4] : -319.9546
## ARIMA(1,1,1)(0,1,0)[4] : -312.9603
## ARIMA(1,1,1)(0,1,1)[4] : Inf
## ARIMA(1,1,1)(0,1,2)[4] : Inf
## ARIMA(1,1,1)(1,1,0)[4] : -319.1684
## ARIMA(1,1,1)(1,1,1)[4] : -322.6238
## ARIMA(1,1,1)(1,1,2)[4] : -320.6322
## ARIMA(1,1,1)(2,1,0)[4] : -321.1827
## ARIMA(1,1,1)(2,1,1)[4] : Inf
## ARIMA(1,1,2)(0,1,0)[4] : -316.1325
## ARIMA(1,1,2)(0,1,1)[4] : -323.6681
## ARIMA(1,1,2)(0,1,2)[4] : -322.6174
## ARIMA(1,1,2)(1,1,0)[4] : -320.3261
## ARIMA(1,1,2)(1,1,1)[4] : -322.5914
## ARIMA(1,1,2)(2,1,0)[4] : -321.6761
## ARIMA(1,1,3)(0,1,0)[4] : -318.7062
## ARIMA(1,1,3)(0,1,1)[4] : -322.004
## ARIMA(1,1,3)(1,1,0)[4] : -319.2309
## ARIMA(2,1,0)(0,1,0)[4] : -314.7958
## ARIMA(2,1,0)(0,1,1)[4] : -324.9045
## ARIMA(2,1,0)(0,1,2)[4] : -323.7631
## ARIMA(2,1,0)(1,1,0)[4] : -321.4036
## ARIMA(2,1,0)(1,1,1)[4] : -323.8094
## ARIMA(2,1,0)(1,1,2)[4] : -321.8095
## ARIMA(2,1,0)(2,1,0)[4] : -322.5249
## ARIMA(2,1,0)(2,1,1)[4] : -321.8095
## ARIMA(2,1,1)(0,1,0)[4] : -313.1823
## ARIMA(2,1,1)(0,1,1)[4] : -323.2195
## ARIMA(2,1,1)(0,1,2)[4] : -322.0775
## ARIMA(2,1,1)(1,1,0)[4] : -319.652
## ARIMA(2,1,1)(1,1,1)[4] : -322.0835
## ARIMA(2,1,1)(2,1,0)[4] : -320.9829
## ARIMA(2,1,2)(0,1,0)[4] : -316.9733
## ARIMA(2,1,2)(0,1,1)[4] : -331.1188
## ARIMA(2,1,2)(1,1,0)[4] : -327.0455
## ARIMA(2,1,3)(0,1,0)[4] : -318.6657
## ARIMA(3,1,0)(0,1,0)[4] : -314.8657
## ARIMA(3,1,0)(0,1,1)[4] : -323.3274
## ARIMA(3,1,0)(0,1,2)[4] : -322.3289
## ARIMA(3,1,0)(1,1,0)[4] : -319.9772
## ARIMA(3,1,0)(1,1,1)[4] : -322.2921
## ARIMA(3,1,0)(2,1,0)[4] : -321.521
## ARIMA(3,1,1)(0,1,0)[4] : -317.6288
## ARIMA(3,1,1)(0,1,1)[4] : -321.3282
## ARIMA(3,1,1)(1,1,0)[4] : -318.9317
## ARIMA(3,1,2)(0,1,0)[4] : -317.4148
##
##
##
## Best model: ARIMA(2,1,2)(0,1,1)[4]
x
## Series: LIP
## ARIMA(2,1,2)(0,1,1)[4]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sma1
## 1.0031 -0.6347 -1.1523 0.9315 -0.4625
## s.e. 0.1762 0.1055 0.0961 0.0536 0.1053
##
## sigma^2 estimated as 0.001761: log likelihood=171.56
## AIC=-331.12 AICc=-330.19 BIC=-315.67
# 回帰診断
tsdiag(x)
# 短期的予測
plot(forecast(x, h = 12))
dfDenryoku <- dfDenryokuOrg %>%
mutate(
dDay = as.Date(day),
gDemand = demand / 1000,
gTemper = temp
)
g1 <- ggplot(data=dfDenryoku, aes(x = dDay, y = gDemand))
g1 <- g1 + geom_line()
g1 <- g1 + labs(x = "日付", y = "電力需要(1000000kWh)")
g1 <- g1 + scale_x_date(date_breaks = "1 months", date_labels = "%m/%d")
g1 <- g1 + theme_bw()
g2 <- ggplot(data=dfDenryoku, aes(x = dDay, y = gTemper))
g2 <- g2 + geom_line()
g2 <- g2 + labs(x = "日付", y = "平均気温(摂氏)")
g2 <- g2 + scale_x_date(date_breaks = "1 months", date_labels = "%m/%d")
g2 <- g2 + theme_bw()
g <- g1 + g2 + plot_layout(ncol = 1)
g
acf(dfDenryoku$gDemand)
acf(dfDenryoku$gTemper)
ndiffs(dfDenryokuOrg$demand, test = "adf", type = "trend")
## [1] 0
ndiffs(dfDenryokuOrg$temp, test = "adf", type = "trend")
## [1] 0
asHoliday <- c(
"2003/10/13", "2003/11/03", "2003/11/23", "2004/01/12",
"2004/02/11"
)
asShogatu <- c(
paste0("2003/12/", 30:31),
paste0("2004/01/", 1:3)
)
dfDenryoku <- dfDenryokuOrg %>%
mutate(
dDay = as.Date(day),
nWDay = wday(dDay),
bSun = if_else(nWDay == 1, 1, 0),
bMon = if_else(nWDay == 2, 1, 0),
bSat = if_else(nWDay == 7, 1, 0),
bHoliday = if_else(dDay %in% as.Date(asHoliday), 1, 0),
bShogatu = if_else(dDay %in% as.Date(asShogatu), 1, 0),
gDemand = demand / 1000,
gTemper = temp
)
# OLS
oModel1 <- lm(
gDemand ~ gTemper + bMon + bSat + bSun + bHoliday + bShogatu,
data=dfDenryoku
)
summary(oModel1)
##
## Call:
## lm(formula = gDemand ~ gTemper + bMon + bSat + bSun + bHoliday +
## bShogatu, data = dfDenryoku)
##
## Residuals:
## Min 1Q Median 3Q Max
## -116.434 -15.659 -0.808 16.562 83.491
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 994.3924 6.0448 164.504 < 2e-16 ***
## gTemper -11.5438 0.4639 -24.885 < 2e-16 ***
## bMon -32.9638 6.9163 -4.766 4.52e-06 ***
## bSat -71.9321 6.5546 -10.974 < 2e-16 ***
## bSun -134.4875 6.5975 -20.384 < 2e-16 ***
## bHoliday -35.9926 12.9406 -2.781 0.00613 **
## bShogatu -236.1801 12.6029 -18.740 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.46 on 145 degrees of freedom
## Multiple R-squared: 0.9052, Adjusted R-squared: 0.9012
## F-statistic: 230.6 on 6 and 145 DF, p-value: < 2.2e-16
# OLS残差時系列
dfResidual <- dfDenryoku %>%
mutate(
Centered = as.vector(scale(gDemand, center = TRUE, scale = FALSE)),
Fitted = as.vector(scale(fitted(oModel1), center = TRUE, scale = FALSE)),
Residual = as.vector(residuals(oModel1))
) %>%
gather(sVar, gValue, c(Centered, Fitted, Residual))
g <- ggplot(data=dfResidual, aes(x = dDay, y = gValue, color=sVar))
g <- g + geom_line()
g <- g + geom_hline(yintercept = 0)
g <- g + theme_bw()
g
ggsave(g, file = paste0("./chap5_code19_1.png"), width = 22, height = 14, units = "cm")
# OLS残差のACF
acf(residuals(oModel1))
# BGテスト
bgtest(oModel1)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: oModel1
## LM test = 17.533, df = 1, p-value = 2.824e-05
# AR(1) error model
mgX <- dfDenryoku %>%
dplyr::select(gTemper, bMon, bSat, bSun, bHoliday, bShogatu) %>%
as.matrix(.)
oModel2 <- Arima(
y = dfDenryoku$gDemand,
order = c(1, 0, 0),
xreg = mgX
)
oModel2
## Series: dfDenryoku$gDemand
## Regression with ARIMA(1,0,0) errors
##
## Coefficients:
## ar1 intercept gTemper bMon bSat bSun bHoliday
## 0.7827 952.1809 -8.3642 -24.4706 -69.1524 -128.9732 -36.4822
## s.e. 0.0619 14.5001 1.0067 5.1014 4.7706 5.6495 8.5043
## bShogatu
## -107.1146
## s.e. 20.4405
##
## sigma^2 estimated as 572.5: log likelihood=-694.64
## AIC=1407.28 AICc=1408.55 BIC=1434.5
# ARMA error model
oModel3 <- auto.arima(dfDenryoku$gDemand, d = 0, xreg = mgX)
oModel3
## Series: dfDenryoku$gDemand
## Regression with ARIMA(5,0,1) errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 intercept gTemper
## -0.2618 0.4900 0.3610 0.2796 -0.1552 0.7538 945.8745 -7.4875
## s.e. 0.1565 0.1054 0.0944 0.1024 0.0997 0.1255 15.5885 1.0253
## bMon bSat bSun bHoliday bShogatu
## -30.7042 -68.4723 -135.3686 -30.1991 -169.6477
## s.e. 4.5078 4.0671 4.6051 8.9254 20.6859
##
## sigma^2 estimated as 512.5: log likelihood=-683.83
## AIC=1395.65 AICc=1398.72 BIC=1437.99
# 残差時系列
dfResidual <- dfDenryoku %>%
mutate(
OLS = as.vector(residuals(oModel1)),
ARMA_1 = as.vector(residuals(oModel2)),
ARMA_2 = as.vector(residuals(oModel3))
) %>%
gather(sVar, gValue, c(OLS, ARMA_1, ARMA_2))
g <- ggplot(data=dfResidual, aes(x = dDay, y = gValue, color=sVar))
g <- g + geom_line()
g <- g + geom_hline(yintercept = 0)
g <- g + theme_bw()
g
ggsave(g, file = paste0("./chap5_code19_2.png"), width = 22, height = 14, units = "cm")
# FGLS, AR(1)
oModel4 <- gls(
gDemand ~ gTemper + bMon + bSat + bSun + bHoliday + bShogatu,
corr = corARMA(p=1),
data=dfDenryoku
)
summary(oModel4)
## Generalized least squares fit by REML
## Model: gDemand ~ gTemper + bMon + bSat + bSun + bHoliday + bShogatu
## Data: dfDenryoku
## AIC BIC logLik
## 1371.166 1397.957 -676.583
##
## Correlation Structure: AR(1)
## Formula: ~1
## Parameter estimate(s):
## Phi
## 0.7961851
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 951.0448 14.253143 66.72527 0
## gTemper -8.2712 0.939645 -8.80253 0
## bMon -24.3504 5.162340 -4.71693 0
## bSat -69.1323 4.856087 -14.23621 0
## bSun -128.9388 5.757543 -22.39475 0
## bHoliday -36.5359 8.646249 -4.22564 0
## bShogatu -104.9998 18.241606 -5.75606 0
##
## Correlation:
## (Intr) gTempr bMon bSat bSun bHoldy
## gTemper -0.747
## bMon -0.100 -0.002
## bSat -0.104 0.008 0.310
## bSun -0.008 -0.152 0.567 0.556
## bHoliday 0.016 -0.022 -0.244 0.000 -0.071
## bShogatu -0.015 -0.057 0.175 0.001 0.158 -0.040
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -4.10772640 -0.54330393 0.04209911 0.62534388 1.91621487
##
## Residual standard error: 39.40679
## Degrees of freedom: 152 total; 145 residual
# FGLS, ARMA(5, 1)
oModel5 <- gls(
gDemand ~ gTemper + bMon + bSat + bSun + bHoliday + bShogatu,
corr = corARMA(p=5, q=1),
data = dfDenryoku
)
summary(oModel5)
## Generalized least squares fit by REML
## Model: gDemand ~ gTemper + bMon + bSat + bSun + bHoliday + bShogatu
## Data: dfDenryoku
## AIC BIC logLik
## 1360.33 1402.004 -666.1648
##
## Correlation Structure: ARMA(5,1)
## Formula: ~1
## Parameter estimate(s):
## Phi1 Phi2 Phi3 Phi4 Phi5 Theta1
## -0.2681756 0.5042322 0.3692546 0.2879284 -0.1367366 0.7624480
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 943.7072 16.192804 58.27942 0e+00
## gTemper -7.3042 0.905353 -8.06781 0e+00
## bMon -30.5827 4.398100 -6.95361 0e+00
## bSat -68.5679 4.067206 -16.85872 0e+00
## bSun -135.4897 4.556282 -29.73691 0e+00
## bHoliday -30.6078 8.725392 -3.50790 6e-04
## bShogatu -170.6390 15.137568 -11.27255 0e+00
##
## Correlation:
## (Intr) gTempr bMon bSat bSun bHoldy
## gTemper -0.649
## bMon -0.062 0.016
## bSat -0.060 0.011 -0.015
## bSun 0.034 -0.167 0.394 0.382
## bHoliday 0.028 -0.046 -0.304 0.043 -0.053
## bShogatu 0.048 -0.134 0.175 -0.004 0.176 -0.055
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.76668703 -0.69624219 -0.07090714 0.71660764 2.27464780
##
## Residual standard error: 37.0372
## Degrees of freedom: 152 total; 145 residual
confint(oModel5)
## 2.5 % 97.5 %
## (Intercept) 911.969885 975.444511
## gTemper -9.078683 -5.529763
## bMon -39.202780 -21.962542
## bSat -76.539457 -60.596302
## bSun -144.419884 -126.559586
## bHoliday -47.709272 -13.506363
## bShogatu -200.308038 -140.969863
# 残差時系列
dfResidual <- dfDenryoku %>%
mutate(
OLS = as.vector(residuals(oModel1)),
FGLS_1 = as.vector(residuals(oModel4)),
FGLS_2 = as.vector(residuals(oModel5))
) %>%
gather(sVar, gValue, c(OLS, FGLS_1, FGLS_2))
g <- ggplot(data=dfResidual, aes(x = dDay, y = gValue, color=sVar))
g <- g + geom_line()
g <- g + geom_hline(yintercept = 0)
g <- g + theme_bw()
g
ggsave(g, file = paste0("./chap5_code19_3.png"), width = 22, height = 14, units = "cm")
# 期待値
dfNew <- crossing(
gTemper = seq(
as.integer(min(dfDenryoku$gTemper)),
as.integer(max(dfDenryoku$gTemper)),
1
),
nDayType = 0:3
) %>%
mutate(
bMon = if_else(nDayType == 1, 1, 0),
bSat = if_else(nDayType == 2, 1, 0),
bSun = if_else(nDayType == 3, 1, 0),
bShogatu = 0,
bHoliday = 0,
fDayType = factor(
nDayType,
levels=0:3,
labels=c("火・水・木・金", "月", "土", "日")
)
)
dfNew$gPredict <- predict(oModel5, newdata=dfNew)
g <- ggplot(data=dfNew, aes(x = gTemper, y = gPredict, color = fDayType))
g <- g + geom_line()
g <- g + geom_point()
g <- g + labs(
y="電力需要の期待値(1000000khW)", x = "平均気温(摂氏)",
caption = "(祝日・正月を除く期待値を示す)"
)
g <- g + theme_bw()
print(g)
ggsave(g, file = paste0("./chap5_code19_4.png"), width = 22, height = 12, units = "cm")
library(fpp2)
## Loading required package: fma
## Loading required package: expsmooth
png("./chap5_code20_1.png", width = 24, height = 14, res = 72, units = "cm")
par(mfrow=c(1,2))
plot(gasoline)
seasonplot(gasoline, type = "l", col = "gray")
dev.off()
## png
## 2
lOut <- lapply(
1:8,
function(nK){
oModel <- Arima(
gasoline,
order = c(1,1,1),
xreg = fourier(gasoline, K=nK),
)
agCoef <- coef(oModel)
dfCoef <- tibble(
sVar = names(agCoef),
gCoef = agCoef
)
out <- as.tibble(
fourier(gasoline, K=nK)
) %>%
mutate(
gTime = time(gasoline),
gCycle = cycle(gasoline)
) %>%
gather(sVar, gValue, -c(gTime, gCycle)) %>%
left_join(dfCoef, by = "sVar") %>%
group_by(gTime, gCycle) %>%
summarize(gSeason = sum(gValue * gCoef)) %>%
ungroup() %>%
mutate(nK = nK)
return(out)
}
)
## Warning: `as.tibble()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
dfPlot <- bind_rows(lOut) %>%
mutate(sK = paste0("S=", nK))
g <- ggplot(data=dfPlot, aes(x = gCycle, y = gSeason))
g <- g + geom_line()
g <- g + labs(x = "週", y = "季節変動")
g <- g + facet_wrap(~ sK)
g <- g + theme_bw()
print(g)
ggsave(g, file = paste0("./chap5_code20_2.png"), width = 22, height = 10, units = "cm")
x <- as.Date(c("2019/03/01", "2019/04/01"))
x
## [1] "2019-03-01" "2019-04-01"
str(x)
## Date[1:2], format: "2019-03-01" "2019-04-01"
x <- as.Date(c("2019-03-01", "2019-04-01"))
x
## [1] "2019-03-01" "2019-04-01"
str(x)
## Date[1:2], format: "2019-03-01" "2019-04-01"
x <- ymd(c("2019/03/01", "2019/04-01"))
x
## [1] "2019-03-01" "2019-04-01"
str(x)
## Date[1:2], format: "2019-03-01" "2019-04-01"
ymd(c(20190301, 20190401))
## [1] "2019-03-01" "2019-04-01"
x <- ymd(c("2019/03/01", "2019/04-01"))
year(x) # 年を取り出す
## [1] 2019 2019
month(x) # 月を取り出す
## [1] 3 4
day(x) # 日を取り出す
## [1] 1 1
wday(x) # 曜日を取り出す
## [1] 6 2
x[2] - x[1] # 差を求める
## Time difference of 31 days
# データ例
dfIn <- tibble(
sMonth = c(
"2018-04", "2018-05", "2018-06", "2018-07", "2018-08", "2018-09",
"2018-10", "2018-11", "2018-12", "2019-01", "2019-02", "2019-03"
),
gX = c(1, 2, 1, 3, 2, 4, 3, 2, 2, 2, 3, 2)
)
# ts
tsX <- ts(dfIn$gX, start = c(2018, 4), frequency = 12)
print(tsX)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2018 1 2 1 3 2 4 3 2 2
## 2019 2 3 2
plot(tsX)
# zoo
library(zoo)
zooX <- zoo(dfIn$gX, as.Date(paste0(dfIn$sMonth, "-01")))
plot(zooX)
# xts
library(xts)
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
xtsX <- xts(dfIn$gX, as.Date(paste0(dfIn$sMonth, "-01")))
plot(xtsX)
plot(as.xts(tsX))
# tsibble
library(tsibble)
##
## Attaching package: 'tsibble'
## The following object is masked from 'package:zoo':
##
## index
## The following objects are masked from 'package:lubridate':
##
## interval, new_interval
## The following object is masked from 'package:dplyr':
##
## id
tsIn <- dfIn %>%
mutate( dMonth = as.Date(paste0(sMonth, "-01")) ) %>%
dplyr::select(dMonth, gX) %>%
as_tsibble( index = dMonth)
g <- ggplot(data=tsIn, aes(y = gX, x = dMonth))
g <- g + geom_line()
print(g)
# tibbletime
library(tibbletime)
##
## Attaching package: 'tibbletime'
## The following object is masked from 'package:stats':
##
## filter
tsIn <- dfIn %>%
mutate( dMonth = as.Date(paste0(sMonth, "-01")) ) %>%
dplyr::select(dMonth, gX) %>%
as_tbl_time( index = dMonth )
以上