knitr::opts_chunk$set(echo = TRUE)Chapter 11
Hands-On-Time-Series-Analysis-with-R
library(TSstudio)Warning: package 'TSstudio' was built under R version 4.2.3
set.seed(12345)
stationary_ts <- arima.sim(model = list(order = c(1,0,0),ar = 0.5 ),n = 500)ts_plot(stationary_ts,
title = "Stationary Time Series",
Ytitle = "Value",
Xtitle = "Index")set.seed(12345)
non_stationary_ts <- arima.sim(model = list(order = c(1,1,0),ar = 0.3 ),n = 500)
ts_plot(non_stationary_ts,
title = "Non-Stationary Time Series",
Ytitle = "Value",
Xtitle = "Index")data(AirPassengers)
AirPassengers Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
1949 112 118 132 129 121 135 148 148 136 119 104 118
1950 115 126 141 135 125 149 170 170 158 133 114 140
1951 145 150 178 163 172 178 199 199 184 162 146 166
1952 171 180 193 181 183 218 230 242 209 191 172 194
1953 196 196 236 235 229 243 264 272 237 211 180 201
1954 204 188 235 227 234 264 302 293 259 229 203 229
1955 242 233 267 269 270 315 364 347 312 274 237 278
1956 284 277 317 313 318 374 413 405 355 306 271 306
1957 315 301 356 348 355 422 465 467 404 347 305 336
1958 340 318 362 348 363 435 491 505 404 359 310 337
1959 360 342 406 396 420 472 548 559 463 407 362 405
1960 417 391 419 461 472 535 622 606 508 461 390 432
data(AirPassengers)
ts_plot(AirPassengers,
title = "Monthly Airline Passenger Numbers 1949-1960",
Ytitle = "Thousands of Passengers",
Xtitle = "Year")data(AirPassengers)
ts_plot(AirPassengers,
title = "Monthly Airline Passenger Numbers 1949-1960",
Ytitle = "Thousands of Passengers",
Xtitle = "Year")ts_plot(diff(AirPassengers, lag = 1),
title = "AirPassengers Series - First Differencing",
Xtitle = "Year",
Ytitle = "Differencing of Thousands of Passengers")ts_plot(diff(diff(AirPassengers, lag = 1), 12),
title = "AirPassengers Series - First and Seasonal Differencing",
Xtitle = "Year",
Ytitle = "Differencing of Thousands of Passengers")ts_plot(diff(log(AirPassengers), lag = 1),
title = "AirPassengers Series - First Differencing with Log Transformation",
Xtitle = "Year",
Ytitle = "Differencing/Log of Thousands of Passengers")set.seed(12345)
library(plotly)Loading required package: ggplot2
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
p1 <- plot_ly()
p2 <- plot_ly()
for(i in 1:20){
rm <- NULL
rw <- arima.sim(model = list(order = c(0, 1, 0)), n = 500)
p1 <- p1 %>% add_lines(x = time(rw), y = as.numeric(rw))
p2 <- p2 %>% add_lines(x = time(diff(rw)), y = as.numeric(diff(rw)))
}p1 %>% layout(title = "Simulate Random Walk",
yaxis = list(title = "Value"),
xaxis = list(title = "Index")) %>%
hide_legend()p2 %>% layout(title = "Simulate Random Walk with First-Order Differencing",
yaxis = list(title = "Value"),
xaxis = list(title = "Index")) %>%
hide_legend()set.seed(12345)
ar2 <- arima.sim(model = list(order = c(2,0,0),
ar = c(0.9, -0.3)),
n = 500) ts_plot(ar2,
title = "Simulate AR(2) Series",
Ytitle = "Value",
Xtitle = "Index")md_ar <- ar(ar2)
md_ar
Call:
ar(x = ar2)
Coefficients:
1 2
0.8851 -0.2900
Order selected 2 sigma^2 estimated as 1.049
par(mfrow=c(1,2))
acf(ar2)
pacf(ar2)
set.seed(12345)
ma2 <- arima.sim(model = list(order = c(0, 0, 2),
ma = c(0.5, -0.3)),
n = 500) ts_plot(ma2,
title = "Simulate MA(2) Series",
Ytitle = "Value",
Xtitle = "Index")md_ma <- arima(ma2, order = c(0,0,2), method = "ML")
md_ma
Call:
arima(x = ma2, order = c(0, 0, 2), method = "ML")
Coefficients:
ma1 ma2 intercept
0.530 -0.3454 0.0875
s.e. 0.041 0.0406 0.0525
sigma^2 estimated as 0.9829: log likelihood = -705.81, aic = 1419.62
par(mfrow=c(1,2))
acf(ma2)
pacf(ma2)
set.seed(12345)
arma <- arima.sim(model = list(order(1,0,2), ar = c(0.7), ma = c(0.5,-0.3)), n = 500)ts_plot(arma,
title = "Simulate ARMA(1,2) Series",
Ytitle = "Value",
Xtitle = "Index")arma_md <- arima(arma, order = c(1,0,2))
arma_md
Call:
arima(x = arma, order = c(1, 0, 2))
Coefficients:
ar1 ma1 ma2 intercept
0.7439 0.4785 -0.3954 0.2853
s.e. 0.0657 0.0878 0.0849 0.1891
sigma^2 estimated as 1.01: log likelihood = -713.18, aic = 1436.36
par(mfrow=c(1,2))
acf(arma)
pacf(arma)
arima(arma, order = c(1, 0, 1), include.mean = FALSE)
Call:
arima(x = arma, order = c(1, 0, 1), include.mean = FALSE)
Coefficients:
ar1 ma1
0.4144 0.8864
s.e. 0.0432 0.0248
sigma^2 estimated as 1.051: log likelihood = -723, aic = 1452
arima(arma, order = c(2, 0, 1), include.mean = FALSE)
Call:
arima(x = arma, order = c(2, 0, 1), include.mean = FALSE)
Coefficients:
ar1 ar2 ma1
0.3136 0.1918 0.9227
s.e. 0.0486 0.0484 0.0183
sigma^2 estimated as 1.019: log likelihood = -715.26, aic = 1438.52
arima(arma, order = c(1, 0, 2), include.mean = FALSE)
Call:
arima(x = arma, order = c(1, 0, 2), include.mean = FALSE)
Coefficients:
ar1 ma1 ma2
0.7602 0.4654 -0.4079
s.e. 0.0626 0.0858 0.0832
sigma^2 estimated as 1.014: log likelihood = -714.27, aic = 1436.54
arima(arma, order = c(2, 0, 2), include.mean = FALSE)
Call:
arima(x = arma, order = c(2, 0, 2), include.mean = FALSE)
Coefficients:
ar1 ar2 ma1 ma2
0.7239 0.0194 0.4997 -0.3783
s.e. 0.2458 0.1257 0.2427 0.2134
sigma^2 estimated as 1.014: log likelihood = -714.26, aic = 1438.51
library(forecast)Warning: package 'forecast' was built under R version 4.2.3
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
best_arma <- arima(arma, order = c(1, 0, 2), include.mean = FALSE)
checkresiduals(best_arma)
Ljung-Box test
data: Residuals from ARIMA(1,0,2) with zero mean
Q* = 5.3129, df = 7, p-value = 0.6218
Model df: 3. Total lags used: 10
ar_fc <- forecast(md_ar, h = 100)plot_forecast(ar_fc,
title = "Forecast AR(2) Model",
Ytitle = "Value",
Xtitle = "Year")data(Coffee_Prices)
robusta_price <- window(Coffee_Prices[,1], start = c(2000, 1))ts_plot(robusta_price,
title = "The Robusta Coffee Monthly Prices",
Ytitle = "Price in USD",
Xtitle = "Year") acf(robusta_price)
robusta_price_d1 <- diff(robusta_price)par(mfrow=c(1,2))
acf(robusta_price_d1)
pacf(robusta_price_d1)
robusta_md <- arima(robusta_price, order = c(1, 1, 0))
robusta_md
Call:
arima(x = robusta_price, order = c(1, 1, 0))
Coefficients:
ar1
0.2780
s.e. 0.0647
sigma^2 estimated as 0.007142: log likelihood = 231.38, aic = -458.76
summary(robusta_md)
Call:
arima(x = robusta_price, order = c(1, 1, 0))
Coefficients:
ar1
0.2780
s.e. 0.0647
sigma^2 estimated as 0.007142: log likelihood = 231.38, aic = -458.76
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set 0.002595604 0.08432096 0.06494772 0.08104715 4.254984 1.001542
ACF1
Training set 0.001526295
checkresiduals(robusta_md)
Ljung-Box test
data: Residuals from ARIMA(1,1,0)
Q* = 26.896, df = 23, p-value = 0.2604
Model df: 1. Total lags used: 24
data(USgas)ts_plot(USgas,
title = "US Monthly Natural Gas consumption",
Ytitle = "Billion Cubic Feet",
Xtitle = "Year")USgas_split <- ts_split(USgas, sample.out = 12)
train <- USgas_split$train
test <- USgas_split$testpar(mfrow=c(1,2))
acf(train, lag.max = 60)
pacf(train, lag.max = 60)
USgas_d12 <- diff(train, 12)
ts_plot(USgas_d12,
title = "US Monthly Natural Gas consumption - First Seasonal Difference",
Ytitle = "Billion Cubic Feet (First Difference)",
Xtitle = "Year")USgas_d12_1 <- diff(diff(USgas_d12, 1))
ts_plot(USgas_d12_1,
title = "US Monthly Natural Gas consumption - First Seasonal and Non-Seasonal Differencing",
Ytitle = "Billion Cubic Feet (Difference)",
Xtitle = "Year")par(mfrow=c(1,2))
acf(USgas_d12_1, lag.max = 60)
pacf(USgas_d12_1, lag.max = 60)
p <- q <- P <- Q <- 0:2arima_grid <- expand.grid(p,q,P,Q)
names(arima_grid) <- c("p", "q", "P", "Q")
arima_grid$d <- 1
arima_grid$D <- 1arima_grid$k <- rowSums(arima_grid)library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
arima_grid <- arima_grid %>% filter(k <= 7)arima_search <- lapply(1:nrow(arima_grid), function(i){
md <- NULL
# md <- arima(train, order = c(arima_grid$p[i], 1, arima_grid$q[i]),
# seasonal = list(order = c(arima_grid$P[i], 1, arima_grid$Q[i])))
# results <- data.frame(p = arima_grid$p[i], d = 1, q = arima_grid$q[i],
# P = arima_grid$P[i], D = 1, Q = arima_grid$Q[i],
# AIC = md$aic)
})
# %>% bind_rows() %>% arrange(AIC)arima_search <- data.frame(p=rep(0,81), d=rep(0,81), q=rep(0,81))
head(arima_search) p d q
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
5 0 0 0
6 0 0 0
USgas_best_md <- arima(train, order = c(1,1,1), seasonal = list(order = c(2,1,1)))
USgas_best_md
Call:
arima(x = train, order = c(1, 1, 1), seasonal = list(order = c(2, 1, 1)))
Coefficients:
ar1 ma1 sar1 sar2 sma1
0.4247 -0.9180 0.0132 -0.2639 -0.7449
s.e. 0.0770 0.0376 0.0894 0.0834 0.0753
sigma^2 estimated as 10160: log likelihood = -1292.96, aic = 2597.91
library(forecast)
USgas_test_fc <- forecast(USgas_best_md, h = 12)accuracy(USgas_test_fc, test) ME RMSE MAE MPE MAPE MASE
Training set 6.081099 97.85701 73.36854 0.1298714 3.517097 0.6371821
Test set 42.211253 104.79281 83.09943 1.4913412 3.314280 0.7216918
ACF1 Theil's U
Training set 0.004565602 NA
Test set -0.049999868 0.3469228
test_forecast(USgas,
forecast.obj = USgas_test_fc,
test = test)final_md <- arima(USgas, order = c(1,1,1), seasonal = list(order = c(2,1,1)))checkresiduals(final_md)
Ljung-Box test
data: Residuals from ARIMA(1,1,1)(2,1,1)[12]
Q* = 30.173, df = 19, p-value = 0.04964
Model df: 5. Total lags used: 24
USgas_fc <- forecast(final_md, h = 12)plot_forecast(USgas_fc,
title = "US Natural Gas Consumption - Forecast",
Ytitle = "Billion Cubic Feet",
Xtitle = "Year")USgas_auto_md1 <- auto.arima(train)
USgas_auto_md1Series: train
ARIMA(2,1,1)(2,1,1)[12]
Coefficients:
ar1 ar2 ma1 sar1 sar2 sma1
0.4301 -0.0372 -0.9098 0.0117 -0.2673 -0.7431
s.e. 0.0794 0.0741 0.0452 0.0887 0.0830 0.0751
sigma^2 = 10446: log likelihood = -1292.83
AIC=2599.67 AICc=2600.22 BIC=2623.2
USgas_auto_md2 <- auto.arima(train,
max.order = 5,
D = 1,
d = 1,
ic = "aic",
stepwise = FALSE,
approximation = FALSE)
USgas_auto_md2Series: train
ARIMA(1,1,1)(2,1,1)[12]
Coefficients:
ar1 ma1 sar1 sar2 sma1
0.4247 -0.9180 0.0132 -0.2639 -0.7449
s.e. 0.0770 0.0376 0.0894 0.0834 0.0753
sigma^2 = 10405: log likelihood = -1292.96
AIC=2597.91 AICc=2598.32 BIC=2618.08
df <- ts_to_prophet(AirPassengers) %>% setNames(c("date", "y"))
df$lag12 <- dplyr::lag(df$y, n = 12)
library(lubridate)
Attaching package: 'lubridate'
The following objects are masked from 'package:base':
date, intersect, setdiff, union
df$month <- factor(month(df$date, label = TRUE), ordered = FALSE)
df$trend <- 1:nrow(df)par <- ts_split(ts.obj = AirPassengers, sample.out = 12)
train <- par$train
test <- par$testtrain_df <- df[1:(nrow(df) - 12), ]
test_df <- df[(nrow(df) - 12 + 1):nrow(df), ]md1 <- tslm(train ~ season + trend + lag12, data = train_df)checkresiduals(md1)
Breusch-Godfrey test for serial correlation of order up to 24
data: Residuals from Linear regression model
LM test = 83.147, df = 24, p-value = 1.903e-08
md2 <- auto.arima(train,
xreg = cbind(model.matrix(~ month,train_df)[,-1],
train_df$trend,
train_df$lag12),
seasonal = TRUE,
stepwise = FALSE,
approximation = FALSE) summary(md2)Series: train
Regression with ARIMA(2,0,0)(2,0,0)[12] errors
Coefficients:
ar1 ar2 sar1 sar2 monthfeb monthmar monthabr monthmay
0.5849 0.3056 -0.4421 -0.2063 -2.7523 0.8231 0.2066 2.8279
s.e. 0.0897 0.0903 0.1050 0.1060 1.8417 2.3248 2.4162 2.5560
monthjun monthjul monthago monthsept monthoct monthnov monthdic
6.6057 11.2337 12.1909 3.8269 0.6350 -2.2723 -0.9918
s.e. 3.2916 4.2324 4.1198 2.9243 2.3405 2.4211 1.9172
0.2726 1.0244
s.e. 0.1367 0.0426
sigma^2 = 72.82: log likelihood = -426.93
AIC=889.86 AICc=896.63 BIC=940.04
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set 0.4117147 8.353629 6.397538 0.2571543 2.549682 0.2100998
ACF1
Training set 0.005966715
checkresiduals(md2)
Ljung-Box test
data: Residuals from Regression with ARIMA(2,0,0)(2,0,0)[12] errors
Q* = 25.816, df = 20, p-value = 0.172
Model df: 4. Total lags used: 24
fc1 <- forecast(md1, newdata = test_df)
fc2 <- forecast(md2, xreg = cbind(model.matrix(~ month,test_df)[,-1],
test_df$trend,
test_df$lag12))x <- fc2
accuracy(fc1, test) ME RMSE MAE MPE MAPE MASE
Training set -1.658222e-15 13.99206 11.47136 -0.1200023 4.472154 0.3541758
Test set 1.079611e+01 20.82782 18.57391 1.9530865 3.828115 0.5734654
ACF1 Theil's U
Training set 0.7502578 NA
Test set 0.1653119 0.4052175
accuracy(fc2, test) ME RMSE MAE MPE MAPE MASE
Training set 0.4117147 8.353629 6.397538 0.2571543 2.549682 0.2100998
Test set -11.2123707 17.928195 13.353910 -2.4898843 2.924174 0.4385521
ACF1 Theil's U
Training set 0.005966715 NA
Test set -0.325044930 0.3923795