knitr::opts_chunk$set(echo = TRUE)
## Warning: package 'TSstudio' was built under R version 4.2.3
set.seed(12345)
stationary_ts <- arima.sim(model = list(order = c(1,0,0),ar = 0.5 ),n = 500)
ts_plot(stationary_ts,
title = "Stationary Time Series",
Ytitle = "Value",
Xtitle = "Index")
set.seed(12345)
non_stationary_ts <- arima.sim(model = list(order = c(1,1,0),ar = 0.3 ),n = 500)
ts_plot(non_stationary_ts,
title = "Non-Stationary Time Series",
Ytitle = "Value",
Xtitle = "Index")
data(AirPassengers)
AirPassengers
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140
## 1951 145 150 178 163 172 178 199 199 184 162 146 166
## 1952 171 180 193 181 183 218 230 242 209 191 172 194
## 1953 196 196 236 235 229 243 264 272 237 211 180 201
## 1954 204 188 235 227 234 264 302 293 259 229 203 229
## 1955 242 233 267 269 270 315 364 347 312 274 237 278
## 1956 284 277 317 313 318 374 413 405 355 306 271 306
## 1957 315 301 356 348 355 422 465 467 404 347 305 336
## 1958 340 318 362 348 363 435 491 505 404 359 310 337
## 1959 360 342 406 396 420 472 548 559 463 407 362 405
## 1960 417 391 419 461 472 535 622 606 508 461 390 432
data(AirPassengers)
ts_plot(AirPassengers,
title = "Monthly Airline Passenger Numbers 1949-1960",
Ytitle = "Thousands of Passengers",
Xtitle = "Year")
data(AirPassengers)
ts_plot(AirPassengers,
title = "Monthly Airline Passenger Numbers 1949-1960",
Ytitle = "Thousands of Passengers",
Xtitle = "Year")
ts_plot(diff(AirPassengers, lag = 1),
title = "AirPassengers Series - First Differencing",
Xtitle = "Year",
Ytitle = "Differencing of Thousands of Passengers")
ts_plot(diff(diff(AirPassengers, lag = 1), 12),
title = "AirPassengers Series - First and Seasonal Differencing",
Xtitle = "Year",
Ytitle = "Differencing of Thousands of Passengers")
ts_plot(diff(log(AirPassengers), lag = 1),
title = "AirPassengers Series - First Differencing with Log Transformation",
Xtitle = "Year",
Ytitle = "Differencing/Log of Thousands of Passengers")
set.seed(12345)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p1 <- plot_ly()
p2 <- plot_ly()
for(i in 1:20){
rm <- NULL
rw <- arima.sim(model = list(order = c(0, 1, 0)), n = 500)
p1 <- p1 %>% add_lines(x = time(rw), y = as.numeric(rw))
p2 <- p2 %>% add_lines(x = time(diff(rw)), y = as.numeric(diff(rw)))
}
p1 %>% layout(title = "Simulate Random Walk",
yaxis = list(title = "Value"),
xaxis = list(title = "Index")) %>%
hide_legend()
p2 %>% layout(title = "Simulate Random Walk with First-Order Differencing",
yaxis = list(title = "Value"),
xaxis = list(title = "Index")) %>%
hide_legend()
set.seed(12345)
ar2 <- arima.sim(model = list(order = c(2,0,0),
ar = c(0.9, -0.3)),
n = 500)
ts_plot(ar2,
title = "Simulate AR(2) Series",
Ytitle = "Value",
Xtitle = "Index")
##
## Call:
## ar(x = ar2)
##
## Coefficients:
## 1 2
## 0.8851 -0.2900
##
## Order selected 2 sigma^2 estimated as 1.049
par(mfrow=c(1,2))
acf(ar2)
pacf(ar2)

set.seed(12345)
ma2 <- arima.sim(model = list(order = c(0, 0, 2),
ma = c(0.5, -0.3)),
n = 500)
ts_plot(ma2,
title = "Simulate MA(2) Series",
Ytitle = "Value",
Xtitle = "Index")
md_ma <- arima(ma2, order = c(0,0,2), method = "ML")
md_ma
##
## Call:
## arima(x = ma2, order = c(0, 0, 2), method = "ML")
##
## Coefficients:
## ma1 ma2 intercept
## 0.530 -0.3454 0.0875
## s.e. 0.041 0.0406 0.0525
##
## sigma^2 estimated as 0.9829: log likelihood = -705.81, aic = 1419.62
par(mfrow=c(1,2))
acf(ma2)
pacf(ma2)

set.seed(12345)
arma <- arima.sim(model = list(order(1,0,2), ar = c(0.7), ma = c(0.5,-0.3)), n = 500)
ts_plot(arma,
title = "Simulate ARMA(1,2) Series",
Ytitle = "Value",
Xtitle = "Index")
arma_md <- arima(arma, order = c(1,0,2))
arma_md
##
## Call:
## arima(x = arma, order = c(1, 0, 2))
##
## Coefficients:
## ar1 ma1 ma2 intercept
## 0.7439 0.4785 -0.3954 0.2853
## s.e. 0.0657 0.0878 0.0849 0.1891
##
## sigma^2 estimated as 1.01: log likelihood = -713.18, aic = 1436.36
par(mfrow=c(1,2))
acf(arma)
pacf(arma)

arima(arma, order = c(1, 0, 1), include.mean = FALSE)
##
## Call:
## arima(x = arma, order = c(1, 0, 1), include.mean = FALSE)
##
## Coefficients:
## ar1 ma1
## 0.4144 0.8864
## s.e. 0.0432 0.0248
##
## sigma^2 estimated as 1.051: log likelihood = -723, aic = 1452
arima(arma, order = c(2, 0, 1), include.mean = FALSE)
##
## Call:
## arima(x = arma, order = c(2, 0, 1), include.mean = FALSE)
##
## Coefficients:
## ar1 ar2 ma1
## 0.3136 0.1918 0.9227
## s.e. 0.0486 0.0484 0.0183
##
## sigma^2 estimated as 1.019: log likelihood = -715.26, aic = 1438.52
arima(arma, order = c(1, 0, 2), include.mean = FALSE)
##
## Call:
## arima(x = arma, order = c(1, 0, 2), include.mean = FALSE)
##
## Coefficients:
## ar1 ma1 ma2
## 0.7602 0.4654 -0.4079
## s.e. 0.0626 0.0858 0.0832
##
## sigma^2 estimated as 1.014: log likelihood = -714.27, aic = 1436.54
arima(arma, order = c(2, 0, 2), include.mean = FALSE)
##
## Call:
## arima(x = arma, order = c(2, 0, 2), include.mean = FALSE)
##
## Coefficients:
## ar1 ar2 ma1 ma2
## 0.7239 0.0194 0.4997 -0.3783
## s.e. 0.2458 0.1257 0.2427 0.2134
##
## sigma^2 estimated as 1.014: log likelihood = -714.26, aic = 1438.51
## Warning: package 'forecast' was built under R version 4.2.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
best_arma <- arima(arma, order = c(1, 0, 2), include.mean = FALSE)
checkresiduals(best_arma)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,2) with zero mean
## Q* = 5.3129, df = 7, p-value = 0.6218
##
## Model df: 3. Total lags used: 10
ar_fc <- forecast(md_ar, h = 100)
plot_forecast(ar_fc,
title = "Forecast AR(2) Model",
Ytitle = "Value",
Xtitle = "Year")
data(Coffee_Prices)
robusta_price <- window(Coffee_Prices[,1], start = c(2000, 1))
ts_plot(robusta_price,
title = "The Robusta Coffee Monthly Prices",
Ytitle = "Price in USD",
Xtitle = "Year")

robusta_price_d1 <- diff(robusta_price)
par(mfrow=c(1,2))
acf(robusta_price_d1)
pacf(robusta_price_d1)

robusta_md <- arima(robusta_price, order = c(1, 1, 0))
robusta_md
##
## Call:
## arima(x = robusta_price, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## 0.2780
## s.e. 0.0647
##
## sigma^2 estimated as 0.007142: log likelihood = 231.38, aic = -458.76
##
## Call:
## arima(x = robusta_price, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## 0.2780
## s.e. 0.0647
##
## sigma^2 estimated as 0.007142: log likelihood = 231.38, aic = -458.76
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.002595604 0.08432096 0.06494772 0.08104715 4.254984 1.001542
## ACF1
## Training set 0.001526295
checkresiduals(robusta_md)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)
## Q* = 26.896, df = 23, p-value = 0.2604
##
## Model df: 1. Total lags used: 24
ts_plot(USgas,
title = "US Monthly Natural Gas consumption",
Ytitle = "Billion Cubic Feet",
Xtitle = "Year")
USgas_split <- ts_split(USgas, sample.out = 12)
train <- USgas_split$train
test <- USgas_split$test
par(mfrow=c(1,2))
acf(train, lag.max = 60)
pacf(train, lag.max = 60)

USgas_d12 <- diff(train, 12)
ts_plot(USgas_d12,
title = "US Monthly Natural Gas consumption - First Seasonal Difference",
Ytitle = "Billion Cubic Feet (First Difference)",
Xtitle = "Year")
USgas_d12_1 <- diff(diff(USgas_d12, 1))
ts_plot(USgas_d12_1,
title = "US Monthly Natural Gas consumption - First Seasonal and Non-Seasonal Differencing",
Ytitle = "Billion Cubic Feet (Difference)",
Xtitle = "Year")
par(mfrow=c(1,2))
acf(USgas_d12_1, lag.max = 60)
pacf(USgas_d12_1, lag.max = 60)

arima_grid <- expand.grid(p,q,P,Q)
names(arima_grid) <- c("p", "q", "P", "Q")
arima_grid$d <- 1
arima_grid$D <- 1
arima_grid$k <- rowSums(arima_grid)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
arima_grid <- arima_grid %>% filter(k <= 7)
arima_search <- lapply(1:nrow(arima_grid), function(i){
md <- NULL
# md <- arima(train, order = c(arima_grid$p[i], 1, arima_grid$q[i]),
# seasonal = list(order = c(arima_grid$P[i], 1, arima_grid$Q[i])))
# results <- data.frame(p = arima_grid$p[i], d = 1, q = arima_grid$q[i],
# P = arima_grid$P[i], D = 1, Q = arima_grid$Q[i],
# AIC = md$aic)
})
# %>% bind_rows() %>% arrange(AIC)
arima_search <- data.frame(p=rep(0,81), d=rep(0,81), q=rep(0,81))
head(arima_search)
## p d q
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
USgas_best_md <- arima(train, order = c(1,1,1), seasonal = list(order = c(2,1,1)))
USgas_best_md
##
## Call:
## arima(x = train, order = c(1, 1, 1), seasonal = list(order = c(2, 1, 1)))
##
## Coefficients:
## ar1 ma1 sar1 sar2 sma1
## 0.4247 -0.9180 0.0132 -0.2639 -0.7449
## s.e. 0.0770 0.0376 0.0894 0.0834 0.0753
##
## sigma^2 estimated as 10160: log likelihood = -1292.96, aic = 2597.91
library(forecast)
USgas_test_fc <- forecast(USgas_best_md, h = 12)
accuracy(USgas_test_fc, test)
## ME RMSE MAE MPE MAPE MASE
## Training set 6.081099 97.85701 73.36854 0.1298714 3.517097 0.6371821
## Test set 42.211253 104.79281 83.09943 1.4913412 3.314280 0.7216918
## ACF1 Theil's U
## Training set 0.004565602 NA
## Test set -0.049999868 0.3469228
test_forecast(USgas,
forecast.obj = USgas_test_fc,
test = test)
final_md <- arima(USgas, order = c(1,1,1), seasonal = list(order = c(2,1,1)))

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(2,1,1)[12]
## Q* = 30.173, df = 19, p-value = 0.04964
##
## Model df: 5. Total lags used: 24
USgas_fc <- forecast(final_md, h = 12)
plot_forecast(USgas_fc,
title = "US Natural Gas Consumption - Forecast",
Ytitle = "Billion Cubic Feet",
Xtitle = "Year")
USgas_auto_md1 <- auto.arima(train)
USgas_auto_md1
## Series: train
## ARIMA(2,1,1)(2,1,1)[12]
##
## Coefficients:
## ar1 ar2 ma1 sar1 sar2 sma1
## 0.4301 -0.0372 -0.9098 0.0117 -0.2673 -0.7431
## s.e. 0.0794 0.0741 0.0452 0.0887 0.0830 0.0751
##
## sigma^2 = 10446: log likelihood = -1292.83
## AIC=2599.67 AICc=2600.22 BIC=2623.2
USgas_auto_md2 <- auto.arima(train,
max.order = 5,
D = 1,
d = 1,
ic = "aic",
stepwise = FALSE,
approximation = FALSE)
USgas_auto_md2
## Series: train
## ARIMA(1,1,1)(2,1,1)[12]
##
## Coefficients:
## ar1 ma1 sar1 sar2 sma1
## 0.4247 -0.9180 0.0132 -0.2639 -0.7449
## s.e. 0.0770 0.0376 0.0894 0.0834 0.0753
##
## sigma^2 = 10405: log likelihood = -1292.96
## AIC=2597.91 AICc=2598.32 BIC=2618.08
df <- ts_to_prophet(AirPassengers) %>% setNames(c("date", "y"))
df$lag12 <- dplyr::lag(df$y, n = 12)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
df$month <- factor(month(df$date, label = TRUE), ordered = FALSE)
df$trend <- 1:nrow(df)
par <- ts_split(ts.obj = AirPassengers, sample.out = 12)
train <- par$train
test <- par$test
train_df <- df[1:(nrow(df) - 12), ]
test_df <- df[(nrow(df) - 12 + 1):nrow(df), ]
md1 <- tslm(train ~ season + trend + lag12, data = train_df)

##
## Breusch-Godfrey test for serial correlation of order up to 24
##
## data: Residuals from Linear regression model
## LM test = 83.147, df = 24, p-value = 1.903e-08
md2 <- auto.arima(train,
xreg = cbind(model.matrix(~ month,train_df)[,-1],
train_df$trend,
train_df$lag12),
seasonal = TRUE,
stepwise = FALSE,
approximation = FALSE)
## Series: train
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors
##
## Coefficients:
## ar1 ar2 sar1 sar2 monthfeb monthmar monthabr monthmay
## 0.5849 0.3056 -0.4421 -0.2063 -2.7523 0.8231 0.2066 2.8279
## s.e. 0.0897 0.0903 0.1050 0.1060 1.8417 2.3248 2.4162 2.5560
## monthjun monthjul monthago monthsept monthoct monthnov monthdic
## 6.6057 11.2337 12.1909 3.8269 0.6350 -2.2723 -0.9918
## s.e. 3.2916 4.2324 4.1198 2.9243 2.3405 2.4211 1.9172
##
## 0.2726 1.0244
## s.e. 0.1367 0.0426
##
## sigma^2 = 72.82: log likelihood = -426.93
## AIC=889.86 AICc=896.63 BIC=940.04
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4117147 8.353629 6.397538 0.2571543 2.549682 0.2100998
## ACF1
## Training set 0.005966715

##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,0,0)(2,0,0)[12] errors
## Q* = 25.816, df = 20, p-value = 0.172
##
## Model df: 4. Total lags used: 24
fc1 <- forecast(md1, newdata = test_df)
fc2 <- forecast(md2, xreg = cbind(model.matrix(~ month,test_df)[,-1],
test_df$trend,
test_df$lag12))
x <- fc2
accuracy(fc1, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.658222e-15 13.99206 11.47136 -0.1200023 4.472154 0.3541758
## Test set 1.079611e+01 20.82782 18.57391 1.9530865 3.828115 0.5734654
## ACF1 Theil's U
## Training set 0.7502578 NA
## Test set 0.1653119 0.4052175
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4117147 8.353629 6.397538 0.2571543 2.549682 0.2100998
## Test set -11.2123707 17.928195 13.353910 -2.4898843 2.924174 0.4385521
## ACF1 Theil's U
## Training set 0.005966715 NA
## Test set -0.325044930 0.3923795