Library
library(forecast)
library(tseries)
library(gridExtra)
library(tidyverse)
library(lmtest)
library(BETS)
library(Metrics)
Generate series
set.seed(123)
sim_data <- arima.sim(n = 1000,
model = list(order = c(4, 1, 0),
ar = c(0.6, -0.3, 0.25, -0.5)),
mean = 0.05)
autoplot(sim_data, main = "Simulated ARIMA(4,1,0)", ylab = "")

sim_train <- sim_data[1:980]
sim_test <- sim_data[-(1:980)]
Model Identification
ACF & PACF
p1 <- ggAcf(sim_train) + ggtitle(label = "")
p2 <- ggPacf(sim_train) + ggtitle(label = "")
grid.arrange(p1, p2, nrow = 1, top = "ACF & PACF of original series")

Stationarity Test
adf.test(sim_train)
Augmented Dickey-Fuller Test
data: sim_train
Dickey-Fuller = -2.3042, Lag order = 9, p-value = 0.4495
alternative hypothesis: stationary
pp.test(sim_train)
Phillips-Perron Unit Root Test
data: sim_train
Dickey-Fuller Z(alpha) = -20.781, Truncation lag parameter = 7, p-value = 0.0606
alternative hypothesis: stationary
kpss.test(sim_train)
KPSS Test for Level Stationarity
data: sim_train
KPSS Level = 10.99, Truncation lag parameter = 7, p-value = 0.01
Data Differencing
sim_train_diff <- diff(sim_train, lag = 1)
autoplot(as.ts(sim_train_diff), ylab="", main = "Differenced Series")

p1 <- ggAcf(sim_train_diff) + ggtitle(label = "")
p2 <- ggPacf(sim_train_diff) + ggtitle(label = "")
grid.arrange(p1, p2, nrow = 1, top = "ACF & PACF of differenced series")

Modelling
Parameter Estimation
m1 <- Arima(y = sim_train, order = c(4, 1, 0), include.drift = TRUE)
summary(m1)
Series: sim_train
ARIMA(4,1,0) with drift
Coefficients:
ar1 ar2 ar3 ar4 drift
0.5937 -0.3323 0.2513 -0.5020 0.0740
s.e. 0.0276 0.0326 0.0326 0.0276 0.0324
sigma^2 = 1.011: log likelihood = -1392.86
AIC=2797.72 AICc=2797.81 BIC=2827.04
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.0006959247 1.002467 0.7929702 -Inf Inf 0.7028372 -0.0151849
coeftest(m1)
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
ar1 0.593682 0.027604 21.5068 < 2.2e-16 ***
ar2 -0.332304 0.032604 -10.1922 < 2.2e-16 ***
ar3 0.251293 0.032558 7.7182 1.18e-14 ***
ar4 -0.501984 0.027623 -18.1725 < 2.2e-16 ***
drift 0.073972 0.032446 2.2798 0.02262 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
t_test(m1)
Residual Diagnostics
checkresiduals(m1)
Ljung-Box test
data: Residuals from ARIMA(4,1,0) with drift
Q* = 3.3169, df = 6, p-value = 0.7681
Model df: 4. Total lags used: 10

shapiro.test(m1$residuals)
Shapiro-Wilk normality test
data: m1$residuals
W = 0.99803, p-value = 0.3127
Forecasting
m1_forecast <- m1 %>% forecast(h = 21)
m1_forecast
forecast_ts <- ts(data.frame(forecast = m1_forecast$mean, actual = sim_test))
autoplot(forecast_ts, ylab = "", main = "Testing Data Forecast")

rmse(actual = sim_test, predicted = m1_forecast$mean)
[1] 1.735212
mape(actual = sim_test, predicted = m1_forecast$mean)
[1] 0.02069097
smape(actual = sim_test, predicted = m1_forecast$mean)
[1] 0.02064002
LS0tCnRpdGxlOiAiQVJJTUEgTW9kZWwgU2ltdWxhdGlvbiIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDoKICAgICAgdG9jX2NvbGxhcHNlZDogdHJ1ZQotLS0KCiMjIExpYnJhcnkKYGBge3J9CmxpYnJhcnkoZm9yZWNhc3QpCmxpYnJhcnkodHNlcmllcykKbGlicmFyeShncmlkRXh0cmEpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGxtdGVzdCkKbGlicmFyeShCRVRTKQpsaWJyYXJ5KE1ldHJpY3MpCmBgYAoKIyMgR2VuZXJhdGUgc2VyaWVzCmBgYHtyfQpzZXQuc2VlZCgxMjMpCnNpbV9kYXRhIDwtIGFyaW1hLnNpbShuID0gMTAwMCwgCiAgICAgICAgICAgICAgICAgICAgIG1vZGVsID0gbGlzdChvcmRlciA9IGMoNCwgMSwgMCksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYXIgPSBjKDAuNiwgLTAuMywgMC4yNSwgLTAuNSkpLAogICAgICAgICAgICAgICAgICAgICBtZWFuID0gMC4wNSkKCmF1dG9wbG90KHNpbV9kYXRhLCBtYWluID0gIlNpbXVsYXRlZCBBUklNQSg0LDEsMCkiLCB5bGFiID0gIiIpCmBgYAoKYGBge3J9CnNpbV90cmFpbiA8LSBzaW1fZGF0YVsxOjk4MF0Kc2ltX3Rlc3QgPC0gc2ltX2RhdGFbLSgxOjk4MCldCmBgYAoKCiMjIE1vZGVsIElkZW50aWZpY2F0aW9uCgojIyMgQUNGICYgUEFDRgoKYGBge3J9CnAxIDwtIGdnQWNmKHNpbV90cmFpbikgKyBnZ3RpdGxlKGxhYmVsID0gIiIpCnAyIDwtIGdnUGFjZihzaW1fdHJhaW4pICsgZ2d0aXRsZShsYWJlbCA9ICIiKQpncmlkLmFycmFuZ2UocDEsIHAyLCBucm93ID0gMSwgdG9wID0gIkFDRiAmIFBBQ0Ygb2Ygb3JpZ2luYWwgc2VyaWVzIikKYGBgCgojIyMgU3RhdGlvbmFyaXR5IFRlc3QKCmBgYHtyfQphZGYudGVzdChzaW1fdHJhaW4pCmBgYAoKYGBge3J9CnBwLnRlc3Qoc2ltX3RyYWluKQpgYGAKCmBgYHtyfQprcHNzLnRlc3Qoc2ltX3RyYWluKQpgYGAKIyMjIERhdGEgRGlmZmVyZW5jaW5nCgpgYGB7cn0Kc2ltX3RyYWluX2RpZmYgPC0gZGlmZihzaW1fdHJhaW4sIGxhZyA9IDEpCmF1dG9wbG90KGFzLnRzKHNpbV90cmFpbl9kaWZmKSwgeWxhYj0iIiwgbWFpbiA9ICJEaWZmZXJlbmNlZCBTZXJpZXMiKQpgYGAKCgpgYGB7cn0KcDEgPC0gZ2dBY2Yoc2ltX3RyYWluX2RpZmYpICsgZ2d0aXRsZShsYWJlbCA9ICIiKQpwMiA8LSBnZ1BhY2Yoc2ltX3RyYWluX2RpZmYpICsgZ2d0aXRsZShsYWJlbCA9ICIiKQpncmlkLmFycmFuZ2UocDEsIHAyLCBucm93ID0gMSwgdG9wID0gIkFDRiAmIFBBQ0Ygb2YgZGlmZmVyZW5jZWQgc2VyaWVzIikKYGBgCgojIyBNb2RlbGxpbmcKCiMjIyBQYXJhbWV0ZXIgRXN0aW1hdGlvbgoKYGBge3J9Cm0xIDwtIEFyaW1hKHkgPSBzaW1fdHJhaW4sIG9yZGVyID0gYyg0LCAxLCAwKSwgaW5jbHVkZS5kcmlmdCA9IFRSVUUpCnN1bW1hcnkobTEpCmBgYAoKYGBge3J9CmNvZWZ0ZXN0KG0xKQpgYGAKCmBgYHtyfQp0X3Rlc3QobTEpCmBgYAoKIyMjIFJlc2lkdWFsIERpYWdub3N0aWNzCgpgYGB7cn0KY2hlY2tyZXNpZHVhbHMobTEpCmBgYAoKYGBge3J9CnNoYXBpcm8udGVzdChtMSRyZXNpZHVhbHMpCmBgYAojIyBGb3JlY2FzdGluZwoKYGBge3J9Cm0xX2ZvcmVjYXN0IDwtIG0xICU+JSBmb3JlY2FzdChoID0gMjEpIAptMV9mb3JlY2FzdApgYGAKCmBgYHtyfQpmb3JlY2FzdF90cyA8LSB0cyhkYXRhLmZyYW1lKGZvcmVjYXN0ID0gbTFfZm9yZWNhc3QkbWVhbiwgYWN0dWFsID0gc2ltX3Rlc3QpKQphdXRvcGxvdChmb3JlY2FzdF90cywgeWxhYiA9ICIiLCBtYWluID0gIlRlc3RpbmcgRGF0YSBGb3JlY2FzdCIpCmBgYAoKYGBge3J9CnJtc2UoYWN0dWFsID0gc2ltX3Rlc3QsIHByZWRpY3RlZCA9IG0xX2ZvcmVjYXN0JG1lYW4pCmBgYAoKYGBge3J9Cm1hcGUoYWN0dWFsID0gc2ltX3Rlc3QsIHByZWRpY3RlZCA9IG0xX2ZvcmVjYXN0JG1lYW4pCmBgYAoKYGBge3J9CnNtYXBlKGFjdHVhbCA9IHNpbV90ZXN0LCBwcmVkaWN0ZWQgPSBtMV9mb3JlY2FzdCRtZWFuKQpgYGAKCg==