Library

library(forecast)
library(tseries)
library(gridExtra)
library(tidyverse)
library(lmtest)
library(BETS)
library(Metrics)

Generate series

set.seed(123)
sim_data <- arima.sim(n = 1000, 
                     model = list(order = c(4, 1, 0), 
                                  ar = c(0.6, -0.3, 0.25, -0.5)),
                     mean = 0.05)

autoplot(sim_data, main = "Simulated ARIMA(4,1,0)", ylab = "")

sim_train <- sim_data[1:980]
sim_test <- sim_data[-(1:980)]

Model Identification

ACF & PACF

p1 <- ggAcf(sim_train) + ggtitle(label = "")
p2 <- ggPacf(sim_train) + ggtitle(label = "")
grid.arrange(p1, p2, nrow = 1, top = "ACF & PACF of original series")

Stationarity Test

adf.test(sim_train)

    Augmented Dickey-Fuller Test

data:  sim_train
Dickey-Fuller = -2.3042, Lag order = 9, p-value = 0.4495
alternative hypothesis: stationary
pp.test(sim_train)

    Phillips-Perron Unit Root Test

data:  sim_train
Dickey-Fuller Z(alpha) = -20.781, Truncation lag parameter = 7, p-value = 0.0606
alternative hypothesis: stationary
kpss.test(sim_train)

    KPSS Test for Level Stationarity

data:  sim_train
KPSS Level = 10.99, Truncation lag parameter = 7, p-value = 0.01

Data Differencing

sim_train_diff <- diff(sim_train, lag = 1)
autoplot(as.ts(sim_train_diff), ylab="", main = "Differenced Series")

p1 <- ggAcf(sim_train_diff) + ggtitle(label = "")
p2 <- ggPacf(sim_train_diff) + ggtitle(label = "")
grid.arrange(p1, p2, nrow = 1, top = "ACF & PACF of differenced series")

Modelling

Parameter Estimation

m1 <- Arima(y = sim_train, order = c(4, 1, 0), include.drift = TRUE)
summary(m1)
Series: sim_train 
ARIMA(4,1,0) with drift 

Coefficients:
         ar1      ar2     ar3      ar4   drift
      0.5937  -0.3323  0.2513  -0.5020  0.0740
s.e.  0.0276   0.0326  0.0326   0.0276  0.0324

sigma^2 = 1.011:  log likelihood = -1392.86
AIC=2797.72   AICc=2797.81   BIC=2827.04

Training set error measures:
                        ME     RMSE       MAE  MPE MAPE      MASE       ACF1
Training set -0.0006959247 1.002467 0.7929702 -Inf  Inf 0.7028372 -0.0151849
coeftest(m1)

z test of coefficients:

       Estimate Std. Error  z value  Pr(>|z|)    
ar1    0.593682   0.027604  21.5068 < 2.2e-16 ***
ar2   -0.332304   0.032604 -10.1922 < 2.2e-16 ***
ar3    0.251293   0.032558   7.7182  1.18e-14 ***
ar4   -0.501984   0.027623 -18.1725 < 2.2e-16 ***
drift  0.073972   0.032446   2.2798   0.02262 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
t_test(m1)

Residual Diagnostics

checkresiduals(m1)

    Ljung-Box test

data:  Residuals from ARIMA(4,1,0) with drift
Q* = 3.3169, df = 6, p-value = 0.7681

Model df: 4.   Total lags used: 10

shapiro.test(m1$residuals)

    Shapiro-Wilk normality test

data:  m1$residuals
W = 0.99803, p-value = 0.3127

Forecasting

m1_forecast <- m1 %>% forecast(h = 21) 
m1_forecast
forecast_ts <- ts(data.frame(forecast = m1_forecast$mean, actual = sim_test))
autoplot(forecast_ts, ylab = "", main = "Testing Data Forecast")

rmse(actual = sim_test, predicted = m1_forecast$mean)
[1] 1.735212
mape(actual = sim_test, predicted = m1_forecast$mean)
[1] 0.02069097
smape(actual = sim_test, predicted = m1_forecast$mean)
[1] 0.02064002
LS0tCnRpdGxlOiAiQVJJTUEgTW9kZWwgU2ltdWxhdGlvbiIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDoKICAgICAgdG9jX2NvbGxhcHNlZDogdHJ1ZQotLS0KCiMjIExpYnJhcnkKYGBge3J9CmxpYnJhcnkoZm9yZWNhc3QpCmxpYnJhcnkodHNlcmllcykKbGlicmFyeShncmlkRXh0cmEpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGxtdGVzdCkKbGlicmFyeShCRVRTKQpsaWJyYXJ5KE1ldHJpY3MpCmBgYAoKIyMgR2VuZXJhdGUgc2VyaWVzCmBgYHtyfQpzZXQuc2VlZCgxMjMpCnNpbV9kYXRhIDwtIGFyaW1hLnNpbShuID0gMTAwMCwgCiAgICAgICAgICAgICAgICAgICAgIG1vZGVsID0gbGlzdChvcmRlciA9IGMoNCwgMSwgMCksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYXIgPSBjKDAuNiwgLTAuMywgMC4yNSwgLTAuNSkpLAogICAgICAgICAgICAgICAgICAgICBtZWFuID0gMC4wNSkKCmF1dG9wbG90KHNpbV9kYXRhLCBtYWluID0gIlNpbXVsYXRlZCBBUklNQSg0LDEsMCkiLCB5bGFiID0gIiIpCmBgYAoKYGBge3J9CnNpbV90cmFpbiA8LSBzaW1fZGF0YVsxOjk4MF0Kc2ltX3Rlc3QgPC0gc2ltX2RhdGFbLSgxOjk4MCldCmBgYAoKCiMjIE1vZGVsIElkZW50aWZpY2F0aW9uCgojIyMgQUNGICYgUEFDRgoKYGBge3J9CnAxIDwtIGdnQWNmKHNpbV90cmFpbikgKyBnZ3RpdGxlKGxhYmVsID0gIiIpCnAyIDwtIGdnUGFjZihzaW1fdHJhaW4pICsgZ2d0aXRsZShsYWJlbCA9ICIiKQpncmlkLmFycmFuZ2UocDEsIHAyLCBucm93ID0gMSwgdG9wID0gIkFDRiAmIFBBQ0Ygb2Ygb3JpZ2luYWwgc2VyaWVzIikKYGBgCgojIyMgU3RhdGlvbmFyaXR5IFRlc3QKCmBgYHtyfQphZGYudGVzdChzaW1fdHJhaW4pCmBgYAoKYGBge3J9CnBwLnRlc3Qoc2ltX3RyYWluKQpgYGAKCmBgYHtyfQprcHNzLnRlc3Qoc2ltX3RyYWluKQpgYGAKIyMjIERhdGEgRGlmZmVyZW5jaW5nCgpgYGB7cn0Kc2ltX3RyYWluX2RpZmYgPC0gZGlmZihzaW1fdHJhaW4sIGxhZyA9IDEpCmF1dG9wbG90KGFzLnRzKHNpbV90cmFpbl9kaWZmKSwgeWxhYj0iIiwgbWFpbiA9ICJEaWZmZXJlbmNlZCBTZXJpZXMiKQpgYGAKCgpgYGB7cn0KcDEgPC0gZ2dBY2Yoc2ltX3RyYWluX2RpZmYpICsgZ2d0aXRsZShsYWJlbCA9ICIiKQpwMiA8LSBnZ1BhY2Yoc2ltX3RyYWluX2RpZmYpICsgZ2d0aXRsZShsYWJlbCA9ICIiKQpncmlkLmFycmFuZ2UocDEsIHAyLCBucm93ID0gMSwgdG9wID0gIkFDRiAmIFBBQ0Ygb2YgZGlmZmVyZW5jZWQgc2VyaWVzIikKYGBgCgojIyBNb2RlbGxpbmcKCiMjIyBQYXJhbWV0ZXIgRXN0aW1hdGlvbgoKYGBge3J9Cm0xIDwtIEFyaW1hKHkgPSBzaW1fdHJhaW4sIG9yZGVyID0gYyg0LCAxLCAwKSwgaW5jbHVkZS5kcmlmdCA9IFRSVUUpCnN1bW1hcnkobTEpCmBgYAoKYGBge3J9CmNvZWZ0ZXN0KG0xKQpgYGAKCmBgYHtyfQp0X3Rlc3QobTEpCmBgYAoKIyMjIFJlc2lkdWFsIERpYWdub3N0aWNzCgpgYGB7cn0KY2hlY2tyZXNpZHVhbHMobTEpCmBgYAoKYGBge3J9CnNoYXBpcm8udGVzdChtMSRyZXNpZHVhbHMpCmBgYAojIyBGb3JlY2FzdGluZwoKYGBge3J9Cm0xX2ZvcmVjYXN0IDwtIG0xICU+JSBmb3JlY2FzdChoID0gMjEpIAptMV9mb3JlY2FzdApgYGAKCmBgYHtyfQpmb3JlY2FzdF90cyA8LSB0cyhkYXRhLmZyYW1lKGZvcmVjYXN0ID0gbTFfZm9yZWNhc3QkbWVhbiwgYWN0dWFsID0gc2ltX3Rlc3QpKQphdXRvcGxvdChmb3JlY2FzdF90cywgeWxhYiA9ICIiLCBtYWluID0gIlRlc3RpbmcgRGF0YSBGb3JlY2FzdCIpCmBgYAoKYGBge3J9CnJtc2UoYWN0dWFsID0gc2ltX3Rlc3QsIHByZWRpY3RlZCA9IG0xX2ZvcmVjYXN0JG1lYW4pCmBgYAoKYGBge3J9Cm1hcGUoYWN0dWFsID0gc2ltX3Rlc3QsIHByZWRpY3RlZCA9IG0xX2ZvcmVjYXN0JG1lYW4pCmBgYAoKYGBge3J9CnNtYXBlKGFjdHVhbCA9IHNpbV90ZXN0LCBwcmVkaWN0ZWQgPSBtMV9mb3JlY2FzdCRtZWFuKQpgYGAKCg==