############################################################
# TOPIK DALAM STATISTIKA 1 - PEMODELAN ARIMA
###########################################################
# =========================================================
# IMPORT LIBRARY
# =========================================================
rm(list=ls())
set.seed(42)

library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
# =========================================================
# GENERATE DATA ARIMA(2,2,2)
# =========================================================
n <- 200

phi1   <- 0.5
phi2   <- -0.7
theta1 <- -0.3
theta2 <- -0.5

sd_e <- sqrt(1.12)

e <- rnorm(n, mean = 0, sd = sd_e)

x <- numeric(n)

for (t in 3:n) {
  x[t] <- phi1 * x[t-1] +
          phi2 * x[t-2] +
          e[t] +
          theta1 * e[t-1] +
          theta2 * e[t-2]
}

# Integrasi orde 1
x_d1 <- cumsum(x)

# Integrasi orde 2
data_sim <- cumsum(x_d1)

# =========================================================
# HAPUS 50 DATA AWAL
# =========================================================
data_trim <- data_sim[51:200]

# =========================================================
# SPLITTING DATA (80% TRAIN, 20% TEST)
# =========================================================
n <- length(data_trim)

train_size <- floor(0.8 * n)

train <- ts(data_trim[1:train_size])

test <- ts(data_trim[(train_size+1):n],
           start = train_size + 1)

# =========================================================
# PLOT DATA TRAIN
# =========================================================
ts.plot(train,
        main = "Data Training")

# =========================================================
# UJI ADF
# =========================================================
cat("ADF Data Asli:\n")
## ADF Data Asli:
print(adf.test(train))
## Warning in adf.test(train): p-value greater than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train
## Dickey-Fuller = 0.82052, Lag order = 4, p-value = 0.99
## alternative hypothesis: stationary
# =========================================================
# DIFFERENCING KE-1
# =========================================================
train_diff1 <- diff(train)

cat("\nADF Differencing 1:\n")
## 
## ADF Differencing 1:
print(adf.test(train_diff1))
## Warning in adf.test(train_diff1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train_diff1
## Dickey-Fuller = -4.1669, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# =========================================================
# DIFFERENCING KE-2
# =========================================================
train_diff2 <- diff(train_diff1)

cat("\nADF Differencing 2:\n")
## 
## ADF Differencing 2:
print(adf.test(train_diff2))
## Warning in adf.test(train_diff2): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train_diff2
## Dickey-Fuller = -8.2592, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# =========================================================
# PLOT SETELAH DIFFERENCING
# =========================================================
ts.plot(train_diff1,
        main = "Data Setelah Differencing 1")

ts.plot(train_diff2,
        main = "Data Setelah Differencing 2")

# =========================================================
# ACF, PACF, DAN EACF
# =========================================================
par(mfrow = c(1,2))

acf(train_diff2,
    main = "ACF")

pacf(train_diff2,
     main = "PACF")

par(mfrow = c(1,1))

eacf(as.matrix(train_diff2))
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x o x o o o o  o  o  o 
## 1 x x x x x o o o o o o  o  o  o 
## 2 o x o o o o o o o o o  o  o  o 
## 3 x x o o o o o o o o o  o  o  o 
## 4 x x o x o o o o o o o  o  o  o 
## 5 x x x x o o o o o o o  o  o  o 
## 6 o x x x o o o o o o o  o  o  o 
## 7 x o x o o o o o o o o  o  o  o
# =========================================================
# PEMBENTUKAN MODEL ARIMA
# =========================================================
model <- forecast::Arima(train,
                         order = c(2,2,2))

model$coef
##        ar1        ar2        ma1        ma2 
##  0.4836872 -0.6429071 -0.3262104 -0.4903318
# =========================================================
# PERSAMAAN MODEL
# =========================================================
cat("\nModel ARIMA(2,2,2):\n")
## 
## Model ARIMA(2,2,2):
cat("∇²Yt = 0.48∇²Yt-1 - 0.64∇²Yt-2 + et - 0.33et-1 - 0.49et-2\n")
## ∇²Yt = 0.48∇²Yt-1 - 0.64∇²Yt-2 + et - 0.33et-1 - 0.49et-2
# =========================================================
# AIC, BIC, DAN AICc
# =========================================================
calc_AICc <- function(model, n) {

  k <- length(model$coef)

  aic <- AIC(model)

  return(aic + (2*k*(k+1))/(n-k-1))
}

n_train <- length(train)

result <- data.frame(
  Model = c("ARIMA(2,2,2)"),
  AIC   = c(AIC(model)),
  BIC   = c(BIC(model)),
  AICc  = c(calc_AICc(model, n_train))
)

print(result)
##          Model      AIC      BIC     AICc
## 1 ARIMA(2,2,2) 338.2998 352.1532 338.6476
# =========================================================
# DIAGNOSTIK MODEL
# =========================================================

# Plot residual
ts.plot(residuals(model),
        main = "Residual")

# ACF residual
acf(residuals(model),
    main = "ACF Residual")

# =========================================================
# UJI LJUNG-BOX
# =========================================================
cat("\nLjung-Box Test:\n")
## 
## Ljung-Box Test:
print(Box.test(residuals(model),
                lag = 20,
                type = "Ljung-Box"))
## 
##  Box-Ljung test
## 
## data:  residuals(model)
## X-squared = 4.1712, df = 20, p-value = 0.9999
# =========================================================
# UJI NORMALITAS SHAPIRO-WILK
# =========================================================
print(shapiro.test(residuals(model)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.98913, p-value = 0.4598
# =========================================================
# HISTOGRAM RESIDUAL
# =========================================================
hist(residuals(model),
     main = "Histogram Residual",
     col = "lightblue")

# =========================================================
# QQ-PLOT RESIDUAL
# =========================================================
qqnorm(residuals(model))

qqline(residuals(model),
       col = "red")