Topik Dalam Statistika I (Model ARIMA)

1. Import Library

rm(list = ls())
set.seed(42)

library(tseries)

## Warning: package 'tseries' was built under R version 4.5.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(forecast)

## Warning: package 'forecast' was built under R version 4.5.3

library(TSA)

## Warning: package 'TSA' was built under R version 4.5.3

## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast

## 
## Attaching package: 'TSA'

## The following objects are masked from 'package:stats':
## 
##     acf, arima

## The following object is masked from 'package:utils':
## 
##     tar

2. Generate Data ARIMA(2,2,2)

n <- 200

# Parameter ARIMA
phi1   <- 0.5
phi2   <- -0.7
theta1 <- -0.3
theta2 <- -0.5
sd_e   <- sqrt(1.20)
# Error white noise
e <- rnorm(n, mean = 0, sd = sd_e)

# Inisialisasi data
x <- numeric(n)

# Simulasi proses ARMA
for (t in 3:n) {
  
  x[t] <- phi1 * x[t - 1] +
          phi2 * x[t - 2] +
          e[t] +
          theta1 * e[t - 1] +
          theta2 * e[t - 2]
}

# Integrasi dua kali → ARIMA(2,2,2)
x_d1     <- cumsum(x)
data_sim <- cumsum(x_d1)

3. Menghapus 50 Data Awal

data_trim <- data_sim[51:200]

4. Split Data

#    80% Training dan 20% Testing

n_data    <- length(data_trim)
train_size <- floor(0.8 * n_data)

train <- ts(data_trim[1:train_size])

test <- ts(
  data_trim[(train_size + 1):n_data],
  start = train_size + 1
)

5. Plot Data Training

ts.plot(
  train,
  main = "Data Training",
  ylab = "Nilai",
  xlab = "Waktu",
  col  = "blue"
)

6. UJI ADF

cat("ADF Data Asli:\n")

## ADF Data Asli:

print(adf.test(train))

## Warning in adf.test(train): p-value greater than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  train
## Dickey-Fuller = 0.82052, Lag order = 4, p-value = 0.99
## alternative hypothesis: stationary

# Differencing ke-1
train_diff1 <- diff(train)

cat("\nADF Differencing 1:\n")

## 
## ADF Differencing 1:

print(adf.test(train_diff1))

## Warning in adf.test(train_diff1): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  train_diff1
## Dickey-Fuller = -4.1669, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

# Differencing ke-2
train_diff2 <- diff(train_diff1)

cat("\nADF Differencing 2:\n")

## 
## ADF Differencing 2:

print(adf.test(train_diff2))

## Warning in adf.test(train_diff2): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  train_diff2
## Dickey-Fuller = -8.2592, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

# Plot Setelah Differencing
ts.plot(
  train_diff1,
  main = "Data Setelah Differencing 1",
  ylab = "Nilai",
  xlab = "Waktu",
  col  = "darkgreen"
)

ts.plot(
  train_diff2,
  main = "Data Setelah Differencing 2",
  ylab = "Nilai",
  xlab = "Waktu",
  col  = "red"
)

7. ACF,PACF, DAN EACF

# Plot ACF dan PACF
par(mfrow = c(1, 2))

acf(
  train_diff2,
  main = "ACF Data Differencing 2"
)

pacf(
  train_diff2,
  main = "PACF Data Differencing 2"
)

par(mfrow = c(1, 1))

# EACF
eacf(as.matrix(train_diff2))

## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x o x o o o o  o  o  o 
## 1 x x x x x o o o o o o  o  o  o 
## 2 o x o o o o o o o o o  o  o  o 
## 3 x x o o o o o o o o o  o  o  o 
## 4 x x o x o o o o o o o  o  o  o 
## 5 x x x x o o o o o o o  o  o  o 
## 6 o x x x o o o o o o o  o  o  o 
## 7 x o x o o o o o o o o  o  o  o

8. Kandidat Model

model <- forecast::Arima(
  train,
  order = c(2, 2, 2)
)

# Koefisien Model
model$coef

##        ar1        ar2        ma1        ma2 
##  0.4836847 -0.6429065 -0.3262111 -0.4903322

9.Menghitung AIC, BIC, dan AICc

calc_AICc <- function(model, n) {
  
  k   <- length(model$coef)
  aic <- AIC(model)
  
  return(
    aic + (2 * k * (k + 1)) / (n - k - 1)
  )
}

n_train <- length(train)

result <- data.frame(
  
  Model = "ARIMA(2,2,2)",
  
  AIC  = AIC(model),
  
  BIC  = BIC(model),
  
  AICc = calc_AICc(model, n_train)
)

print(result)

##          Model     AIC      BIC     AICc
## 1 ARIMA(2,2,2) 346.441 360.2944 346.7888

10. Diagnostik Model

ts.plot(
  residuals(model),
  main = "Residual Model ARIMA(2,2,2)",
  ylab = "Residual",
  xlab = "Waktu",
  col  = "purple"
)

# ---------------------------------------------------------
# ACF Residual
# ---------------------------------------------------------

acf(
  residuals(model),
  main = "ACF Residual"
)

# ---------------------------------------------------------
# Uji Ljung-Box
# ---------------------------------------------------------

cat("\nLjung-Box Test:\n")

## 
## Ljung-Box Test:

print(
  Box.test(
    residuals(model),
    lag  = 20,
    type = "Ljung-Box"
  )
)

## 
##  Box-Ljung test
## 
## data:  residuals(model)
## X-squared = 4.1712, df = 20, p-value = 0.9999

# ---------------------------------------------------------
# Uji Normalitas Shapiro-Wilk
# ---------------------------------------------------------

print(
  shapiro.test(
    residuals(model)
  )
)

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.98913, p-value = 0.4598

# ---------------------------------------------------------
# Histogram Residual
# ---------------------------------------------------------

hist(
  residuals(model),
  main = "Histogram Residual",
  col  = "lightblue",
  border = "black"
)

# ---------------------------------------------------------
# QQ Plot Residual
# ---------------------------------------------------------

qqnorm(
  residuals(model),
  main = "QQ Plot Residual"
)

qqline(
  residuals(model),
  col = "red",
  lwd = 2
)