title: “Model ARIMA” author: “Micel Fernando” date: “2026-05-07” output: rmarkdown::html_vignette vignette: > % % %

# ── 0. INSTALL & LOAD PACKAGE ──────────────────────────────────
for (pkg in c("forecast", "tseries", "TSA")) {
  if (!requireNamespace(pkg, quietly = TRUE))
    install.packages(pkg, repos = "https://cloud.r-project.org")
}
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
# ================================================================
# 1. SPESIFIKASI MODEL ARIMA(2,2,2)
#    Parameter dideklarasikan SATU PER SATU
#    agar nilai tiap koefisien jelas dan tidak ambigu
# ================================================================
# Koefisien AR — dideklarasi satu per satu
ar1 <- -0.5    # nilai AR(1) : phi_1
ar2 <- -0.7    # nilai AR(2) : phi_2

# Koefisien MA — nilai TRUE (sebelum dibalik)
ma1_true <- 0.3    # nilai MA(1) asli : theta_1
ma2_true <- 0.5    # nilai MA(2) asli : theta_2

# Konvensi R: MA dibalik → nilai yang diinput ke arima.sim
ma1 <- -ma1_true   # = -0.3
ma2 <- -ma2_true   # = -0.5

# Standar deviasi error
sigma <- sqrt(1.20)

cat("================================================================\n")
## ================================================================
cat("  1. SPESIFIKASI MODEL ARIMA(2,2,2)\n")
##   1. SPESIFIKASI MODEL ARIMA(2,2,2)
cat("================================================================\n")
## ================================================================
cat(sprintf("  ar1  (phi_1)   = %4.1f\n", ar1))
##   ar1  (phi_1)   = -0.5
cat(sprintf("  ar2  (phi_2)   = %4.1f\n", ar2))
##   ar2  (phi_2)   = -0.7
cat(sprintf("  ma1  true      = %4.1f   --> input R: ma1 = %.1f\n", ma1_true, ma1))
##   ma1  true      =  0.3   --> input R: ma1 = -0.3
cat(sprintf("  ma2  true      = %4.1f   --> input R: ma2 = %.1f\n", ma2_true, ma2))
##   ma2  true      =  0.5   --> input R: ma2 = -0.5
cat(sprintf("  sigma          = %.4f\n", sigma))
##   sigma          = 1.0954
cat("----------------------------------------------------------------\n")
## ----------------------------------------------------------------
cat("  Persamaan model:\n")
##   Persamaan model:
cat("  (1 - ar1*B - ar2*B^2)(1-B)^2 Xt = (1 + ma1_true*B + ma2_true*B^2) et\n")
##   (1 - ar1*B - ar2*B^2)(1-B)^2 Xt = (1 + ma1_true*B + ma2_true*B^2) et
cat("  (1 + 0.5B + 0.7B^2)(1-B)^2 Xt   = (1 + 0.3B + 0.5B^2) et\n")
##   (1 + 0.5B + 0.7B^2)(1-B)^2 Xt   = (1 + 0.3B + 0.5B^2) et
cat("================================================================\n\n")
## ================================================================

#Model yang digunakan adalah ARIMA(p=2, d=2, q=2). Artinya model ini memiliki 2 komponen AR, membutuhkan differencing 2 kali agar stasioner, dan 2 komponen MA.

# ================================================================
# 2. GENERATE DATA DENGAN MODEL ARIMA(2,2,2)
# ================================================================

# Gabungkan ar1, ar2 dan ma1, ma2 ke vektor
# setelah masing-masing dideklarasi sendiri di atas
ar_vec <- c(ar1, ar2)   # c(-0.5, -0.7)
ma_vec <- c(ma1, ma2)   # c(-0.3, -0.5)  <- sudah dibalik

ts_raw <- arima.sim(
  n     = 200,
  model = list(
    order = c(2, 2, 2),
    ar    = ar_vec,
    ma    = ma_vec
  ),
  sd = sigma
)

cat("================================================================\n")
## ================================================================
cat("  2. GENERATE DATA (n = 200)\n")
##   2. GENERATE DATA (n = 200)
cat("================================================================\n")
## ================================================================
cat(sprintf("  Jumlah observasi : %d\n",   length(ts_raw)))
##   Jumlah observasi : 202
cat(sprintf("  Mean             : %.4f\n", mean(ts_raw)))
##   Mean             : -157.8828
cat(sprintf("  Std Dev          : %.4f\n", sd(ts_raw)))
##   Std Dev          : 94.8550
cat(sprintf("  Min              : %.4f\n", min(ts_raw)))
##   Min              : -367.7581
cat(sprintf("  Max              : %.4f\n", max(ts_raw)))
##   Max              : 0.0000
cat("================================================================\n\n")
## ================================================================
# Plot data mentah
par(mar = c(4, 4, 3, 1))
plot(ts_raw, type = "l", col = "steelblue", lwd = 1.5,
     main = "Data Simulasi ARIMA(2,2,2) — 200 Observasi",
     xlab = "Waktu", ylab = "Nilai")
abline(v = 50, col = "red", lty = 2, lwd = 2)
legend("topleft", legend = "Batas burn-in (obs ke-50)",
       col = "red", lty = 2, bty = "n")

#Data dibangkitkan menggunakan fungsi arima.sim() dengan model ARIMA(2,2,2) yang sudah dispesifikasikan. Penggunaan set.seed(123) memastikan hasil selalu dapat direproduksi.

# ================================================================
# 3. HAPUS 50 DATA PERTAMA (BURN-IN) & SPLIT DATA
# ================================================================
#50 data pertama dibuang karena merupakan periode burn-in — data awal simulasi masih dipengaruhi kondisi awal (initial conditions) sehingga tidak merepresentasikan proses yang sesungguhnya.
# Hapus 50 data pertama
data_ts <- ts(as.numeric(ts_raw)[51:200])
n       <- length(data_ts)       # 150

# Split 80% training / 20% testing
n_train <- floor(0.8 * n)        # 120
n_test  <- n - n_train           # 30

train_data <- ts(data_ts[1:n_train],
                 start = 1, frequency = 1)
test_data  <- ts(data_ts[(n_train + 1):n],
                 start = n_train + 1, frequency = 1)

cat("================================================================\n")
## ================================================================
cat("  3. HAPUS BURN-IN & SPLIT DATA\n")
##   3. HAPUS BURN-IN & SPLIT DATA
cat("================================================================\n")
## ================================================================
cat(sprintf("  Total data generate     : 200 obs\n"))
##   Total data generate     : 200 obs
cat(sprintf("  Dihapus (burn-in)       :  50 obs\n"))
##   Dihapus (burn-in)       :  50 obs
cat(sprintf("  Sisa data bersih        : %d obs\n",   n))
##   Sisa data bersih        : 150 obs
cat(sprintf("  Training (80%%)          : %d obs (obs 1 - %d)\n",
            n_train, n_train))
##   Training (80%)          : 120 obs (obs 1 - 120)
cat(sprintf("  Testing  (20%%)          :  %d obs (obs %d - %d)\n",
            n_test, n_train + 1, n))
##   Testing  (20%)          :  30 obs (obs 121 - 150)
cat("================================================================\n\n")
## ================================================================
# Plot split
par(mar = c(4, 4, 3, 1))
plot(data_ts, type = "l", col = "gray70", lwd = 1,
     main = "Data Bersih: Training vs Testing",
     xlab = "Waktu", ylab = "Nilai")
lines(1:n_train,     as.numeric(train_data), col = "steelblue", lwd = 2)
lines((n_train+1):n, as.numeric(test_data),  col = "firebrick",  lwd = 2)
abline(v = n_train, col = "black", lty = 2, lwd = 1.5)
legend("topleft",
       legend = c(sprintf("Training (n=%d)", n_train),
                  sprintf("Testing  (n=%d)", n_test)),
       col = c("steelblue", "firebrick"), lwd = 2, bty = "n")

#50 data pertama dibuang karena merupakan periode burn-in — data awal simulasi masih dipengaruhi kondisi awal (initial conditions) sehingga tidak merepresentasikan proses yang sesungguhnya.

# ================================================================
# 4. UJI ADF — SAMPAI STASIONER
#    H0: Tidak Stasioner (ada unit root)
#    H1: Stasioner
#    Tolak H0 jika p-value < 0.05
# ================================================================
#Uji Augmented Dickey-Fuller (ADF) digunakan untuk mendeteksi apakah data memiliki unit root (tidak stasioner). Pengujian dilakukan bertahap sampai data stasioner.
cat("================================================================\n")
## ================================================================
cat("  4. UJI ADF (Augmented Dickey-Fuller)\n")
##   4. UJI ADF (Augmented Dickey-Fuller)
cat("================================================================\n")
## ================================================================
cat("  H0: Tidak Stasioner  |  H1: Stasioner\n")
##   H0: Tidak Stasioner  |  H1: Stasioner
cat("  Tolak H0 jika p-value < 0.05\n")
##   Tolak H0 jika p-value < 0.05
cat("----------------------------------------------------------------\n")
## ----------------------------------------------------------------
# Uji level
adf_level <- adf.test(train_data, alternative = "stationary")
## Warning in adf.test(train_data, alternative = "stationary"): p-value greater
## than printed p-value
cat("  --- Data Level (d = 0) ---\n")
##   --- Data Level (d = 0) ---
cat(sprintf("  Statistik ADF : %.4f\n", adf_level$statistic))
##   Statistik ADF : 1.0042
cat(sprintf("  p-value       : %.4f\n", adf_level$p.value))
##   p-value       : 0.9900
cat(sprintf("  Kesimpulan    : %s\n\n",
            ifelse(adf_level$p.value < 0.05,
                   "STASIONER",
                   "Tidak Stasioner --> lakukan differencing")))
##   Kesimpulan    : Tidak Stasioner --> lakukan differencing
# Differencing orde 1
diff1     <- diff(train_data)
adf_diff1 <- adf.test(diff1, alternative = "stationary")
cat("  --- Differencing d = 1 ---\n")
##   --- Differencing d = 1 ---
cat(sprintf("  Statistik ADF : %.4f\n", adf_diff1$statistic))
##   Statistik ADF : -3.1578
cat(sprintf("  p-value       : %.4f\n", adf_diff1$p.value))
##   p-value       : 0.0983
cat(sprintf("  Kesimpulan    : %s\n\n",
            ifelse(adf_diff1$p.value < 0.05,
                   "STASIONER setelah d=1",
                   "Tidak Stasioner --> lakukan differencing d=2")))
##   Kesimpulan    : Tidak Stasioner --> lakukan differencing d=2
# Differencing orde 2
diff2     <- diff(diff1)
adf_diff2 <- adf.test(diff2, alternative = "stationary")
## Warning in adf.test(diff2, alternative = "stationary"): p-value smaller than
## printed p-value
cat("  --- Differencing d = 2 ---\n")
##   --- Differencing d = 2 ---
cat(sprintf("  Statistik ADF : %.4f\n", adf_diff2$statistic))
##   Statistik ADF : -8.3274
cat(sprintf("  p-value       : %.4f\n", adf_diff2$p.value))
##   p-value       : 0.0100
cat(sprintf("  Kesimpulan    : %s\n",
            ifelse(adf_diff2$p.value < 0.05,
                   "STASIONER setelah d=2  --> orde integrasi d = 2",
                   "Belum stasioner, periksa data")))
##   Kesimpulan    : STASIONER setelah d=2  --> orde integrasi d = 2
cat("================================================================\n\n")
## ================================================================
# Plot level vs diff2
par(mfrow = c(1, 2), mar = c(4, 4, 3, 1))
plot(train_data, type = "l", col = "steelblue", lwd = 1.5,
     main = "Data Level (Training)",
     xlab = "Waktu", ylab = "Nilai")
plot(diff2, type = "l", col = "darkgreen", lwd = 1.5,
     main = "Data Setelah Differencing d=2",
     xlab = "Waktu", ylab = "Nilai")
abline(h = 0, col = "red", lty = 2)

par(mfrow = c(1, 1))

#Uji Augmented Dickey-Fuller (ADF) digunakan untuk mendeteksi apakah data memiliki unit root (tidak stasioner). Pengujian dilakukan bertahap sampai data stasioner.Biarpun pada uji pertama sudah stasioner karena d=2 tetap harus di diferrncing 2 kali.

# ================================================================
# 5. ACF, PACF, DAN EACF
# ================================================================

cat("================================================================\n")
## ================================================================
cat("  5. ACF, PACF, DAN EACF\n")
##   5. ACF, PACF, DAN EACF
cat("================================================================\n\n")
## ================================================================
# ── ACF & PACF ──────────────────────────────────────────────────
par(mfrow = c(1, 2), mar = c(4, 4, 3, 1))
acf(diff2, lag.max = 20, col = "steelblue", lwd = 2,
    main = "ACF — Differencing d=2",
    xlab = "Lag", ylab = "ACF")
pacf(diff2, lag.max = 20, col = "firebrick", lwd = 2,
     main = "PACF — Differencing d=2",
     xlab = "Lag", ylab = "PACF")

par(mfrow = c(1, 1))

# Tabel nilai ACF & PACF
acf_v  <- acf(diff2,  lag.max = 12, plot = FALSE)
pacf_v <- pacf(diff2, lag.max = 12, plot = FALSE)
batas  <- qnorm(0.975) / sqrt(length(diff2))

cat(sprintf("  Batas signifikansi : +/- %.4f\n\n", batas))
##   Batas signifikansi : +/- 0.1804
cat(sprintf("  %-5s  %10s  %-6s  %10s  %-6s\n",
            "Lag", "ACF", "Sig?", "PACF", "Sig?"))
##   Lag           ACF  Sig?          PACF  Sig?
cat(sprintf("  %s\n", strrep("-", 46)))
##   ----------------------------------------------
for (i in 1:12) {
  a  <- acf_v$acf[i + 1]
  p  <- pacf_v$acf[i]
  sa <- ifelse(abs(a) > batas, "*", " ")
  sp <- ifelse(abs(p) > batas, "*", " ")
  cat(sprintf("  %-5d  %10.4f  %-6s  %10.4f  %-6s\n", i, a, sa, p, sp))
}
##   1         -0.6643  *          -0.2399  *     
##   2          0.4215  *          -0.7659  *     
##   3          0.2177  *          -0.1120        
##   4         -0.3132  *          -0.3505  *     
##   5         -0.0007             -0.0234        
##   6          0.2371  *          -0.1431        
##   7         -0.1752              0.2116  *     
##   8         -0.1207             -0.1823  *     
##   9          0.2942  *           0.0260        
##   10        -0.1066              0.0099        
##   11        -0.2109  *          -0.0866        
##   12             NA  NA         -0.0351
cat("\n  * = signifikan (melewati batas +/- 1.96/sqrt(n))\n\n")
## 
##   * = signifikan (melewati batas +/- 1.96/sqrt(n))
# ── EACF ────────────────────────────────────────────────────────
cat("  --- EACF ---\n")
##   --- EACF ---
cat("  o = tidak signifikan  |  x = signifikan\n")
##   o = tidak signifikan  |  x = signifikan
cat("  Kandidat model: pojok kiri-atas segitiga 'o'\n\n")
##   Kandidat model: pojok kiri-atas segitiga 'o'
x_eacf <- as.numeric(diff2)
x_eacf <- x_eacf[is.finite(x_eacf) & !is.na(x_eacf)]
cat(sprintf("  Data untuk EACF: %d observasi\n\n", length(x_eacf)))
##   Data untuk EACF: 118 observasi
eacf(x_eacf, ar.max = 5, ma.max = 5)
## AR/MA
##   0 1 2 3 4 5
## 0 x x x x x o
## 1 x x x x x o
## 2 o x o o x o
## 3 x x o o x x
## 4 x x o o o x
## 5 x x o x o o

Ketiga plot ini digunakan untuk mengidentifikasi orde AR (p) dan MA (q) yang tepat dari data yang sudah stasioner (diff2).

#ACF (Autocorrelation Function) #Menunjukkan korelasi data dengan lag-lag sebelumnya. Jika ACF turun tajam (cut off) setelah lag ke-q, mengindikasikan proses MA(q). Spike signifikan di lag 1 dan 2 mengarah ke komponen MA(2).

#PACF (Partial Autocorrelation Function) #Menunjukkan korelasi data dengan lag tertentu setelah menghilangkan pengaruh lag di antaranya. Jika PACF cut off setelah lag ke-p, mengindikasikan proses AR(p). Spike signifikan di lag 1 dan 2 mengarah ke komponen AR(2).

#EACF (Extended ACF) — dari package TSA #Menampilkan tabel simbol ‘o’ (tidak signifikan) dan ‘x’ (signifikan). Kandidat model ARIMA(p,d,q) dibaca dari pojok kiri atas segitiga ‘o’ — titik di mana baris dan kolom pertama membentuk sudut ‘o’.

# ================================================================
# 6. KANDIDAT MODEL
# ================================================================
#Berdasarkan hasil ACF, PACF, dan EACF, dipilih 6 kandidat model. Semua menggunakan d=2 karena data stasioner di differencing orde 2.
cat("\n================================================================\n")
## 
## ================================================================
cat("  6. KANDIDAT MODEL\n")
##   6. KANDIDAT MODEL
cat("================================================================\n")
## ================================================================
cat(sprintf("  %-14s  %s\n", "Model", "Dasar Pemilihan"))
##   Model           Dasar Pemilihan
cat(sprintf("  %s\n", strrep("-", 52)))
##   ----------------------------------------------------
cat(sprintf("  %-14s  %s\n", "ARIMA(2,2,2)", "Model TRUE / EACF pojok (2,2)"))
##   ARIMA(2,2,2)    Model TRUE / EACF pojok (2,2)
cat(sprintf("  %-14s  %s\n", "ARIMA(1,2,2)", "EACF pojok (1,2)"))
##   ARIMA(1,2,2)    EACF pojok (1,2)
cat(sprintf("  %-14s  %s\n", "ARIMA(2,2,1)", "EACF pojok (2,1)"))
##   ARIMA(2,2,1)    EACF pojok (2,1)
cat(sprintf("  %-14s  %s\n", "ARIMA(1,2,1)", "ACF & PACF cut lag-1"))
##   ARIMA(1,2,1)    ACF & PACF cut lag-1
cat(sprintf("  %-14s  %s\n", "ARIMA(0,2,2)", "ACF cut lag-2"))
##   ARIMA(0,2,2)    ACF cut lag-2
cat(sprintf("  %-14s  %s\n", "ARIMA(2,2,0)", "PACF cut lag-2"))
##   ARIMA(2,2,0)    PACF cut lag-2
cat("================================================================\n\n")
## ================================================================
suppressWarnings({
  model1 <- Arima(train_data, order = c(2, 2, 2))
  model2 <- Arima(train_data, order = c(1, 2, 2))
  model3 <- Arima(train_data, order = c(2, 2, 1))
  model4 <- Arima(train_data, order = c(1, 2, 1))
  model5 <- Arima(train_data, order = c(0, 2, 2))
  model6 <- Arima(train_data, order = c(2, 2, 0))
})
cat("  Semua kandidat berhasil di-fit.\n\n")
##   Semua kandidat berhasil di-fit.

#Berdasarkan hasil ACF, PACF, dan EACF, dipilih 6 kandidat model. Semua menggunakan d=2 karena data stasioner di differencing orde 2.

# ================================================================
# 7. MODEL TERBAIK — AIC, BIC, AICc
# ================================================================
#Ketiga kriteria informasi digunakan untuk memilih model terbaik. Semakin kecil nilainya, semakin baik model tersebut dalam menyeimbangkan kecocokan data dan kompleksitas model.
models      <- list(model1, model2, model3, model4, model5, model6)
model_names <- c("ARIMA(2,2,2)", "ARIMA(1,2,2)", "ARIMA(2,2,1)",
                 "ARIMA(1,2,1)", "ARIMA(0,2,2)", "ARIMA(2,2,0)")

tabel <- data.frame(
  Model  = model_names,
  AIC    = sapply(models, function(m) round(m$aic,  4)),
  BIC    = sapply(models, function(m) round(m$bic,  4)),
  AICc   = sapply(models, function(m) round(m$aicc, 4)),
  LogLik = sapply(models, function(m) round(as.numeric(logLik(m)), 4))
)
tabel <- tabel[order(tabel$AIC), ]
rownames(tabel) <- NULL

cat("================================================================\n")
## ================================================================
cat("  7. PERBANDINGAN KRITERIA INFORMASI\n")
##   7. PERBANDINGAN KRITERIA INFORMASI
cat("================================================================\n")
## ================================================================
cat(sprintf("  %-14s %10s %10s %10s %10s\n",
            "Model", "AIC", "BIC", "AICc", "LogLik"))
##   Model                 AIC        BIC       AICc     LogLik
cat(sprintf("  %s\n", strrep("-", 58)))
##   ----------------------------------------------------------
for (i in seq_len(nrow(tabel))) {
  tag <- ifelse(i == 1, "  <-- TERBAIK", "")
  cat(sprintf("  %-14s %10.4f %10.4f %10.4f %10.4f%s\n",
              tabel$Model[i], tabel$AIC[i], tabel$BIC[i],
              tabel$AICc[i], tabel$LogLik[i], tag))
}
##   ARIMA(2,2,2)     359.1823   373.0357   359.7180  -174.5911  <-- TERBAIK
##   ARIMA(2,2,1)     375.1243   386.2070   375.4783  -183.5621
##   ARIMA(2,2,0)     385.5295   393.8416   385.7400  -189.7648
##   ARIMA(1,2,2)     399.4622   410.5449   399.8161  -195.7311
##   ARIMA(0,2,2)     411.8161   420.1282   412.0267  -202.9081
##   ARIMA(1,2,1)     430.9071   439.2191   431.1176  -212.4535
cat(sprintf("\n  Model terpilih : %s\n", tabel$Model[1]))
## 
##   Model terpilih : ARIMA(2,2,2)
cat("================================================================\n\n")
## ================================================================
# Ringkasan model terbaik
cat("  --- Ringkasan ARIMA(2,2,2) ---\n")
##   --- Ringkasan ARIMA(2,2,2) ---
print(summary(model1))
## Series: train_data 
## ARIMA(2,2,2) 
## 
## Coefficients:
##          ar1      ar2      ma1      ma2
##       -0.505  -0.6234  -0.1042  -0.6581
## s.e.   0.084   0.0798   0.0890   0.0908
## 
## sigma^2 = 1.134:  log likelihood = -174.59
## AIC=359.18   AICc=359.72   BIC=373.04
## 
## Training set error measures:
##                       ME    RMSE       MAE        MPE      MAPE      MASE
## Training set -0.08048772 1.03792 0.8258194 0.04079194 0.5569096 0.4595773
##                     ACF1
## Training set -0.04062015
# Koefisien estimasi vs nilai true — satu per satu
cat("\n================================================================\n")
## 
## ================================================================
cat("  KOEFISIEN ESTIMASI vs NILAI TRUE\n")
##   KOEFISIEN ESTIMASI vs NILAI TRUE
cat("  (Konvensi R: koefisien MA dibalik tandanya)\n")
##   (Konvensi R: koefisien MA dibalik tandanya)
cat("================================================================\n")
## ================================================================
est <- coef(model1)

cat(sprintf("  ar1  estimasi = %8.4f  |  nilai true ar1 = %4.1f\n",
            est["ar1"], ar1))
##   ar1  estimasi =  -0.5050  |  nilai true ar1 = -0.5
cat(sprintf("  ar2  estimasi = %8.4f  |  nilai true ar2 = %4.1f\n",
            est["ar2"], ar2))
##   ar2  estimasi =  -0.6234  |  nilai true ar2 = -0.7
cat(sprintf("  ma1  estimasi = %8.4f  |  true MA(1) = %.4f  [= -(%.4f)]\n",
            est["ma1"], -est["ma1"], est["ma1"]))
##   ma1  estimasi =  -0.1042  |  true MA(1) = 0.1042  [= -(-0.1042)]
cat(sprintf("  ma2  estimasi = %8.4f  |  true MA(2) = %.4f  [= -(%.4f)]\n",
            est["ma2"], -est["ma2"], est["ma2"]))
##   ma2  estimasi =  -0.6581  |  true MA(2) = 0.6581  [= -(-0.6581)]
cat("================================================================\n\n")
## ================================================================

#Ketiga kriteria informasi digunakan untuk memilih model terbaik. Semakin kecil nilainya, semakin baik model tersebut dalam menyeimbangkan kecocokan data dan kompleksitas model.

# ================================================================
# 8. DIAGNOSTIK MODEL
# ================================================================
#Diagnostik memastikan bahwa residu model berperilaku seperti white noise — yaitu bebas autokorelasi, berdistribusi normal, dan memiliki varians konstan.
res <- residuals(model1)

cat("================================================================\n")
## ================================================================
cat("  8. DIAGNOSTIK MODEL\n")
##   8. DIAGNOSTIK MODEL
cat("================================================================\n\n")
## ================================================================
# ── 8a. Plot Diagnostik ─────────────────────────────────────────
par(mfrow = c(2, 2), mar = c(4, 4, 3, 1))

plot(res, type = "l", col = "steelblue", lwd = 1.2,
     main = "Residual vs Waktu",
     xlab = "Waktu", ylab = "Residual")
abline(h = 0, col = "red", lty = 2, lwd = 1.5)

hist(res, breaks = 15, col = "lightblue", border = "white",
     main = "Histogram Residual",
     xlab = "Residual", probability = TRUE)
curve(dnorm(x, mean(res), sd(res)), add = TRUE, col = "red", lwd = 2)
legend("topright", legend = "Kurva Normal",
       col = "red", lwd = 2, bty = "n")

acf(res, lag.max = 20, col = "darkgreen", lwd = 2,
    main = "ACF Residual",
    xlab = "Lag", ylab = "ACF")

qqnorm(res, main = "QQ-Plot Residual",
       col = "purple", pch = 16, cex = 0.7)
qqline(res, col = "red", lwd = 2)

par(mfrow = c(1, 1))

# ── 8b. Uji Ljung-Box ───────────────────────────────────────────
cat("  --- Uji Ljung-Box (Independensi Residual) ---\n")
##   --- Uji Ljung-Box (Independensi Residual) ---
cat("  H0: Residual bebas (tidak ada autokorelasi)\n")
##   H0: Residual bebas (tidak ada autokorelasi)
cat("  Gagal tolak H0 jika p-value > 0.05 --> model baik\n\n")
##   Gagal tolak H0 jika p-value > 0.05 --> model baik
cat(sprintf("  %-5s %12s %10s   %s\n",
            "Lag", "Statistik", "p-value", "Keputusan"))
##   Lag      Statistik    p-value   Keputusan
cat(sprintf("  %s\n", strrep("-", 52)))
##   ----------------------------------------------------
for (lg in c(5, 10, 15, 20)) {
  lb  <- Box.test(res, lag = lg, type = "Ljung-Box",
                  fitdf = 4)     # fitdf = p + q = 2 + 2 = 4
  kes <- ifelse(lb$p.value > 0.05,
                "Bebas --> OK",
                "Ada autokorelasi!")
  cat(sprintf("  %-5d %12.4f %10.4f   %s\n",
              lg, lb$statistic, lb$p.value, kes))
}
##   5           2.7756     0.0957   Bebas --> OK
##   10          7.0090     0.3200   Bebas --> OK
##   15         10.5415     0.4824   Bebas --> OK
##   20         12.7822     0.6886   Bebas --> OK
# ── 8c. Uji Jarque-Bera ─────────────────────────────────────────
cat("\n  --- Uji Jarque-Bera (Normalitas Residual) ---\n")
## 
##   --- Uji Jarque-Bera (Normalitas Residual) ---
cat("  H0: Residual berdistribusi Normal\n")
##   H0: Residual berdistribusi Normal
cat("  Gagal tolak H0 jika p-value > 0.05\n\n")
##   Gagal tolak H0 jika p-value > 0.05
jb <- jarque.bera.test(res)
cat(sprintf("  Statistik JB : %.4f\n", jb$statistic))
##   Statistik JB : 0.2943
cat(sprintf("  p-value      : %.4f\n",  jb$p.value))
##   p-value      : 0.8631
cat(sprintf("  Keputusan    : %s\n\n",
            ifelse(jb$p.value > 0.05,
                   "Residual NORMAL --> OK",
                   "Residual TIDAK Normal")))
##   Keputusan    : Residual NORMAL --> OK
skew <- mean((res - mean(res))^3) / sd(res)^3
kurt <- mean((res - mean(res))^4) / sd(res)^4
cat("  --- Statistik Deskriptif Residual ---\n")
##   --- Statistik Deskriptif Residual ---
cat(sprintf("  Mean      : %10.6f   (idealnya ~ 0)\n", mean(res)))
##   Mean      :  -0.080488   (idealnya ~ 0)
cat(sprintf("  Std Dev   : %10.4f\n",                   sd(res)))
##   Std Dev   :     1.0391
cat(sprintf("  Skewness  : %10.4f   (idealnya ~ 0)\n", skew))
##   Skewness  :     0.0963   (idealnya ~ 0)
cat(sprintf("  Kurtosis  : %10.4f   (idealnya ~ 3)\n", kurt))
##   Kurtosis  :     2.8084   (idealnya ~ 3)
# ── 8d. Forecast vs Data Testing ────────────────────────────────
cat("\n  --- Forecast vs Data Testing ---\n")
## 
##   --- Forecast vs Data Testing ---
fc     <- forecast(model1, h = n_test)
pred   <- as.numeric(fc$mean)
aktual <- as.numeric(test_data)

mae  <- mean(abs(pred - aktual))
rmse <- sqrt(mean((pred - aktual)^2))
mape <- mean(abs((pred - aktual) / aktual)) * 100

par(mar = c(4, 4, 3, 1))
plot(fc,
     main = "Forecast vs Aktual — ARIMA(2,2,2)",
     xlab = "Waktu", ylab = "Nilai",
     fcol = "firebrick", flwd = 2,
     shadecols = c("#d6e9f8", "#aed4f1"))
lines((n_train + 1):(n_train + n_test),
      aktual, col = "darkgreen", lwd = 2)
legend("topleft",
       legend = c("Aktual Testing", "Forecast", "CI 80%", "CI 95%"),
       col    = c("darkgreen", "firebrick", "#aed4f1", "#d6e9f8"),
       lwd    = c(2, 2, 6, 6), bty = "n")

cat(sprintf("  MAE   : %.4f\n", mae))
##   MAE   : 11.0290
cat(sprintf("  RMSE  : %.4f\n", rmse))
##   RMSE  : 13.4260
cat(sprintf("  MAPE  : %.4f%%\n\n", mape))
##   MAPE  : 3.3273%
cat("  accuracy() dari package forecast:\n")
##   accuracy() dari package forecast:
print(accuracy(fc, aktual))
##                        ME     RMSE        MAE        MPE      MAPE      MASE
## Training set  -0.08048772  1.03792  0.8258194 0.04079194 0.5569096 0.4595773
## Test set     -11.02900381 13.42604 11.0290038 3.32730260 3.3273026 6.1377576
##                     ACF1
## Training set -0.04062015
## Test set              NA

#Diagnostik memastikan bahwa residu model berperilaku seperti white noise — yaitu bebas autokorelasi, berdistribusi normal, dan memiliki varians konstan.