############################################################
# TOPIK DALAM STATISTIKA 1 - PEMODELAN ARIMA
###########################################################
# Memuat library yang dibutuhkan
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
## Registered S3 methods overwritten by 'forecast':
##   method       from
##   fitted.Arima TSA 
##   plot.Arima   TSA
# Set seed agar hasil generate data bisa direproduksi
set.seed(123)

# ==============================================================================
# 1. Memodelkan ARIMA & 2. Generate Data
# ==============================================================================
n_total <- 200
phi <- c(0.5, -0.7)    # Parameter AR
theta <- c(0.3, 0.5)  # Parameter MA (versi Box-Jenkins)

# Note: Menggunakan R nilai MA dibalik menjadi negatifnya
theta_r <- -theta      

# Standar deviasi (akar varians)
sd_e <- sqrt(1.12)

# Generate data dengan model ARMA(2,2)
data_sim <- arima.sim(n = n_total, 
                      list(ar = phi, ma = theta_r), 
                      sd = sd_e)

# ==============================================================================
# 3. Hapus 50 data pertama, lalu splitting sisa data
# ==============================================================================
# Menghapus 50 observasi pertama (burn-in period)
data_clean <- data_sim[51:n_total]
n_clean <- length(data_clean)

# Splitting data: 80% Data Latih (Train) & 20% Data Uji (Test)
split_point <- floor(0.8 * n_clean)
train_data <- ts(data_clean[1:split_point])
test_data <- ts(data_clean[(split_point + 1):n_clean])

cat("Jumlah data latih:", length(train_data), "\n")
## Jumlah data latih: 120
cat("Jumlah data uji:", length(test_data), "\n\n")
## Jumlah data uji: 30
# ==============================================================================
# 4. Uji ADF (Augmented Dickey-Fuller) untuk cek Stasioneritas
# ==============================================================================
cat("--- Uji Stasioneritas (ADF Test) ---\n")
## --- Uji Stasioneritas (ADF Test) ---
adf_result <- adf.test(train_data)
## Warning in adf.test(train_data): p-value smaller than printed p-value
print(adf_result)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train_data
## Dickey-Fuller = -7.7141, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Jika p-value > 0.05, data tidak stasioner dan butuh differencing.
# (Note: Mengingat koefisien phi (0.5, -0.7) memenuhi syarat stasioner, 
# data seharusnya stasioner tanpa differencing (d=0)).
if(adf_result$p.value > 0.05) {
  cat("Data tidak stasioner. Melakukan differencing ke-1...\n")
  train_data_stat <- diff(train_data)
  print(adf.test(train_data_stat))
} else {
  cat("Data sudah stasioner (p-value <= 0.05).\n")
  train_data_stat <- train_data
}
## Data sudah stasioner (p-value <= 0.05).
# ==============================================================================
# 5. ACF, PACF, dan EACF
# ==============================================================================
# Plot ACF dan PACF
par(mfrow=c(1,2))
acf(train_data_stat, main="Plot ACF Data Latih", lag.max=20)
pacf(train_data_stat, main="Plot PACF Data Latih", lag.max=20)

# Plot EACF
cat("\n--- Tabel EACF ---\n")
## 
## --- Tabel EACF ---
eacf(train_data_stat)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x o x o x o o  x  o  x 
## 1 x x x x x o x o x o o  x  o  x 
## 2 o x o x o o o o o o o  o  o  o 
## 3 x x o x x o o o o o o  o  o  o 
## 4 x x o x o o o o o o o  o  o  o 
## 5 o x o x o o o o o o o  o  o  o 
## 6 x x x o x o o o o o o  o  o  o 
## 7 x x x o x x o o o o o  o  o  o
# ==============================================================================
# 6. Kandidat Model 
# ==============================================================================
# Dari data asli yang merupakan ARMA(2,2), kandidat yang dieksplorasi di sekitar 
# ordo tersebut (misalnya ordo p dan q antara 1 dan 2).
# Catatan: Fungsi Arima() juga otomatis "membalik" nilai MA dari Box-Jenkins
kandidat_1 <- Arima(train_data_stat, order=c(2,0,2))
kandidat_2 <- Arima(train_data_stat, order=c(2,0,1))
kandidat_3 <- Arima(train_data_stat, order=c(1,0,2))
kandidat_4 <- Arima(train_data_stat, order=c(1,0,1))

# ==============================================================================
# 7. Model Terbaik berdasarkan AIC, BIC, dan AICc
# ==============================================================================
# Membuat fungsi untuk mengambil kriteria evaluasi informasi
get_metrics <- function(model, name) {
  data.frame(
    Model = name,
    AIC = model$aic,
    BIC = model$bic,
    AICc = model$aicc
  )
}

# Menggabungkan hasil
metrics_table <- rbind(
  get_metrics(kandidat_1, "ARMA(2,2)"),
  get_metrics(kandidat_2, "ARMA(2,1)"),
  get_metrics(kandidat_3, "ARMA(1,2)"),
  get_metrics(kandidat_4, "ARMA(1,1)")
)

cat("\n--- Perbandingan Kriteria Model ---\n")
## 
## --- Perbandingan Kriteria Model ---
print(metrics_table)
##       Model      AIC      BIC     AICc
## 1 ARMA(2,2) 360.7317 377.4567 361.4751
## 2 ARMA(2,1) 378.1513 392.0888 378.6776
## 3 ARMA(1,2) 408.0464 421.9838 408.5727
## 4 ARMA(1,1) 463.2997 474.4496 463.6475
# Menentukan model terbaik berdasarkan AIC terkecil
best_model_idx <- which.min(metrics_table$AIC)
best_model_name <- metrics_table$Model[best_model_idx]
cat("\nModel terbaik berdasarkan AIC adalah:", best_model_name, "\n")
## 
## Model terbaik berdasarkan AIC adalah: ARMA(2,2)
# Kita asumsikan model ARMA(2,2) adalah yang terbaik untuk diagnostik
best_model <- kandidat_1 

# ==============================================================================
# 8. Diagnostik Model (Analisis Residual)
# ==============================================================================
cat("\n--- Diagnostik Model Terbaik ---\n")
## 
## --- Diagnostik Model Terbaik ---
# 1. Plot Residuals (Fungsi bawaan forecast)
checkresiduals(best_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,2) with non-zero mean
## Q* = 7.4043, df = 6, p-value = 0.2851
## 
## Model df: 4.   Total lags used: 10
# 2. Uji Autokorelasi Residual (Ljung-Box Test)
# H0: Tidak ada autokorelasi pada residual (Residual independen)
# H1: Ada autokorelasi
ljung_box <- Box.test(best_model$residuals, type="Ljung-Box", lag=10, fitdf=4) # fitdf = p+q = 2+2=4
cat("\nUji Ljung-Box untuk Autokorelasi:\n")
## 
## Uji Ljung-Box untuk Autokorelasi:
print(ljung_box)
## 
##  Box-Ljung test
## 
## data:  best_model$residuals
## X-squared = 7.4043, df = 6, p-value = 0.2851
# 3. Uji Normalitas Residual (Shapiro-Wilk Test)
# H0: Residual berdistribusi normal
# H1: Residual tidak berdistribusi normal
shapiro_test <- shapiro.test(best_model$residuals)
cat("\nUji Shapiro-Wilk untuk Normalitas:\n")
## 
## Uji Shapiro-Wilk untuk Normalitas:
print(shapiro_test)
## 
##  Shapiro-Wilk normality test
## 
## data:  best_model$residuals
## W = 0.97921, p-value = 0.06017