############################################################
# TOPIK DALAM STATISTIKA 1 - PEMODELAN ARIMA
###########################################################
# Memuat library yang dibutuhkan
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.Arima TSA
## plot.Arima TSA
# Set seed agar hasil generate data bisa direproduksi
set.seed(123)
# ==============================================================================
# 1. Memodelkan ARIMA & 2. Generate Data
# ==============================================================================
n_total <- 200
phi <- c(0.5, -0.7) # Parameter AR
theta <- c(0.3, 0.5) # Parameter MA (versi Box-Jenkins)
# Note: Menggunakan R nilai MA dibalik menjadi negatifnya
theta_r <- -theta
# Standar deviasi (akar varians)
sd_e <- sqrt(1.12)
# Generate data dengan model ARMA(2,2)
data_sim <- arima.sim(n = n_total,
list(ar = phi, ma = theta_r),
sd = sd_e)
# ==============================================================================
# 3. Hapus 50 data pertama, lalu splitting sisa data
# ==============================================================================
# Menghapus 50 observasi pertama (burn-in period)
data_clean <- data_sim[51:n_total]
n_clean <- length(data_clean)
# Splitting data: 80% Data Latih (Train) & 20% Data Uji (Test)
split_point <- floor(0.8 * n_clean)
train_data <- ts(data_clean[1:split_point])
test_data <- ts(data_clean[(split_point + 1):n_clean])
cat("Jumlah data latih:", length(train_data), "\n")
## Jumlah data latih: 120
cat("Jumlah data uji:", length(test_data), "\n\n")
## Jumlah data uji: 30
# ==============================================================================
# 4. Uji ADF (Augmented Dickey-Fuller) untuk cek Stasioneritas
# ==============================================================================
cat("--- Uji Stasioneritas (ADF Test) ---\n")
## --- Uji Stasioneritas (ADF Test) ---
adf_result <- adf.test(train_data)
## Warning in adf.test(train_data): p-value smaller than printed p-value
print(adf_result)
##
## Augmented Dickey-Fuller Test
##
## data: train_data
## Dickey-Fuller = -7.7141, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Jika p-value > 0.05, data tidak stasioner dan butuh differencing.
# (Note: Mengingat koefisien phi (0.5, -0.7) memenuhi syarat stasioner,
# data seharusnya stasioner tanpa differencing (d=0)).
if(adf_result$p.value > 0.05) {
cat("Data tidak stasioner. Melakukan differencing ke-1...\n")
train_data_stat <- diff(train_data)
print(adf.test(train_data_stat))
} else {
cat("Data sudah stasioner (p-value <= 0.05).\n")
train_data_stat <- train_data
}
## Data sudah stasioner (p-value <= 0.05).
# ==============================================================================
# 5. ACF, PACF, dan EACF
# ==============================================================================
# Plot ACF dan PACF
par(mfrow=c(1,2))
acf(train_data_stat, main="Plot ACF Data Latih", lag.max=20)
pacf(train_data_stat, main="Plot PACF Data Latih", lag.max=20)

# Plot EACF
cat("\n--- Tabel EACF ---\n")
##
## --- Tabel EACF ---
eacf(train_data_stat)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x o x o x o o x o x
## 1 x x x x x o x o x o o x o x
## 2 o x o x o o o o o o o o o o
## 3 x x o x x o o o o o o o o o
## 4 x x o x o o o o o o o o o o
## 5 o x o x o o o o o o o o o o
## 6 x x x o x o o o o o o o o o
## 7 x x x o x x o o o o o o o o
# ==============================================================================
# 6. Kandidat Model
# ==============================================================================
# Dari data asli yang merupakan ARMA(2,2), kandidat yang dieksplorasi di sekitar
# ordo tersebut (misalnya ordo p dan q antara 1 dan 2).
# Catatan: Fungsi Arima() juga otomatis "membalik" nilai MA dari Box-Jenkins
kandidat_1 <- Arima(train_data_stat, order=c(2,0,2))
kandidat_2 <- Arima(train_data_stat, order=c(2,0,1))
kandidat_3 <- Arima(train_data_stat, order=c(1,0,2))
kandidat_4 <- Arima(train_data_stat, order=c(1,0,1))
# ==============================================================================
# 7. Model Terbaik berdasarkan AIC, BIC, dan AICc
# ==============================================================================
# Membuat fungsi untuk mengambil kriteria evaluasi informasi
get_metrics <- function(model, name) {
data.frame(
Model = name,
AIC = model$aic,
BIC = model$bic,
AICc = model$aicc
)
}
# Menggabungkan hasil
metrics_table <- rbind(
get_metrics(kandidat_1, "ARMA(2,2)"),
get_metrics(kandidat_2, "ARMA(2,1)"),
get_metrics(kandidat_3, "ARMA(1,2)"),
get_metrics(kandidat_4, "ARMA(1,1)")
)
cat("\n--- Perbandingan Kriteria Model ---\n")
##
## --- Perbandingan Kriteria Model ---
print(metrics_table)
## Model AIC BIC AICc
## 1 ARMA(2,2) 360.7317 377.4567 361.4751
## 2 ARMA(2,1) 378.1513 392.0888 378.6776
## 3 ARMA(1,2) 408.0464 421.9838 408.5727
## 4 ARMA(1,1) 463.2997 474.4496 463.6475
# Menentukan model terbaik berdasarkan AIC terkecil
best_model_idx <- which.min(metrics_table$AIC)
best_model_name <- metrics_table$Model[best_model_idx]
cat("\nModel terbaik berdasarkan AIC adalah:", best_model_name, "\n")
##
## Model terbaik berdasarkan AIC adalah: ARMA(2,2)
# Kita asumsikan model ARMA(2,2) adalah yang terbaik untuk diagnostik
best_model <- kandidat_1
# ==============================================================================
# 8. Diagnostik Model (Analisis Residual)
# ==============================================================================
cat("\n--- Diagnostik Model Terbaik ---\n")
##
## --- Diagnostik Model Terbaik ---
# 1. Plot Residuals (Fungsi bawaan forecast)
checkresiduals(best_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,2) with non-zero mean
## Q* = 7.4043, df = 6, p-value = 0.2851
##
## Model df: 4. Total lags used: 10
# 2. Uji Autokorelasi Residual (Ljung-Box Test)
# H0: Tidak ada autokorelasi pada residual (Residual independen)
# H1: Ada autokorelasi
ljung_box <- Box.test(best_model$residuals, type="Ljung-Box", lag=10, fitdf=4) # fitdf = p+q = 2+2=4
cat("\nUji Ljung-Box untuk Autokorelasi:\n")
##
## Uji Ljung-Box untuk Autokorelasi:
print(ljung_box)
##
## Box-Ljung test
##
## data: best_model$residuals
## X-squared = 7.4043, df = 6, p-value = 0.2851
# 3. Uji Normalitas Residual (Shapiro-Wilk Test)
# H0: Residual berdistribusi normal
# H1: Residual tidak berdistribusi normal
shapiro_test <- shapiro.test(best_model$residuals)
cat("\nUji Shapiro-Wilk untuk Normalitas:\n")
##
## Uji Shapiro-Wilk untuk Normalitas:
print(shapiro_test)
##
## Shapiro-Wilk normality test
##
## data: best_model$residuals
## W = 0.97921, p-value = 0.06017