#Pendaluhuan Model ARIMA digunakan untuk memodelkan data deret waktu. Model yang digunakan adalah ARIMA(2,2,2) dengan parameter:
φ1 = 0.5, φ2 = 0.7 θ1 = -0.3, θ2 = -0.5 error ~ N(0, 1.08)
Catatan: Dalam R, parameter MA dibalik → θ menjadi (0.3, 0.5).
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
set.seed(123)
# SIMULASI DATA ARIMA
n <- 200
# noise
e <- rnorm(n, mean = 0, sd = sqrt(1.08))
# inisialisasi
y <- numeric(n)
# generate ARMA(2,2)
for(i in 3:n){
y[i] <- 0.5*y[i-1] +
0.7*y[i-2] +
e[i] +
0.3*e[i-1] +
0.5*e[i-2]
}
# integrasi d = 2
data_sim <- diffinv(y, differences = 2)
# ambil 200 data pertama
data_sim <- data_sim[1:n]
# ubah menjadi time series
data_sim <- ts(data_sim)
# plot data awal
plot(data_sim,
main = "Simulasi Time Series (200 Data)",
col = "red",
lwd = 2)
# prepocessing
# buang 50 data pertama
data_clean <- data_sim[-c(1:50)]
# cek panjang data
length(data_clean)
## [1] 150
# hasil = 150
# SPLITTING DATA
# 120 data train
train <- data_clean[1:120]
# 30 data test
test <- data_clean[121:150]
# cek jumlah data
length(train) # 120
## [1] 120
length(test) # 30
## [1] 30
# VISUALISASI TRAIN & TEST
plot(train,
main = "Data Train (120 Data)",
col = "blue",
lwd = 2)
plot(test,
main = "Data Test (30 Data)",
col = "darkgreen",
lwd = 2)
# uji stasioneritas
# Uji ADF data train awal
adf.test(train)
## Warning in adf.test(train): p-value greater than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: train
## Dickey-Fuller = 248019766, Lag order = 4, p-value = 0.99
## alternative hypothesis: stationary
# DIFFERENCING KE-1
train_d1 <- diff(train, differences = 1)
# Uji ADF setelah differencing 1
adf.test(train_d1)
## Warning in adf.test(train_d1): p-value greater than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: train_d1
## Dickey-Fuller = 273464608, Lag order = 4, p-value = 0.99
## alternative hypothesis: stationary
# DIFFERENCING KE-2
train_d2 <- diff(train_d1, differences = 1)
# atau bisa juga:
# train_d2 <- diff(train, differences = 2)
# Uji ADF setelah differencing 2
adf.test(train_d2)
## Warning in adf.test(train_d2): p-value greater than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: train_d2
## Dickey-Fuller = 5.7562, Lag order = 4, p-value = 0.99
## alternative hypothesis: stationary
# DATA SUDAH STASIONER
ts_stationary <- train_d2
# Plot hasil differencing kedua
plot(ts_stationary,
main = "Data Train Setelah Differencing Orde 2",
col = "blue",
lwd = 2)
# identifikasi model arima
library(forecast)
# Differencing orde 2
ts_stationary <- diff(train, differences = 2)
# Bersihkan data
ts_stationary <- as.numeric(ts_stationary)
ts_stationary <- na.omit(ts_stationary)
# Cek panjang data
length(ts_stationary)
## [1] 118
# PLOT ACF DAN PACF
par(mfrow = c(1,2))
acf(ts_stationary,
lag.max = 20,
main = "ACF Data Stasioner")
pacf(ts_stationary,
lag.max = 20,
main = "PACF Data Stasioner")
# IDENTIFIKASI OTOMATIS MODEL
auto_model <- auto.arima(train)
# Ringkasan model terbaik
summary(auto_model)
## Series: train
## ARIMA(0,2,0)
##
## sigma^2 = 1.09e+16: log likelihood = -2346.15
## AIC=4694.31 AICc=4694.34 BIC=4697.08
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 39230109 103524038 39230110 1.18523 1.186801 0.1087823 0.8889062
#estimasi kandidat model
library(forecast)
# Kandidat model ARIMA
model1 <- Arima(train,
order = c(2,2,2),
method = "CSS")
model2 <- Arima(train,
order = c(1,2,2),
method = "CSS")
model3 <- Arima(train,
order = c(2,2,1),
method = "CSS")
model4 <- Arima(train,
order = c(1,2,1),
method = "CSS")
# RINGKASAN MODEL
summary(model1)
## Series: train
## ARIMA(2,2,2)
##
## Coefficients:
## ar1 ar2 ma1 ma2
## 0.626 0.5585 1.2106 0.9488
## s.e. 0.000 0.0000 0.0099 0.0098
##
## sigma^2 = 10.57: log likelihood = -305.54
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set 0.004470634 3.168981 2.612091 9.479965e-06 0.0001990815
## MASE ACF1
## Training set 7.243145e-09 -0.6436178
summary(model2)
## Series: train
## ARIMA(1,2,2)
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ma1 ma2
## 1.1232 1.1998 1.0179
## s.e. 0.0000 NaN 0.0048
##
## sigma^2 = 155.3: log likelihood = -464.11
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.2948608 12.20115 9.551007 4.199349e-06 0.0003390283 2.648427e-08
## ACF1
## Training set -0.5794744
summary(model3)
## Series: train
## ARIMA(2,2,1)
##
## Coefficients:
## ar1 ar2 ma1
## 0.6277 0.5565 0.9743
## s.e. 0.0000 0.0000 0.0073
##
## sigma^2 = 13.36: log likelihood = -319.86
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0279187 3.577812 2.910442 1.434634e-05 0.0004519478 8.070451e-09
## ACF1
## Training set -0.9438186
summary(model4)
## Series: train
## ARIMA(1,2,1)
##
## Coefficients:
## ar1 ma1
## 1.1232 0.9696
## s.e. 0.0000 0.0093
##
## sigma^2 = 41: log likelihood = -386.03
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01261139 6.29564 5.06807 6.456682e-06 0.0007024837 1.40534e-08
## ACF1
## Training set -0.9733089
# PERBANDINGAN AIC
aic1 <- model1$aic
aic2 <- model2$aic
aic3 <- model3$aic
aic4 <- model4$aic
aic_values <- data.frame(
Model = c("ARIMA(2,2,2)",
"ARIMA(1,2,2)",
"ARIMA(2,2,1)",
"ARIMA(1,2,1)"),
AIC = c(aic1, aic2, aic3, aic4)
)
print(aic_values)
## Model AIC
## 1 ARIMA(2,2,2) NA
## 2 ARIMA(1,2,2) NA
## 3 ARIMA(2,2,1) NA
## 4 ARIMA(1,2,1) NA
# PERBANDINGAN BIC
n <- length(train)
bic1 <- as.numeric(AIC(model1, k = log(n)))
bic2 <- as.numeric(AIC(model2, k = log(n)))
bic3 <- as.numeric(AIC(model3, k = log(n)))
bic4 <- as.numeric(AIC(model4, k = log(n)))
# gabungkan hasil
bic_values <- cbind(
Model = c("ARIMA(2,2,2)",
"ARIMA(1,2,2)",
"ARIMA(2,2,1)",
"ARIMA(1,2,1)"),
BIC = c(bic1,
bic2,
bic3,
bic4)
)
print(bic_values)
## Model
## [1,] "ARIMA(2,2,2)"
## [2,] "ARIMA(1,2,2)"
## [3,] "ARIMA(2,2,1)"
## [4,] "ARIMA(1,2,1)"
# SELEKSI MODEL MENGGUNAKAN AICc
# jumlah residual dan parameter
n1 <- length(na.omit(model1$residuals))
k1 <- length(model1$coef)
n2 <- length(na.omit(model2$residuals))
k2 <- length(model2$coef)
n3 <- length(na.omit(model3$residuals))
k3 <- length(model3$coef)
n4 <- length(na.omit(model4$residuals))
k4 <- length(model4$coef)
# HITUNG AICc
aicc1 <- as.numeric(
AIC(model1) + (2*k1*(k1+1))/(n1-k1-1)
)
aicc2 <- as.numeric(
AIC(model2) + (2*k2*(k2+1))/(n2-k2-1)
)
aicc3 <- as.numeric(
AIC(model3) + (2*k3*(k3+1))/(n3-k3-1)
)
aicc4 <- as.numeric(
AIC(model4) + (2*k4*(k4+1))/(n4-k4-1)
)
# TABEL AICc
model_names <- c("ARIMA(2,2,2)",
"ARIMA(1,2,2)",
"ARIMA(2,2,1)",
"ARIMA(1,2,1)")
aicc_result <- c(aicc1,
aicc2,
aicc3,
aicc4)
# ubah ke numeric
aicc_result <- as.numeric(aicc_result)
# buat tabel
aicc_values <- cbind(
Model = model_names,
AICc = round(aicc_result, 4)
)
print(aicc_values)
## Model
## [1,] "ARIMA(2,2,2)"
## [2,] "ARIMA(1,2,2)"
## [3,] "ARIMA(2,2,1)"
## [4,] "ARIMA(1,2,1)"
print(aicc_values)
## Model
## [1,] "ARIMA(2,2,2)"
## [2,] "ARIMA(1,2,2)"
## [3,] "ARIMA(2,2,1)"
## [4,] "ARIMA(1,2,1)"
# MATRIKS AICc
aicc_result <- c(
as.numeric(aicc1),
as.numeric(aicc2),
as.numeric(aicc3),
as.numeric(aicc4)
)
model_names <- c(
"ARIMA(2,2,2)",
"ARIMA(1,2,2)",
"ARIMA(2,2,1)",
"ARIMA(1,2,1)"
)
# buat matriks
aicc_matrix <- matrix(
aicc_result,
nrow = 4,
ncol = 1
)
# tambahkan nama baris & kolom
rownames(aicc_matrix) <- model_names
colnames(aicc_matrix) <- "AICc"
print(aicc_matrix)
## AICc
## ARIMA(2,2,2) NA
## ARIMA(1,2,2) NA
## ARIMA(2,2,1) NA
## ARIMA(1,2,1) NA
# MODEL TERBAIK
best_index <- which.min(aicc_matrix[,1])
best_model <- rownames(aicc_matrix)[best_index]
best_aicc <- aicc_matrix[best_index,1]
cat("Model terbaik :", best_model, "\n")
## Model terbaik :
cat("Nilai AICc :", best_aicc)
## Nilai AICc :
# VALIDASI DENGAN AUTO ARIMA
# pencarian model otomatis
auto_model <- auto.arima(train)
# ringkasan model
summary(auto_model)
## Series: train
## ARIMA(0,2,0)
##
## sigma^2 = 1.09e+16: log likelihood = -2346.15
## AIC=4694.31 AICc=4694.34 BIC=4697.08
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 39230109 103524038 39230110 1.18523 1.186801 0.1087823 0.8889062
# nilai AIC, BIC, dan AICc
cat("AIC :", auto_model$aic, "\n")
## AIC : 4694.307
cat("BIC :", BIC(auto_model), "\n")
## BIC : 4697.078
cat("AICc :", auto_model$aicc, "\n")
## AICc : 4694.341
# DIAGNOSTIK MODEL TERBAIK
# misalkan model terbaik adalah model1
best_arima <- model1
# CHECK RESIDUAL
checkresiduals(best_arima)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,2,2)
## Q* = 329.26, df = 6, p-value < 2.2e-16
##
## Model df: 4. Total lags used: 10
# UJI LJUNG-BOX
Box.test(
residuals(best_arima),
lag = 20,
type = "Ljung-Box"
)
##
## Box-Ljung test
##
## data: residuals(best_arima)
## X-squared = 500.66, df = 20, p-value < 2.2e-16
# MODEL TERBAIK
best_model <- model1
summary(best_model)
## Series: train
## ARIMA(2,2,2)
##
## Coefficients:
## ar1 ar2 ma1 ma2
## 0.626 0.5585 1.2106 0.9488
## s.e. 0.000 0.0000 0.0099 0.0098
##
## sigma^2 = 10.57: log likelihood = -305.54
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set 0.004470634 3.168981 2.612091 9.479965e-06 0.0001990815
## MASE ACF1
## Training set 7.243145e-09 -0.6436178
# FORECASTING
library(forecast)
forecast_result <- forecast(
best_model,
h = 20
)
# tampilkan hasil forecast
print(forecast_result)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 121 48202593292 48202593288 48202593296 48202593286 48202593299
## 122 54141753844 54141753828 54141753861 54141753819 54141753870
## 123 60812692968 60812692925 60812693010 60812692903 60812693032
## 124 68305574898 68305574814 68305574983 68305574769 68305575027
## 125 76721673230 76721673083 76721673377 76721673005 76721673455
## 126 86174739724 86174739490 86174739958 86174739366 86174740082
## 127 96792541776 96792541426 96792542126 96792541241 96792542311
## 128 108718589316 108718588816 108718589816 108718588552 108718590081
## 129 122114074488 122114073798 122114075177 122114073433 122114075542
## 130 137160050311 137160049387 137160051235 137160048898 137160051724
## 131 154059877796 154059876586 154059879006 154059875945 154059879647
## 132 173041974562 173041973006 173041976118 173041972182 173041976941
## 133 194362902124 194362900155 194362904093 194362899112 194362905136
## 134 218310833573 218310831114 218310836033 218310829812 218310837334
## 135 245209448514 245209445478 245209451550 245209443870 245209453158
## 136 275422307911 275422304199 275422311623 275422302234 275422313588
## 137 309357767969 309357763470 309357772468 309357761089 309357774850
## 138 347474499464 347474494053 347474504876 347474491188 347474507741
## 139 390287687126 390287680659 390287693592 390287677236 390287697015
## 140 438375992860 438375985179 438376000541 438375981113 438376004608
# plot forecast
plot(
forecast_result,
main = "Forecasting ARIMA(2,2,2)",
col = "blue",
lwd = 2
)
# EVALUASI AKURASI MODEL
# gunakan model terbaik
best_model <- model1
# horizon forecast sesuai panjang data test
h <- length(test)
# forecasting data test
forecast_test <- forecast(
best_model,
h = h
)
# hasil prediksi
pred <- as.numeric(forecast_test$mean)
# METRIK AKURASI
rmse <- sqrt(mean((test - pred)^2))
mae <- mean(abs(test - pred))
mape <- mean(abs((test - pred)/test)) * 100
# TABEL AKURASI
accuracy_table <- data.frame(
RMSE = rmse,
MAE = mae,
MAPE = mape
)
print(accuracy_table)
## RMSE MAE MAPE
## 1 19372.68 11599.41 1.689681e-06
# VISUALISASI AKTUAL VS PREDIKSI
# gunakan hasil forecast sebelumnya
pred <- forecast_test$mean
# ubah ke numeric
pred <- as.numeric(pred)
# samakan panjang data
min_len <- min(length(test), length(pred))
test_plot <- as.numeric(test[1:min_len])
pred_plot <- pred[1:min_len]
# PLOT AKTUAL VS PREDIKSI
plot(
test_plot,
type = "l",
col = "black",
lwd = 2,
ylim = range(c(test_plot, pred_plot)),
main = "Aktual vs Prediksi",
xlab = "Periode",
ylab = "Nilai"
)
# garis prediksi
lines(
pred_plot,
col = "red",
lwd = 2,
lty = 2
)
# legenda
legend(
"topleft",
legend = c("Aktual", "Prediksi"),
col = c("black", "red"),
lty = c(1,2),
lwd = 2
)
# AKURASI MODEL
accuracy_result <- accuracy(
pred_plot,
test_plot
)
print(accuracy_result)
## ME RMSE MAE MPE MAPE
## Test set 11599.41 19372.68 11599.41 1.689681e-06 1.689681e-06