Analisis ini bertujuan untuk melakukan analisis ARIMA Seasonal terhadap suatu data
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(forecast)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggplot2)
library(readxl)
#1. Preprocessing
data_raw <- read_excel("Data_Asistensi_8 copy.xlsx")
data_ts <- ts(data_raw$Nilai, start = c(1995, 1), frequency = 12)
# -Splitting Data 80:20
n <- length(data_ts)
train_size <- floor(0.8*n)
y_train <- window(data_ts,end=c(1995+(train_size-1)%/%12,(train_size-1)%%12+1))
y_test <- window(data_ts,start=c(1995+train_size%/%12,train_size%%12+1))
#2. Visualisasi Awal Time Series
plot(data_ts, main="Seasonal Time Series Plot",ylab="Nilai",xlab="Tahun",col="blue")
#3. Uji Stasioneritas Terhadap Varians dan Mean
##. Uji Stasioneritas Terhadap Varians
lambda <- BoxCox.lambda(y_train)
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
print(lambda)
## [1] 0.9565522
##. Uji Stasioneritas Terhadap Mean
## H0: Data tidak stasioner
## H1: Data stasioner
adf.test(y_train)
## Warning in adf.test(y_train): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: y_train
## Dickey-Fuller = -7.6435, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
#Karena p-value<0.05 (alpha 5%), maka kita Tolak H0
##. Uji Stasioneritas Seasonal
##. Cek apakah butuh Seasonal Differencing (D)
cat("Jumlah Seasonal Difference yang disarankan:",nsdiffs(y_train))
## Jumlah Seasonal Difference yang disarankan: 1
##. Cek plot ACF untuk melihat pola musiman (seasonal spikes)
Acf(y_train, lag.max = 48, main="ACF Data Training (Original)")
##. Jika nsdiffs > 0, lakukan seasonal differencing
y_train_sdiff <- diff(y_train, lag = 12)
#H0: Data tidak stasioner
#H1: Data stasioner
print(adf.test(na.omit(y_train_sdiff)))
## Warning in adf.test(na.omit(y_train_sdiff)): p-value smaller than printed
## p-value
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(y_train_sdiff)
## Dickey-Fuller = -5.262, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
#Karena p-value<0.05 (alpha 5%), maka kita Tolak H0
#4. Identifikasi Model
## Lihat lag ACF PACF di kelipatan 12 (12, 24, 36)
par(mfrow=c(1,2))
Acf(y_train_sdiff, lag.max = 36, main="ACF (Seasonal Diff)")
Pacf(y_train_sdiff, lag.max = 36, main="PACF (Seasonal Diff)")
par(mfrow=c(1,1))
#5. Estimasi Beberapa Model Kandidat
m1 <- Arima(y_train, order = c(1, 0, 0), seasonal = c(0, 1, 1))
m2 <- Arima(y_train, order = c(1, 0, 1), seasonal = c(0, 1, 1))
m3 <- Arima(y_train, order = c(2, 0, 0), seasonal = c(1, 1, 1))
# Uji Signifikansi Parameter
print(coeftest(m1))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.729111 0.042208 17.274 < 2.2e-16 ***
## sma1 -0.570380 0.051842 -11.002 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Perbandingan AIC
cat("\nPerbandingan AIC:\n")
##
## Perbandingan AIC:
print(data.frame(Model = c("SARIMA(1,0,0)(0,1,1)", "SARIMA(1,0,1)(0,1,1)", "SARIMA(2,0,0)(1,1,1)"),
AIC = c(m1$aic, m2$aic, m3$aic)))
## Model AIC
## 1 SARIMA(1,0,0)(0,1,1) 989.8772
## 2 SARIMA(1,0,1)(0,1,1) 991.8602
## 3 SARIMA(2,0,0)(1,1,1) 993.2242
best_model <- m1 #Pilih berdasarkan AIC terkecil dan signifikansi parameter
#6. Diagnostik Residual
#H0: Residual bersifat White Noise
#H1: Residual tidak bersifat White Noise
checkresiduals(best_model)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0)(0,1,1)[12]
## Q* = 15.229, df = 22, p-value = 0.8522
##
## Model df: 2. Total lags used: 24
#Gagal Tolak H0
#7. Forecasting & Evaluasi
fc<-forecast(best_model,h=length(y_test))
print(accuracy(fc,y_test))
## ME RMSE MAE MPE MAPE MASE
## Training set -0.006813099 1.394151 1.079234 -23.31077 136.7796 0.5642715
## Test set -1.172097764 2.586996 1.969073 104.54686 195.3915 1.0295190
## ACF1 Theil's U
## Training set -0.007821212 NA
## Test set 0.793596288 0.4719869
#8. Plot Data Forecast vs Aktual
autoplot(fc)+autolayer(y_test, series="Actual Data", color = "red") +
labs(title = "SARIMA Final Forecast",x="Tahun",y="Nilai")+theme_minimal()
#9. Forecasting 2 Tahun ke Depan (2025 - 2026)
final_model<-Arima(data_ts, order=c(1, 0, 0),seasonal=c(0, 1, 1))
future_fc <- forecast(final_model, h = 24)
# Visualisasi Forecast Masa Depan
autoplot(future_fc)+labs(title = "Peramalan SARIMA 2 Tahun ke Depan",
subtitle="Proyeksi Periode: 2025 - 2026",x = "Tahun",y="Nilai")+
geom_vline(xintercept=2025,linetype="dashed",color="darkgreen",size=1)+
annotate("text", x = 2025.5, y = max(data_ts), label = "Mulai Forecast",color="darkgreen") +theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.