Pendahuluan

Analisis ini bertujuan untuk melakukan analisis ARIMA Seasonal terhadap suatu data

knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(forecast)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ggplot2)
library(readxl)

#1. Preprocessing
data_raw <- read_excel("Data_Asistensi_8 copy.xlsx")
data_ts  <- ts(data_raw$Nilai, start = c(1995, 1), frequency = 12)

# -Splitting Data 80:20
n <- length(data_ts)
train_size <- floor(0.8*n)

y_train <- window(data_ts,end=c(1995+(train_size-1)%/%12,(train_size-1)%%12+1))
y_test  <- window(data_ts,start=c(1995+train_size%/%12,train_size%%12+1))

#2. Visualisasi Awal Time Series
plot(data_ts, main="Seasonal Time Series Plot",ylab="Nilai",xlab="Tahun",col="blue")

#3. Uji Stasioneritas Terhadap Varians dan Mean
##. Uji Stasioneritas Terhadap Varians
lambda <- BoxCox.lambda(y_train)
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
print(lambda)
## [1] 0.9565522
##. Uji Stasioneritas Terhadap Mean
##  H0: Data tidak stasioner
##  H1: Data stasioner
adf.test(y_train)
## Warning in adf.test(y_train): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y_train
## Dickey-Fuller = -7.6435, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
#Karena p-value<0.05 (alpha 5%), maka kita Tolak H0

##. Uji Stasioneritas Seasonal
##. Cek apakah butuh Seasonal Differencing (D)
cat("Jumlah Seasonal Difference yang disarankan:",nsdiffs(y_train))
## Jumlah Seasonal Difference yang disarankan: 1
##. Cek plot ACF untuk melihat pola musiman (seasonal spikes)
Acf(y_train, lag.max = 48, main="ACF Data Training (Original)") 

##. Jika nsdiffs > 0, lakukan seasonal differencing
y_train_sdiff <- diff(y_train, lag = 12)

#H0: Data tidak stasioner
#H1: Data stasioner
print(adf.test(na.omit(y_train_sdiff)))
## Warning in adf.test(na.omit(y_train_sdiff)): p-value smaller than printed
## p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(y_train_sdiff)
## Dickey-Fuller = -5.262, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
#Karena p-value<0.05 (alpha 5%), maka kita Tolak H0

#4. Identifikasi Model
##  Lihat lag ACF PACF di kelipatan 12 (12, 24, 36)
par(mfrow=c(1,2))
Acf(y_train_sdiff, lag.max = 36, main="ACF (Seasonal Diff)")
Pacf(y_train_sdiff, lag.max = 36, main="PACF (Seasonal Diff)")

par(mfrow=c(1,1))

#5. Estimasi Beberapa Model Kandidat
m1 <- Arima(y_train, order = c(1, 0, 0), seasonal = c(0, 1, 1))
m2 <- Arima(y_train, order = c(1, 0, 1), seasonal = c(0, 1, 1))
m3 <- Arima(y_train, order = c(2, 0, 0), seasonal = c(1, 1, 1))

# Uji Signifikansi Parameter
print(coeftest(m1))
## 
## z test of coefficients:
## 
##       Estimate Std. Error z value  Pr(>|z|)    
## ar1   0.729111   0.042208  17.274 < 2.2e-16 ***
## sma1 -0.570380   0.051842 -11.002 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Perbandingan AIC
cat("\nPerbandingan AIC:\n")
## 
## Perbandingan AIC:
print(data.frame(Model = c("SARIMA(1,0,0)(0,1,1)", "SARIMA(1,0,1)(0,1,1)", "SARIMA(2,0,0)(1,1,1)"),
                 AIC = c(m1$aic, m2$aic, m3$aic)))
##                  Model      AIC
## 1 SARIMA(1,0,0)(0,1,1) 989.8772
## 2 SARIMA(1,0,1)(0,1,1) 991.8602
## 3 SARIMA(2,0,0)(1,1,1) 993.2242
best_model <- m1 #Pilih berdasarkan AIC terkecil dan signifikansi parameter

#6. Diagnostik Residual
#H0: Residual bersifat White Noise
#H1: Residual tidak bersifat White Noise
checkresiduals(best_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,0)(0,1,1)[12]
## Q* = 15.229, df = 22, p-value = 0.8522
## 
## Model df: 2.   Total lags used: 24
#Gagal Tolak H0

#7. Forecasting & Evaluasi 
fc<-forecast(best_model,h=length(y_test))
print(accuracy(fc,y_test))
##                        ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.006813099 1.394151 1.079234 -23.31077 136.7796 0.5642715
## Test set     -1.172097764 2.586996 1.969073 104.54686 195.3915 1.0295190
##                      ACF1 Theil's U
## Training set -0.007821212        NA
## Test set      0.793596288 0.4719869
#8. Plot Data Forecast vs Aktual
autoplot(fc)+autolayer(y_test, series="Actual Data", color = "red") +
  labs(title = "SARIMA Final Forecast",x="Tahun",y="Nilai")+theme_minimal()

#9. Forecasting 2 Tahun ke Depan (2025 - 2026)
final_model<-Arima(data_ts, order=c(1, 0, 0),seasonal=c(0, 1, 1))
future_fc <- forecast(final_model, h = 24)

# Visualisasi Forecast Masa Depan
autoplot(future_fc)+labs(title = "Peramalan SARIMA 2 Tahun ke Depan",
                         subtitle="Proyeksi Periode: 2025 - 2026",x = "Tahun",y="Nilai")+
  geom_vline(xintercept=2025,linetype="dashed",color="darkgreen",size=1)+
  annotate("text", x = 2025.5, y = max(data_ts), label = "Mulai Forecast",color="darkgreen") +theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.