# Input data
penjualan <- c(120,135,150,160,170,180,200,210,190,220,240,260,
               130,145,160,175,185,200,220,230,210,240,260,280,
               140,155,170,180,190,210,230,240,220,250,270,300,
               150,165,180,195,205,225,250,260,240,270,290,320,
               160,175,190,205,220,240,260,275,255,290,310,340)

# Ubah ke time series
ts_data <- ts(penjualan, start=c(2018,1), frequency=12)

# Plot data
plot(ts_data, main="Penjualan Bulanan 2018-2022",
     ylab="Penjualan", xlab="Tahun")

# Decomposition (trend & musiman)
dekomposisi <- decompose(ts_data)
plot(dekomposisi)

Uji Stasioneritas

library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
adf.test(ts_data)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -3.3542, Lag order = 3, p-value = 0.07161
## alternative hypothesis: stationary

p-value = 0,07161 > 0,05 artinya data tidak stasioner.

Differencing

diff1 <- diff(ts_data, differences=1)
adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1
## Dickey-Fuller = -4.1858, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
diff_final <- diff(diff1, lag=12)
adf.test(diff_final)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_final
## Dickey-Fuller = -3.1571, Lag order = 3, p-value = 0.113
## alternative hypothesis: stationary

p-value = 0,01 < 0,05 artinya data sudah stasioner. Oleh karena itu, proses differencing dihentikan pada orde pertama (d = 1).

Mengidentifikasi Model (Plot ACF & PACF)

library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
# Plot ACF
acf(diff1, main="ACF")

# Plot PACF
pacf(diff1, main="PACF")

Berdasarkan plot ACF, terdapat spike signifikan pada lag 1 dan lag 12 yang menunjukkan adanya komponen MA dan pola musiman. Sementara itu, plot PACF menunjukkan spike pada lag 1 dan 2 yang mengindikasikan adanya komponen AR. Oleh karena itu, kandidat model yang dipertimbangkan adalah ARIMA(1,1,1), ARIMA(2,1,1), serta model musiman SARIMA(1,1,1)(1,0,1)[12] dan SARIMA(2,1,1)(1,0,1)[12].

Estimasi Model ARIMA

library(forecast)

model_arima1 <- Arima(ts_data, order=c(1,1,1))
model_arima2 <- Arima(ts_data, order=c(2,1,1))

summary(model_arima1)
## Series: ts_data 
## ARIMA(1,1,1) 
## 
## Coefficients:
##          ar1      ma1
##       0.5784  -0.9043
## s.e.  0.1366   0.0675
## 
## sigma^2 = 1610:  log likelihood = -300.84
## AIC=607.68   AICc=608.12   BIC=613.91
## 
## Training set error measures:
##                   ME     RMSE      MAE      MPE     MAPE     MASE         ACF1
## Training set 9.15566 39.10743 26.84525 1.473338 13.37917 1.881127 -0.007709712
summary(model_arima2)
## Series: ts_data 
## ARIMA(2,1,1) 
## 
## Coefficients:
##          ar1      ar2      ma1
##       0.6086  -0.0855  -0.8949
## s.e.  0.1452   0.1380   0.0721
## 
## sigma^2 = 1626:  log likelihood = -300.65
## AIC=609.3   AICc=610.04   BIC=617.61
## 
## Training set error measures:
##                    ME     RMSE      MAE      MPE     MAPE     MASE        ACF1
## Training set 9.358504 38.95469 26.30155 1.607976 13.03462 1.843028 -0.06100009

Berdasarkan hasil estimasi model ARIMA, diperoleh bahwa model ARIMA(1,1,1) memiliki nilai AIC (607.68) dan BIC (613.91) yang lebih kecil dibandingkan ARIMA(2,1,1). Oleh karena itu, model ARIMA(1,1,1) dipilih sebagai model terbaik untuk pendekatan non-musiman.

Estimasi Model SARIMA

library(forecast)

model_sarima1 <- Arima(ts_data,
                       order=c(1,1,1),
                       seasonal=c(0,1,1))

model_sarima2 <- Arima(ts_data,
                       order=c(2,1,1),
                       seasonal=c(0,1,1))

summary(model_sarima1)
## Series: ts_data 
## ARIMA(1,1,1)(0,1,1)[12] 
## 
## Coefficients:
##           ar1      ma1    sma1
##       -0.0408  -0.1318  0.1949
## s.e.   0.6212   0.6093  0.1384
## 
## sigma^2 = 13.51:  log likelihood = -126.55
## AIC=261.1   AICc=262.05   BIC=268.5
## 
## Training set error measures:
##                     ME     RMSE     MAE         MPE      MAPE      MASE
## Training set 0.1644189 3.147447 1.73685 -0.05359323 0.8730661 0.1217063
##                      ACF1
## Training set -0.006879982
summary(model_sarima2)
## Series: ts_data 
## ARIMA(2,1,1)(0,1,1)[12] 
## 
## Coefficients:
##           ar1      ar2     ma1    sma1
##       -0.4819  -0.0997  0.3072  0.1973
## s.e.   1.2812   0.2418  1.2799  0.1385
## 
## sigma^2 = 13.79:  log likelihood = -126.51
## AIC=263.02   AICc=264.48   BIC=272.27
## 
## Training set error measures:
##                     ME     RMSE      MAE         MPE      MAPE      MASE
## Training set 0.1666443 3.144215 1.749437 -0.05339637 0.8790533 0.1225883
##                      ACF1
## Training set -0.003612474

Menentukan Model Terbaik berdasarkan AIC/BIC

AIC(model_arima1, model_arima2,
    model_sarima1, model_sarima2)
## Warning in AIC.default(model_arima1, model_arima2, model_sarima1,
## model_sarima2): models are not all fitted to the same number of observations
##               df      AIC
## model_arima1   3 607.6804
## model_arima2   4 609.3021
## model_sarima1  4 261.0961
## model_sarima2  5 263.0152
BIC(model_arima1, model_arima2,
    model_sarima1, model_sarima2)
## Warning in BIC.default(model_arima1, model_arima2, model_sarima1,
## model_sarima2): models are not all fitted to the same number of observations
##               df      BIC
## model_arima1   3 613.9130
## model_arima2   4 617.6123
## model_sarima1  4 268.4967
## model_sarima2  5 272.2659

Berdasarkan nilai AIC dan BIC model terbaik adalah SARIMA(1,1,1)(0,0,1)[12] memiliki nilai AIC terkecil sebesar 261.0961 dan BIC terkecil sebesar 268.4967.

Uji Diagnostik Model

checkresiduals(model_sarima1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1)(0,1,1)[12]
## Q* = 9.1664, df = 9, p-value = 0.4221
## 
## Model df: 3.   Total lags used: 12
# Uji Ljung-Box
Box.test(residuals(model_sarima1),
         lag=20,
         type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  residuals(model_sarima1)
## X-squared = 20.398, df = 20, p-value = 0.4333

Hasil uji diagnostik residual menggunakan uji Ljung-Box menunjukkan bahwa p-value > 0.05, sehingga residual bersifat white noise. Hal ini menunjukkan bahwa model SARIMA(1,1,1)(0,1,1)[12] telah memenuhi asumsi diagnostik dan layak digunakan untuk forecasting.

Forecasting

library(forecast)

forecast_final <- forecast(model_sarima1, h=12)

# Lihat hasil angka
forecast_final
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 2023       178.2786 173.5684 182.9889 171.0749 185.4823
## Feb 2023       193.2819 187.1686 199.3953 183.9324 202.6315
## Mar 2023       208.2818 201.0141 215.5495 197.1669 219.3967
## Apr 2023       223.0480 214.7864 231.3097 210.4130 235.6831
## May 2023       239.0224 229.8742 248.1707 225.0314 253.0135
## Jun 2023       259.0524 249.0962 269.0087 243.8257 274.2792
## Jul 2023       277.8882 267.1848 288.5915 261.5187 294.2576
## Aug 2023       293.8626 282.4609 305.2642 276.4252 311.2999
## Sep 2023       273.8626 261.8030 285.9222 255.4190 292.3061
## Oct 2023       309.8370 297.1535 322.5204 290.4393 329.2346
## Nov 2023       329.8370 316.5589 343.1150 309.5300 350.1440
## Dec 2023       359.9109 346.0638 373.7580 338.7336 381.0882
#plot
plot(forecast_final)

Kesimpulan

Berdasarkan model SARIMA(1,1,1)(0,1,1)[12], hasil forecasting menunjukkan bahwa penjualan pada tahun 2023 cenderung meningkat secara bertahap.

Terdapat penurunan sementara pada bulan September, yang mengindikasikan adanya pola musiman, sebelum kembali meningkat hingga mencapai puncak pada bulan Desember dengan penjualan sebesar 359.9109.