TUGAS SARIMA

Packages

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
library(forecast)
library(TSA)
library(aTSA)
library(car)
library(lmtest)

Data

set.seed(754016)
periods <- 156
trend <- seq(0, 40, length.out = periods) 
seasonal_effect <- 5 * sin(seq(0, 2 * pi * (periods / 12), length.out = periods)) 
noise <- rnorm(periods, mean = 0, sd = 1) 
time_series_data <- abs(trend + seasonal_effect + noise)
df <- data.frame(Period = rep(1:13,each=12), Value = time_series_data)
ts.df<-ts(df$Value)

Eksplorasi

plot(ts.df)
points(ts.df)

seasonplot(ts.df,12,main="Seasonal Plot", ylab="Period",xlab="Season",year.labels = TRUE,
            col=rainbow(18))

ggplot(df,aes(y=Value,x=Period,group=Period))+
  geom_boxplot()

Hasil yang didapatkan dari eksplorasi boxplot bahwa data cenderung homogen dari tahun ke tahun.

Uji Homogenitas

fligner.test(Value ~ Period, data=df)
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Value by Period
## Fligner-Killeen:med chi-squared = 6.7908, df = 12, p-value = 0.8711

Berdasarkan hasil uji Fligner-Kileen didapatkan p-value sebesar 0.8711 yang lebih besar dari \(\alpha\)=0.05. Hal tersebut membuktikan bahwa tidak terdapat cukup bukti bahwa ragam data tidak stasioner.

Pembagian Data

train.df<-df[1:120,]
test.df<-df[121:156,]
train.ts<-ts(train.df$Value)
test.ts<-window(ts.df,start=120,end=(157-0.01))
## Warning in window.default(x, ...): 'end' value not changed
ggplot() + 
  geom_line(data = df, aes(x = 1:156, y = Value, col = "Data Latih")) +
  geom_line(data = test.df, aes(x = 121:156, y = Value, col = "Data Uji")) +
  labs(x = "Periods", y = "Value", color = "Legend") +
  scale_colour_manual(name="Keterangan:", breaks = c("Data Latih", "Data Uji"),
                      values = c("blue", "red"))

Non-Seasonal ARIMA

Kestasioneran Data

acf0 <- acf(train.ts,main="ACF",lag.max=48,xaxt="n", col="blue")
axis(1, at=0:48, labels=0:48)

acf0$lag <- acf0$lag 
acf0.1 <- as.data.frame(cbind(acf0$acf,acf0$lag))
acf0.2 <- acf0.1[which(acf0.1$V2%%12==0),]
barplot(height = acf0.2$V1, 
names.arg=acf0.2$V2, ylab="ACF", xlab="Lag")

Berdasarkan plot deret sebelumnya diketahui bahwa perilaku deret berulang setiap tahun, atau dikatakan bahwa deret memiliki periode musiman bulanan, sehingga s=12 . Perhatikan nilai fungsi autokorelasi pada lag-lag musiman (lag 12, 24, 36,…) dalam plot ACF contoh di atas. Tampak bahwa nilai autokorelasi pada lag-lag tersebut memiliki hubungan yang kuat. Bagaimanapun juga, plot ACF contoh meluruh secara perlahan dan membentuk pola gelombang kosinus teredam, yang menandakan ketidakstasioneran (plot deret juga menunjukkan adanya trend naik dalam deret).

tseries::adf.test(train.ts)
## Warning in tseries::adf.test(train.ts): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train.ts
## Dickey-Fuller = -9.8885, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

Didapatkan dari uji formal Dickey-Fuller nilai dari p-value adalah 0.01 yang berarti tolak H0 pada taraf 0.05. Hal tersebut menandakan bahwa data bersifat stasioner. Namun karena plot ACF menunjukkan grafik yang turun secara perlahan, proses differencing akan tetap dilakukan.

Differencing

d1 <- diff(train.ts)
ts.plot(d1, type="l", ylab="d1 Xt", col="blue")

acf1 <- acf(d1,lag.max=48,xaxt="n", main="ACF d1", col="blue")
axis(1, at=0:48, labels=0:48)

acf2 <- acf1$lag <- acf1$lag 
acf1.1 <- as.data.frame(cbind(acf1$acf,acf1$lag))
acf1.2 <- acf1.1[which(acf1.1$V2%%12==0),]
barplot(height = acf1.2$V1, names.arg=acf1.2$V2, ylab="ACF", xlab="Lag")

Dapat diketahui setelh proses differencing dilakukan, terdapat komponen seasonal pada data berdasarkan plot ACF. Hal tersebut ditandakan dengan ACF yang signifikan pada lag-lag tertentu.

Seasonal ARIMA

D1 <- diff(train.ts,12)
ts.plot(D1, type="l", ylab="D1 Xt", col="blue")

acf2<-acf(D1,lag.max=48,xaxt="n", main="ACF D1", col="blue")

acf2$lag <- acf2$lag
acf2.1 <- as.data.frame(cbind(acf2$acf,acf2$lag))
acf2.2 <- acf2.1[which(acf2.1$V2%%12==0),]
barplot(height = acf2.2$V1, names.arg=acf2.2$V2, ylab="ACF", xlab="Lag")

Non-seasonal differencing D = 12 berhasil mengatasi ketidakstasioneran dalam rataan untuk komponen seasonalnya (namun tidak untuk komponen non-seasonalnya). Untuk menghilangkan kecenderungan musiman dilakukan pembedaan musiman terhadap deret hasil pembedaan pertama.

d1D1 <- diff(D1)
ts.plot(d1D1, type="l", ylab="d1 D1 Xt", col="blue")

Identifikasi Model

acf3 <- acf(d1D1,lag.max=48,xaxt="n", main="ACF d1D1", col="blue")
axis(1, at=0:48, labels=0:48)

acf3$lag <- acf3$lag 
acf3.1 <- as.data.frame(cbind(acf3$acf,acf3$lag))
acf3.2 <- acf3.1[which(acf3.1$V2%%12==0),]
barplot(height = acf3.2$V1, names.arg=acf3.2$V2, ylab="ACF", 
xlab="Lag")
abline(a=0.1,b=0)
abline(a=-0.1,b=0)

pacf3 <- pacf(d1D1,lag.max=48,xaxt="n", main="PACF d1D1", col="blue")
axis(1, at=0:48, labels=0:48)

pacf3$lag <- pacf3$lag
pacf3.1 <- as.data.frame(cbind(pacf3$acf,pacf3$lag))
pacf3.2 <- pacf3.1[which(pacf3.1$V2%%12==0),]
barplot(height = pacf3.2$V1, names.arg=pacf3.2$V2, ylab="PACF", xlab="Lag")
abline(a=0.1,b=0)
abline(a=-0.1,b=0)

Didapatkan dari setiap identifikasi yang sudah dilakukan, akan diambil komponen seasonal P=1 sedangkan seasonal MA tails-off. Model-model yang terbentuk adalah ARIMA(0,1,1)X(1,1,0)[12], ARIMA(2,1,0)X(1,1,0)[12], ARIMA(2,1,1)X(1,1,0)[12].

TSA::eacf(d1D1)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o o o o o o o o o x  x  x  o 
## 1 x o o o o o o o o o o  x  x  o 
## 2 x x x o o o o o o o o  x  o  o 
## 3 x o x o o o o o o o o  x  o  o 
## 4 x x x o o o o o o o o  x  o  o 
## 5 x x o o x o o o o o o  x  x  o 
## 6 x x x o o o o o o o o  x  x  o 
## 7 x x o o o o o x o o o  x  x  o

Berdasarkan EACF didapatkan komponen ARIMA(1,1,2) dan ARIMA(2,1,3) dan komponen seasonal (1,1,0)[12].

Model tentatif yang didapatkan adalah: -ARIMA(0,1,1)X(1,1,0)[12] -ARIMA(2,1,0)X(1,1,0)[12] -ARIMA(2,1,1)X(1,1,0)[12] -ARIMA(1,1,2)X(1,1,0)[12] -ARIMA(2,1,3)X(1,1,0)[12]

Pendugaan Parameter

#ARIMA(0,1,1)X(1,1,0)[12]
tmodel1 <- Arima(train.ts,order=c(0,1,1),seasonal=c(1,1,0))
summary(tmodel1)
## Series: train.ts 
## ARIMA(0,1,1) 
## 
## Coefficients:
##          ma1
##       0.1876
## s.e.  0.0788
## 
## sigma^2 = 5.808:  log likelihood = -273.05
## AIC=550.1   AICc=550.2   BIC=555.66
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.2085014 2.389902 1.908305 -89.68811 113.0767 0.9671632
##                    ACF1
## Training set 0.02723599
lmtest::coeftest(tmodel1)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)  
## ma1 0.187558   0.078837  2.3791  0.01736 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#ARIMA(2,1,0)X(1,1,0)[12]
tmodel2 <- Arima(train.ts,order=c(2,1,0),seasonal=c(1,1,0))
summary(tmodel2)
## Series: train.ts 
## ARIMA(2,1,0) 
## 
## Coefficients:
##          ar1     ar2
##       0.2099  0.1945
## s.e.  0.0914  0.0914
## 
## sigma^2 = 5.525:  log likelihood = -269.63
## AIC=545.25   AICc=545.46   BIC=553.59
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.1546013 2.321067 1.846775 -82.61285 111.7243 0.9359788
##                     ACF1
## Training set 0.003865816
lmtest::coeftest(tmodel2)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)  
## ar1 0.209888   0.091437  2.2955  0.02171 *
## ar2 0.194457   0.091410  2.1273  0.03340 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#ARIMA(2,1,1)X(1,1,0)[12]
tmodel3 <- Arima(train.ts,order=c(2,1,1),seasonal=c(1,1,0))
summary(tmodel3)
## Series: train.ts 
## ARIMA(2,1,1) 
## 
## Coefficients:
##          ar1     ar2     ma1
##       0.1505  0.2114  0.0613
## s.e.  0.2182  0.1035  0.2068
## 
## sigma^2 = 5.569:  log likelihood = -269.58
## AIC=547.16   AICc=547.51   BIC=558.28
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.1557348 2.320171 1.845324 -83.95084 113.1585 0.9352432
##                      ACF1
## Training set -0.003279174
lmtest::coeftest(tmodel3)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)  
## ar1 0.150510   0.218246  0.6896  0.49042  
## ar2 0.211360   0.103545  2.0412  0.04123 *
## ma1 0.061298   0.206824  0.2964  0.76694  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#ARIMA(1,1,2)X(1,1,0)[12]
tmodel4 <- Arima(train.ts,order=c(1,1,2),seasonal=c(1,1,0))
summary(tmodel4)
## Series: train.ts 
## ARIMA(1,1,2) 
## 
## Coefficients:
##          ar1      ma1     ma2
##       0.3334  -0.1902  0.4664
## s.e.  0.1587   0.1346  0.0969
## 
## sigma^2 = 5.078:  log likelihood = -264.31
## AIC=536.63   AICc=536.98   BIC=547.75
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.1398002 2.215508 1.754849 -111.0612 142.5237 0.8893886
##                    ACF1
## Training set 0.02898212
lmtest::coeftest(tmodel4)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ar1  0.333364   0.158662  2.1011   0.03563 *  
## ma1 -0.190186   0.134560 -1.4134   0.15754    
## ma2  0.466370   0.096877  4.8140 1.479e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#ARIMA(2,1,3)X(1,1,0)[12]
tmodel5 <- Arima(train.ts,order=c(2,1,3),seasonal=c(1,1,0))
summary(tmodel5)
## Series: train.ts 
## ARIMA(2,1,3) 
## 
## Coefficients:
##           ar1      ar2     ma1     ma2     ma3
##       -0.3837  -0.3522  0.5756  0.8084  0.4025
## s.e.   0.1908   0.2071  0.1836  0.1608  0.1193
## 
## sigma^2 = 4.868:  log likelihood = -260.97
## AIC=533.95   AICc=534.7   BIC=550.62
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.161631 2.150562 1.682987 -92.11347 119.0297 0.8529678
##                     ACF1
## Training set 0.006697315
lmtest::coeftest(tmodel4)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ar1  0.333364   0.158662  2.1011   0.03563 *  
## ma1 -0.190186   0.134560 -1.4134   0.15754    
## ma2  0.466370   0.096877  4.8140 1.479e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.auto.arima <- auto.arima(train.ts)
summary(model.auto.arima)
## Series: train.ts 
## ARIMA(1,1,4) 
## 
## Coefficients:
##          ar1      ma1     ma2      ma3      ma4
##       0.5093  -0.4256  0.1845  -0.1252  -0.3656
## s.e.  0.1579   0.1658  0.0944   0.1038   0.0859
## 
## sigma^2 = 4.445:  log likelihood = -255.55
## AIC=523.11   AICc=523.86   BIC=539.78
## 
## Training set error measures:
##                     ME     RMSE     MAE       MPE     MAPE      MASE
## Training set 0.4228965 2.054917 1.60796 -115.6176 140.1406 0.8149427
##                     ACF1
## Training set -0.03891079
lmtest::coeftest(model.auto.arima)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ar1  0.509289   0.157902  3.2253  0.001258 ** 
## ma1 -0.425627   0.165827 -2.5667  0.010267 *  
## ma2  0.184508   0.094367  1.9552  0.050559 .  
## ma3 -0.125236   0.103762 -1.2069  0.227453    
## ma4 -0.365606   0.085854 -4.2585 2.058e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Dari seluruh model didapatkan model yang terbaik adalah ARIMA(2,1,0)X(1,1,0)[12] tetapi MAPE model masih buruk. Dibutuhkan analisis berbeda untuk mendapatkan prediksi yang lebih baik.

Overfitting

#ARIMA(2,1,1)X(1,1,0)[12]
tmodel1.ofq <- Arima(train.ts,order=c(2,1,1),seasonal=c(1,1,0))
summary(tmodel1.ofq)
## Series: train.ts 
## ARIMA(2,1,1) 
## 
## Coefficients:
##          ar1     ar2     ma1
##       0.1505  0.2114  0.0613
## s.e.  0.2182  0.1035  0.2068
## 
## sigma^2 = 5.569:  log likelihood = -269.58
## AIC=547.16   AICc=547.51   BIC=558.28
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.1557348 2.320171 1.845324 -83.95084 113.1585 0.9352432
##                      ACF1
## Training set -0.003279174
lmtest::coeftest(tmodel1.ofq)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)  
## ar1 0.150510   0.218246  0.6896  0.49042  
## ar2 0.211360   0.103545  2.0412  0.04123 *
## ma1 0.061298   0.206824  0.2964  0.76694  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tmodel2.ofq <- Arima(train.ts,order=c(3,1,0),seasonal=c(1,1,0))
summary(tmodel2.ofq)
## Series: train.ts 
## ARIMA(3,1,0) 
## 
## Coefficients:
##          ar1     ar2      ar3
##       0.2225  0.2082  -0.0660
## s.e.  0.0929  0.0932   0.0926
## 
## sigma^2 = 5.549:  log likelihood = -269.37
## AIC=546.75   AICc=547.1   BIC=557.86
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.1627664 2.316015 1.835455 -89.96207 119.3511 0.9302416
##                     ACF1
## Training set -0.03936615
lmtest::coeftest(tmodel2.ofq)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)  
## ar1  0.222517   0.092927  2.3945  0.01664 *
## ar2  0.208188   0.093204  2.2337  0.02550 *
## ar3 -0.065960   0.092638 -0.7120  0.47646  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tmodel3.ofq <- Arima(train.ts,order=c(2,1,0),seasonal=c(2,1,0))
summary(tmodel3.ofq)
## Series: train.ts 
## ARIMA(2,1,0) 
## 
## Coefficients:
##          ar1     ar2
##       0.2099  0.1945
## s.e.  0.0914  0.0914
## 
## sigma^2 = 5.525:  log likelihood = -269.63
## AIC=545.25   AICc=545.46   BIC=553.59
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.1546013 2.321067 1.846775 -82.61285 111.7243 0.9359788
##                     ACF1
## Training set 0.003865816
lmtest::coeftest(tmodel3.ofq)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)  
## ar1 0.209888   0.091437  2.2955  0.02171 *
## ar2 0.194457   0.091410  2.1273  0.03340 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tmodel4.ofq <- Arima(train.ts,order=c(2,1,0),seasonal=c(1,1,1))
summary(tmodel4.ofq)
## Series: train.ts 
## ARIMA(2,1,0) 
## 
## Coefficients:
##          ar1     ar2
##       0.2099  0.1945
## s.e.  0.0914  0.0914
## 
## sigma^2 = 5.525:  log likelihood = -269.63
## AIC=545.25   AICc=545.46   BIC=553.59
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.1546013 2.321067 1.846775 -82.61285 111.7243 0.9359788
##                     ACF1
## Training set 0.003865816
lmtest::coeftest(tmodel4.ofq)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)  
## ar1 0.209888   0.091437  2.2955  0.02171 *
## ar2 0.194457   0.091410  2.1273  0.03340 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Didapatkan hasil dari overfitting komponen non-seasonal memiliki parameter yang tidak signifikan dan overfitting dari komponen seasonal tidak merubah nilai dari AIC atau signifikansi model.

Peramalan

ramalan_sarima = forecast::forecast(tmodel2, 36)
ramalan_sarima
##     Point Forecast     Lo 80    Hi 80        Lo 95    Hi 95
## 121       30.36249 27.350030 33.37495  25.75532934 34.96965
## 122       31.17110 26.442563 35.89964  23.93942666 38.40277
## 123       31.53461 25.100563 37.96866  21.69458400 41.37464
## 124       31.76815 23.837274 39.69902  19.63892304 43.89737
## 125       31.88785 22.599792 41.17591  17.68299134 46.09271
## 126       31.95839 21.443586 42.47319  15.87738555 48.03939
## 127       31.99647 20.358978 43.63396  14.19846166 49.79448
## 128       32.01818 19.345071 44.69129  12.63633284 51.40003
## 129       32.03014 18.393334 45.66695  11.17444501 52.88584
## 130       32.03687 17.496761 46.57699   9.79968994 54.27406
## 131       32.04061 16.648322 47.43290   8.50013604 55.58109
## 132       32.04271 15.842060 48.24335   7.26595523 56.81946
## 133       32.04387 15.072823 49.01492   6.08889238 57.99885
## 134       32.04453 14.336288 49.75276   4.96211312 59.12694
## 135       32.04489 13.628804 50.46097   3.87991789 60.20986
## 136       32.04509 12.947300 51.14288   2.83753946 61.25264
## 137       32.04521 12.289173 51.80124   1.83096097 62.25945
## 138       32.04527 11.652208 52.43833   0.85677429 63.23376
## 139       32.04530 11.034509 53.05610  -0.08793448 64.17854
## 140       32.04532 10.434437 53.65621  -1.00567516 65.09632
## 141       32.04533  9.850572 54.24010  -1.89862523 65.98929
## 142       32.04534  9.281672 54.80901  -2.76868676 66.85937
## 143       32.04534  8.726643 55.36405  -3.61753192 67.70822
## 144       32.04535  8.184519 55.90617  -4.44663957 68.53733
## 145       32.04535  7.654441 56.43625  -5.25732487 69.34802
## 146       32.04535  7.135640 56.95506  -6.05076338 70.14146
## 147       32.04535  6.627425 57.46327  -6.82801090 70.91871
## 148       32.04535  6.129174 57.96152  -7.59001977 71.68072
## 149       32.04535  5.640323 58.45037  -8.33765257 72.42835
## 150       32.04535  5.160360 58.93034  -9.07169344 73.16239
## 151       32.04535  4.688816 59.40188  -9.79285776 73.88355
## 152       32.04535  4.225263 59.86543 -10.50180029 74.59250
## 153       32.04535  3.769309 60.32139 -11.19912211 75.28982
## 154       32.04535  3.320591 60.77011 -11.88537660 75.97607
## 155       32.04535  2.878776 61.21192 -12.56107454 76.65177
## 156       32.04535  2.443555 61.64714 -13.22668863 77.31739
autoplot(ramalan_sarima, col="blue")

accuracy(ts(ramalan_sarima),test.ts)
##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 0.1546013 2.321067 1.846775 -82.612850 111.7243 0.9359788
## Test set     3.7121541 5.520941 4.712443   9.143864  12.7020 2.3883503
##                     ACF1 Theil's U
## Training set 0.003865816        NA
## Test set     0.838867668  2.102735

Didapatkan MAPE dari test.ts lebih baik dibandingkan dengna MAPE training yaitu sebesar 12.7020%. Kemungkinan ada model lain yang lebih baik dibandingkan dengan ARIMA(2,1,0)X(1,1,0)[12] atau terdapat kesalahan dalam pemrograman atau hal lainnya.