library("tseries")
library("forecast")
library("TTR")
library("TSA")
library("graphics")
library("astsa")
library("car")
library("portes")
library("MASS")
library("readxl")

Data Simulated Seasonal

Input data dari file excel

simulasi<- read_excel("latihan Praktikum 8.xlsx", 
    sheet = "Sheet4")
head(simulasi)
## # A tibble: 6 x 1
##   `Simulated Seasonal TS`
##                     <dbl>
## 1                    97.3
## 2                    99.4
## 3                   101. 
## 4                    95.7
## 5                   103. 
## 6                   102.

Data terdiri dari 150 observasi. Definisikan data menjadi time series dengan musim berupa per empat periode setiap tahun.

simulasi<-simulasi[1:148,]
season<-rep(1:4,times=37)
year<-rep(1:37,each=4)
simulasi$season<- season
simulasi$year<- year
sim.ts <- ts(simulasi, frequency = 4, start = c(1,1), end = c(37,4))
head(sim.ts)
##      Simulated Seasonal TS season year
## 1 Q1                97.323      1    1
## 1 Q2                99.386      2    1
## 1 Q3               100.581      3    1
## 1 Q4                95.671      4    1
## 2 Q1               102.601      1    2
## 2 Q2               101.751      2    2
plot.ts(simulasi$`Simulated Seasonal TS`)

seasonplot(sim.ts[,1],4,main="Simulated Seasonal", ylab="Simulated Seasonal",year.labels = TRUE, col=rainbow(18))

Berikut monthplot dan boxplot dari data simulasi. Terlihat pola asumsi musimaan dengan ragam dan ukuran pemusatan data yang sama. Sehingga ada indikasi deret waktu musiman.

monthplot(sim.ts[,1],ylab="Simulated Seasonal")

boxplot(sim.ts ~ cycle(sim.ts), xlab = "Month", ylab = "Simulated Seasonal", main = "Simulated Seasonal - Boxplot")

Uji kehomogenan menyatakan p-value lebih besar dari 5% artinya data terjadi kehomogenan ragam, maka tidak perlu transformasi data.

fligner.test(sim.ts[,1]~ year, data=simulasi)
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  sim.ts[, 1] by year
## Fligner-Killeen:med chi-squared = 31.816, df = 36, p-value = 0.6679

Pembagian Training dan testing

Selanjutnya, data deret waktu dibagi menjadi training dan testing. Testing didefinisikan 1 musim terkahir dari data (4 periode). Selainnya akan digunakan menjadi training.

sim.ts<-sim.ts[,1]
tr.simts<- subset(sim.ts,start=1,end=144)
ts.simts <- subset(sim.ts,start=145,end=148)

Identifikasi Kestasioneran dan Pembedaan

Identifikasi kestasioneran non musiman dan musiman menurut plot ACF. Plot ACF non musiman menyatakan data tidak stasioner karena fungsi ACF turun perlahan, begitu pula kondisi yang terjadi pada musiman.

acf0<-acf(sim.ts,main="ACF",lag.max=60,xaxt="n")
axis(1, at=0:60/4, labels=0:60)

acf0$lag=acf0$lag*4
acf0.1 <- as.data.frame(cbind(acf0$acf,acf0$lag))
acf0.2 <- acf0.1[which(acf0.1$V2%%4==0),]
barplot(height = acf0.2$V1, names.arg=acf0.2$V2, ylab="ACF", xlab="Lag")

Data non musiman dilakukan pembedaan d=1 diperoleh plot data yang stasioner terhadap rataan kemudian dikuatkan dengan uji adf (tolak hipotesis data tidak stasioner).

diff1.simts=diff(tr.simts)
plot(diff1.simts, main = "Time series plot of simulated seasonal d=1")

adf.test(diff1.simts)
## Warning in adf.test(diff1.simts): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1.simts
## Dickey-Fuller = -4.6946, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

Periksa kembali plot acf, data non musiman telah stasioner. Namun plot acf musiman masih tidak stasioner.

acf0<-acf(diff1.simts,main="ACF",lag.max=60,xaxt="n")
axis(1, at=0:60/4, labels=0:60)

acf0$lag=acf0$lag*4
acf0.1 <- as.data.frame(cbind(acf0$acf,acf0$lag))
acf0.2 <- acf0.1[which(acf0.1$V2%%4==0),]
barplot(height = acf0.2$V1, names.arg=acf0.2$V2, ylab="ACF", xlab="Lag")

Data musiman dilakukan pembedaan D=1 diperoleh deret musiman yang stasioner berdasarkan plot ACF. Namun deret non musiman tidak stasioner berdasarkan uji adf (tidak tolak hipotesis data tidak stasioner).

diff4.simts <- diff(tr.simts,4)
plot(diff4.simts, main = "Time series plot of simulated seasonal D=4")

acf2<- acf(diff4.simts,lag.max=48,xaxt="n", main="ACF d1D1")
axis(1, at=0:48/4, labels=0:48)

acf2$lag <- acf2$lag * 4
acf2.1 <- as.data.frame(cbind(acf2$acf,acf2$lag))
acf2.2 <- acf2.1[which(acf2.1$V2%%4==0),]
barplot(height=acf2.2$V1,names.arg=acf2.2$V2,ylab="ACF", xlab="Lag")

adf.test(diff4.simts)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff4.simts
## Dickey-Fuller = -3.4037, Lag order = 5, p-value = 0.05685
## alternative hypothesis: stationary

Data musiman pembedaan 1 dilakukan pembedaan d=1. Sehingga terjadi kestasioneran pada series musiman dan non musiman.

diff14.simts <- diff(diff4.simts ,1)
plot(diff14.simts, main = "Time series plot of simulated seasonal d=1 D=4")

Identifikasi Model

acf2(diff14.simts,48)

##       [,1]  [,2] [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12]
## ACF  -0.56  0.06 0.30 -0.53  0.25 -0.02 -0.06  0.10  0.03 -0.05  0.01  0.04
## PACF -0.56 -0.37 0.22 -0.35 -0.37 -0.28  0.05 -0.17 -0.09 -0.06  0.04  0.03
##      [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF  -0.09 -0.02  0.08 -0.08  0.03  0.11 -0.11  0.08 -0.06  0.03 -0.04  0.02
## PACF  0.07 -0.15 -0.05 -0.02 -0.08 -0.07 -0.02  0.04 -0.13  0.09  0.04  0.09
##      [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF   0.02 -0.09  0.15  -0.1 -0.01  0.11 -0.18  0.08  0.06 -0.12  0.14 -0.04
## PACF  0.00  0.01  0.08   0.1 -0.06  0.01 -0.06 -0.07 -0.04 -0.01  0.03  0.02
##      [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF  -0.07  0.06 -0.05  0.04  0.03 -0.03  0.01 -0.03 -0.03  0.08 -0.11  0.10
## PACF -0.02 -0.08 -0.01  0.08  0.14 -0.04  0.00  0.00 -0.10  0.00 -0.13 -0.09
eacf(diff14.simts)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o x x x o o o o o o  o  o  o 
## 1 x o x x x o o o o o o  o  o  o 
## 2 x o x x o o o o o o o  o  o  o 
## 3 x o x x o o x o o o o  o  o  o 
## 4 x x o x o o x o o o o  o  o  o 
## 5 x x o x x o o o o o o  o  o  o 
## 6 x x x x o o x o o o o  o  o  o 
## 7 x x x x o o x o o o o  o  o  o
#non musiman
#ARIMA(0,1,1);ARIMA(6,1,0);ARIMA(0,1,5)
#musiman
#ARIMA(0,1,1)4;ARIMA(2,1,0)4
#Model tentatif
#ARIMA(0,1,1)xARIMA(0,1,1)4
#ARIMA(6,1,0)xARIMA(0,1,1)4
#ARIMA(0,1,5)xARIMA(0,1,1)4
#ARIMA(0,1,1)xARIMA(2,1,0)4
#ARIMA(6,1,0)xARIMA(2,1,0)4
#ARIMA(0,1,5)xARIMA(2,1,0)4
model1 <- Arima(tr.simts,order=c(0,1,1),seasonal=c(0,1,1))
model2 <- Arima(tr.simts,order=c(6,1,0),seasonal=c(0,1,1))
model3 <- Arima(tr.simts,order=c(0,1,5),seasonal=c(0,1,1))
model4 <- Arima(tr.simts,order=c(0,1,1),seasonal=c(2,1,0))
model5 <- Arima(tr.simts,order=c(6,1,0),seasonal=c(2,1,0))
model6 <- Arima(tr.simts,order=c(0,1,5),seasonal=c(2,1,0))
summary(model1)
## Series: tr.simts 
## ARIMA(0,1,1)(0,1,1)[4] 
## 
## Coefficients:
##           ma1     sma1
##       -0.7538  -0.6134
## s.e.   0.0522   0.0626
## 
## sigma^2 estimated as 0.9245:  log likelihood=-192.33
## AIC=390.66   AICc=390.83   BIC=399.46
## 
## Training set error measures:
##                       ME     RMSE       MAE         MPE      MAPE      MASE
## Training set 0.004204648 0.937827 0.7369922 0.009181691 0.4670374 0.1722105
##                    ACF1
## Training set -0.1047938
summary(model2)
## Series: tr.simts 
## ARIMA(6,1,0)(0,1,1)[4] 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5      ar6     sma1
##       -0.8672  -0.4709  -0.2229  -0.5040  -0.4354  -0.1941  -0.3927
## s.e.   0.0830   0.1060   0.1064   0.1286   0.1293   0.0907   0.1221
## 
## sigma^2 estimated as 0.8756:  log likelihood=-186.12
## AIC=388.25   AICc=389.35   BIC=411.72
## 
## Training set error measures:
##                        ME      RMSE      MAE         MPE      MAPE      MASE
## Training set -0.002531851 0.8959229 0.693286 0.002289909 0.4382708 0.1619978
##                    ACF1
## Training set 0.01005648
summary(model3)
## Series: tr.simts 
## ARIMA(0,1,5)(0,1,1)[4] 
## 
## Coefficients:
##           ma1     ma2      ma3      ma4     ma5     sma1
##       -0.8769  0.2349  -0.0119  -0.3993  0.2663  -0.3832
## s.e.   0.0841  0.1121   0.1152   0.2004  0.1391   0.1781
## 
## sigma^2 estimated as 0.8788:  log likelihood=-186.91
## AIC=387.83   AICc=388.68   BIC=408.37
## 
## Training set error measures:
##                         ME      RMSE       MAE         MPE      MAPE      MASE
## Training set -0.0003916168 0.9009396 0.6954765 0.005049586 0.4381382 0.1625097
##                    ACF1
## Training set 0.02105412
summary(model4)
## Series: tr.simts 
## ARIMA(0,1,1)(2,1,0)[4] 
## 
## Coefficients:
##           ma1     sar1     sar2
##       -0.7469  -0.7062  -0.2203
## s.e.   0.0569   0.0877   0.0881
## 
## sigma^2 estimated as 0.9017:  log likelihood=-190.14
## AIC=388.28   AICc=388.58   BIC=400.02
## 
## Training set error measures:
##                        ME      RMSE       MAE         MPE      MAPE      MASE
## Training set -0.003670576 0.9228317 0.7363059 0.003761538 0.4634708 0.1720502
##                    ACF1
## Training set -0.1235067
summary(model5)
## Series: tr.simts 
## ARIMA(6,1,0)(2,1,0)[4] 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5      ar6     sar1     sar2
##       -0.8792  -0.4920  -0.2667  -0.3138  -0.2578  -0.1397  -0.6144  -0.1539
## s.e.   0.0855   0.1145   0.1251   0.3293   0.3035   0.1160   0.3618   0.1660
## 
## sigma^2 estimated as 0.8834:  log likelihood=-186.2
## AIC=390.39   AICc=391.79   BIC=416.8
## 
## Training set error measures:
##                        ME      RMSE       MAE         MPE      MAPE      MASE
## Training set -0.003414938 0.8964725 0.7005247 0.002025855 0.4423934 0.1636893
##                    ACF1
## Training set 0.02215841
summary(model6)
## Series: tr.simts 
## ARIMA(0,1,5)(2,1,0)[4] 
## 
## Coefficients:
##           ma1     ma2      ma3      ma4     ma5     sar1     sar2
##       -0.8756  0.2553  -0.0447  -0.3698  0.2395  -0.4223  -0.0442
## s.e.   0.0845  0.1267   0.1352   0.3218  0.2247   0.2915   0.1862
## 
## sigma^2 estimated as 0.8756:  log likelihood=-186.15
## AIC=388.3   AICc=389.41   BIC=411.77
## 
## Training set error measures:
##                         ME      RMSE       MAE         MPE      MAPE      MASE
## Training set -0.0005455563 0.8959119 0.7032663 0.004935974 0.4430293 0.1643299
##                     ACF1
## Training set 0.008652251

Pengujian Parameter Model

Model yang semua dugaan parameternya siginfikan adalah model1, model2, dan model4. Dengan model2 adalah model terbaik karena memiliki nilai AIC paling kecil.

printstatarima <- function (x, digits = 4,se=TRUE){
  if (length(x$coef) > 0) {
    cat("\nCoefficients:\n")
    coef <- round(x$coef, digits = digits)
    if (se && nrow(x$var.coef)) {
      ses <- rep(0, length(coef))
      ses[x$mask] <- round(sqrt(diag(x$var.coef)), digits = digits)
      coef <- matrix(coef, 1, dimnames = list(NULL, names(coef)))
      coef <- rbind(coef, s.e. = ses)
      statt <- coef[1,]/ses
      pval  <- 2*pt(abs(statt), df=length(x$residuals)-1, lower.tail = FALSE)
      coef <- rbind(coef, t=round(statt,digits=digits),sign.=round(pval,digits=digits))
      coef <- t(coef)
    }
    print.default(coef, print.gap = 2)
  }
}

printstatarima(model1)
## 
## Coefficients:
##                  s.e.         t  sign.
## ma1   -0.7538  0.0522  -14.4406      0
## sma1  -0.6134  0.0626   -9.7987      0
printstatarima(model2)
## 
## Coefficients:
##                  s.e.         t   sign.
## ar1   -0.8672  0.0830  -10.4482  0.0000
## ar2   -0.4709  0.1060   -4.4425  0.0000
## ar3   -0.2229  0.1064   -2.0949  0.0379
## ar4   -0.5040  0.1286   -3.9191  0.0001
## ar5   -0.4354  0.1293   -3.3674  0.0010
## ar6   -0.1941  0.0907   -2.1400  0.0340
## sma1  -0.3927  0.1221   -3.2162  0.0016
printstatarima(model3)
## 
## Coefficients:
##                  s.e.         t   sign.
## ma1   -0.8769  0.0841  -10.4269  0.0000
## ma2    0.2349  0.1121    2.0955  0.0379
## ma3   -0.0119  0.1152   -0.1033  0.9179
## ma4   -0.3993  0.2004   -1.9925  0.0482
## ma5    0.2663  0.1391    1.9145  0.0576
## sma1  -0.3832  0.1781   -2.1516  0.0331
printstatarima(model4)
## 
## Coefficients:
##                  s.e.         t   sign.
## ma1   -0.7469  0.0569  -13.1265  0.0000
## sar1  -0.7062  0.0877   -8.0525  0.0000
## sar2  -0.2203  0.0881   -2.5006  0.0135
printstatarima(model5)
## 
## Coefficients:
##                  s.e.         t   sign.
## ar1   -0.8792  0.0855  -10.2830  0.0000
## ar2   -0.4920  0.1145   -4.2969  0.0000
## ar3   -0.2667  0.1251   -2.1319  0.0347
## ar4   -0.3138  0.3293   -0.9529  0.3422
## ar5   -0.2578  0.3035   -0.8494  0.3971
## ar6   -0.1397  0.1160   -1.2043  0.2305
## sar1  -0.6144  0.3618   -1.6982  0.0916
## sar2  -0.1539  0.1660   -0.9271  0.3554
printstatarima(model6)
## 
## Coefficients:
##                  s.e.         t   sign.
## ma1   -0.8756  0.0845  -10.3621  0.0000
## ma2    0.2553  0.1267    2.0150  0.0458
## ma3   -0.0447  0.1352   -0.3306  0.7414
## ma4   -0.3698  0.3218   -1.1492  0.2524
## ma5    0.2395  0.2247    1.0659  0.2883
## sar1  -0.4223  0.2915   -1.4487  0.1496
## sar2  -0.0442  0.1862   -0.2374  0.8127

Diagnostik Model

Sisaan menyebar normal berdasarkan plot, sisaan bersifat acak dan tidak terdapat autokorelasi berdasarkan plot residual dan signifikasni lag 4,8,dst. Menurut Uji Jarque Bera sisaan menyebar normal (tidak tolak hipotesis data menyebar normal)

checkresiduals(model2, lag = 15)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(6,1,0)(0,1,1)[4]
## Q* = 7.0932, df = 8, p-value = 0.5266
## 
## Model df: 7.   Total lags used: 15
tsdisplay(residuals(model2), lag.max=15, main='ARIMA(6,1,0)(0,1,1)4 Model Residuals')

ljbtest <- LjungBox(residuals(model2),lags=seq(4,36,4),order=0,season=1,squared.residuals=FALSE)
ljbtest
##  lags statistic df   p-value
##     4  0.424139  4 0.9804533
##     8  1.263397  8 0.9959752
##    12  2.544854 12 0.9979929
##    16  7.130284 16 0.9706674
##    20 12.918019 20 0.8808737
##    24 14.896386 24 0.9237539
##    28 16.326364 28 0.9607360
##    32 17.746817 32 0.9803220
##    36 20.238792 36 0.9841336
jarque.bera.test(residuals(model2))
## 
##  Jarque Bera Test
## 
## data:  residuals(model2)
## X-squared = 0.10512, df = 2, p-value = 0.9488

Validasi Model

ramalan_arima = forecast(model2,4)
accuracy(ramalan_arima,ts.simts)
##                        ME      RMSE      MAE         MPE      MAPE      MASE
## Training set -0.002531851 0.8959229 0.693286 0.002289909 0.4382708 0.1619978
## Test set      1.095511822 1.4505687 1.095512 0.434157991 0.4341580 0.2559846
##                    ACF1 Theil's U
## Training set 0.01005648        NA
## Test set     0.16959753 0.3536136
plot(ramalan_arima)

forecast_arima <- cbind(ramalan_arima$mean,ramalan_arima$lower,ramalan_arima$upper)
forecast_arima <- cbind(simulasi[145:148,1:3],(forecast_arima))
forecast_arima <- as.data.frame(forecast_arima)
(forecast_arima)
##   Simulated Seasonal TS season year ramalan_arima$mean ramalan_arima$lower.80%
## 1               249.887      1   37           249.6523                248.4531
## 2               256.824      2   37           256.4269                255.2172
## 3               254.345      3   37           253.2352                251.9276
## 4               251.052      4   37           248.4115                247.0361
##   ramalan_arima$lower.95% ramalan_arima$upper.80% ramalan_arima$upper.95%
## 1                247.8182                250.8515                251.4864
## 2                254.5768                257.6367                258.2771
## 3                251.2354                254.5429                255.2351
## 4                246.3080                249.7869                250.5150
#plot ramalan perbulan
plot(forecast_arima[,2],forecast_arima[,1], xlab='Year', ylab='Simulated Seasonal', type='l', col='black', lwd=1, ylim=c(250,259))
lines(forecast_arima[,2],forecast_arima[,3], col='red', lwd=1)
lines(forecast_arima[,2],forecast_arima[,5], col='blue', lwd=1)
lines(forecast_arima[,2],forecast_arima[,7], col='green',lwd=1)
legend("topleft", c("Actual","Forecast","Lower 95%","Upper 95%"), 
col=c("black","red","blue","green"), lty=1,lwd=1,cex=0.6, inset=0.02, box.lty=0)