Library

Data

data <- read_xlsx("D:/SEMESTER 5/METODE PERAMALAN DERET WAKTU/Kelompok 15/Data Kelompok 15.xlsx")
data <- data[,3]
str(data)
## tibble [115 x 1] (S3: tbl_df/tbl/data.frame)
##  $ Premium: chr [1:115] "7797.63" "7773.26" "7576.27" "7420.72" ...
data$Premium <- as.numeric(data$Premium)

data.ts <- ts(data, start = c(2013,1), frequency = 12)

# Splitting
train <- data[1:88,]
test <- data[89:115,]
train.ts <- ts(train, start = 2013, frequency = 12)
test.ts <- ts(test, start = c(2020,5), frequency = 12)

Eksplorasi

ts.plot(data.ts, type = "l", col = "blue", lwd = 1.5)
lines(train.ts, col = "blue", lwd = 2)
lines(test.ts, col = "red", lwd = 2)
legend(2012.7,10500,c("Data Training","Data Testing"), 
       lty = 1, col=c("blue","red"), cex = 0.8)

Cek Stasioner

# ACF PACF
acf(data.ts[,1], lag.max = 35)

pacf(data.ts[,1], lag.max = 35)

# Uji Statistik
adf.test(data.ts[,1])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data.ts[, 1]
## Dickey-Fuller = -2.1214, Lag order = 4, p-value = 0.5262
## alternative hypothesis: stationary

ACF tail-off slowly, p-value > 0.05, belum stasioner

Differencing

data.diff <- diff(data.ts, differences = 1)

# cek  Stasioner Eksploratif
plot(data.diff, type = "l")

# Uji Statistik
adf.test(data.diff)
## Warning in adf.test(data.diff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data.diff
## Dickey-Fuller = -5.7563, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

Dilihat dari grafik sudah stasioner dalam nilai tengah, p-value < 0.05, sudah stasioner

Model Tentatif (Komponen non-musiman)

acf(data.diff, lag.max = 48)

pacf(data.diff, lag.max = 48)

eacf(data.diff)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o x x o o o o o o x  x  x  o 
## 1 x o x x o o o o o o o  x  x  o 
## 2 x o o x o o o o o o o  o  o  o 
## 3 x o o x o o o o o o o  o  o  o 
## 4 x o x x o o o o o o o  o  o  o 
## 5 o x x o o o o o o o o  o  o  o 
## 6 x x x o o o o o o o o  o  o  o 
## 7 o o x o o o o x o o o  o  o  o
# Model 1 ARIMA(0,1,2)
model1 <- Arima(data.diff, order = c(0,0,2))

# Model 2 ARIMA(0,1,4)
model2 <- Arima(data.diff, order = c(0,0,4))

# Model 3 <- ARIMA(2,1,1)
model3 <- Arima(data.diff, order = c(2,0,1))

Model Tentatif (Komponen Musiman)

acf.diff <- acf(data.diff, lag.max = 120, xaxt = "n", main = "ACF diff")

acf.diff$lag <- acf.diff$lag * 12
acf3.Zt.1 <- as.data.frame(cbind(acf.diff$acf,acf.diff$lag))
acf3.Zt.2 <- acf3.Zt.1[which(acf3.Zt.1$V2%%12==0),]
barplot(height = acf3.Zt.2$V1, names.arg=acf3.Zt.2$V2, ylab="ACF", xlab="Lag")

pacf3.Zt <- pacf(data.diff, lag.max = 120, xaxt = "n", main = "PACF diff")

pacf3.Zt$lag <- pacf3.Zt$lag * 12
pacf3.Zt.1 <- as.data.frame(cbind(pacf3.Zt$acf,pacf3.Zt$lag))
pacf3.Zt.2 <- pacf3.Zt.1[which(pacf3.Zt.1$V2%%12==0),]
barplot(height = pacf3.Zt.2$V1, names.arg=pacf3.Zt.2$V2, ylab="PACF", xlab="Lag")

Dilihat dari correlogram diatas ACF tail-off dan PACF cut-off di lag ke 3, maka daimbil komponen musiman ARIMA(3,1,0)12

Pendugaan Model SARIMA

# Model 1 ARIMA(0,1,2)x(3,1,0)12
model1 <- Arima(train.ts, order = c(0,1,2), seasonal = c(3,1,0))
summary(model1)
## Series: train.ts 
## ARIMA(0,1,2)(3,1,0)[12] 
## 
## Coefficients:
##          ma1      ma2     sar1     sar2     sar3
##       0.1438  -0.0696  -0.9670  -0.8758  -0.2879
## s.e.  0.1236   0.1492   0.1283   0.1380   0.1453
## 
## sigma^2 = 18041:  log likelihood = -481.04
## AIC=974.08   AICc=975.32   BIC=987.99
## 
## Training set error measures:
##                     ME     RMSE      MAE        MPE      MAPE      MASE
## Training set -12.50184 119.7953 83.67063 -0.1343795 0.8988066 0.2082282
##                     ACF1
## Training set -0.01633883
# Model 2 ARIMA(0,1,4)x(3,1,0)12
model2 <- Arima(train.ts, order = c(0,1,4), seasonal = c(3,1,0))
summary(model2)
## Series: train.ts 
## ARIMA(0,1,4)(3,1,0)[12] 
## 
## Coefficients:
##          ma1     ma2      ma3      ma4     sar1     sar2     sar3
##       0.1929  0.0421  -0.1015  -0.2435  -0.8960  -0.7892  -0.2394
## s.e.  0.1168  0.1252   0.1071   0.1255   0.1379   0.1502   0.1539
## 
## sigma^2 = 18370:  log likelihood = -479.19
## AIC=974.37   AICc=976.55   BIC=992.91
## 
## Training set error measures:
##                     ME     RMSE      MAE        MPE      MAPE      MASE
## Training set -13.72586 119.1422 82.82409 -0.1464666 0.8886819 0.2061214
##                     ACF1
## Training set -0.04649526
# Model 3 ARIMA(2,1,1)x(3,1,0)12
model3 <- Arima(train.ts, order = c(2,1,1), seasonal = c(3,1,0))
summary(model3)
## Series: train.ts 
## ARIMA(2,1,1)(3,1,0)[12] 
## 
## Coefficients:
##          ar1      ar2      ma1     sar1     sar2     sar3
##       0.7095  -0.1780  -0.5599  -0.9490  -0.8480  -0.2801
## s.e.  0.3499   0.1271   0.3308   0.1308   0.1458   0.1459
## 
## sigma^2 = 18444:  log likelihood = -480.75
## AIC=975.5   AICc=977.17   BIC=991.72
## 
## Training set error measures:
##                     ME     RMSE      MAE        MPE      MAPE     MASE
## Training set -14.31528 120.2582 83.79554 -0.1535447 0.9003225 0.208539
##                     ACF1
## Training set -0.02818101

Uji Signifikansi

coeftest(model1)
## 
## z test of coefficients:
## 
##       Estimate Std. Error z value  Pr(>|z|)    
## ma1   0.143770   0.123648  1.1627   0.24494    
## ma2  -0.069603   0.149223 -0.4664   0.64090    
## sar1 -0.967013   0.128299 -7.5372 4.803e-14 ***
## sar2 -0.875813   0.137969 -6.3479 2.183e-10 ***
## sar3 -0.287911   0.145310 -1.9814   0.04755 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(model2)
## 
## z test of coefficients:
## 
##       Estimate Std. Error z value  Pr(>|z|)    
## ma1   0.192886   0.116796  1.6515   0.09864 .  
## ma2   0.042062   0.125218  0.3359   0.73694    
## ma3  -0.101465   0.107079 -0.9476   0.34335    
## ma4  -0.243477   0.125542 -1.9394   0.05245 .  
## sar1 -0.896035   0.137850 -6.5001 8.029e-11 ***
## sar2 -0.789172   0.150244 -5.2526 1.500e-07 ***
## sar3 -0.239403   0.153908 -1.5555   0.11983    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(model3)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ar1   0.70945    0.34995  2.0273   0.04263 *  
## ar2  -0.17795    0.12706 -1.4005   0.16136    
## ma1  -0.55993    0.33080 -1.6927   0.09052 .  
## sar1 -0.94895    0.13085 -7.2523 4.097e-13 ***
## sar2 -0.84796    0.14583 -5.8146 6.079e-09 ***
## sar3 -0.28015    0.14585 -1.9207   0.05477 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Dilihat dari signifikansi peubah dan perbandingan error dan AIC, diambil model terbaik SARIMA(2,1,1)(3,1,0) yang kemudian akan di overfit

Overfitting

over1 <- Arima(train.ts, order = c(2,1,1), seasonal = c(4,1,0), include.drift = F, method = "ML")
summary(over1)
## Series: train.ts 
## ARIMA(2,1,1)(4,1,0)[12] 
## 
## Coefficients:
##          ar1      ar2      ma1     sar1     sar2     sar3     sar4
##       0.7468  -0.2441  -0.5268  -1.0190  -1.0964  -0.6087  -0.3737
## s.e.  0.3109   0.1307   0.2932   0.1363   0.1952   0.2164   0.1910
## 
## sigma^2 = 16754:  log likelihood = -479.26
## AIC=974.52   AICc=976.7   BIC=993.06
## 
## Training set error measures:
##                     ME     RMSE      MAE        MPE      MAPE      MASE
## Training set -18.91001 113.7803 80.48889 -0.2003191 0.8649459 0.2003099
##                     ACF1
## Training set -0.04283316
over2 <- Arima(train.ts, order = c(2,1,1), seasonal = c(3,1,1), include.drift = F)
summary(over2)
## Series: train.ts 
## ARIMA(2,1,1)(3,1,1)[12] 
## 
## Coefficients:
##          ar1      ar2      ma1     sar1     sar2    sar3     sma1
##       0.7288  -0.2179  -0.5258  -0.2679  -0.3240  0.1002  -0.9997
## s.e.  0.3395   0.1273   0.3236   0.1674   0.1657  0.1968   0.3395
## 
## sigma^2 = 13956:  log likelihood = -478.69
## AIC=973.37   AICc=975.56   BIC=991.91
## 
## Training set error measures:
##                     ME     RMSE      MAE        MPE      MAPE      MASE
## Training set -16.85889 103.8454 72.05311 -0.1788212 0.7755012 0.1793161
##                     ACF1
## Training set -0.03992707
coeftest(over1)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ar1   0.74678    0.31089  2.4020  0.016304 *  
## ar2  -0.24414    0.13071 -1.8679  0.061783 .  
## ma1  -0.52685    0.29321 -1.7968  0.072366 .  
## sar1 -1.01896    0.13632 -7.4750 7.722e-14 ***
## sar2 -1.09642    0.19519 -5.6171 1.942e-08 ***
## sar3 -0.60873    0.21638 -2.8133  0.004904 ** 
## sar4 -0.37375    0.19102 -1.9566  0.050392 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(over2)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)   
## ar1   0.72885    0.33953  2.1467 0.031821 * 
## ar2  -0.21790    0.12728 -1.7119 0.086916 . 
## ma1  -0.52581    0.32358 -1.6250 0.104163   
## sar1 -0.26790    0.16736 -1.6007 0.109440   
## sar2 -0.32403    0.16575 -1.9550 0.050583 . 
## sar3  0.10023    0.19684  0.5092 0.610609   
## sma1 -0.99974    0.33947 -2.9450 0.003229 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model Overfit 2 memiliki AIC dan MAPE yang sedikit lebih rendah namun tidak semua peubahnya signifikan, maka diambil model overfit 1 sebagai model terbaik yang selanjutnya akan di uji diagnostik yaitu SARIMA(2,1,1)(4,1,0)

Uji diganostik model terbaik

checkresiduals(over1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,1)(4,1,0)[12]
## Q* = 20.095, df = 11, p-value = 0.04406
## 
## Model df: 7.   Total lags used: 18
library(tseries)
jarque.bera.test(residuals(over1))
## 
##  Jarque Bera Test
## 
## data:  residuals(over1)
## X-squared = 3.0239, df = 2, p-value = 0.2205