data <- read_xlsx("D:/SEMESTER 5/METODE PERAMALAN DERET WAKTU/Kelompok 15/Data Kelompok 15.xlsx")
data <- data[,3]
str(data)
## tibble [115 x 1] (S3: tbl_df/tbl/data.frame)
## $ Premium: chr [1:115] "7797.63" "7773.26" "7576.27" "7420.72" ...
data$Premium <- as.numeric(data$Premium)
data.ts <- ts(data, start = c(2013,1), frequency = 12)
# Splitting
train <- data[1:88,]
test <- data[89:115,]
train.ts <- ts(train, start = 2013, frequency = 12)
test.ts <- ts(test, start = c(2020,5), frequency = 12)
ts.plot(data.ts, type = "l", col = "blue", lwd = 1.5)
lines(train.ts, col = "blue", lwd = 2)
lines(test.ts, col = "red", lwd = 2)
legend(2012.7,10500,c("Data Training","Data Testing"),
lty = 1, col=c("blue","red"), cex = 0.8)
# ACF PACF
acf(data.ts[,1], lag.max = 35)
pacf(data.ts[,1], lag.max = 35)
# Uji Statistik
adf.test(data.ts[,1])
##
## Augmented Dickey-Fuller Test
##
## data: data.ts[, 1]
## Dickey-Fuller = -2.1214, Lag order = 4, p-value = 0.5262
## alternative hypothesis: stationary
ACF tail-off slowly, p-value > 0.05, belum stasioner
data.diff <- diff(data.ts, differences = 1)
# cek Stasioner Eksploratif
plot(data.diff, type = "l")
# Uji Statistik
adf.test(data.diff)
## Warning in adf.test(data.diff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: data.diff
## Dickey-Fuller = -5.7563, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
Dilihat dari grafik sudah stasioner dalam nilai tengah, p-value < 0.05, sudah stasioner
acf(data.diff, lag.max = 48)
pacf(data.diff, lag.max = 48)
eacf(data.diff)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o x x o o o o o o x x x o
## 1 x o x x o o o o o o o x x o
## 2 x o o x o o o o o o o o o o
## 3 x o o x o o o o o o o o o o
## 4 x o x x o o o o o o o o o o
## 5 o x x o o o o o o o o o o o
## 6 x x x o o o o o o o o o o o
## 7 o o x o o o o x o o o o o o
# Model 1 ARIMA(0,1,2)
model1 <- Arima(data.diff, order = c(0,0,2))
# Model 2 ARIMA(0,1,4)
model2 <- Arima(data.diff, order = c(0,0,4))
# Model 3 <- ARIMA(2,1,1)
model3 <- Arima(data.diff, order = c(2,0,1))
acf.diff <- acf(data.diff, lag.max = 120, xaxt = "n", main = "ACF diff")
acf.diff$lag <- acf.diff$lag * 12
acf3.Zt.1 <- as.data.frame(cbind(acf.diff$acf,acf.diff$lag))
acf3.Zt.2 <- acf3.Zt.1[which(acf3.Zt.1$V2%%12==0),]
barplot(height = acf3.Zt.2$V1, names.arg=acf3.Zt.2$V2, ylab="ACF", xlab="Lag")
pacf3.Zt <- pacf(data.diff, lag.max = 120, xaxt = "n", main = "PACF diff")
pacf3.Zt$lag <- pacf3.Zt$lag * 12
pacf3.Zt.1 <- as.data.frame(cbind(pacf3.Zt$acf,pacf3.Zt$lag))
pacf3.Zt.2 <- pacf3.Zt.1[which(pacf3.Zt.1$V2%%12==0),]
barplot(height = pacf3.Zt.2$V1, names.arg=pacf3.Zt.2$V2, ylab="PACF", xlab="Lag")
Dilihat dari correlogram diatas ACF tail-off dan PACF cut-off di lag ke 3, maka daimbil komponen musiman ARIMA(3,1,0)12
# Model 1 ARIMA(0,1,2)x(3,1,0)12
model1 <- Arima(train.ts, order = c(0,1,2), seasonal = c(3,1,0))
summary(model1)
## Series: train.ts
## ARIMA(0,1,2)(3,1,0)[12]
##
## Coefficients:
## ma1 ma2 sar1 sar2 sar3
## 0.1438 -0.0696 -0.9670 -0.8758 -0.2879
## s.e. 0.1236 0.1492 0.1283 0.1380 0.1453
##
## sigma^2 = 18041: log likelihood = -481.04
## AIC=974.08 AICc=975.32 BIC=987.99
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -12.50184 119.7953 83.67063 -0.1343795 0.8988066 0.2082282
## ACF1
## Training set -0.01633883
# Model 2 ARIMA(0,1,4)x(3,1,0)12
model2 <- Arima(train.ts, order = c(0,1,4), seasonal = c(3,1,0))
summary(model2)
## Series: train.ts
## ARIMA(0,1,4)(3,1,0)[12]
##
## Coefficients:
## ma1 ma2 ma3 ma4 sar1 sar2 sar3
## 0.1929 0.0421 -0.1015 -0.2435 -0.8960 -0.7892 -0.2394
## s.e. 0.1168 0.1252 0.1071 0.1255 0.1379 0.1502 0.1539
##
## sigma^2 = 18370: log likelihood = -479.19
## AIC=974.37 AICc=976.55 BIC=992.91
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -13.72586 119.1422 82.82409 -0.1464666 0.8886819 0.2061214
## ACF1
## Training set -0.04649526
# Model 3 ARIMA(2,1,1)x(3,1,0)12
model3 <- Arima(train.ts, order = c(2,1,1), seasonal = c(3,1,0))
summary(model3)
## Series: train.ts
## ARIMA(2,1,1)(3,1,0)[12]
##
## Coefficients:
## ar1 ar2 ma1 sar1 sar2 sar3
## 0.7095 -0.1780 -0.5599 -0.9490 -0.8480 -0.2801
## s.e. 0.3499 0.1271 0.3308 0.1308 0.1458 0.1459
##
## sigma^2 = 18444: log likelihood = -480.75
## AIC=975.5 AICc=977.17 BIC=991.72
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -14.31528 120.2582 83.79554 -0.1535447 0.9003225 0.208539
## ACF1
## Training set -0.02818101
coeftest(model1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 0.143770 0.123648 1.1627 0.24494
## ma2 -0.069603 0.149223 -0.4664 0.64090
## sar1 -0.967013 0.128299 -7.5372 4.803e-14 ***
## sar2 -0.875813 0.137969 -6.3479 2.183e-10 ***
## sar3 -0.287911 0.145310 -1.9814 0.04755 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(model2)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 0.192886 0.116796 1.6515 0.09864 .
## ma2 0.042062 0.125218 0.3359 0.73694
## ma3 -0.101465 0.107079 -0.9476 0.34335
## ma4 -0.243477 0.125542 -1.9394 0.05245 .
## sar1 -0.896035 0.137850 -6.5001 8.029e-11 ***
## sar2 -0.789172 0.150244 -5.2526 1.500e-07 ***
## sar3 -0.239403 0.153908 -1.5555 0.11983
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(model3)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.70945 0.34995 2.0273 0.04263 *
## ar2 -0.17795 0.12706 -1.4005 0.16136
## ma1 -0.55993 0.33080 -1.6927 0.09052 .
## sar1 -0.94895 0.13085 -7.2523 4.097e-13 ***
## sar2 -0.84796 0.14583 -5.8146 6.079e-09 ***
## sar3 -0.28015 0.14585 -1.9207 0.05477 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Dilihat dari signifikansi peubah dan perbandingan error dan AIC, diambil model terbaik SARIMA(2,1,1)(3,1,0) yang kemudian akan di overfit
over1 <- Arima(train.ts, order = c(2,1,1), seasonal = c(4,1,0), include.drift = F, method = "ML")
summary(over1)
## Series: train.ts
## ARIMA(2,1,1)(4,1,0)[12]
##
## Coefficients:
## ar1 ar2 ma1 sar1 sar2 sar3 sar4
## 0.7468 -0.2441 -0.5268 -1.0190 -1.0964 -0.6087 -0.3737
## s.e. 0.3109 0.1307 0.2932 0.1363 0.1952 0.2164 0.1910
##
## sigma^2 = 16754: log likelihood = -479.26
## AIC=974.52 AICc=976.7 BIC=993.06
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -18.91001 113.7803 80.48889 -0.2003191 0.8649459 0.2003099
## ACF1
## Training set -0.04283316
over2 <- Arima(train.ts, order = c(2,1,1), seasonal = c(3,1,1), include.drift = F)
summary(over2)
## Series: train.ts
## ARIMA(2,1,1)(3,1,1)[12]
##
## Coefficients:
## ar1 ar2 ma1 sar1 sar2 sar3 sma1
## 0.7288 -0.2179 -0.5258 -0.2679 -0.3240 0.1002 -0.9997
## s.e. 0.3395 0.1273 0.3236 0.1674 0.1657 0.1968 0.3395
##
## sigma^2 = 13956: log likelihood = -478.69
## AIC=973.37 AICc=975.56 BIC=991.91
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -16.85889 103.8454 72.05311 -0.1788212 0.7755012 0.1793161
## ACF1
## Training set -0.03992707
coeftest(over1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.74678 0.31089 2.4020 0.016304 *
## ar2 -0.24414 0.13071 -1.8679 0.061783 .
## ma1 -0.52685 0.29321 -1.7968 0.072366 .
## sar1 -1.01896 0.13632 -7.4750 7.722e-14 ***
## sar2 -1.09642 0.19519 -5.6171 1.942e-08 ***
## sar3 -0.60873 0.21638 -2.8133 0.004904 **
## sar4 -0.37375 0.19102 -1.9566 0.050392 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(over2)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.72885 0.33953 2.1467 0.031821 *
## ar2 -0.21790 0.12728 -1.7119 0.086916 .
## ma1 -0.52581 0.32358 -1.6250 0.104163
## sar1 -0.26790 0.16736 -1.6007 0.109440
## sar2 -0.32403 0.16575 -1.9550 0.050583 .
## sar3 0.10023 0.19684 0.5092 0.610609
## sma1 -0.99974 0.33947 -2.9450 0.003229 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model Overfit 2 memiliki AIC dan MAPE yang sedikit lebih rendah namun tidak semua peubahnya signifikan, maka diambil model overfit 1 sebagai model terbaik yang selanjutnya akan di uji diagnostik yaitu SARIMA(2,1,1)(4,1,0)
checkresiduals(over1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,1)(4,1,0)[12]
## Q* = 20.095, df = 11, p-value = 0.04406
##
## Model df: 7. Total lags used: 18
library(tseries)
jarque.bera.test(residuals(over1))
##
## Jarque Bera Test
##
## data: residuals(over1)
## X-squared = 3.0239, df = 2, p-value = 0.2205