library("tseries")
library("forecast")
library("TTR")
library("TSA")
library("graphics")
library("astsa")
library("car")
library("portes")
library("MASS")
library("readxl")
Input data dari file excel
simulasi<- read_excel("latihan Praktikum 8.xlsx",
sheet = "Sheet4")
head(simulasi)
## # A tibble: 6 x 1
## `Simulated Seasonal TS`
## <dbl>
## 1 97.3
## 2 99.4
## 3 101.
## 4 95.7
## 5 103.
## 6 102.
Data terdiri dari 150 observasi. Definisikan data menjadi time series dengan musim berupa per empat periode setiap tahun.
simulasi<-simulasi[1:148,]
season<-rep(1:4,times=37)
year<-rep(1:37,each=4)
simulasi$season<- season
simulasi$year<- year
sim.ts <- ts(simulasi, frequency = 4, start = c(1,1), end = c(37,4))
head(sim.ts)
## Simulated Seasonal TS season year
## 1 Q1 97.323 1 1
## 1 Q2 99.386 2 1
## 1 Q3 100.581 3 1
## 1 Q4 95.671 4 1
## 2 Q1 102.601 1 2
## 2 Q2 101.751 2 2
plot.ts(simulasi$`Simulated Seasonal TS`)
seasonplot(sim.ts[,1],4,main="Simulated Seasonal", ylab="Simulated Seasonal",year.labels = TRUE, col=rainbow(18))
Berikut monthplot dan boxplot dari data simulasi. Terlihat pola asumsi musimaan dengan ragam dan ukuran pemusatan data yang sama. Sehingga ada indikasi deret waktu musiman.
monthplot(sim.ts[,1],ylab="Simulated Seasonal")
boxplot(sim.ts ~ cycle(sim.ts), xlab = "Month", ylab = "Simulated Seasonal", main = "Simulated Seasonal - Boxplot")
Uji kehomogenan menyatakan p-value lebih besar dari 5% artinya data terjadi kehomogenan ragam, maka tidak perlu transformasi data.
fligner.test(sim.ts[,1]~ year, data=simulasi)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: sim.ts[, 1] by year
## Fligner-Killeen:med chi-squared = 31.816, df = 36, p-value = 0.6679
Selanjutnya, data deret waktu dibagi menjadi training dan testing. Testing didefinisikan 1 musim terkahir dari data (4 periode). Selainnya akan digunakan menjadi training.
sim.ts<-sim.ts[,1]
tr.simts<- subset(sim.ts,start=1,end=144)
ts.simts <- subset(sim.ts,start=145,end=148)
Identifikasi kestasioneran non musiman dan musiman menurut plot ACF. Plot ACF non musiman menyatakan data tidak stasioner karena fungsi ACF turun perlahan, begitu pula kondisi yang terjadi pada musiman.
acf0<-acf(sim.ts,main="ACF",lag.max=60,xaxt="n")
axis(1, at=0:60/4, labels=0:60)
acf0$lag=acf0$lag*4
acf0.1 <- as.data.frame(cbind(acf0$acf,acf0$lag))
acf0.2 <- acf0.1[which(acf0.1$V2%%4==0),]
barplot(height = acf0.2$V1, names.arg=acf0.2$V2, ylab="ACF", xlab="Lag")
Data non musiman dilakukan pembedaan d=1 diperoleh plot data yang stasioner terhadap rataan kemudian dikuatkan dengan uji adf (tolak hipotesis data tidak stasioner).
diff1.simts=diff(tr.simts)
plot(diff1.simts, main = "Time series plot of simulated seasonal d=1")
adf.test(diff1.simts)
## Warning in adf.test(diff1.simts): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff1.simts
## Dickey-Fuller = -4.6946, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
Periksa kembali plot acf, data non musiman telah stasioner. Namun plot acf musiman masih tidak stasioner.
acf0<-acf(diff1.simts,main="ACF",lag.max=60,xaxt="n")
axis(1, at=0:60/4, labels=0:60)
acf0$lag=acf0$lag*4
acf0.1 <- as.data.frame(cbind(acf0$acf,acf0$lag))
acf0.2 <- acf0.1[which(acf0.1$V2%%4==0),]
barplot(height = acf0.2$V1, names.arg=acf0.2$V2, ylab="ACF", xlab="Lag")
Data musiman dilakukan pembedaan D=1 diperoleh deret musiman yang stasioner berdasarkan plot ACF. Namun deret non musiman tidak stasioner berdasarkan uji adf (tidak tolak hipotesis data tidak stasioner).
diff4.simts <- diff(tr.simts,4)
plot(diff4.simts, main = "Time series plot of simulated seasonal D=4")
acf2<- acf(diff4.simts,lag.max=48,xaxt="n", main="ACF d1D1")
axis(1, at=0:48/4, labels=0:48)
acf2$lag <- acf2$lag * 4
acf2.1 <- as.data.frame(cbind(acf2$acf,acf2$lag))
acf2.2 <- acf2.1[which(acf2.1$V2%%4==0),]
barplot(height=acf2.2$V1,names.arg=acf2.2$V2,ylab="ACF", xlab="Lag")
adf.test(diff4.simts)
##
## Augmented Dickey-Fuller Test
##
## data: diff4.simts
## Dickey-Fuller = -3.4037, Lag order = 5, p-value = 0.05685
## alternative hypothesis: stationary
Data musiman pembedaan 1 dilakukan pembedaan d=1. Sehingga terjadi kestasioneran pada series musiman dan non musiman.
diff14.simts <- diff(diff4.simts ,1)
plot(diff14.simts, main = "Time series plot of simulated seasonal d=1 D=4")
acf2(diff14.simts,48)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## ACF -0.56 0.06 0.30 -0.53 0.25 -0.02 -0.06 0.10 0.03 -0.05 0.01 0.04
## PACF -0.56 -0.37 0.22 -0.35 -0.37 -0.28 0.05 -0.17 -0.09 -0.06 0.04 0.03
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF -0.09 -0.02 0.08 -0.08 0.03 0.11 -0.11 0.08 -0.06 0.03 -0.04 0.02
## PACF 0.07 -0.15 -0.05 -0.02 -0.08 -0.07 -0.02 0.04 -0.13 0.09 0.04 0.09
## [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF 0.02 -0.09 0.15 -0.1 -0.01 0.11 -0.18 0.08 0.06 -0.12 0.14 -0.04
## PACF 0.00 0.01 0.08 0.1 -0.06 0.01 -0.06 -0.07 -0.04 -0.01 0.03 0.02
## [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF -0.07 0.06 -0.05 0.04 0.03 -0.03 0.01 -0.03 -0.03 0.08 -0.11 0.10
## PACF -0.02 -0.08 -0.01 0.08 0.14 -0.04 0.00 0.00 -0.10 0.00 -0.13 -0.09
eacf(diff14.simts)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o x x x o o o o o o o o o
## 1 x o x x x o o o o o o o o o
## 2 x o x x o o o o o o o o o o
## 3 x o x x o o x o o o o o o o
## 4 x x o x o o x o o o o o o o
## 5 x x o x x o o o o o o o o o
## 6 x x x x o o x o o o o o o o
## 7 x x x x o o x o o o o o o o
#non musiman
#ARIMA(0,1,1);ARIMA(6,1,0);ARIMA(0,1,5)
#musiman
#ARIMA(0,1,1)4;ARIMA(2,1,0)4
#Model tentatif
#ARIMA(0,1,1)xARIMA(0,1,1)4
#ARIMA(6,1,0)xARIMA(0,1,1)4
#ARIMA(0,1,5)xARIMA(0,1,1)4
#ARIMA(0,1,1)xARIMA(2,1,0)4
#ARIMA(6,1,0)xARIMA(2,1,0)4
#ARIMA(0,1,5)xARIMA(2,1,0)4
model1 <- Arima(tr.simts,order=c(0,1,1),seasonal=c(0,1,1))
model2 <- Arima(tr.simts,order=c(6,1,0),seasonal=c(0,1,1))
model3 <- Arima(tr.simts,order=c(0,1,5),seasonal=c(0,1,1))
model4 <- Arima(tr.simts,order=c(0,1,1),seasonal=c(2,1,0))
model5 <- Arima(tr.simts,order=c(6,1,0),seasonal=c(2,1,0))
model6 <- Arima(tr.simts,order=c(0,1,5),seasonal=c(2,1,0))
summary(model1)
## Series: tr.simts
## ARIMA(0,1,1)(0,1,1)[4]
##
## Coefficients:
## ma1 sma1
## -0.7538 -0.6134
## s.e. 0.0522 0.0626
##
## sigma^2 estimated as 0.9245: log likelihood=-192.33
## AIC=390.66 AICc=390.83 BIC=399.46
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.004204648 0.937827 0.7369922 0.009181691 0.4670374 0.1722105
## ACF1
## Training set -0.1047938
summary(model2)
## Series: tr.simts
## ARIMA(6,1,0)(0,1,1)[4]
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 sma1
## -0.8672 -0.4709 -0.2229 -0.5040 -0.4354 -0.1941 -0.3927
## s.e. 0.0830 0.1060 0.1064 0.1286 0.1293 0.0907 0.1221
##
## sigma^2 estimated as 0.8756: log likelihood=-186.12
## AIC=388.25 AICc=389.35 BIC=411.72
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.002531851 0.8959229 0.693286 0.002289909 0.4382708 0.1619978
## ACF1
## Training set 0.01005648
summary(model3)
## Series: tr.simts
## ARIMA(0,1,5)(0,1,1)[4]
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 sma1
## -0.8769 0.2349 -0.0119 -0.3993 0.2663 -0.3832
## s.e. 0.0841 0.1121 0.1152 0.2004 0.1391 0.1781
##
## sigma^2 estimated as 0.8788: log likelihood=-186.91
## AIC=387.83 AICc=388.68 BIC=408.37
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0003916168 0.9009396 0.6954765 0.005049586 0.4381382 0.1625097
## ACF1
## Training set 0.02105412
summary(model4)
## Series: tr.simts
## ARIMA(0,1,1)(2,1,0)[4]
##
## Coefficients:
## ma1 sar1 sar2
## -0.7469 -0.7062 -0.2203
## s.e. 0.0569 0.0877 0.0881
##
## sigma^2 estimated as 0.9017: log likelihood=-190.14
## AIC=388.28 AICc=388.58 BIC=400.02
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.003670576 0.9228317 0.7363059 0.003761538 0.4634708 0.1720502
## ACF1
## Training set -0.1235067
summary(model5)
## Series: tr.simts
## ARIMA(6,1,0)(2,1,0)[4]
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 sar1 sar2
## -0.8792 -0.4920 -0.2667 -0.3138 -0.2578 -0.1397 -0.6144 -0.1539
## s.e. 0.0855 0.1145 0.1251 0.3293 0.3035 0.1160 0.3618 0.1660
##
## sigma^2 estimated as 0.8834: log likelihood=-186.2
## AIC=390.39 AICc=391.79 BIC=416.8
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.003414938 0.8964725 0.7005247 0.002025855 0.4423934 0.1636893
## ACF1
## Training set 0.02215841
summary(model6)
## Series: tr.simts
## ARIMA(0,1,5)(2,1,0)[4]
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 sar1 sar2
## -0.8756 0.2553 -0.0447 -0.3698 0.2395 -0.4223 -0.0442
## s.e. 0.0845 0.1267 0.1352 0.3218 0.2247 0.2915 0.1862
##
## sigma^2 estimated as 0.8756: log likelihood=-186.15
## AIC=388.3 AICc=389.41 BIC=411.77
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0005455563 0.8959119 0.7032663 0.004935974 0.4430293 0.1643299
## ACF1
## Training set 0.008652251
Model yang semua dugaan parameternya siginfikan adalah model1, model2, dan model4. Dengan model2 adalah model terbaik karena memiliki nilai AIC paling kecil.
printstatarima <- function (x, digits = 4,se=TRUE){
if (length(x$coef) > 0) {
cat("\nCoefficients:\n")
coef <- round(x$coef, digits = digits)
if (se && nrow(x$var.coef)) {
ses <- rep(0, length(coef))
ses[x$mask] <- round(sqrt(diag(x$var.coef)), digits = digits)
coef <- matrix(coef, 1, dimnames = list(NULL, names(coef)))
coef <- rbind(coef, s.e. = ses)
statt <- coef[1,]/ses
pval <- 2*pt(abs(statt), df=length(x$residuals)-1, lower.tail = FALSE)
coef <- rbind(coef, t=round(statt,digits=digits),sign.=round(pval,digits=digits))
coef <- t(coef)
}
print.default(coef, print.gap = 2)
}
}
printstatarima(model1)
##
## Coefficients:
## s.e. t sign.
## ma1 -0.7538 0.0522 -14.4406 0
## sma1 -0.6134 0.0626 -9.7987 0
printstatarima(model2)
##
## Coefficients:
## s.e. t sign.
## ar1 -0.8672 0.0830 -10.4482 0.0000
## ar2 -0.4709 0.1060 -4.4425 0.0000
## ar3 -0.2229 0.1064 -2.0949 0.0379
## ar4 -0.5040 0.1286 -3.9191 0.0001
## ar5 -0.4354 0.1293 -3.3674 0.0010
## ar6 -0.1941 0.0907 -2.1400 0.0340
## sma1 -0.3927 0.1221 -3.2162 0.0016
printstatarima(model3)
##
## Coefficients:
## s.e. t sign.
## ma1 -0.8769 0.0841 -10.4269 0.0000
## ma2 0.2349 0.1121 2.0955 0.0379
## ma3 -0.0119 0.1152 -0.1033 0.9179
## ma4 -0.3993 0.2004 -1.9925 0.0482
## ma5 0.2663 0.1391 1.9145 0.0576
## sma1 -0.3832 0.1781 -2.1516 0.0331
printstatarima(model4)
##
## Coefficients:
## s.e. t sign.
## ma1 -0.7469 0.0569 -13.1265 0.0000
## sar1 -0.7062 0.0877 -8.0525 0.0000
## sar2 -0.2203 0.0881 -2.5006 0.0135
printstatarima(model5)
##
## Coefficients:
## s.e. t sign.
## ar1 -0.8792 0.0855 -10.2830 0.0000
## ar2 -0.4920 0.1145 -4.2969 0.0000
## ar3 -0.2667 0.1251 -2.1319 0.0347
## ar4 -0.3138 0.3293 -0.9529 0.3422
## ar5 -0.2578 0.3035 -0.8494 0.3971
## ar6 -0.1397 0.1160 -1.2043 0.2305
## sar1 -0.6144 0.3618 -1.6982 0.0916
## sar2 -0.1539 0.1660 -0.9271 0.3554
printstatarima(model6)
##
## Coefficients:
## s.e. t sign.
## ma1 -0.8756 0.0845 -10.3621 0.0000
## ma2 0.2553 0.1267 2.0150 0.0458
## ma3 -0.0447 0.1352 -0.3306 0.7414
## ma4 -0.3698 0.3218 -1.1492 0.2524
## ma5 0.2395 0.2247 1.0659 0.2883
## sar1 -0.4223 0.2915 -1.4487 0.1496
## sar2 -0.0442 0.1862 -0.2374 0.8127
Sisaan menyebar normal berdasarkan plot, sisaan bersifat acak dan tidak terdapat autokorelasi berdasarkan plot residual dan signifikasni lag 4,8,dst. Menurut Uji Jarque Bera sisaan menyebar normal (tidak tolak hipotesis data menyebar normal)
checkresiduals(model2, lag = 15)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(6,1,0)(0,1,1)[4]
## Q* = 7.0932, df = 8, p-value = 0.5266
##
## Model df: 7. Total lags used: 15
tsdisplay(residuals(model2), lag.max=15, main='ARIMA(6,1,0)(0,1,1)4 Model Residuals')
ljbtest <- LjungBox(residuals(model2),lags=seq(4,36,4),order=0,season=1,squared.residuals=FALSE)
ljbtest
## lags statistic df p-value
## 4 0.424139 4 0.9804533
## 8 1.263397 8 0.9959752
## 12 2.544854 12 0.9979929
## 16 7.130284 16 0.9706674
## 20 12.918019 20 0.8808737
## 24 14.896386 24 0.9237539
## 28 16.326364 28 0.9607360
## 32 17.746817 32 0.9803220
## 36 20.238792 36 0.9841336
jarque.bera.test(residuals(model2))
##
## Jarque Bera Test
##
## data: residuals(model2)
## X-squared = 0.10512, df = 2, p-value = 0.9488
ramalan_arima = forecast(model2,4)
accuracy(ramalan_arima,ts.simts)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.002531851 0.8959229 0.693286 0.002289909 0.4382708 0.1619978
## Test set 1.095511822 1.4505687 1.095512 0.434157991 0.4341580 0.2559846
## ACF1 Theil's U
## Training set 0.01005648 NA
## Test set 0.16959753 0.3536136
plot(ramalan_arima)
forecast_arima <- cbind(ramalan_arima$mean,ramalan_arima$lower,ramalan_arima$upper)
forecast_arima <- cbind(simulasi[145:148,1:3],(forecast_arima))
forecast_arima <- as.data.frame(forecast_arima)
(forecast_arima)
## Simulated Seasonal TS season year ramalan_arima$mean ramalan_arima$lower.80%
## 1 249.887 1 37 249.6523 248.4531
## 2 256.824 2 37 256.4269 255.2172
## 3 254.345 3 37 253.2352 251.9276
## 4 251.052 4 37 248.4115 247.0361
## ramalan_arima$lower.95% ramalan_arima$upper.80% ramalan_arima$upper.95%
## 1 247.8182 250.8515 251.4864
## 2 254.5768 257.6367 258.2771
## 3 251.2354 254.5429 255.2351
## 4 246.3080 249.7869 250.5150
#plot ramalan perbulan
plot(forecast_arima[,2],forecast_arima[,1], xlab='Year', ylab='Simulated Seasonal', type='l', col='black', lwd=1, ylim=c(250,259))
lines(forecast_arima[,2],forecast_arima[,3], col='red', lwd=1)
lines(forecast_arima[,2],forecast_arima[,5], col='blue', lwd=1)
lines(forecast_arima[,2],forecast_arima[,7], col='green',lwd=1)
legend("topleft", c("Actual","Forecast","Lower 95%","Upper 95%"),
col=c("black","red","blue","green"), lty=1,lwd=1,cex=0.6, inset=0.02, box.lty=0)