Project EAS TDS I
LOAD PACKAGE
## Warning: package 'forecast' was built under R version 4.5.3
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Warning: package 'readr' was built under R version 4.5.3
IMPORT DATA
## Rows: 144 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Month
## dbl (1): #Passengers
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 6 × 2
## Month `#Passengers`
## <chr> <dbl>
## 1 1949-01 112
## 2 1949-02 118
## 3 1949-03 132
## 4 1949-04 129
## 5 1949-05 121
## 6 1949-06 135
## spc_tbl_ [144 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Month : chr [1:144] "1949-01" "1949-02" "1949-03" "1949-04" ...
## $ #Passengers: num [1:144] 112 118 132 129 121 135 148 148 136 119 ...
## - attr(*, "spec")=
## .. cols(
## .. Month = col_character(),
## .. `#Passengers` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
## Month #Passengers
## Length:144 Min. :104.0
## Class :character 1st Qu.:180.0
## Mode :character Median :265.5
## Mean :280.3
## 3rd Qu.:360.5
## Max. :622.0
PLOT DATA
plot(
ts_data,
main = "Jumlah Penumpang Pesawat Udara",
ylab = "Jumlah Penumpang",
xlab = "Tahun",
col = "blue"
)# Training set
train.Yt.ts <- ts_data[1:120]
# Testing set
test.Yt.ts <- ts_data[121:144]
ts.plot(
train.Yt.ts,
col = "blue",
ylab = "Jumlah Penumpang",
xlab = "Tahun"
)
title(
main = "Plot Data Training AirPassengers"
)
points(
train.Yt.ts,
pch = 20,
col = "blue"
)ts.plot(
test.Yt.ts,
col = "blue",
ylab = "Jumlah Penumpang",
xlab = "Tahun"
)
title(
main = "Plot Data Testing AirPassengers"
)
points(
test.Yt.ts,
pch = 20,
col = "blue"
)
#MOdel ARIMA # Cek kestasioneran data #
=========================================================
adf_train <- adf.test(
train.Yt.ts,
alternative = "stationary",
k = trunc((length(train.Yt.ts)-1)^(1/3)))## Warning in adf.test(train.Yt.ts, alternative = "stationary", k =
## trunc((length(train.Yt.ts) - : p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: train.Yt.ts
## Dickey-Fuller = -5.2404, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
ACF DAN PACF
## Warning: package 'TSA' was built under R version 4.5.3
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following object is masked from 'package:readr':
##
## spec
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x x x x x x x x x x
## 1 x o x x o o o x o o x x x o
## 2 x o o x o o o x o o o x x o
## 3 x o o x o o o x o o o x x o
## 4 x x x x o o o o o o o x o x
## 5 x x x x o o o x o o o x x x
## 6 x o o x x o o o o o o x x x
## 7 x x o x x o o o o o o x o o
Kandidat Model yang diperoleh yaitu: \(ARIMA(1,0,1),ARIMA(2,0,1), ARIMA(3,0,1)\), dan \(ARIMA(4,0,1)\)
# ARIMA(1,0,1)
arima101 <- arima(train.Yt.ts, order = c(1,0,1), include.mean = TRUE, method = "ML")
arima101##
## Call:
## arima(x = train.Yt.ts, order = c(1, 0, 1), include.mean = TRUE, method = "ML")
##
## Coefficients:
## ar1 ma1 intercept
## 0.9252 0.4112 243.8853
## s.e. 0.0356 0.0997 41.6537
##
## sigma^2 estimated as 708.3: log likelihood = -565.43, aic = 1136.85
# ARIMA(2,0,1)
arima201 <- arima(train.Yt.ts, order = c(2,0,1), include.mean = TRUE, method = "ML")
arima201##
## Call:
## arima(x = train.Yt.ts, order = c(2, 0, 1), include.mean = TRUE, method = "ML")
##
## Coefficients:
## ar1 ar2 ma1 intercept
## 0.4620 0.4537 0.8606 243.7508
## s.e. 0.1556 0.1547 0.1116 46.8685
##
## sigma^2 estimated as 691.4: log likelihood = -564.19, aic = 1136.37
##
## Call:
## arima(x = train.Yt.ts, order = c(3, 0, 1), include.mean = TRUE, method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 ma1 intercept
## 0.4369 0.6063 -0.1416 0.9291 244.3702
## s.e. 0.1226 0.1567 0.1084 0.0814 42.3870
##
## sigma^2 estimated as 681.3: log likelihood = -563.37, aic = 1136.73
AICKandidatModel <- c(1076.07, 1071.72, 1072.61)
KndidatModelARIMA <- c("ARIMA(1,0,1)", "ARIMA(2,0,1)", "ARIMA(3,0,1)")
compmodelARIMA <- cbind(KndidatModelARIMA, AICKandidatModel)
colnames(compmodelARIMA) <- c("Kandidat Model", "Nilai AIC")
compmodelARIMA <- as.data.frame(compmodelARIMA)
compmodelARIMA## Kandidat Model Nilai AIC
## 1 ARIMA(1,0,1) 1076.07
## 2 ARIMA(2,0,1) 1071.72
## 3 ARIMA(3,0,1) 1072.61
Model terbaik diperoleh Berdasarkan nilai AIC terkecil dari kandidat model. Oleh karena itu, Model terbaik yang diperoleh yaitu ARIMA(2,0,1)
Model ARIMA(2,0,1), maka Yt diperoleh dari penjabaran operator backshift sehingga untuk model ARIMA(2,0,1): \[ ϕ_p(1−B)^dY_t=μ+θ_q(B)e_t \] $$ (1−ϕ_1B−ϕ_2B2)(1−B)0Y_t=μ+(1+θ_1B)e_t
$$
$$ (1−ϕ_1B−ϕ_2B^2)Y_t=μ+(1+θ_1B)e_t
$$
$$ (1−ϕ_1B−ϕ_2B^2)Y_t=μ+e_t+θ_1Be_t
\[ \] (1−ϕ_1B−ϕ_2B^2)Y_t=μ+e_t+θ_1e_{t−1}
\[ \] Y_t−ϕ_1Y_{t−1}−ϕ_2Y_{t−2}=μ+e_t+θ_1e_{t−1}
\[ Sehingga diperoleh modelnya: \] Yt=μ+ϕ_1Y_{t−1}+ϕ_2Y_{t−2}+e_t+θ_1e_{t−1}
$$
## [1] 3.544645
## [1] 3.466715
## [1] 3.417067
## [1] 10.59619
prmtrdugaanarima201 <- c(
"mu",
"phi(1)",
"phi(2)",
"theta(1)"
)
thitungprmtrarima201 <- c(
abs(intercept_mu),
abs(ar_phi1),
abs(ar_phi2),
abs(ma_theta1)
)
ttabel <- c(
1.981,
1.981,
1.981,
1.981
)
keputusan <- ifelse(
thitungprmtrarima201 > 1.981,
"Signifikan",
"Tidak Signifikan"
)
tksignimodelarima201 <- data.frame(
"Parameter Dugaan" = prmtrdugaanarima201,
"T-Hitung" = round(thitungprmtrarima201, 4),
"T-Tabel" = ttabel,
"Keputusan" = keputusan
)
tksignimodelarima201## Parameter.Dugaan T.Hitung T.Tabel Keputusan
## 1 mu 3.5446 1.981 Signifikan
## 2 phi(1) 3.4667 1.981 Signifikan
## 3 phi(2) 3.4171 1.981 Signifikan
## 4 theta(1) 10.5962 1.981 Signifikan
Berdasarkan uji signifikansi parameter, seluruh parameter pada model ARIMA(2,0,1) memiliki nilai t hitungyang lebih besar dibandingkan t tabel=1.981. Oleh karena itu, parameter μ, ϕ_1,θ_1 ,dan θ_2 signifikan pada taraf nyata 5%, sehingga model ARIMA(2,0,1) layak digunakan untuk peramalan.
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.46199 0.15563 2.9684 0.002993 **
## ar2 0.45371 0.15471 2.9327 0.003360 **
## ma1 0.86059 0.11163 7.7094 1.265e-14 ***
## intercept 243.75080 46.86855 5.2007 1.985e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Berdasarkan tabel di atas maka pengambilan keputusan dari semua paramater dugaan dari model ARIMA(2,0,1) semua parameternya signifikan.
##
## ARIMA(2,1,2) with drift : 1087.779
## ARIMA(0,1,0) with drift : 1140.395
## ARIMA(1,1,0) with drift : 1132.545
## ARIMA(0,1,1) with drift : 1128.877
## ARIMA(0,1,0) : 1138.843
## ARIMA(1,1,2) with drift : Inf
## ARIMA(2,1,1) with drift : Inf
## ARIMA(3,1,2) with drift : Inf
## ARIMA(2,1,3) with drift : Inf
## ARIMA(1,1,1) with drift : 1127.299
## ARIMA(1,1,3) with drift : Inf
## ARIMA(3,1,1) with drift : Inf
## ARIMA(3,1,3) with drift : 1084.396
## ARIMA(4,1,3) with drift : Inf
## ARIMA(3,1,4) with drift : Inf
## ARIMA(2,1,4) with drift : Inf
## ARIMA(4,1,2) with drift : Inf
## ARIMA(4,1,4) with drift : 1083.503
## ARIMA(5,1,4) with drift : Inf
## ARIMA(4,1,5) with drift : Inf
## ARIMA(3,1,5) with drift : Inf
## ARIMA(5,1,3) with drift : Inf
## ARIMA(5,1,5) with drift : Inf
## ARIMA(4,1,4) : 1115.646
##
## Best model: ARIMA(4,1,4) with drift
## Series: train.Yt.ts
## ARIMA(4,1,4) with drift
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## 0.0040 0.9528 -0.1145 -0.7477 -0.0700 -1.5097 0.0314 0.9016
## s.e. 0.0867 0.0824 0.0808 0.0866 0.0923 0.0807 0.1013 0.0900
## drift
## 2.2084
## s.e. 0.7242
##
## sigma^2 = 444.4: log likelihood = -530.73
## AIC=1081.47 AICc=1083.5 BIC=1109.26
ARIMA201diag <- stats::arima(train.Yt.ts, order = c(2,0,1), method = "ML")
checkresiduals(ARIMA201diag)##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,1) with non-zero mean
## Q* = 18.54, df = 7, p-value = 0.009757
##
## Model df: 3. Total lags used: 10
Uji formal normalitas data
H0: sisaan menyebar normal
H1: sisaan tidak mengikuti sebaran normal
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: sisaan
## D = 0.48455, p-value < 2.2e-16
## alternative hypothesis: two-sided
Hasil pengujian, Normalitas Data: Hasil : p−value=0.000000000000000222<α=0.05yang berarti TOLAK H0 . Sisaan tidak menyebar normal
#Uji nilai tengah sisaan H0:μ=0 H1:μ≠0
##
## One Sample t-test
##
## data: sisaan
## t = 0.59452, df = 119, p-value = 0.5533
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -3.334782 6.196531
## sample estimates:
## mean of x
## 1.430875
Nilai Tengah Sisaan Hasil : \(p−value=0.41843>\alpha=0.05\) yang berarti TAK TOLAK H0 . Nilai tengah sisaan sama dengan 0
#Uji autokorelasi H0: tidak ada autokorelasi H1: terdapat autokorelasi
##
## Box-Ljung test
##
## data: sisaan
## X-squared = 109.71, df = 23, p-value = 2.841e-13
Autokorelasi Hasil : \(p−value= 2.8311e-13>\alpha=0.05\) yang berarti TOLAK H0 . terdapat gejala autokorelasi
Kesimpulan : Asumsi terpenuhi, kecuali terdapat gejala autokorelasi.
Peramalan
ts.plot(train.Yt.ts,col = "blue",ylab = "Jumlah Penumpang",xlab = "Bulan")
title(main = "Fitted Time Series Plot ARIMA(1,0,2)",cex.sub = 0.8)
points(train.Yt.ts,pch = 20,col = "blue")
par(col="red")
lines(predictARIMA201)arima102 <- arima(train.Yt.ts, order = c(1,0,2))
forecasting <- predict(arima102, n.ahead = 20)
hasilforecasting <- data.frame(Forecast = forecasting$pred,StdError = forecasting$se)
hasilforecasting## Forecast StdError
## 1 359.8124 26.53245
## 2 345.5605 44.17849
## 3 339.6621 53.09470
## 4 334.1055 59.90640
## 5 328.8708 65.35983
## 6 323.9393 69.84398
## 7 319.2935 73.59514
## 8 314.9169 76.77091
## 9 310.7938 79.48318
## 10 306.9095 81.81503
## 11 303.2503 83.83021
## 12 299.8030 85.57894
## 13 296.5555 87.10152
## 14 293.4961 88.43085
## 15 290.6139 89.59411
## 16 287.8987 90.61398
## 17 285.3408 91.50959
## 18 282.9311 92.29716
## 19 280.6609 92.99053
## 20 278.5223 93.60160
ts.plot(train.Yt.ts,col = "blue",ylab = "Jumlah Penumpang",xlab = "Bulan")
title(main = "Fitted Time Series Plot ARIMA(2,0,1)",cex.sub = 0.8)
points(train.Yt.ts,pch = 20,col = "blue")
lines(predictARIMA201,
col = "red",
lwd = 2
)
legend("topleft",legend = c("Aktual", "Fitted"),
col = c("blue", "red"),
lty = 1,
lwd = 2,
bty = "n"
)##
## Call:
## arima(x = test.Yt.ts, order = c(2, 0, 1), include.mean = TRUE, method = "ML")
##
## Coefficients:
## ar1 ar2 ma1 intercept
## 1.2704 -0.6405 -0.1869 455.5089
## s.e. 0.4773 0.3387 0.7288 18.7467
##
## sigma^2 estimated as 1657: log likelihood = -123.77, aic = 255.53
predictARIMA201test <- fitted(arima201test)
fittedARIMA201test <- cbind(test.Yt.ts,predictARIMA201test)
fittedARIMA201test## Time Series:
## Start = 1
## End = 24
## Frequency = 1
## test.Yt.ts predictARIMA201test
## 1 360 413.7466
## 2 342 377.8714
## 3 406 378.2183
## 4 396 460.1370
## 5 420 423.6025
## 6 472 449.1862
## 7 548 494.9393
## 8 559 552.5329
## 9 463 526.5379
## 10 407 410.6148
## 11 362 389.7591
## 12 405 372.9704
## 13 417 445.2470
## 14 391 444.2164
## 15 419 408.1660
## 16 461 448.4200
## 17 472 483.5178
## 18 535 475.0954
## 19 622 534.7381
## 20 606 599.8010
## 21 508 538.9011
## 22 461 431.5808
## 23 390 423.3657
## 24 432 375.0032
ts.plot(test.Yt.ts,col = "blue",ylab = "Jumlah Penumpang",xlab = "Bulan")
title(main = "Fitted Time Series Plot ARIMA(2,0,1)",cex.sub = 0.8)
points(test.Yt.ts,pch = 20,col = "blue")
lines(predictARIMA201test,col = "red",lwd = 2)
legend(
"topleft",
legend = c("Data Aktual", "Fitted"),
col = c("blue", "red"),
lty = 1,
lwd = 2,
pch = c(20, NA),
bty = "n"
)forecastingtest <- predict(arima201test, n.ahead = 20)
hasilforcastingtest <- as.data.frame(forecastingtest)
hasilforcastingtest## pred se
## 1 456.9482 40.70228
## 2 472.3950 60.01399
## 3 476.0396 67.07621
## 4 470.7762 67.79025
## 5 461.7550 68.12268
## 6 453.6654 69.71712
## 7 449.1662 71.21491
## 8 448.6316 71.77666
## 9 450.8343 71.80680
## 10 453.9751 71.87358
## 11 456.5543 72.06635
## 12 457.8195 72.21998
## 13 457.7747 72.26704
## 14 456.9075 72.26765
## 15 455.8345 72.27921
## 16 455.0267 72.30244
## 17 454.6877 72.31800
## 18 454.7745 72.32171
## 19 455.1019 72.32171
## 20 455.4621 72.32350
akurasi.arima201training <- accuracy(train.Yt.ts, predictARIMA201)
akurasi.arima201testing <- accuracy(train.Yt.ts, predictARIMA201test)
compmodelforecast <- cbind(akurasi.arima201training, akurasi.arima201testing)# Error training
error.train <- train.Yt.ts - predictARIMA201
# MAE
MAE.train <- mean(abs(error.train))
# RMSE
RMSE.train <- sqrt(mean(error.train^2))
# MAPE
MAPE.train <- mean(abs(error.train/train.Yt.ts))*100
# Error testing
error.test <- test.Yt.ts - predictARIMA201test
# MAE
MAE.test <- mean(abs(error.test))
# RMSE
RMSE.test <- sqrt(mean(error.test^2))
# MAPE
MAPE.test <- mean(abs(error.test/test.Yt.ts))*100
Akurasiall <- data.frame(
Akurasi = c("MAE", "RMSE", "MAPE"),
Training = c(MAE.train, RMSE.train, MAPE.train),
Testing = c(MAE.test, RMSE.test, MAPE.test)
)
Akurasiall## Akurasi Training Testing
## 1 MAE 20.942016 33.95274
## 2 RMSE 26.293787 40.70228
## 3 MAPE 8.675423 7.65741
Akurasi <- c("MAE","RMSE", "MAPE")
ModelARIMA201train <- c(20.9420157, 26.2937868, 8.6754231)
ModelARIMA201test <- c(33.9527438,40.7022772,7.6574101)
Akurasiall <- rbind(Akurasi, ModelARIMA201train, ModelARIMA201test)
Akurasiall <- as.data.frame(Akurasiall)
Akurasiall## V1 V2 V3
## Akurasi MAE RMSE MAPE
## ModelARIMA201train 20.9420157 26.2937868 8.6754231
## ModelARIMA201test 33.9527438 40.7022772 7.6574101
Berdasarkan hasil fitting model dengan menggunakan metode ARIMA yang diperoleh model terbaik yaitu ARIMA(2,0,1). Pemilihan model terbaik ini didapatkan berdasarkan nilai akurasi AIC yang terkecil dari model tentatif dan diperoleh akurasi dari model berdasarkan nilai RMSE sebesar 26.2937868 untuk data train dan RMSE sebesar 40.7022772 untuk data testing. #Berdasarkan range nilai MAPE bahwa kemampuan model dalam meramal:
rangeaku <- c("<10%", "10-20%", "20-50%", ">50%")
arti <- c("Kemampuan peramalan sangat baik", "Kemampuan peramalan baik", "Kemampuan peramalan layak", "Kemampuan peramalan buruk")
Akurasiall <- cbind(rangeaku, arti)
colnames(Akurasiall) <- c("Range RMSE", "Arti")
Akurasiall <- as.data.frame(Akurasiall)
Akurasiall## Range RMSE Arti
## 1 <10% Kemampuan peramalan sangat baik
## 2 10-20% Kemampuan peramalan baik
## 3 20-50% Kemampuan peramalan layak
## 4 >50% Kemampuan peramalan buruk
Sehingga berdasarkan range tersebut model ARIMA(2,0,1) dengan nilai RMSE sebesar 26.2937868untuk data train dan MAPE sebesar 40.7022772 untuk data testing memiliki tingkat peramalan yang layak.
#Model SARIMA ##Forming Data TS
#Splitting Data Sebelum dilakukan analisis lebih lanjut, dilakukan splitting data terlebih dahulu yaitu dengan membagi data menjadi dua bagian bagian pertama yaitu sebagai data training sebesar 80% dan bagian kedua sebagai data testing sebesar 20%.
#Identifikasi Kestasioneran ##ACF Awal
acf0$lag <- acf0$lag * 12
acf0.1 <- as.data.frame(
cbind(acf0$acf, acf0$lag)
)
acf0.2 <- acf0.1[
which(acf0.1$V2 %% 12 == 0),
]
barplot(
height = acf0.2$V1,
names.arg = acf0.2$V2,
ylab = "ACF",
xlab = "Lag"
)
Berdasarkan plot ACF pada lag musiman (12, 24, 36, dan 48), terlihat
bahwa nilai autokorelasi pada lag 12 masih cukup tinggi yaitu sekitar
0,72 dan menurun secara bertahap pada lag-lag berikutnya. Kondisi ini
menunjukkan adanya pola musiman tahunan yang kuat pada data
AirPassengers. Oleh karena itu, data belum stasioner terhadap komponen
musiman sehingga diperlukan proses seasonal differencing orde satu (D=1)
dengan periode musiman 12. ##Differencing Non-Musiman
dif1 <- diff(train)
ts.plot(dif1,
type = "l",
ylab = "Differencing Orde 1",
xlab = "Waktu",
col = "blue")
title(main = "Plot Differencing Orde 1 AirPassengers")
points(dif1,
pch = 20,
col = "blue")
Berdasarkan plot hasil differencing orde satu (d=1), tren yang
sebelumnya terlihat pada data AirPassengers telah berkurang sehingga
rata-rata data menjadi lebih konstan dari waktu ke waktu. Dengan
demikian, differencing non-musiman orde satu berhasil mengurangi
ketidakstasioneran dalam rataan pada data.
acf1 <- acf(dif1,
lag.max = 48,
plot = FALSE)
acf1$lag <- acf1$lag * 12
acf1.1 <- as.data.frame(cbind(acf1$acf, acf1$lag))
acf1.2 <- acf1.1[which(acf1.1$V2 %% 12 == 0),]
barplot(height = acf1.2$V1,names.arg = acf1.2$V2,ylab = "ACF",xlab = "Lag")
Berdasarkan plot ACF setelah dilakukan differencing non-musiman orde
satu (d=1), terlihat bahwa nilai autokorelasi pada lag musiman 12, 24,
36, dan 48 masih cukup tinggi serta menurun secara bertahap. Kondisi ini
menunjukkan bahwa komponen tren telah berkurang, namun pola musiman
tahunan masih kuat sehingga data belum stasioner terhadap komponen
musiman. Oleh karena itu diperlukan seasonal differencing orde satu
(D=1) dengan periode musiman 12 untuk menghilangkan pengaruh musiman
pada data AirPassengers. ##Differencing Musiman
Plot hasil differencing orde satu (d=1) menunjukkan bahwa data telah
berfluktuasi di sekitar rata-rata yang relatif konstan dan tidak lagi
memperlihatkan tren yang kuat. Dengan demikian, differencing non-musiman
orde satu berhasil mengatasi ketidakstasioneran dalam rataan.
Selanjutnya dilakukan analisis ACF dan PACF untuk mengidentifikasi
keberadaan komponen musiman pada data. ## ARIMA Seasonal
Differencing Data Seasonal
D1 <- diff(train, lag = 12)
ts.plot(D1,
type = "l",
ylab = "D1 Xt",
xlab = "Time",
col = "blue")
title(main = "Plot Seasonal Differencing Orde 1")
points(D1,
pch = 20,
col = "blue")acf2$lag <- acf2$lag * 12
acf2.1 <- as.data.frame(cbind(acf2$acf, acf2$lag))
acf2.2 <- acf2.1[which(acf2.1$V2 %% 12 == 0),]
barplot(height = acf2.2$V1,names.arg = acf2.2$V2,ylab = "ACF",xlab = "Lag")d1D1 <- diff(D1)
ts.plot(d1D1,
type = "l",
ylab = "d1D1 Xt",
xlab = "Time",
col = "blue")
title(main = "Plot Non-Seasonal dan Seasonal Differencing")
points(d1D1,
pch = 20,
col = "blue")
## Identifikasi Kandidat Model
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o o o o o o o o o o o o o
## 1 o o o o o o o o o o o o o o
## 2 x x x o o o o o o o o o o o
## 3 x x x o o o o o o o o o o o
## 4 o o x o o o o o o o o o o o
## 5 x o x x o o o o o o o o o o
## 6 o o x o o o o o o o o o o o
## 7 x x x o o o o o o o o o o o
Berdasarkan plot EACF identifikasi komponen non-seasonal yaitu \(ARIMA(1,1,0)\), \(ARIMA(0,1,1)\), dan \(ARIMA(2,1,0)\). Identifikasi komponen seasonal adalah \(ARIMA(0,1,1)_{12}\), sehingga model yang diperoleh adalah - \(ARIMA(1,1,0)\times(0,1,1)_{12}\) - \(ARIMA(0,1,1)\times(0,1,1)_{12}\) - \(ARIMA(2,1,0)\times(0,1,1)_{12}\)
tmodel1 <- Arima(
train,
order = c(1,1,0),
seasonal = list(
order = c(0,1,1),
period = 12
)
)
summary(tmodel1)## Series: train
## ARIMA(1,1,0)(0,1,1)[12]
##
## Coefficients:
## ar1 sma1
## -0.2320 -0.0453
## s.e. 0.0952 0.0927
##
## sigma^2 = 104.4: log likelihood = -399.52
## AIC=805.04 AICc=805.28 BIC=813.06
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.00348442 9.556435 7.106144 -0.02970069 2.883089 0.248692
## ACF1
## Training set 0.00847144
tmodel2 <- Arima(
train,
order = c(0,1,1),
seasonal = list(
order = c(0,1,1),
period = 12
)
)
summary(tmodel2)## Series: train
## ARIMA(0,1,1)(0,1,1)[12]
##
## Coefficients:
## ma1 sma1
## -0.2278 -0.0459
## s.e. 0.0993 0.0933
##
## sigma^2 = 104.7: log likelihood = -399.7
## AIC=805.41 AICc=805.64 BIC=813.43
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.00146199 9.572807 7.1838 -0.03015954 2.909925 0.2514097
## ACF1
## Training set -0.005036639
tmodel3 <- Arima(
train,
order = c(2,1,0),
seasonal = list(
order = c(2,1,0),
period = 12
)
)
summary(tmodel3)## Series: train
## ARIMA(2,1,0)(2,1,0)[12]
##
## Coefficients:
## ar1 ar2 sar1 sar2
## -0.2687 0.0366 -0.0363 0.1582
## s.e. 0.1020 0.0969 0.1028 0.1099
##
## sigma^2 = 103.6: log likelihood = -398.39
## AIC=806.78 AICc=807.38 BIC=820.15
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.05126405 9.42796 6.999253 -0.04307906 2.847532 0.2449512
## ACF1
## Training set 0.002542035
AICKandidatModel <- c(
tmodel1$aic,
tmodel2$aic,
tmodel3$aic
)
AICcKandidatModel <- c(
tmodel1$aicc,
tmodel2$aicc,
tmodel3$aicc
)
BICKandidatModel <- c(
BIC(tmodel1),
BIC(tmodel2),
BIC(tmodel3)
)
KandidatModelSARIMA <- c(
"SARIMA(1,1,0)(0,1,1)12",
"SARIMA(2,1,1)(0,1,1)12",
"SARIMA(3,1,2)(0,1,1)12"
)
compmodelSARIMA <- data.frame(
`Kandidat Model` = KandidatModelSARIMA,
`Nilai AIC` = AICKandidatModel,
`Nilai AICc` = AICcKandidatModel,
`Nilai BIC` = BICKandidatModel
)
compmodelSARIMA## Kandidat.Model Nilai.AIC Nilai.AICc Nilai.BIC
## 1 SARIMA(1,1,0)(0,1,1)12 805.0438 805.2768 813.0623
## 2 SARIMA(2,1,1)(0,1,1)12 805.4087 805.6417 813.4272
## 3 SARIMA(3,1,2)(0,1,1)12 806.7812 807.3752 820.1453
Model terbaik diperoleh berdasarkan nilai AIC terkecil dari kandidat model. Oleh karena itu, model terbaik yang diperoleh adalah \(ARIMA(1,1,0)\times(0,1,1)_{12}\). ## Bandingkan dengan Fungsi auto.arima
data.ts <- AirPassengers
train.ts <- window(data.ts, end = c(1958,12))
library(forecast)
auto.arima(train.ts)## Series: train.ts
## ARIMA(1,1,0)(0,1,0)[12]
##
## Coefficients:
## ar1
## -0.2397
## s.e. 0.0935
##
## sigma^2 = 103.6: log likelihood = -399.64
## AIC=803.28 AICc=803.4 BIC=808.63
## Series: train
## ARIMA(1,1,0)(0,1,0)[12]
##
## Coefficients:
## ar1
## -0.2397
## s.e. 0.0935
##
## sigma^2 = 103.6: log likelihood = -399.64
## AIC=803.28 AICc=803.4 BIC=808.63
## Series: d1
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## -0.5153 0.8765 2.0930
## s.e. 0.1493 0.1032 3.0147
##
## sigma^2 = 724.7: log likelihood = -559.47
## AIC=1126.95 AICc=1127.3 BIC=1138.06
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.01000132 26.57938 21.21506 NaN Inf 2.544856 0.03826655
## Series: d1
## ARIMA(2,0,1) with non-zero mean
##
## Coefficients:
## ar1 ar2 ma1 mean
## 1.0825 -0.5024 -1.0000 2.5039
## s.e. 0.0787 0.0800 0.0306 0.1444
##
## sigma^2 = 550.5: log likelihood = -544.5
## AIC=1099 AICc=1099.54 BIC=1112.9
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -1.401017 23.06587 18.98509 -Inf Inf 2.277359 -0.05175547
## Series: d1
## ARIMA(3,0,1) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ma1 mean
## 1.0306 -0.3925 -0.1025 -1.000 2.5113
## s.e. 0.0912 0.1278 0.0931 0.036 0.1309
##
## sigma^2 = 548.7: log likelihood = -543.9
## AIC=1099.8 AICc=1100.55 BIC=1116.48
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -1.553648 22.92759 18.84893 -Inf Inf 2.261027 -0.01865953
##Model Terbaik
AICKandidatModel <- c(model1$aic, model2$aic, model3$aic)
AICcKandidatModel <- c(model1$aicc, model2$aicc, model3$aicc)
BICKandidatModel <- c(model1$bic, model2$bic, model3$bic)
KandidatModelARIMA <- c("ARIMA(0,1,1)", "ARIMA(1,0,1)", "ARIMA(2,0,1)", "ARIMA(3,0,1)")
compmodelARIMA <- cbind(KandidatModelARIMA, AICKandidatModel, AICcKandidatModel, BICKandidatModel)## Warning in cbind(KandidatModelARIMA, AICKandidatModel, AICcKandidatModel, :
## number of rows of result is not a multiple of vector length (arg 2)
colnames(compmodelARIMA) <- c("Kandidat Model", "Nilai AIC", "Nilai AICc", "Nilai BIC")
compmodelARIMA <- as.data.frame(compmodelARIMA)
compmodelARIMA## Kandidat Model Nilai AIC Nilai AICc Nilai BIC
## 1 ARIMA(0,1,1) 1126.94850264733 1127.29937984031 1138.06499661978
## 2 ARIMA(1,0,1) 1099.00483973914 1099.53581319046 1112.90045720469
## 3 ARIMA(2,0,1) 1099.8017929448 1100.5517929448 1116.47653390347
## 4 ARIMA(3,0,1) 1126.94850264733 1127.29937984031 1138.06499661978
Berdasarkan tabel tersebut, model ARIMA(1,0,1) memiliki nilai AIC sebesar 1099,005, nilai AICc sebesar 1099,536, dan nilai BIC sebesar 1112,900 yang merupakan nilai terkecil dibandingkan model kandidat lainnya. Meskipun model ARIMA(2,0,1) memiliki nilai AIC yang mendekati, seluruh kriteria informasi yang dimiliki ARIMA(1,0,1) tetap lebih kecil.
Maka diperoleh \(Y_t\) dari penjabaran operator backshift dari model \(ARIMA(1,1,0)\) adalah \[ϕ_1(B)Φ_0(B^{12})(1−B)^1(1−B^{12})^1X_t=Θ_1(B^{12})e_t\] \[(1−ϕ_1B)(1−B)(1−B^{12})X_t=(1+Θ_1B^{12})e_t\] Karena \[(1−B)(1−B^{12})=1−B−B^{12}+B^{13}\] maka \[(1−ϕ_1B)(1−B−B^{12}+B^{13})X_t=(1+Θ_1B^{12})e_t\] diperoleh \[(1−(1+ϕ_1)B+ϕ_1B^2−B^{12}+(1+ϕ_1)B^{13}−ϕ_1B^{14})X_t=(1+Θ_1B^{12})e_t\] Bentuk AKhir \[X_t=(1+ϕ_1)X_{t−1}−ϕ_1X_{t−2}+X_{t−12}−(1+ϕ_1)X_{t−13}+ϕ_1X_{t−14}+e_t+Θ_1e_{t−12}\] ## Diagnostic Model
library(forecast)
SARIMA110011diag <- Arima(train.ts,order = c(1,1,0),seasonal = list(order = c(0,1,1),period = 12),
method = "ML"
)
checkresiduals(SARIMA110011diag)##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)(0,1,1)[12]
## Q* = 33.136, df = 22, p-value = 0.05999
##
## Model df: 2. Total lags used: 24
tmodel1 <- Arima(
train.ts,
order = c(1,1,0),
seasonal = list(order = c(0,1,1), period = 12)
)
sisaan <- residuals(tmodel1)
shapiro.test(sisaan)##
## Shapiro-Wilk normality test
##
## data: sisaan
## W = 0.97687, p-value = 0.0365
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: sisaan
## D = 0.1047, p-value = 0.144
## alternative hypothesis: two-sided
Hipotesis H0:Residual berdistribusi normal H1:Residual tidak berdistribusi normal Kriteria Keputusan Tolak H0 jika p-value < 0,05 Gagal menolak H0 jika p-value > 0,05
Berdasarkan uji normalitas Kolmogorov-Smirnov diperoleh nilai p-value lebih besar dari 0,05 sehingga gagal menolak H0 Hal ini menunjukkan bahwa residual model ARIMA(1,1,0)×(0,1,1) 12 berdistribusi normal. ### Uji Nilai Tengah Sisaan
\(H_0\) : \(\mu=0\)
\(H_1\) : \(\mu\neq0\)
##
## One Sample t-test
##
## data: sisaan
## t = -0.0039775, df = 119, p-value = 0.9968
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -1.738125 1.731156
## sample estimates:
## mean of x
## -0.00348442
Berdasarkan uji t satu sampel diperoleh nilai p-value lebih besar dari 0,05 sehingga gagal menolak H0.Hal ini menunjukkan bahwa rata-rata residual tidak berbeda signifikan dari nol. Dengan demikian residual model ARIMA(1,1,0)×(0,1,1) 12 telah memenuhi asumsi nilai tengah sama dengan nol. ### Uji Autokorelasi
\(H_0\) : tidak ada autokorelasi
\(H_1\) : terdapat autokorelasi
##
## Box-Ljung test
##
## data: sisaan
## X-squared = 30.816, df = 23, p-value = 0.1274
Berdasarkan uji Ljung-Box diperoleh nilai p-value lebih besar dari 0,05 sehingga gagal menolak H0. Hal ini menunjukkan bahwa residual model ARIMA(1,1,0)×(0,1,1)12 tidak mengandung autokorelasi yang signifikan. Dengan demikian residual dapat dianggap bersifat white noise dan model layak digunakan untuk peramalan. ## Forecasting Selanjutnya dilakukan forecasting hasil dari pemodelan dengan menggunakan data training, yaitu sebagai berikut:
Fitted model \(ARIMA(1,1,0)\) dengan data training
## Jan Feb Mar Apr May Jun
## 1949 111.9353 117.9664 131.9662 128.9774 120.9892 134.9782
fittedSARIMA <- data.frame(
Aktual = as.numeric(train.ts),
Fitted = as.numeric(predictSARIMA)
)
head(fittedSARIMA)## Aktual Fitted
## 1 112 111.9353
## 2 118 117.9664
## 3 132 131.9662
## 4 129 128.9774
## 5 121 120.9892
## 6 135 134.9782
##MAE
## [1] 7.106144
##RMSE Training
## [1] 9.556435
##MAPE Training
MAPE_SARIMA_train <- mean(
abs((fittedSARIMA$Aktual - fittedSARIMA$Fitted) /
fittedSARIMA$Aktual)
) * 100
MAPE_SARIMA_train## [1] 2.883089
Forecast Data Testing
forecastSARIMA <- forecast(
tmodel1,
h = length(test)
)
predictSARIMAtest <- forecastSARIMA$mean
hasilSARIMAtest <- data.frame(
Aktual = as.numeric(test),
Prediksi = as.numeric(predictSARIMAtest)
)
head(hasilSARIMAtest)## Aktual Prediksi
## 1 360 342.1935
## 2 342 320.3441
## 3 406 364.8632
## 4 396 351.1313
## 5 420 365.7676
## 6 472 437.5169
##MAE Testing
##RMSE Testing
##MAPE Testing
MAPE_SARIMA_test <- mean(
abs((hasilSARIMAtest$Aktual -
hasilSARIMAtest$Prediksi) /
hasilSARIMAtest$Aktual)
) * 100##Tabel Akurasi
Akurasi <- c("MAE", "RMSE", "MAPE")
SARIMA_train <- c(
MAE_SARIMA_train,
RMSE_SARIMA_train,
MAPE_SARIMA_train
)
SARIMA_test <- c(
MAE_SARIMA_test,
RMSE_SARIMA_test,
MAPE_SARIMA_test
)
AkurasiSARIMA <- data.frame(
Akurasi,
Training = SARIMA_train,
Testing = SARIMA_test
)
AkurasiSARIMA## Akurasi Training Testing
## 1 MAE 7.106144 67.15235
## 2 RMSE 9.556435 72.73253
## 3 MAPE 2.883089 14.60425
##Forecasting
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1959 342.5163 329.4012 355.6313 322.4585 362.5741
## Feb 1959 320.8980 304.3278 337.4682 295.5561 346.2399
## Mar 1959 365.3708 345.9508 384.7908 335.6705 395.0711
## Apr 1959 351.6552 329.7531 373.5573 318.1589 385.1515
## May 1959 366.2834 342.1532 390.4136 329.3795 403.1873
## Jun 1959 438.0296 411.8603 464.1989 398.0071 478.0520
## Jul 1959 493.4253 465.3647 521.4859 450.5104 536.3403
## Aug 1959 506.8526 477.0203 536.6848 461.2281 552.4770
## Sep 1959 407.6257 376.1214 439.1301 359.4439 455.8076
## Oct 1959 362.0929 329.0007 395.1850 311.4828 412.7030
## Nov 1959 313.4288 278.8216 348.0360 260.5017 366.3559
## Dec 1959 340.6213 304.5628 376.6799 285.4745 395.7682
## Jan 1960 346.0783 303.5011 388.6554 280.9621 411.1944
## Feb 1960 324.4600 277.5084 371.4117 252.6537 396.2664
## Mar 1960 368.9328 317.9809 419.8848 291.0085 446.8571
## Apr 1960 355.2172 300.5569 409.8775 271.6215 438.8129
## May 1960 369.8454 311.7129 427.9779 280.9394 458.7515
## Jun 1960 441.5916 380.1828 503.0003 347.6750 535.5082
## Jul 1960 496.9873 432.4685 561.5062 398.3143 595.6604
## Aug 1960 510.4146 442.9289 577.9003 407.2041 613.6251
## Sep 1960 411.1878 340.8602 481.5153 303.6310 518.7445
## Oct 1960 365.6549 292.5959 438.7138 253.9209 477.3889
## Nov 1960 316.9908 241.2990 392.6827 201.2301 432.7515
## Dec 1960 344.1834 265.9472 422.4195 224.5315 463.8353