=========================================================
1. LOAD PACKAGE
=========================================================
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(readr)
## Warning: package 'readr' was built under R version 4.5.2
=========================================================
2. IMPORT DATA CSV
=========================================================
data <- read_csv(
"C:/Topik Statistik 1/AirPassengers.csv"
)
## Rows: 144 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Month
## dbl (1): #Passengers
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 2
## Month `#Passengers`
## <chr> <dbl>
## 1 1949-01 112
## 2 1949-02 118
## 3 1949-03 132
## 4 1949-04 129
## 5 1949-05 121
## 6 1949-06 135
=========================================================
3. MELIHAT STRUKTUR DATA
=========================================================
str(data)
## spc_tbl_ [144 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Month : chr [1:144] "1949-01" "1949-02" "1949-03" "1949-04" ...
## $ #Passengers: num [1:144] 112 118 132 129 121 135 148 148 136 119 ...
## - attr(*, "spec")=
## .. cols(
## .. Month = col_character(),
## .. `#Passengers` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(data)
## Month #Passengers
## Length:144 Min. :104.0
## Class :character 1st Qu.:180.0
## Mode :character Median :265.5
## Mean :280.3
## 3rd Qu.:360.5
## Max. :622.0
=========================================================
4. DATA CLEANING
=========================================================
# Mengubah jumlah penumpang menjadi numerik
data$`#Passengers` <- as.numeric(
data$`#Passengers`
)
# Mengubah bulan menjadi format Date
data$Month <- as.Date(
paste0(data$Month, "-01")
)
# Mengurutkan data berdasarkan tanggal
data <- data[order(data$Month), ]
# Mengecek missing value
sum(is.na(data$`#Passengers`))
## [1] 0
head(data)
## # A tibble: 6 × 2
## Month `#Passengers`
## <date> <dbl>
## 1 1949-01-01 112
## 2 1949-02-01 118
## 3 1949-03-01 132
## 4 1949-04-01 129
## 5 1949-05-01 121
## 6 1949-06-01 135
=========================================================
5. MEMBUAT TIME SERIES
=========================================================
ts_data <- ts(
data$`#Passengers`,
start = c(1949, 1),
frequency = 12
)
plot(
ts_data,
main = "Plot Data AirPassengers",
ylab = "Jumlah Penumpang",
xlab = "Waktu",
col = "blue"
)

=========================================================
6. UJI STASIONERITAS AWAL
=========================================================
adf_awal <- adf.test(ts_data)
## Warning in adf.test(ts_data): p-value smaller than printed p-value
adf_awal
##
## Augmented Dickey-Fuller Test
##
## data: ts_data
## Dickey-Fuller = -7.3186, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
Interpretasi:
Jika p-value > 0.05 maka data tidak stasioner
=========================================================
7. DIFFERENCING
=========================================================
diff1 <- diff(ts_data)
plot(
diff1,
main = "Differencing Orde 1",
ylab = "Differencing",
xlab = "Waktu",
col = "red"
)

adf_diff <- adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
adf_diff
##
## Augmented Dickey-Fuller Test
##
## data: diff1
## Dickey-Fuller = -7.0177, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
=========================================================
8. PLOT ACF DAN PACF
=========================================================
acf(
diff1,
main = "Plot ACF"
)

pacf(
diff1,
main = "Plot PACF"
)

=========================================================
9. MEMBAGI DATA TRAINING DAN TESTING
=========================================================
n <- length(ts_data)
train_size <- floor(0.8 * n)
train <- ts_data[1:train_size]
test <- ts_data[(train_size + 1):n]
length(train)
## [1] 115
length(test)
## [1] 29
=========================================================
10. PEMODELAN ARIMA
=========================================================
model_arima <- auto.arima(
train,
seasonal = FALSE
)
summary(model_arima)
## Series: train
## ARIMA(0,1,4)
##
## Coefficients:
## ma1 ma2 ma3 ma4
## 0.3286 -0.2513 -0.1427 -0.4081
## s.e. 0.0879 0.1044 0.0847 0.1005
##
## sigma^2 = 549.1: log likelihood = -520.04
## AIC=1050.07 AICc=1050.63 BIC=1063.75
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.986725 22.91749 18.28994 1.582755 7.579482 0.8684102 -0.02060094
=========================================================
11. FORECASTING ARIMA
=========================================================
forecast_arima <- forecast(
model_arima,
h = length(test)
)
plot(
forecast_arima,
main = "Forecasting ARIMA"
)

=========================================================
12. EVALUASI MODEL ARIMA
=========================================================
akurasi_arima <- accuracy(
forecast_arima,
test
)
akurasi_arima
## ME RMSE MAE MPE MAPE MASE
## Training set 4.986725 22.91749 18.28994 1.582755 7.579482 0.8684102
## Test set 17.222539 81.22430 61.10674 1.041556 13.407979 2.9013613
## ACF1
## Training set -0.02060094
## Test set NA
=========================================================
13. PEMODELAN SARIMA
=========================================================
model_sarima <- auto.arima(
train,
seasonal = TRUE
)
summary(model_sarima)
## Series: train
## ARIMA(0,1,4)
##
## Coefficients:
## ma1 ma2 ma3 ma4
## 0.3286 -0.2513 -0.1427 -0.4081
## s.e. 0.0879 0.1044 0.0847 0.1005
##
## sigma^2 = 549.1: log likelihood = -520.04
## AIC=1050.07 AICc=1050.63 BIC=1063.75
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.986725 22.91749 18.28994 1.582755 7.579482 0.8684102 -0.02060094
=========================================================
14. FORECASTING SARIMA
=========================================================
forecast_sarima <- forecast(
model_sarima,
h = length(test)
)
plot(
forecast_sarima,
main = "Forecasting SARIMA"
)

=========================================================
15. EVALUASI MODEL SARIMA
=========================================================
akurasi_sarima <- accuracy(
forecast_sarima,
test
)
akurasi_sarima
## ME RMSE MAE MPE MAPE MASE
## Training set 4.986725 22.91749 18.28994 1.582755 7.579482 0.8684102
## Test set 17.222539 81.22430 61.10674 1.041556 13.407979 2.9013613
## ACF1
## Training set -0.02060094
## Test set NA
=========================================================
16. TABEL PERBANDINGAN MODEL
=========================================================
perbandingan <- data.frame(
Model = c(
"ARIMA",
"SARIMA"
),
RMSE = c(
akurasi_arima[2, "RMSE"],
akurasi_sarima[2, "RMSE"]
),
MAE = c(
akurasi_arima[2, "MAE"],
akurasi_sarima[2, "MAE"]
),
MAPE = c(
akurasi_arima[2, "MAPE"],
akurasi_sarima[2, "MAPE"]
),
AIC = c(
AIC(model_arima),
AIC(model_sarima)
)
)
perbandingan
## Model RMSE MAE MAPE AIC
## 1 ARIMA 81.2243 61.10674 13.40798 1050.072
## 2 SARIMA 81.2243 61.10674 13.40798 1050.072
=========================================================
17. MENENTUKAN MODEL TERBAIK
=========================================================
if(
min(perbandingan$RMSE) ==
perbandingan$RMSE[1]
){
cat("Model terbaik adalah ARIMA")
} else {
cat("Model terbaik adalah SARIMA")
}
## Model terbaik adalah ARIMA
=========================================================
18. DIAGNOSTIK RESIDUAL
=========================================================
checkresiduals(model_arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,4)
## Q* = 15.954, df = 6, p-value = 0.014
##
## Model df: 4. Total lags used: 10
checkresiduals(model_sarima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,4)
## Q* = 15.954, df = 6, p-value = 0.014
##
## Model df: 4. Total lags used: 10
=========================================================
19. FORECAST 30 BULAN KE DEPAN
=========================================================
forecast_30 <- forecast(
model_arima,
h = 30
)
plot(
forecast_30,
main = "Forecast 30 Bulan Kedepan"
)

forecast_30
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 116 498.5240 468.4940 528.5540 452.5971 544.4510
## 117 470.5975 420.6622 520.5328 394.2281 546.9670
## 118 438.5508 379.0527 498.0489 347.5562 529.5453
## 119 417.7644 351.9795 483.5492 317.1551 518.3736
## 120 417.7644 350.1068 485.4220 314.2910 521.2378
## 121 417.7644 348.2844 487.2443 311.5040 524.0248
## 122 417.7644 346.5087 489.0200 308.7883 526.7405
## 123 417.7644 344.7762 490.7526 306.1386 529.3902
## 124 417.7644 343.0838 492.4449 303.5504 531.9784
## 125 417.7644 341.4290 494.0998 301.0195 534.5093
## 126 417.7644 339.8093 495.7195 298.5424 536.9864
## 127 417.7644 338.2225 497.3062 296.1156 539.4131
## 128 417.7644 336.6668 498.8619 293.7364 541.7924
## 129 417.7644 335.1404 500.3883 291.4019 544.1268
## 130 417.7644 333.6417 501.8871 289.1098 546.4189
## 131 417.7644 332.1692 503.3596 286.8579 548.6709
## 132 417.7644 330.7216 504.8071 284.6440 550.8848
## 133 417.7644 329.2977 506.2310 282.4663 553.0624
## 134 417.7644 327.8964 507.6324 280.3232 555.2056
## 135 417.7644 326.5166 509.0122 278.2129 557.3159
## 136 417.7644 325.1573 510.3715 276.1341 559.3947
## 137 417.7644 323.8177 511.7110 274.0854 561.4434
## 138 417.7644 322.4970 513.0318 272.0654 563.4633
## 139 417.7644 321.1943 514.3345 270.0731 565.4557
## 140 417.7644 319.9089 515.6199 268.1073 567.4214
## 141 417.7644 318.6402 516.8886 266.1670 569.3617
## 142 417.7644 317.3875 518.1412 264.2513 571.2775
## 143 417.7644 316.1503 519.3785 262.3591 573.1697
## 144 417.7644 314.9280 520.6008 260.4897 575.0391
## 145 417.7644 313.7200 521.8088 258.6422 576.8865