=========================================================

1. LOAD PACKAGE

=========================================================

library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(readr)
## Warning: package 'readr' was built under R version 4.5.2

=========================================================

2. IMPORT DATA CSV

=========================================================

data <- read_csv(
  "C:/Topik Statistik 1/AirPassengers.csv"
)
## Rows: 144 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Month
## dbl (1): #Passengers
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 2
##   Month   `#Passengers`
##   <chr>           <dbl>
## 1 1949-01           112
## 2 1949-02           118
## 3 1949-03           132
## 4 1949-04           129
## 5 1949-05           121
## 6 1949-06           135

=========================================================

3. MELIHAT STRUKTUR DATA

=========================================================

str(data)
## spc_tbl_ [144 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Month      : chr [1:144] "1949-01" "1949-02" "1949-03" "1949-04" ...
##  $ #Passengers: num [1:144] 112 118 132 129 121 135 148 148 136 119 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Month = col_character(),
##   ..   `#Passengers` = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(data)
##     Month            #Passengers   
##  Length:144         Min.   :104.0  
##  Class :character   1st Qu.:180.0  
##  Mode  :character   Median :265.5  
##                     Mean   :280.3  
##                     3rd Qu.:360.5  
##                     Max.   :622.0

=========================================================

4. DATA CLEANING

=========================================================

# Mengubah jumlah penumpang menjadi numerik

data$`#Passengers` <- as.numeric(
  data$`#Passengers`
)

# Mengubah bulan menjadi format Date

data$Month <- as.Date(
  paste0(data$Month, "-01")
)

# Mengurutkan data berdasarkan tanggal

data <- data[order(data$Month), ]

# Mengecek missing value

sum(is.na(data$`#Passengers`))
## [1] 0
head(data)
## # A tibble: 6 × 2
##   Month      `#Passengers`
##   <date>             <dbl>
## 1 1949-01-01           112
## 2 1949-02-01           118
## 3 1949-03-01           132
## 4 1949-04-01           129
## 5 1949-05-01           121
## 6 1949-06-01           135

=========================================================

5. MEMBUAT TIME SERIES

=========================================================

ts_data <- ts(
  data$`#Passengers`,
  start = c(1949, 1),
  frequency = 12
)

plot(
  ts_data,
  main = "Plot Data AirPassengers",
  ylab = "Jumlah Penumpang",
  xlab = "Waktu",
  col = "blue"
)

=========================================================

6. UJI STASIONERITAS AWAL

=========================================================

adf_awal <- adf.test(ts_data)
## Warning in adf.test(ts_data): p-value smaller than printed p-value
adf_awal
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -7.3186, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

Interpretasi:

Jika p-value > 0.05 maka data tidak stasioner

=========================================================

7. DIFFERENCING

=========================================================

diff1 <- diff(ts_data)

plot(
  diff1,
  main = "Differencing Orde 1",
  ylab = "Differencing",
  xlab = "Waktu",
  col = "red"
)

adf_diff <- adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
adf_diff
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1
## Dickey-Fuller = -7.0177, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

=========================================================

8. PLOT ACF DAN PACF

=========================================================

acf(
  diff1,
  main = "Plot ACF"
)

pacf(
  diff1,
  main = "Plot PACF"
)

=========================================================

9. MEMBAGI DATA TRAINING DAN TESTING

=========================================================

n <- length(ts_data)

train_size <- floor(0.8 * n)

train <- ts_data[1:train_size]

test <- ts_data[(train_size + 1):n]

length(train)
## [1] 115
length(test)
## [1] 29

=========================================================

10. PEMODELAN ARIMA

=========================================================

model_arima <- auto.arima(
  train,
  seasonal = FALSE
)

summary(model_arima)
## Series: train 
## ARIMA(0,1,4) 
## 
## Coefficients:
##          ma1      ma2      ma3      ma4
##       0.3286  -0.2513  -0.1427  -0.4081
## s.e.  0.0879   0.1044   0.0847   0.1005
## 
## sigma^2 = 549.1:  log likelihood = -520.04
## AIC=1050.07   AICc=1050.63   BIC=1063.75
## 
## Training set error measures:
##                    ME     RMSE      MAE      MPE     MAPE      MASE        ACF1
## Training set 4.986725 22.91749 18.28994 1.582755 7.579482 0.8684102 -0.02060094

=========================================================

11. FORECASTING ARIMA

=========================================================

forecast_arima <- forecast(
  model_arima,
  h = length(test)
)

plot(
  forecast_arima,
  main = "Forecasting ARIMA"
)

=========================================================

12. EVALUASI MODEL ARIMA

=========================================================

akurasi_arima <- accuracy(
  forecast_arima,
  test
)

akurasi_arima
##                     ME     RMSE      MAE      MPE      MAPE      MASE
## Training set  4.986725 22.91749 18.28994 1.582755  7.579482 0.8684102
## Test set     17.222539 81.22430 61.10674 1.041556 13.407979 2.9013613
##                     ACF1
## Training set -0.02060094
## Test set              NA

=========================================================

13. PEMODELAN SARIMA

=========================================================

model_sarima <- auto.arima(
  train,
  seasonal = TRUE
)

summary(model_sarima)
## Series: train 
## ARIMA(0,1,4) 
## 
## Coefficients:
##          ma1      ma2      ma3      ma4
##       0.3286  -0.2513  -0.1427  -0.4081
## s.e.  0.0879   0.1044   0.0847   0.1005
## 
## sigma^2 = 549.1:  log likelihood = -520.04
## AIC=1050.07   AICc=1050.63   BIC=1063.75
## 
## Training set error measures:
##                    ME     RMSE      MAE      MPE     MAPE      MASE        ACF1
## Training set 4.986725 22.91749 18.28994 1.582755 7.579482 0.8684102 -0.02060094

=========================================================

14. FORECASTING SARIMA

=========================================================

forecast_sarima <- forecast(
  model_sarima,
  h = length(test)
)

plot(
  forecast_sarima,
  main = "Forecasting SARIMA"
)

=========================================================

15. EVALUASI MODEL SARIMA

=========================================================

akurasi_sarima <- accuracy(
  forecast_sarima,
  test
)

akurasi_sarima
##                     ME     RMSE      MAE      MPE      MAPE      MASE
## Training set  4.986725 22.91749 18.28994 1.582755  7.579482 0.8684102
## Test set     17.222539 81.22430 61.10674 1.041556 13.407979 2.9013613
##                     ACF1
## Training set -0.02060094
## Test set              NA

=========================================================

16. TABEL PERBANDINGAN MODEL

=========================================================

perbandingan <- data.frame(

  Model = c(
    "ARIMA",
    "SARIMA"
  ),

  RMSE = c(
    akurasi_arima[2, "RMSE"],
    akurasi_sarima[2, "RMSE"]
  ),

  MAE = c(
    akurasi_arima[2, "MAE"],
    akurasi_sarima[2, "MAE"]
  ),

  MAPE = c(
    akurasi_arima[2, "MAPE"],
    akurasi_sarima[2, "MAPE"]
  ),

  AIC = c(
    AIC(model_arima),
    AIC(model_sarima)
  )
)

perbandingan
##    Model    RMSE      MAE     MAPE      AIC
## 1  ARIMA 81.2243 61.10674 13.40798 1050.072
## 2 SARIMA 81.2243 61.10674 13.40798 1050.072

=========================================================

17. MENENTUKAN MODEL TERBAIK

=========================================================

if(
  min(perbandingan$RMSE) ==
  perbandingan$RMSE[1]
){

  cat("Model terbaik adalah ARIMA")

} else {

  cat("Model terbaik adalah SARIMA")

}
## Model terbaik adalah ARIMA

=========================================================

18. DIAGNOSTIK RESIDUAL

=========================================================

checkresiduals(model_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,4)
## Q* = 15.954, df = 6, p-value = 0.014
## 
## Model df: 4.   Total lags used: 10
checkresiduals(model_sarima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,4)
## Q* = 15.954, df = 6, p-value = 0.014
## 
## Model df: 4.   Total lags used: 10

=========================================================

19. FORECAST 30 BULAN KE DEPAN

=========================================================

forecast_30 <- forecast(
  model_arima,
  h = 30
)

plot(
  forecast_30,
  main = "Forecast 30 Bulan Kedepan"
)

forecast_30
##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 116       498.5240 468.4940 528.5540 452.5971 544.4510
## 117       470.5975 420.6622 520.5328 394.2281 546.9670
## 118       438.5508 379.0527 498.0489 347.5562 529.5453
## 119       417.7644 351.9795 483.5492 317.1551 518.3736
## 120       417.7644 350.1068 485.4220 314.2910 521.2378
## 121       417.7644 348.2844 487.2443 311.5040 524.0248
## 122       417.7644 346.5087 489.0200 308.7883 526.7405
## 123       417.7644 344.7762 490.7526 306.1386 529.3902
## 124       417.7644 343.0838 492.4449 303.5504 531.9784
## 125       417.7644 341.4290 494.0998 301.0195 534.5093
## 126       417.7644 339.8093 495.7195 298.5424 536.9864
## 127       417.7644 338.2225 497.3062 296.1156 539.4131
## 128       417.7644 336.6668 498.8619 293.7364 541.7924
## 129       417.7644 335.1404 500.3883 291.4019 544.1268
## 130       417.7644 333.6417 501.8871 289.1098 546.4189
## 131       417.7644 332.1692 503.3596 286.8579 548.6709
## 132       417.7644 330.7216 504.8071 284.6440 550.8848
## 133       417.7644 329.2977 506.2310 282.4663 553.0624
## 134       417.7644 327.8964 507.6324 280.3232 555.2056
## 135       417.7644 326.5166 509.0122 278.2129 557.3159
## 136       417.7644 325.1573 510.3715 276.1341 559.3947
## 137       417.7644 323.8177 511.7110 274.0854 561.4434
## 138       417.7644 322.4970 513.0318 272.0654 563.4633
## 139       417.7644 321.1943 514.3345 270.0731 565.4557
## 140       417.7644 319.9089 515.6199 268.1073 567.4214
## 141       417.7644 318.6402 516.8886 266.1670 569.3617
## 142       417.7644 317.3875 518.1412 264.2513 571.2775
## 143       417.7644 316.1503 519.3785 262.3591 573.1697
## 144       417.7644 314.9280 520.6008 260.4897 575.0391
## 145       417.7644 313.7200 521.8088 258.6422 576.8865