Contoh header

coba link

library(tidyverse)
library(forecast)
library(MLmetrics)
library(tseries)
theme_set(theme_minimal())

Read Data

birth <- read.csv("data_input/nybirth.csv")
tail(birth)
range(birth$date)
#> [1] "1946-01-01" "1959-12-01"

Membuat Object TS

birth_ts1 <- ts(data = birth$births,
                  start = c(1946,1),
                  frequency = 12)

birth_ts_q <- ts(data = birth$births,
                   start = 1946,
                   frequency = 3)

Visualisasi Object TS

birth_ts1 %>% 
  autoplot()

class(birth_ts1)
#> [1] "ts"
birth_ts1 %>% 
  decompose() %>%  
  autoplot()

# birth_ts %>%
#   decompose() %>%
#   autoplot()
birth_ts_q %>%
  decompose(type = "additive") %>%
  autoplot()

#plot trend masih fluktuatif, pilih yang tahun (12)

Splitting train dan test

Splitting data duatahun terakhir sebagai data test, dan sisanya data train

birth_test <- tail(birth_ts1, 24) #hanya mengambil 24 bulan terakhir

birth_train <- head(birth_ts1, -24) #tidak mengambil 24 bulan terakhir
birth_train %>% 
  decompose() %>% 
  autoplot()

birth_train %>% 
  decompose(type = "multiplicative") %>% 
  autoplot()

Model Fitting

Dengan mengggunakan metode smoothing auto
Dengan nilai smoothing mendefinisikan nilai alpha beta dan gamma (manual)

birth_hw <- HoltWinters(x = birth_train)

birth_hw2 <- HoltWinters(x = birth_train, alpha = 0.9, beta = 0.02 , gamma = 0.90)
#gamma = seasonal
#alpha = error (residual)
#beta = trend
birth_forecast_auto <- forecast(birth_hw, h = 24)
birth_forecast_manual <- forecast(birth_hw2, h = 24)

Visualisasi data dan hasil forecast

birth_ts1 %>% 
  autoplot()+
  autolayer(birth_hw$fitted[,1], series =  "Auto", lwd = 1)+
  autolayer(birth_hw2$fitted[,1], series =  "Alpha 0.9; beta 0.02; gamma = 0.9", lwd = 1)

birth_ts1 %>% 
  autoplot()+
  autolayer(birth_forecast_auto)

  # autolayer(birth_forecast_manual, col = "red")
birth_ts1 %>% 
  autoplot()+
  autolayer(birth_forecast_manual, col = "red")

Model Evaluation

MAPE(y_pred =  birth_forecast_manual$mean, y_true = birth_test) * 100 #$ mean adalah poin forecastnya  
#> [1] 4.152653
MAPE(y_pred =  birth_forecast_auto$mean, y_true = birth_test) * 100 #dikali 100(persen) supaya keluar nya bukan satu komaan
#> [1] 4.674416

nilai MAPE antara model auto dan manual terpaut 0.5 persen, rata-rata prosentase error dari model manual lebih kecil (lebih baik)

Seasonal ARIMA (SARIMA)

Memisahkan data untuk keperluan differencing

birth_train_a <- birth_train 
birth_test_a <- birth_test 

Memeriksa stasionaritas data

birth_train_a%>% 
  adf.test() #P-value 0.01, data sudah stasioner (?)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  .
#> Dickey-Fuller = -5.6971, Lag order = 5, p-value = 0.01
#> alternative hypothesis: stationary
birth_train_a %>% 
  autoplot()

Data secara statistik sudah stasioner (p-value < 0.05),
namun setelah dilakukan pemeriksaan plot masih perlu dilakukan differencing

Seleksi SARIMA (P,D,Q)(p, d, q) [m]

birth_train_a %>% 
  diff(lag = 12) %>% 
  diff(lag = 1 ) %>% 
  tsdisplay(lag.max = 36)

Seasonal AR

*P : 1, 2

*D : 1

*Q : 1

Keseluruhan Data

*p : 2, 3

*d : 1

*q : 2, 3

Kombinasi model yang mungkin terbentuk adalah:

SARIMA: (p, d, q)(P, D, Q)[m]

*ARIMA(2,1,2)(1,1,1) [12]

*ARIMA(3,1,3)(2,1,1) [12]

birth_arima212_111 <- arima(x = birth_train_a, order = c(2,1,2), seasonal = c(1,1,1))
birth_arima313_211 <- arima(x = birth_train_a, order = c(3,1,3), seasonal = c(2,1,1))
birth_auto_arima <- auto.arima(birth_train_a)
birth_auto_arima
#> Series: birth_train_a 
#> ARIMA(0,1,2)(0,1,2)[12] 
#> 
#> Coefficients:
#>           ma1      ma2     sma1    sma2
#>       -0.1054  -0.2263  -1.1221  0.3499
#> s.e.   0.0883   0.0822   0.1095  0.1127
#> 
#> sigma^2 estimated as 0.4161:  log likelihood=-135.08
#> AIC=280.15   AICc=280.63   BIC=294.53

Memeriksa besaran informasi yang hilang (AIC)

birth_arima212_111$aic
#> [1] 283.9502
birth_arima313_211$aic
#> [1] 277.8386
birth_auto_arima$aic
#> [1] 280.1547

Dari hasil penghitungan AIC dapat dilihat bahwa model SARIMA (3,1,3)(2,1,1)[12] memiliki lebih sedikit nilai AIC

Memeriksa besaran prosentase error

accuracy(birth_arima212_111)
#>                     ME     RMSE       MAE       MPE     MAPE      MASE
#> Training set 0.1445184 0.604505 0.4600347 0.5880691 1.860877 0.3899662
#>                   ACF1
#> Training set -0.060737
accuracy(birth_arima313_211)
#>                    ME      RMSE       MAE       MPE     MAPE      MASE
#> Training set 0.123292 0.5692868 0.4284582 0.5079665 1.738566 0.3631992
#>                      ACF1
#> Training set -0.006509432
accuracy(birth_auto_arima)
#>                     ME      RMSE      MAE       MPE     MAPE      MASE
#> Training set 0.1329977 0.6057793 0.450476 0.5482743 1.825522 0.4734038
#>                     ACF1
#> Training set -0.01184833

Dari hasil penghitungan accuracy dapat dilihat bahwa model SARIMA (3,1,3)(2,1,1)[12] memiliki nilai MAPE yang lebih kecil

Model Forecasting

birth_a_forecast <- forecast(birth_arima313_211, h = 24)
birth_au_forecast <- forecast(birth_auto_arima, h = 24)
birth_ts1 %>% 
  autoplot()+
  autolayer(birth_a_forecast, lwd = 1, series = "ARIMA (3,1,3)(2,1,1)")

Evaluasi model dengan data test

MAPE(y_pred = birth_a_forecast$mean, birth_test_a)*100
#> [1] 4.995938
MAPE(y_pred = birth_au_forecast$mean, birth_test_a)*100 #(?)
#> [1] 4.403916

Ingin ditanyakan: mengapa model manual memiliki nilai MAPE yang lebih besar saat dievaluasi dengan data testnya? Sedangkan pada saat proses membuat model, model manual lebih unggul