Dive Deeper CM2
1 Library Setup
library(tidyverse)
library(MLmetrics)
library(lubridate)
library(TSstudio)
library(forecast)
library(tseries)
theme_set(theme_minimal())
Gunakanlah data nybirth.csv
. Buatlah model time series yang tepat untuk data tersebut, analisislah pola time series yang ada pada data tersebut, apakah multiplicative atau additive? Lakukanlah forecasting dan lihatlah model evaluationnya.
2 Data Preparation
2.1 Read Data
<- read.csv("data_input/nybirth.csv") birth
<- birth %>%
birth mutate(date = ymd(date)) %>%
arrange(date)
glimpse(birth)
## Rows: 168
## Columns: 2
## $ date <date> 1946-01-01, 1946-02-01, 1946-03-01, 1946-04-01, 1946-05-01, 19~
## $ births <dbl> 26.663, 23.598, 26.931, 24.740, 25.806, 24.364, 24.477, 23.901,~
#check range from birthdate
range(birth$date)
## [1] "1946-01-01" "1959-12-01"
2.2 Buatlah object ts
<- ts(data = birth$births,
birth_ts start = c(1946,1),
frequency = 12)
2.3 Visualisasikan object ts
%>%
birth_ts autoplot()
2.4 Splitting train test, untuk data train 12 tahun, data test 2 tahun
# test
<- tail(birth_ts, 24)
birth_test
# train
<- head(birth_ts, -24) birth_train
2.5 Lakukan decompose untuk data train dan visualisasikan untuk menentukan additive atau multiplicative
%>%
birth_train decompose() %>%
autoplot()
3 Modelling Holt’s Winter Explonential
- model dengan nilai smoothing nya auto
- model dengan nilai smoothing dengan mendefinisikan nilai alpha beta dan gamma secara manual
# #Smoothing Auto
<- HoltWinters(x = birth_train)
birth_hw_auto
# #Smoothing Manual
<- HoltWinters(x = birth_train,
birth_hw_manual alpha = 0.8, #recent observation
beta = 0.3, #trend
gamma = 0.05) #seasonal
3.1 Plot HW
3.1.1 HW Auto
plot(birth_hw_auto)
3.1.2 HW Manual
plot(birth_hw_manual)
3.2 Lakukanlah forecasting untuk 2 tahun kedepan (24 bulan)
#Smoothing Auto
<- forecast(birth_hw_auto, h=24)
birth_forecast_auto
#Smoothing Manual
<- forecast(birth_hw_manual, h=24) birth_forecast_manual
3.3 Visualisasikan data train, test dan hasil forecastnya
3.3.1 Train
%>%
birth_train autoplot(lwd = 1)+
autolayer(birth_hw_auto$fitted[,1], lwd = 1, series = "Auto (a=0.8, b=0.01, y=1)")+
autolayer(birth_hw_manual$fitted[,1], lwd = 1, series = "Manual(a=0.8, b=0.2, y=0.05)")
3.3.2 Test
%>%
birth_test autoplot(lwd = 1)+
autolayer(birth_forecast_auto$mean, lwd = 1, series = "Auto (a=0.8, b=0.01, y=1)")+
autolayer(birth_forecast_manual$mean, lwd = 1, series = "Manual(a=0.8, b=0.2, y=0.05)")
3.3.3 Confidence Interval
%>%
birth_ts autoplot (lwd = 1)+
autolayer(birth_forecast_auto, lwd = 1, series = "Auto (a=0.8, b=0.01, y=1)")+
autolayer(birth_forecast_manual, lwd = 1, series = "Manual (a=0.8, b=0.2, y=0.05)")
3.4 Model Evaluation
#Smoothing Auto
MAPE(y_pred = birth_forecast_auto$mean, y_true = birth_test) *100
## [1] 4.674416
#Smoothing Manual
MAPE(y_pred = birth_forecast_manual$mean, y_true = birth_test) *100
## [1] 3.511854
Model birth_hw_manual
memiliki persentasi error sebesar 3.5%, lebih rendah dibandingkan dengan model birth_hw_auto
. Jika mengacu pada besaran error antar 2 model tersebut, model birth_hw_manual
merupakan model terbaik untuk melakukan forecast pada datasets Birth.
4 ARIMA (Autoregressive Integrated Moving Average)
4.1 Differencing
Untuk menghasilkan data yang stasioner, maka dilakukan differencing menggunakan function (diff())
%>%
birth_train diff(lag = 12) %>%
diff(lag = 1) %>%
adf.test()
##
## Augmented Dickey-Fuller Test
##
## data: .
## Dickey-Fuller = -6.5978, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
Berdasarkan hasil Differencing sebanyak 1 kali, dihasilkan p-value < 0.05 (alpha).
d = 1
Berikut perbandingan visual data sebelum dan sesudah dilakukan differencing sebanyak 1 kali.
4.1.1 Before Differencing
%>%
birth_train autoplot()
4.1.2 After Differencing
%>%
birth_train diff() %>%
autoplot()
4.2 Model Fitting
Membuat plot menggunakan tsdisplay() untuk menentukan nilai p (PACF) dan q (ACF).
%>%
birth_train %>%
diff tsdisplay()
Berdasarkan visualisasi PACF dan ACF di atas, maka dapat ditentukan nilai p dan q sebagai berikut:
- PACF : 1, 4
- ACF : 1, 2 (lag 5 melewati garis putus-putus biru, namun tidak di masukan untuk menimalkan jumlah model yang di buat.)
Kombinasi ARIMA (p,d,q):
- (1,1,1)
- (1,1,2)
- (4,1,1)
- (4,1,2)
#create model
<- arima(x = birth_train, order = c(1,1,1))
birth_arima111 <- arima(x = birth_train, order = c(1,1,2))
birth_arima112 <- arima(x = birth_train, order = c(4,1,1))
birth_arima411 <- arima(x = birth_train, order = c(4,1,2)) birth_arima412
Membuat model ARIMA menggunakan fungsi auto.arima()
<- auto.arima(birth_train, seasonal = F)
birth_auto_arima birth_auto_arima
## Series: birth_train
## ARIMA(1,1,0)
##
## Coefficients:
## ar1
## -0.5559
## s.e. 0.0706
##
## sigma^2 estimated as 1.544: log likelihood=-233.67
## AIC=471.33 AICc=471.42 BIC=477.26
Nilai p,d,q yang dihasilkan oleh model birth_auto_arima
adalah (1,1,0).
4.3 Goodness of Fit
$aic birth_arima111
## [1] 473.0518
$aic birth_arima112
## [1] 474.777
$aic birth_arima411
## [1] 460.5939
$aic birth_arima412
## [1] 461.6367
$aic birth_auto_arima
## [1] 471.3316
Berdasarkan nilai AIC dari masing-masing model, model birth_arima411
memiliki AIC terkecil sebesar 460.59.
4.4 Model Evaluation
accuracy(birth_arima111)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01222229 1.232849 0.9623417 -0.1312407 3.934987 0.8157662
## ACF1
## Training set 0.006544136
accuracy(birth_arima112)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01011292 1.231642 0.9650349 -0.133472 3.942579 0.8180492
## ACF1
## Training set 0.002596069
accuracy(birth_arima411)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.05953305 1.152643 0.9238914 0.02838998 3.783571 0.7831723
## ACF1
## Training set 0.009414609
accuracy(birth_arima412)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.05753951 1.148794 0.9033971 0.02281373 3.702157 0.7657996
## ACF1
## Training set -0.001865113
accuracy(birth_auto_arima)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01111 1.234064 0.9630305 -0.1324311 3.938786 1.012046
## ACF1
## Training set -0.01788515
Berdasarkan hasil evaluasi 5 model dengan menggunakan fungsi accuracy(), MAPE terendah sebesar 3.7% dihasilkan oleh model birth_arima412
.
5 SARIMA (Seasonal ARIMA)
5.1 Differencing
Pembuatan object ts dan splitting data sudah dilakukan pada saat membuat model Holt-Winter, sehingga kita bisa langsung masuk ke dalam tahap Differencing.
%>%
birth_train diff(lag = 12) %>%
diff(lag =1) %>%
adf.test
##
## Augmented Dickey-Fuller Test
##
## data: .
## Dickey-Fuller = -6.5978, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
Berdasarkan hasil Differencing sebanyak 1 kali pada index utama dan seasonal, dihasilkan p-value < 0.05 (alpha).
- d = 1
- D = 1
5.1.1 Before Differencing
%>%
birth_train autoplot()
5.1.2 After Differencing
%>%
birth_train diff(lag = 12) %>%
diff(lag =1) %>%
autoplot()
5.2 Model Fitting
Membuat plot menggunakan tsdisplay() untuk menentukan nilai p (PACF) dan q (ACF).
%>%
birth_train diff(lag = 12) %>%
diff(lag =1) %>%
tsdisplay(lag.max = 36)
index utama :
- p : 2,3
- d : 1
- q : 2,3
seasonal :
- P : 1,2
- D : 1
- Q : 1
Kombinasi model yang terbentuk adalah :
ARIMA (2,1,2)(1,1,1)[12] sarima1
ARIMA (3,1,2)(1,1,1)[12] sarima2
ARIMA (2,1,3)(1,1,1)[12] sarima3
ARIMA (3,1,3)(1,1,1)[12] sarima4
ARIMA (2,1,2)(2,1,1)[12] sarima5
ARIMA (3,1,2)(2,1,1)[12] sarima6
ARIMA (2,1,3)(2,1,1)[12] sarima7
ARIMA (3,1,3)(2,1,1)[12] sarima8
# create model
<- arima(x = birth_train, order = c(2,1,2), seasonal = c(1,1,1))
sarima1 <- arima(x = birth_train, order = c(3,1,2), seasonal = c(1,1,1))
sarima2 <- arima(x = birth_train, order = c(2,1,3), seasonal = c(1,1,1))
sarima3 <- arima(x = birth_train, order = c(3,1,3), seasonal = c(1,1,1))
sarima4 <- arima(x = birth_train, order = c(2,1,2), seasonal = c(2,1,1))
sarima5 <- arima(x = birth_train, order = c(3,1,2), seasonal = c(2,1,1))
sarima6 <- arima(x = birth_train, order = c(2,1,3), seasonal = c(2,1,1))
sarima7 <- arima(x = birth_train, order = c(3,1,3), seasonal = c(2,1,1)) sarima8
Membuat model ARIMA menggunakan fungsi auto.arima()
# create model
<- auto.arima(birth_train)
sarima_auto sarima_auto
## Series: birth_train
## ARIMA(0,1,2)(0,1,2)[12]
##
## Coefficients:
## ma1 ma2 sma1 sma2
## -0.1054 -0.2263 -1.1221 0.3499
## s.e. 0.0883 0.0822 0.1095 0.1127
##
## sigma^2 estimated as 0.4161: log likelihood=-135.08
## AIC=280.15 AICc=280.63 BIC=294.53
Nilai (p,d,q)(P,D,Q)[F] yang dihasilkan oleh model birth_auto_arima
adalah (0,1,2)(0,1,2)[12].
5.3 Goodness of Fit
$aic sarima1
## [1] 283.9502
$aic sarima2
## [1] 285.3937
$aic sarima3
## [1] 285.4812
$aic sarima4
## [1] 287.3461
$aic sarima5
## [1] 281.1986
$aic sarima6
## [1] 277.5694
$aic sarima7
## [1] 281.5742
$aic sarima8
## [1] 277.8386
$aic sarima_auto
## [1] 280.1547
Berdasarkan nilai AIC dari masing-masing model, model sarima6
memiliki AIC terkecil sebesar 277.56.
5.4 Model Evaluation
accuracy(sarima1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1445184 0.604505 0.4600347 0.5880691 1.860877 0.3899662
## ACF1
## Training set -0.060737
accuracy(sarima2)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1460272 0.6038102 0.457588 0.5932925 1.850959 0.3878922
## ACF1
## Training set -0.05345237
accuracy(sarima3)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1492736 0.6036553 0.4552473 0.6074197 1.842082 0.385908
## ACF1
## Training set -0.05678732
accuracy(sarima4)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1475945 0.6037679 0.456615 0.5999198 1.847341 0.3870674
## ACF1
## Training set -0.05458337
accuracy(sarima5)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.09770979 0.5786945 0.4387213 0.4022654 1.781933 0.3718991
## ACF1
## Training set -0.0948942
accuracy(sarima6)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.09476621 0.5693816 0.4233738 0.3806519 1.722433 0.3588892
## ACF1
## Training set -0.02140837
accuracy(sarima7)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1168173 0.5847007 0.437083 0.4778213 1.775556 0.3705104
## ACF1
## Training set -0.01359231
accuracy(sarima8)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.123292 0.5692868 0.4284582 0.5079665 1.738566 0.3631992
## ACF1
## Training set -0.006509432
accuracy(sarima_auto)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1329977 0.6057793 0.450476 0.5482743 1.825522 0.4734038
## ACF1
## Training set -0.01184833
Berdasarkan hasil evaluasi 9 model dengan menggunakan fungsi accuracy(), MAPE terendah sebesar 1.72% dihasilkan oleh model sarima6
.
6 Forecasting
Berdasarkan pembuatan model ARIMA dan SARIMA, didapatkan 2 model dengan MAPE terendah yaitu:
- ARIMA :
birth_arima412
- SARIMA :
sarima6
Kita akan melakukan forecasting untuk 24 bulan kedepan menggunakan 2 model tersebut dan membandingkan hasilnya.
6.1 Forecasting model
<- forecast(birth_arima412, h=24)
forecast_arima <- forecast(sarima6, h=24) forecast_sarima
6.2 Visualization
6.2.1 Whole Data
%>%
birth_ts autoplot(lwd = 1)+
autolayer(forecast_arima$mean, lwd = 1, series = "ARIMA")+
autolayer(forecast_sarima$mean, lwd = 1, series = "SARIMA")
6.2.2 Data Test
%>%
birth_test autoplot(lwd = 1)+
autolayer(forecast_arima$mean, lwd = 1, series = "ARIMA")+
autolayer(forecast_sarima$mean, lwd = 1, series = "SARIMA")
Berdasarkan hasil visualisasi, model SARIMA
terlihat memiliki pola yang mirip dengan data birth_test
, namun tingkat error yang dihasilkan cenderung besar.
6.3 Model Evaluation
MAPE(y_pred=forecast_arima$mean, birth_test)*100
## [1] 4.430088
MAPE(y_pred=forecast_sarima$mean, birth_test)*100
## [1] 8.472663
Berdasarkan pengecekan error menggunakan MAPE, model ARIMA
memiliki error yang lebih rendah dibandingkan dengan model SARIMA
.