library(tidyverse) # package
library(forecast) # forecast ARIMA & HoltWinters
library(MLmetrics) # MAPE()
library(tseries) # ts() timeseries
library(TSstudio) # to visualize our timeseries
library(lubridate) # incase if we needed ymd_hms customize
library(padr) # timestamp more specific (lubridate)
Read Data
birth <- read.csv("data_input/nybirth.csv")
glimpse(birth$date)
## chr [1:168] "1946-01-01" "1946-02-01" "1946-03-01" "1946-04-01" ...
Data Pre-processing
filtering date from 1950
birth_1950 <- birth %>%
mutate(year = year(date)) %>%
filter(year >= 1950)
Check missing value
birth %>%
is.na() %>%
colSums()
## date births
## 0 0
there is no missing value in our data thus we can proceed the our next step
Converting Dataframe timeseries
birth_ts <- ts(data = birth$births,
start = 1946,
frequency = 12)
birth_ts_1950 <- ts(data = birth_1950$births,
start = 1950,
frequency = 12)
Data Manipulation
Splitting train test and train test for the prediction later, in this case we want to forecasting for 2 years on the future (24 monhts) as the frequency
# test
birth_test <- tail(birth_ts, 24)
# train
birth_train <- head(birth_ts, -24)
visualization EDA timeseries
now we can visualize our data from library(forecast), autoplot() to observe the frequency of the data. Additionally, we can also use library(TSstudio) to perform data analysis.
1946-1960
birth_ts %>%
autoplot()
1950-1960
birth_ts_1950 %>%
autoplot()
Exploratory Data Analysis
Seasonal
TSstudio::ts_seasonal(birth_ts, type = "all")
Heatmap
ts_heatmap(birth_ts)
Interactive Plot Birth rate
ts_plot(birth_ts,
title = "New York Birth Rate",
Ytitle = "Births ratio",
Xtitle = "Years",
slider = TRUE)
Check Seasonal or Non-Seasonal
TSstudio::ts_cor(birth_ts)
HoltWinters Timeseries
birth_ts %>% autoplot()
Decomposition (additive & multiplicative)
Additive timeseries
birth_train %>%
decompose(type = "additive") %>%
autoplot()
Multiplicative timeseries
birth_train %>%
decompose(type = "multiplicative") %>%
autoplot()
Holt-Winters using Additive Seasonality
# a
birth_hw_auto <- HoltWinters(birth_train) # Auto
# b
birth_hw_manual <- HoltWinters(birth_train, alpha = 0.5, beta = 0.2, gamma = 0.6) # Manual
birth_forecast_a <- forecast(birth_hw_auto, h = 24)
birth_forecast_b <- forecast(birth_hw_manual, h = 24)
birth_ts %>%
autoplot() +
autolayer(birth_hw_auto$fitted[,1], lwd = 1) +
autolayer(birth_hw_manual$fitted[,1], lwd = 1)
birth_ts %>%
autoplot()+
autolayer(birth_forecast_a$mean, series = "auto", lwd = 1)+
autolayer(birth_forecast_b$mean, series = "manual", lwd = 1)
### MAPE RESULT
MAPE(y_pred = birth_forecast_a$mean,
y_true = birth_test)*100
## [1] 4.674416
MAPE(y_pred = birth_forecast_b$mean,
y_true = birth_test)*100
## [1] 11.39774
Model Evaluation Holtwinters
accuracy(birth_hw_auto$fitted[,1], birth_ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 0.09549013 0.7536798 0.5737626 0.3378208 2.339772 0.1415391 0.4947687
accuracy(birth_hw_manual$fitted[,1], birth_ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 0.02567983 0.8293213 0.6553268 0.03230736 2.66782 0.3416248 0.5377257
model SARIMA
Seasonal arima adalah metode arima dimana object time series yang ada memiliki pola seasonal[^10]. Tahapan dalam melakukan pemodelan menggunakan SARIMA sama seperti saat membuat pemodelan ARIMA.
birth_train %>%
diff(lag = 12) %>%
diff(lag = 1) %>%
adf.test()
##
## Augmented Dickey-Fuller Test
##
## data: .
## Dickey-Fuller = -6.5978, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
auto SARIMA
- Series: birth_ts
- ARIMA(2,1,2)(1,1,1)[12]
birth_auto <- auto.arima(birth_ts)
birth_auto
## Series: birth_ts
## ARIMA(2,1,2)(1,1,1)[12]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sar1 sma1
## 0.6539 -0.4540 -0.7255 0.2532 -0.2427 -0.8451
## s.e. 0.3004 0.2429 0.3228 0.2879 0.0985 0.0995
##
## sigma^2 estimated as 0.4076: log likelihood=-157.45
## AIC=328.91 AICc=329.67 BIC=350.21
manual SARIMA
Kesimpulan: data nya sudah di diffrencing 1x diff() untuk menghasilkan stasioner (p-value < alpha). sehingga nilai d pada model ARIMA adalah 1
birth_train %>%
diff(lag = 12) %>%
diff(lag = 1) %>%
tsdisplay(lag.max = 36)
Untuk seasosal, AR murni
* P : 1,2 * D : 1 * Q : 1
untuk keseluruhan data, MA murni * p: 2,3 * d: 1 * q: 2,3
birth_a <- arima (x = birth_train, order = c(2,1,2), seasonal = c(1,1,1))
birth_b <- arima (x = birth_train, order = c(2,1,3), seasonal = c(1,1,1))
birth_c <- arima (x = birth_train, order = c(3,1,3), seasonal = c(1,1,1))
birth_d <- arima (x = birth_train, order = c(3,1,2), seasonal = c(2,1,1))
birth_e <- arima (x = birth_train, order = c(2,1,2), seasonal = c(2,1,1))
cek nilai AIC
birth_a$aic
## [1] 283.9502
birth_b$aic
## [1] 285.4812
birth_c$aic
## [1] 287.3461
birth_d$aic # jackpot
## [1] 277.5694
birth_e$aic
## [1] 281.1986
cat(
accuracy(birth_auto), # 1.8232
accuracy(birth_a),# 1.860
accuracy(birth_b), # 1.842
accuracy(birth_c), # 1.847
accuracy(birth_d), # 1.722 # jackpot
accuracy(birth_e) # 1.781
)
## 0.1273882 0.6012674 0.4578678 0.5110816 1.823201 0.4841123 -0.04405169 0.1445184 0.604505 0.4600347 0.5880691 1.860877 0.3899662 -0.060737 0.1492736 0.6036553 0.4552473 0.6074197 1.842082 0.385908 -0.05678732 0.1475945 0.6037679 0.456615 0.5999198 1.847341 0.3870674 -0.05458337 0.09476621 0.5693816 0.4233738 0.3806519 1.722433 0.3588892 -0.02140837 0.09770979 0.5786945 0.4387213 0.4022654 1.781933 0.3718991 -0.0948942
forecasting
birth_forecast_arima <- forecast(birth_d, h = 24)
EDA
birth_ts %>%
autoplot() +
autolayer(birth_forecast_arima$mean, lwd = 1, series = "ARIMA (3,1,2) (2,1,1)")
evaluation
MAPE(y_pred = birth_forecast_arima$mean, birth_test)*100
## [1] 8.472663
birth_ts %>%
autoplot() +
autolayer(birth_forecast_arima$mean, lwd = 1, series = "SARIMA (3,1,2) (2,1,1)") +
autolayer(birth_forecast_a$mean, series = "addictive") +
autolayer(birth_forecast_b$mean, series = "multiplicative")
data.frame(y_pred = birth_forecast_arima$mean, birth_test)
## y_pred birth_test
## 1 27.36446 27.132
## 2 26.33198 24.924
## 3 29.42753 28.963
## 4 27.45003 26.589
## 5 28.70995 27.931
## 6 29.20205 28.009
## 7 31.15892 29.229
## 8 31.17317 28.759
## 9 30.47068 28.405
## 10 30.74757 27.945
## 11 28.90249 25.912
## 12 29.82579 26.619
## 13 29.32366 26.076
## 14 27.78252 25.286
## 15 30.94898 27.660
## 16 29.44201 25.951
## 17 30.80229 26.398
## 18 30.20065 25.565
## 19 31.82085 28.865
## 20 31.45662 30.000
## 21 31.13976 29.261
## 22 31.52479 29.012
## 23 29.27402 26.992
## 24 30.11243 27.897