Data From Part 1

ihsg_tbl <- read_rds("ihsg_tbl")
jkse_current_tbl <- read_rds("jkse_current_tbl")
ihsg_future_tbl <- read_rds("ihsg_future_tbl")
splits <- read_rds("splits")


regression_model_tbl <- read_rds("regression_model_tbl")
machine_learning_model_tbl <- read_rds("machine_learning_model_tbl")
boosted_model_tbl <- read_rds("boosted_model_tbl")
all_model_tbl <- read_rds("all_model_tbl")

Resampling (Time Series Split Cross Validation)

resamples_tscv_ihsg <- time_series_cv(
    data = training(splits),
    cumulative  = TRUE,
    initial     = "5 year", # train
    assess      = "2 year", # test
    skip        = "2 year",
    slice_limit = 5,
    date_var = date
)

resamples_tscv_ihsg %>% write_rds("resamples_tscv_ihsg")
resamples_tscv_ihsg <- read_rds("resamples_tscv_ihsg")

resamples_tscv_ihsg %>%
    tk_time_series_cv_plan() %>%
    plot_time_series_cv_plan(date, value)

Regression Model

regression_model_resample_fitted <- regression_model_tbl %>%
  modeltime_fit_resamples(resamples_tscv_ihsg,
                          control = control_resamples(verbose = FALSE))

regression_model_resample_fitted %>% write_rds("regression_model_resample_fitted")
regression_model_resample_fitted <- read_rds("regression_model_resample_fitted")

regression_model_resample_fitted %>% 
  plot_modeltime_resamples()
## Warning: There were 2 warnings in `dplyr::summarise()`.
## The first warning was:
## ℹ In argument: `.estimate = metric_fn(truth = .value, estimate = .pred, na_rm =
##   na_rm)`.
## ℹ In group 8: `.resample_id = "Slice2"`, `.model_desc = "5_BATS(0.089, {0,0},
##   0.972, -)"`.
## Caused by warning:
## ! A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
## ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 1 remaining warning.
regression_model_resample_fitted %>% 
  modeltime_resample_accuracy(summary_fns = mean) %>% 
  table_modeltime_accuracy(.interactive = FALSE)
## Warning: There were 2 warnings in `dplyr::summarise()`.
## The first warning was:
## ℹ In argument: `.estimate = metric_fn(truth = value, estimate = .pred, na_rm =
##   na_rm)`.
## ℹ In group 17: `.model_id = 5`, `.model_desc = "BATS(0.089, {0,0}, 0.972, -)"`,
##   `.resample_id = "Slice2"`, `.type = "Resamples"`.
## Caused by warning:
## ! A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
## ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 1 remaining warning.
## Warning: There was 1 warning in `dplyr::summarise()`.
## ℹ In argument: `dplyr::across(.fns = summary_fns, ...)`.
## Caused by warning:
## ! Using `across()` without supplying `.cols` was deprecated in dplyr 1.1.0.
## ℹ Please supply `.cols` instead.
Accuracy Table
.model_id .model_desc .type n mae mape mase smape rmse rsq
1 ARIMA(4,1,2)(0,0,2)[5] WITH DRIFT Resamples 5 948.04 32.81 31.49 33.85 1054.44 0.36
3 PROPHET Resamples 5 653.45 23.75 22.23 21.65 743.01 0.50
4 PROPHET Resamples 5 653.33 23.74 22.23 21.63 742.88 0.50
5 BATS(0.089, {0,0}, 0.972, -) Resamples 5 590.13 21.01 20.34 22.46 678.09 NA

Machine Learning Model

machine_learning_model_resample_fitted <- machine_learning_model_tbl %>%
  modeltime_fit_resamples(resamples_tscv_ihsg,
                          control = control_resamples(verbose = FALSE))

machine_learning_model_resample_fitted %>% write_rds("machine_learning_model_resample_fitted")

machine_learning_model_resample_fitted <- read_rds("machine_learning_model_resample_fitted")

machine_learning_model_resample_fitted %>% 
  plot_modeltime_resamples()
machine_learning_model_resample_fitted %>% 
  modeltime_resample_accuracy(summary_fns = mean) %>% 
  table_modeltime_accuracy()

Boosted Model

boosted_model_resample_fitted <- boosted_model_tbl %>%
  modeltime_fit_resamples(resamples_tscv_ihsg,
                          control = control_resamples(verbose = FALSE))

boosted_model_resample_fitted %>% write_rds("boosted_model_resample_fitted")

boosted_model_resample_fitted <- read_rds("boosted_model_resample_fitted")

boosted_model_resample_fitted %>% 
  plot_modeltime_resamples() # Slice 2
boosted_model_resample_fitted %>% 
  modeltime_resample_accuracy(summary_fns = mean) %>% 
  table_modeltime_accuracy()
boosted_model_tbl %>% 
  modeltime_accuracy(new_data = testing(splits)) %>% 
  # modeltime_resample_accuracy(summary_fns = mean) %>% 
  table_modeltime_accuracy()

!!! Resampling All Model no Transform

all_model_tbl_resample_fitted <- combine_modeltime_tables(regression_model_resample_fitted, 
                         machine_learning_model_resample_fitted, 
                         boosted_model_resample_fitted)
## Some Modeltime Tables have more than 3 columns, likely due to calibration.
## - Models: 1, 2, 3 have more than 3 columns.
## Calibration data is being removed. Please use `modeltime_calibrate()` to re-calibrate following combining modeltime tables.
all_model_tbl_resample_fitted %>% write_rds("all_model_tbl_resample_fitted")
all_model_tbl_resample_fitted <- read_rds("all_model_tbl_resample_fitted")

Accuracy Checking

all_model_accuracy_tbl <- all_model_tbl %>% 
  modeltime_calibrate(new_data = testing(splits)) %>% 
  modeltime_accuracy()

all_model_resample_fitted_accuracy_tbl <- all_model_tbl_resample_fitted %>% 
  modeltime_calibrate(new_data = testing(splits)) %>%
  modeltime_accuracy()

all_model_accuracy_tbl %>% write_rds("all_model_accuracy_tbl")
all_model_accuracy_tbl <- read_rds("all_model_accuracy_tbl")

all_model_resample_fitted_accuracy_tbl %>% write_rds("all_model_resample_fitted_accuracy_tbl")
all_model_resample_fitted_accuracy_tbl <- read_rds("all_model_resample_fitted_accuracy_tbl")

all_model_accuracy_tbl %>% 
    table_modeltime_accuracy(defaultPageSize = 30, bordered = TRUE, resizable = TRUE)
all_model_resample_fitted_accuracy_tbl %>% 
    table_modeltime_accuracy(defaultPageSize = 30, bordered = TRUE, resizable = TRUE)

Selected Model based on RMSE

all_model_accuracy_tbl %>% 
  table_modeltime_accuracy(defaultPageSize = 30, bordered = TRUE, resizable = TRUE)
# Model ID : Propet #4 (533.23), Prophet #3 (533.51), Prophet Boost #15 (674.89), ARIMA Boost #16 (839.28)

all_model_resample_fitted_accuracy_tbl %>% 
    table_modeltime_accuracy(defaultPageSize = 30, bordered = TRUE, resizable = TRUE)
# Model ID : Propet #4 (533.23), Prophet #3 (533.51), Prophet Boost #15 (674.89), ARIMA Boost #16 (839.28)

Visual Model No Transform

all_model_tbl %>% 
  pull_modeltime_model(4) %>% 
  modeltime_calibrate(new_data = testing(splits)) %>% 
  modeltime_refit(data = ihsg_tbl) %>% 
  modeltime_forecast(h = "1 year", actual_data = ihsg_tbl) %>% 
  plot_modeltime_forecast()
## Converting to Modeltime Table.
all_model_tbl %>% 
  pull_modeltime_model(3) %>% 
  modeltime_calibrate(new_data = testing(splits)) %>% 
  modeltime_refit(data = ihsg_tbl) %>% 
  modeltime_forecast(h = "1 year", actual_data = ihsg_tbl) %>% 
  plot_modeltime_forecast()
## Converting to Modeltime Table.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
all_model_tbl %>% 
  pull_modeltime_model(15) %>% 
  modeltime_calibrate(new_data = testing(splits)) %>% 
  modeltime_refit(data = ihsg_tbl) %>% 
  modeltime_forecast(h = "1 year", actual_data = ihsg_tbl) %>% 
  plot_modeltime_forecast()
## Converting to Modeltime Table.
all_model_tbl %>% 
  pull_modeltime_model(16) %>% 
  modeltime_calibrate(new_data = testing(splits)) %>% 
  modeltime_refit(data = ihsg_tbl) %>% 
  modeltime_forecast(h = "1 year", actual_data = ihsg_tbl) %>% 
  plot_modeltime_forecast()
## Converting to Modeltime Table.
all_model_tbl %>% 
  pull_modeltime_model(2) %>% 
  modeltime_calibrate(new_data = testing(splits)) %>% 
  modeltime_refit(data = ihsg_tbl) %>% 
  modeltime_forecast(h = "1 year", actual_data = ihsg_tbl) %>% 
  plot_modeltime_forecast()
## Converting to Modeltime Table.
## frequency = 5 observations per 1 week

Visual Resample Model No Transform

all_model_tbl_resample_fitted %>% 
  pull_modeltime_model(4) %>% 
  modeltime_calibrate(new_data = testing(splits)) %>% 
  modeltime_refit(data = ihsg_tbl) %>% 
  modeltime_forecast(h = "1 year", actual_data = ihsg_tbl) %>% 
  plot_modeltime_forecast()
## Converting to Modeltime Table.
all_model_tbl_resample_fitted %>% 
  pull_modeltime_model(3) %>% 
  modeltime_calibrate(new_data = testing(splits)) %>% 
  modeltime_refit(data = ihsg_tbl) %>% 
  modeltime_forecast(h = "1 year", actual_data = ihsg_tbl) %>% 
  plot_modeltime_forecast()
## Converting to Modeltime Table.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
all_model_tbl_resample_fitted %>% 
  pull_modeltime_model(12) %>% 
  modeltime_calibrate(new_data = testing(splits)) %>% 
  modeltime_refit(data = ihsg_tbl) %>% 
  modeltime_forecast(h = "1 year", actual_data = ihsg_tbl) %>% 
  plot_modeltime_forecast()
## Converting to Modeltime Table.
all_model_tbl_resample_fitted %>% 
  pull_modeltime_model(13) %>% 
  modeltime_calibrate(new_data = testing(splits)) %>% 
  modeltime_refit(data = ihsg_tbl) %>% 
  modeltime_forecast(h = "1 year", actual_data = ihsg_tbl) %>% 
  plot_modeltime_forecast()
## Converting to Modeltime Table.
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `.model = purrr::map2(...)`.
## Caused by warning:
## ! The number of neighbors should be >= 0 and <= 9. Truncating the value.