Load the following libraries.
library(tidyverse)
library(tidymodels)
library(modeltime)
library(timetk)
library(lubridate)
library(readr)
library(dplyr)
library(tidyr)
library(glmnet)
library(randomForest)
Split time series into training and testing sets. We use the last ****5 days** of data as the testing set.
splits <- covid19_confirmed_global_tbl %>%
time_series_split(assess = "5 days", cumulative = TRUE)
## Using date_var: date
splits %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(date, value, .interactive = FALSE)
Several models to see this process :
Auto Arima Model fitting process.
model_fit_arima <- arima_reg() %>%
set_engine("auto_arima") %>%
fit(value ~ date, training(splits))
## frequency = 7 observations per 1 week
model_fit_arima
## parsnip model object
##
## Fit time: 342ms
## Series: outcome
## ARIMA(2,1,2)(0,0,2)[7] with drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 sma1 sma2 drift
## 1.0928 -0.5854 -1.6122 0.8593 0.1954 0.3654 2001.7886
## s.e. 0.0932 0.0879 0.0692 0.0662 0.1019 0.0941 858.2301
##
## sigma^2 estimated as 2.12e+08: log likelihood=-1759.24
## AIC=3534.47 AICc=3535.43 BIC=3559.08
Prophet Model fitting process.
model_fit_prophet <- prophet_reg() %>%
set_engine("prophet", yearly.seasonality = TRUE) %>%
fit(value ~ date, training(splits))
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
model_fit_prophet
## parsnip model object
##
## Fit time: 61ms
## PROPHET Model
## - growth: 'linear'
## - n.changepoints: 25
## - seasonality.mode: 'additive'
## - extra_regressors: 0
recipe_spec <- recipe(value ~ date, training(splits)) %>%
step_timeseries_signature(date) %>%
step_rm(contains("am.pm"), contains("hour"), contains("minute"),
contains("second"), contains("xts")) %>%
step_fourier(date, period = 365, K = 5) %>%
step_dummy(all_nominal())
recipe_spec %>% prep() %>% juice()
With a recipe in-hand, we can set up our machine learning pipelines.
Making an Elastic NET model
model_spec_glmnet <- linear_reg(penalty = 0.01, mixture = 0.5) %>%
set_engine("glmnet")
Fitted workflow
workflow_fit_glmnet <- workflow() %>%
add_model(model_spec_glmnet) %>%
add_recipe(recipe_spec %>% step_rm(date)) %>%
fit(training(splits))
Fit a Random Forest
model_spec_rf <- rand_forest(trees = 500, min_n = 50) %>%
set_engine("randomForest")
workflow_fit_rf <- workflow() %>%
add_model(model_spec_rf) %>%
add_recipe(recipe_spec %>% step_rm(date)) %>%
fit(training(splits))
Set the model up using a workflow
model_spec_prophet_boost <- prophet_boost() %>%
set_engine("prophet_xgboost", yearly.seasonality = TRUE)
workflow_fit_prophet_boost <- workflow() %>%
add_model(model_spec_prophet_boost) %>%
add_recipe(recipe_spec) %>%
fit(training(splits))
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## [11:41:37] WARNING: amalgamation/../src/learner.cc:480:
## Parameters: { validation } might not be used.
##
## This may not be accurate due to some parameters are only used in language bindings but
## passed down to XGBoost core. Or some parameters are not used but slip through this
## verification. Please open an issue if you find above cases.
workflow_fit_prophet_boost
## ══ Workflow [trained] ══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: prophet_boost()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## 4 Recipe Steps
##
## ● step_timeseries_signature()
## ● step_rm()
## ● step_fourier()
## ● step_dummy()
##
## ── Model ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## PROPHET w/ XGBoost Errors
## ---
## Model 1: PROPHET
## - growth: 'linear'
## - n.changepoints: 25
## - seasonality.mode: 'additive'
##
## ---
## Model 2: XGBoost Errors
##
## xgboost::xgb.train(params = list(eta = 0.3, max_depth = 6, gamma = 0,
## colsample_bytree = 1, min_child_weight = 1, subsample = 1),
## data = x, nrounds = 15, early_stopping_rounds = NULL, objective = "reg:squarederror",
## validation = 0)
Organize the models with IDs and creates generic descriptions.
model_table <- modeltime_table(
model_fit_arima,
model_fit_prophet,
workflow_fit_glmnet,
workflow_fit_rf,
workflow_fit_prophet_boost
)
model_table
Quantify error and estimate confidence intervals.
calibration_table <- model_table %>%
modeltime_calibrate(testing(splits))
calibration_table
Visualize the testing predictions (forecast).
calibration_table %>%
modeltime_forecast(actual_data = covid19_confirmed_global_tbl) %>%
plot_modeltime_forecast(.interactive = FALSE)
## 'new_data' is missing. Using '.calibration_data' to forecast.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
Calculate the testing accuracy to compare the models.
calibration_table %>%
modeltime_accuracy() %>%
table_modeltime_accuracy(.interactive = FALSE)
Accuracy Table | ||||||||
---|---|---|---|---|---|---|---|---|
.model_id | .model_desc | .type | mae | mape | mase | smape | rmse | rsq |
1 | ARIMA(2,1,2)(0,0,2)[7] WITH DRIFT | Test | 22526.77 | 6.26 | 1.22 | 6.51 | 25367.86 | 0.94 |
2 | PROPHET | Test | 21943.96 | 6.45 | 1.19 | 6.34 | 24090.94 | 0.02 |
3 | GLMNET | Test | 28088.56 | 7.77 | 1.53 | 8.18 | 32302.35 | 1.00 |
4 | RANDOMFOREST | Test | 82702.35 | 23.34 | 4.49 | 26.62 | 86107.41 | 0.36 |
5 | PROPHET W/ XGBOOST ERRORS | Test | 20410.54 | 6.14 | 1.11 | 5.92 | 24731.97 | 0.12 |
From the accuracy measures and forecast results, we see that:
calibration_table %>%
# Remove RANDOMFOREST model with low accuracy
filter(.model_id != 4) %>%
# Refit and Forecast Forward
modeltime_refit(covid19_confirmed_global_tbl) %>%
modeltime_forecast(h = "3 months", actual_data = covid19_confirmed_global_tbl) %>%
plot_modeltime_forecast(.interactive = FALSE, .title = "Compare all models forecast Plot", .y_lab = "New daily cases")
## frequency = 7 observations per 1 week
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## [11:41:41] WARNING: amalgamation/../src/learner.cc:480:
## Parameters: { validation } might not be used.
##
## This may not be accurate due to some parameters are only used in language bindings but
## passed down to XGBoost core. Or some parameters are not used but slip through this
## verification. Please open an issue if you find above cases.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
calibration_table %>%
filter(.model_id == 1) %>%
# Refit and Forecast Forward
modeltime_refit(covid19_confirmed_global_tbl) %>%
modeltime_forecast(h = "3 months", actual_data = covid19_confirmed_global_tbl) %>%
plot_modeltime_forecast(.interactive = FALSE, .title = "ARIMA model forecast Plot", .y_lab = "New daily cases")
## frequency = 7 observations per 1 week
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
calibration_table %>%
filter(.model_id == 2) %>%
# Refit and Forecast Forward
modeltime_refit(covid19_confirmed_global_tbl) %>%
modeltime_forecast(h = "3 months", actual_data = covid19_confirmed_global_tbl) %>%
plot_modeltime_forecast(.interactive = FALSE, .title = "PROPHET model forecast Plot", .y_lab = "New daily cases")
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
calibration_table %>%
filter(.model_id == 3) %>%
# Refit and Forecast Forward
modeltime_refit(covid19_confirmed_global_tbl) %>%
modeltime_forecast(h = "3 months", actual_data = covid19_confirmed_global_tbl) %>%
plot_modeltime_forecast(.interactive = FALSE, .title = "GLMNET model forecast Plot", .y_lab = "New daily cases")
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
calibration_table %>%
filter(.model_id == 5) %>%
# Refit and Forecast Forward
modeltime_refit(covid19_confirmed_global_tbl) %>%
modeltime_forecast(h = "3 months", actual_data = covid19_confirmed_global_tbl) %>%
plot_modeltime_forecast(.interactive = FALSE, .title = "PROPHET + XGBOOST model forecast Plot", .y_lab = "New daily cases")
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## [11:41:46] WARNING: amalgamation/../src/learner.cc:480:
## Parameters: { validation } might not be used.
##
## This may not be accurate due to some parameters are only used in language bindings but
## passed down to XGBoost core. Or some parameters are not used but slip through this
## verification. Please open an issue if you find above cases.
## Using date_var: date
Auto Arima Model fitting process.
model_fit_arima <- arima_reg() %>%
set_engine("auto_arima") %>%
fit(value ~ date, training(splits))
## frequency = 7 observations per 1 week
model_fit_arima
## parsnip model object
##
## Fit time: 527ms
## Series: outcome
## ARIMA(0,1,1)(0,0,2)[7]
##
## Coefficients:
## ma1 sma1 sma2
## -0.6935 0.1816 0.2081
## s.e. 0.0590 0.0874 0.0886
##
## sigma^2 estimated as 816656: log likelihood=-986.23
## AIC=1980.47 AICc=1980.82 BIC=1991.62
Prophet Model fitting process.
model_fit_prophet <- prophet_reg() %>%
set_engine("prophet", yearly.seasonality = TRUE) %>%
fit(value ~ date, training(splits))
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
model_fit_prophet
## parsnip model object
##
## Fit time: 55ms
## PROPHET Model
## - growth: 'linear'
## - n.changepoints: 25
## - seasonality.mode: 'additive'
## - extra_regressors: 0
Making an Elastic NET model
model_spec_glmnet <- linear_reg(penalty = 0.01, mixture = 0.5) %>%
set_engine("glmnet")
Fitted workflow
workflow_fit_glmnet <- workflow() %>%
add_model(model_spec_glmnet) %>%
add_recipe(recipe_spec %>% step_rm(date)) %>%
fit(training(splits))
Fit a Random Forest
Set the model up using a workflow
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## [11:41:51] WARNING: amalgamation/../src/learner.cc:480:
## Parameters: { validation } might not be used.
##
## This may not be accurate due to some parameters are only used in language bindings but
## passed down to XGBoost core. Or some parameters are not used but slip through this
## verification. Please open an issue if you find above cases.
## ══ Workflow [trained] ══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: prophet_boost()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## 4 Recipe Steps
##
## ● step_timeseries_signature()
## ● step_rm()
## ● step_fourier()
## ● step_dummy()
##
## ── Model ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## PROPHET w/ XGBoost Errors
## ---
## Model 1: PROPHET
## - growth: 'linear'
## - n.changepoints: 25
## - seasonality.mode: 'additive'
##
## ---
## Model 2: XGBoost Errors
##
## xgboost::xgb.train(params = list(eta = 0.3, max_depth = 6, gamma = 0,
## colsample_bytree = 1, min_child_weight = 1, subsample = 1),
## data = x, nrounds = 15, early_stopping_rounds = NULL, objective = "reg:squarederror",
## validation = 0)
Organize the models with IDs and creates generic descriptions.
Quantify error and estimate confidence intervals.
Visualize the testing predictions (forecast).
## 'new_data' is missing. Using '.calibration_data' to forecast.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
Calculate the testing accuracy to compare the models.
Accuracy Table | ||||||||
---|---|---|---|---|---|---|---|---|
.model_id | .model_desc | .type | mae | mape | mase | smape | rmse | rsq |
1 | ARIMA(0,1,1)(0,0,2)[7] | Test | 618.20 | Inf | 0.55 | 167.35 | 724.97 | 0.34 |
2 | PROPHET | Test | 664.69 | Inf | 0.59 | 167.73 | 759.30 | 0.08 |
3 | GLMNET | Test | 730.82 | Inf | 0.65 | 187.91 | 925.11 | 0.06 |
4 | RANDOMFOREST | Test | 1235.75 | Inf | 1.10 | 129.72 | 1433.47 | 0.01 |
5 | PROPHET W/ XGBOOST ERRORS | Test | 409.69 | Inf | 0.36 | 127.77 | 461.89 | 0.61 |
From the accuracy measures and forecast results, we see that:
## frequency = 7 observations per 1 week
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## [11:41:56] WARNING: amalgamation/../src/learner.cc:480:
## Parameters: { validation } might not be used.
##
## This may not be accurate due to some parameters are only used in language bindings but
## passed down to XGBoost core. Or some parameters are not used but slip through this
## verification. Please open an issue if you find above cases.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## frequency = 7 observations per 1 week
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## [11:42:01] WARNING: amalgamation/../src/learner.cc:480:
## Parameters: { validation } might not be used.
##
## This may not be accurate due to some parameters are only used in language bindings but
## passed down to XGBoost core. Or some parameters are not used but slip through this
## verification. Please open an issue if you find above cases.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf