R Markdown

library(tidymodels)
library(modeltime)
library(timetk)   
library(lubridate)
library(tidyverse)
library(dplyr)
library(glmnet)
library(randomForest)

Read in Data

sales <- walmart_sales_weekly%>%
  select(Date, Weekly_Sales)%>%
  group_by(Date)%>%
  summarise(Weekly_Sales = sum(Weekly_Sales))

Walmart Weekly Sales

sales %>%
  plot_time_series(Date, Weekly_Sales, .interactive = FALSE)

3 Month Test Data

splits <- sales %>%
  time_series_split(assess = "3 months", cumulative = TRUE)

splits %>%
    tk_time_series_cv_plan() %>%
    plot_time_series_cv_plan(Date, Weekly_Sales, .interactive = FALSE)

Create Arima and Prophet Models

model_fit_arima <- arima_reg() %>%
  set_engine("auto_arima") %>%
  fit(Weekly_Sales ~ Date, training(splits))

model_fit_prophet <- prophet_reg(seasonality_weekly = TRUE, seasonality_daily = TRUE) %>%
  set_engine("prophet") %>%
  fit(Weekly_Sales ~ Date, training(splits))

Preprocess data for ML models

recipe_spec <- recipe(Weekly_Sales ~ Date, training(splits)) %>%
  step_timeseries_signature(Date) %>%
  step_rm(contains("am.pm"), contains("hour"), contains("minute"),
          contains("second"), contains("xts")) %>%
  step_fourier(Date, period = 365, K = 5) %>%
  step_dummy(all_nominal())

recipe_spec %>% prep() %>% juice()
## # A tibble: 131 x 47
##    Date       Weekly_Sales Date_index.num Date_year Date_year.iso Date_half
##    <date>            <dbl>          <dbl>     <int>         <int>     <int>
##  1 2010-02-05      407512.     1265328000      2010          2010         1
##  2 2010-02-12      406467.     1265932800      2010          2010         1
##  3 2010-02-19      398901.     1266537600      2010          2010         1
##  4 2010-02-26      357362.     1267142400      2010          2010         1
##  5 2010-03-05      397944.     1267747200      2010          2010         1
##  6 2010-03-12      375159.     1268352000      2010          2010         1
##  7 2010-03-19      353797.     1268956800      2010          2010         1
##  8 2010-03-26      369665.     1269561600      2010          2010         1
##  9 2010-04-02      420950.     1270166400      2010          2010         1
## 10 2010-04-09      407180.     1270771200      2010          2010         1
## # ... with 121 more rows, and 41 more variables: Date_quarter <int>,
## #   Date_month <int>, Date_day <int>, Date_wday <int>, Date_mday <int>,
## #   Date_qday <int>, Date_yday <int>, Date_mweek <int>, Date_week <int>,
## #   Date_week.iso <int>, Date_week2 <int>, Date_week3 <int>, Date_week4 <int>,
## #   Date_mday7 <int>, Date_sin365_K1 <dbl>, Date_cos365_K1 <dbl>,
## #   Date_sin365_K2 <dbl>, Date_cos365_K2 <dbl>, Date_sin365_K3 <dbl>,
## #   Date_cos365_K3 <dbl>, Date_sin365_K4 <dbl>, Date_cos365_K4 <dbl>,
## #   Date_sin365_K5 <dbl>, Date_cos365_K5 <dbl>, Date_month.lbl_01 <dbl>,
## #   Date_month.lbl_02 <dbl>, Date_month.lbl_03 <dbl>, Date_month.lbl_04 <dbl>,
## #   Date_month.lbl_05 <dbl>, Date_month.lbl_06 <dbl>, Date_month.lbl_07 <dbl>,
## #   Date_month.lbl_08 <dbl>, Date_month.lbl_09 <dbl>, Date_month.lbl_10 <dbl>,
## #   Date_month.lbl_11 <dbl>, Date_wday.lbl_1 <dbl>, Date_wday.lbl_2 <dbl>,
## #   Date_wday.lbl_3 <dbl>, Date_wday.lbl_4 <dbl>, Date_wday.lbl_5 <dbl>,
## #   Date_wday.lbl_6 <dbl>

Create GLMnet, Random Forest, and Prophet xgBoost

model_spec_glmnet <- linear_reg(penalty = 0.01, mixture = 0.5) %>%
  set_engine("glmnet")

#GLM 
workflow_fit_glmnet <- workflow() %>%
  add_model(model_spec_glmnet) %>%
  add_recipe(recipe_spec %>% step_rm(Date)) %>%
  fit(training(splits))

#random forest 
model_spec_rf <- rand_forest(trees = 500, min_n = 50) %>%
  set_engine("randomForest")

workflow_fit_rf <- workflow() %>%
  add_model(model_spec_rf) %>%
  add_recipe(recipe_spec %>% step_rm(Date)) %>%
  fit(training(splits))
  
#Prophet Boost
model_spec_prophet_boost <- prophet_boost(seasonality_weekly = TRUE, seasonality_daily = TRUE) %>%
  set_engine("prophet_xgboost") 

workflow_fit_prophet_boost <- workflow() %>%
  add_model(model_spec_prophet_boost) %>%
  add_recipe(recipe_spec) %>%
  fit(training(splits))

Create table with all models

model_table <- modeltime_table(
  model_fit_arima, 
  model_fit_prophet,
  workflow_fit_glmnet,
  workflow_fit_rf,
  workflow_fit_prophet_boost
)

calibration_table <- model_table %>%
  modeltime_calibrate(testing(splits))

Visualize all models on test data

calibration_table %>%
  modeltime_forecast(actual_data = sales) %>%
  plot_modeltime_forecast(.interactive = TRUE, .conf_interval_show = FALSE,
                          .title = "Walmart Weekly Sales Forecast",
                          .plotly_slider = TRUE)

Model Performance

Prophet xgboost performs the best out of these 5 models

calibration_table %>%
  modeltime_accuracy() %>%
  table_modeltime_accuracy(.interactive = FALSE)
Accuracy Table
.model_id .model_desc .type mae mape mase smape rmse rsq
1 ARIMA(3,1,3)(0,0,2)[13] Test 17993.31 4.50 0.86 4.52 20666.06 0.23
2 PROPHET Test 17784.00 4.55 0.85 4.49 20367.40 0.24
3 GLMNET Test 13826.15 3.35 0.66 3.45 18975.04 0.61
4 RANDOMFOREST Test 17309.01 4.20 0.82 4.37 23824.04 0.43
5 PROPHET W/ XGBOOST ERRORS Test 10116.26 2.50 0.48 2.52 14616.94 0.63

Refit and create forcast for the next 12 Months

calibration_table %>%
  # Remove ARIMA model with low accuracy
  filter(.model_id == 5) %>%
modeltime_refit(sales) %>%
  modeltime_forecast(h = "12 months", actual_data = sales) %>%
  plot_modeltime_forecast(.interactive = T, .conf_interval_show = FALSE,
                          .title = "12 Month Forecast - Walmart Weekly Sales",
                          .plotly_slider = TRUE)