library(tidymodels)
library(modeltime)
library(timetk)
library(lubridate)
library(tidyverse)
library(dplyr)
library(glmnet)
library(randomForest)
sales <- walmart_sales_weekly%>%
select(Date, Weekly_Sales)%>%
group_by(Date)%>%
summarise(Weekly_Sales = sum(Weekly_Sales))
sales %>%
plot_time_series(Date, Weekly_Sales, .interactive = FALSE)
splits <- sales %>%
time_series_split(assess = "3 months", cumulative = TRUE)
splits %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(Date, Weekly_Sales, .interactive = FALSE)
model_fit_arima <- arima_reg() %>%
set_engine("auto_arima") %>%
fit(Weekly_Sales ~ Date, training(splits))
model_fit_prophet <- prophet_reg(seasonality_weekly = TRUE, seasonality_daily = TRUE) %>%
set_engine("prophet") %>%
fit(Weekly_Sales ~ Date, training(splits))
recipe_spec <- recipe(Weekly_Sales ~ Date, training(splits)) %>%
step_timeseries_signature(Date) %>%
step_rm(contains("am.pm"), contains("hour"), contains("minute"),
contains("second"), contains("xts")) %>%
step_fourier(Date, period = 365, K = 5) %>%
step_dummy(all_nominal())
recipe_spec %>% prep() %>% juice()
## # A tibble: 131 x 47
## Date Weekly_Sales Date_index.num Date_year Date_year.iso Date_half
## <date> <dbl> <dbl> <int> <int> <int>
## 1 2010-02-05 407512. 1265328000 2010 2010 1
## 2 2010-02-12 406467. 1265932800 2010 2010 1
## 3 2010-02-19 398901. 1266537600 2010 2010 1
## 4 2010-02-26 357362. 1267142400 2010 2010 1
## 5 2010-03-05 397944. 1267747200 2010 2010 1
## 6 2010-03-12 375159. 1268352000 2010 2010 1
## 7 2010-03-19 353797. 1268956800 2010 2010 1
## 8 2010-03-26 369665. 1269561600 2010 2010 1
## 9 2010-04-02 420950. 1270166400 2010 2010 1
## 10 2010-04-09 407180. 1270771200 2010 2010 1
## # ... with 121 more rows, and 41 more variables: Date_quarter <int>,
## # Date_month <int>, Date_day <int>, Date_wday <int>, Date_mday <int>,
## # Date_qday <int>, Date_yday <int>, Date_mweek <int>, Date_week <int>,
## # Date_week.iso <int>, Date_week2 <int>, Date_week3 <int>, Date_week4 <int>,
## # Date_mday7 <int>, Date_sin365_K1 <dbl>, Date_cos365_K1 <dbl>,
## # Date_sin365_K2 <dbl>, Date_cos365_K2 <dbl>, Date_sin365_K3 <dbl>,
## # Date_cos365_K3 <dbl>, Date_sin365_K4 <dbl>, Date_cos365_K4 <dbl>,
## # Date_sin365_K5 <dbl>, Date_cos365_K5 <dbl>, Date_month.lbl_01 <dbl>,
## # Date_month.lbl_02 <dbl>, Date_month.lbl_03 <dbl>, Date_month.lbl_04 <dbl>,
## # Date_month.lbl_05 <dbl>, Date_month.lbl_06 <dbl>, Date_month.lbl_07 <dbl>,
## # Date_month.lbl_08 <dbl>, Date_month.lbl_09 <dbl>, Date_month.lbl_10 <dbl>,
## # Date_month.lbl_11 <dbl>, Date_wday.lbl_1 <dbl>, Date_wday.lbl_2 <dbl>,
## # Date_wday.lbl_3 <dbl>, Date_wday.lbl_4 <dbl>, Date_wday.lbl_5 <dbl>,
## # Date_wday.lbl_6 <dbl>
model_spec_glmnet <- linear_reg(penalty = 0.01, mixture = 0.5) %>%
set_engine("glmnet")
#GLM
workflow_fit_glmnet <- workflow() %>%
add_model(model_spec_glmnet) %>%
add_recipe(recipe_spec %>% step_rm(Date)) %>%
fit(training(splits))
#random forest
model_spec_rf <- rand_forest(trees = 500, min_n = 50) %>%
set_engine("randomForest")
workflow_fit_rf <- workflow() %>%
add_model(model_spec_rf) %>%
add_recipe(recipe_spec %>% step_rm(Date)) %>%
fit(training(splits))
#Prophet Boost
model_spec_prophet_boost <- prophet_boost(seasonality_weekly = TRUE, seasonality_daily = TRUE) %>%
set_engine("prophet_xgboost")
workflow_fit_prophet_boost <- workflow() %>%
add_model(model_spec_prophet_boost) %>%
add_recipe(recipe_spec) %>%
fit(training(splits))
model_table <- modeltime_table(
model_fit_arima,
model_fit_prophet,
workflow_fit_glmnet,
workflow_fit_rf,
workflow_fit_prophet_boost
)
calibration_table <- model_table %>%
modeltime_calibrate(testing(splits))
calibration_table %>%
modeltime_forecast(actual_data = sales) %>%
plot_modeltime_forecast(.interactive = TRUE, .conf_interval_show = FALSE,
.title = "Walmart Weekly Sales Forecast",
.plotly_slider = TRUE)
Prophet xgboost performs the best out of these 5 models
calibration_table %>%
modeltime_accuracy() %>%
table_modeltime_accuracy(.interactive = FALSE)
| Accuracy Table | ||||||||
|---|---|---|---|---|---|---|---|---|
| .model_id | .model_desc | .type | mae | mape | mase | smape | rmse | rsq |
| 1 | ARIMA(3,1,3)(0,0,2)[13] | Test | 17993.31 | 4.50 | 0.86 | 4.52 | 20666.06 | 0.23 |
| 2 | PROPHET | Test | 17784.00 | 4.55 | 0.85 | 4.49 | 20367.40 | 0.24 |
| 3 | GLMNET | Test | 13826.15 | 3.35 | 0.66 | 3.45 | 18975.04 | 0.61 |
| 4 | RANDOMFOREST | Test | 17309.01 | 4.20 | 0.82 | 4.37 | 23824.04 | 0.43 |
| 5 | PROPHET W/ XGBOOST ERRORS | Test | 10116.26 | 2.50 | 0.48 | 2.52 | 14616.94 | 0.63 |
calibration_table %>%
# Remove ARIMA model with low accuracy
filter(.model_id == 5) %>%
modeltime_refit(sales) %>%
modeltime_forecast(h = "12 months", actual_data = sales) %>%
plot_modeltime_forecast(.interactive = T, .conf_interval_show = FALSE,
.title = "12 Month Forecast - Walmart Weekly Sales",
.plotly_slider = TRUE)