Load data
rm(list = ls())
# Preventing clash from Dplyr and MASS package for select function
select <- dplyr::select
train_df <- vroom("D:\\Dataset\\sales_train_validation.csv", delim = ",", col_types = cols())
price_df <- vroom("D:\\Dataset\\sell_prices.csv", delim = ",",
col_types = cols())
calendar_df <- vroom("D:\\Dataset\\calendar.csv", col_types = cols())
test_df <- vroom("D:\\Dataset\\sales_train_evaluation.csv", delim = ",",
col_types = cols())
# Retrieving special holidays
holidays <- c(USThanksgivingDay(2011:2016), USChristmasDay(2011:2016))@Data
hoidays <- as.Date(holidays)
Forecasting with Category and State Level
start_date = date("2011-01-29")
temp <- train_df %>%
group_by(cat_id, state_id) %>%
summarise_at(vars(starts_with("d_")), sum) %>%
ungroup() %>%
select(ends_with("id"), starts_with("d_")) %>%
pivot_longer(starts_with("d_"), names_to = "dates", values_to = "sales") %>%
mutate(dates = as.integer(str_remove(dates, "d_"))) %>%
mutate(dates = start_date + dates - 1) %>%
mutate(dates = ymd(dates)) %>%
mutate(dummy = if_else(dates %in% holidays, 1, 0)) %>%
mutate(dow = wday(dates, label = TRUE))
# filter(!str_detect(as.character(dates), '..-12-25'))
# replace_na(list(sales = 0))
temp_ts <- temp %>%
as_tsibble(key = c(cat_id, state_id), index = dates)
FOOD Category in California
1 week forecast
food.CA <- temp_ts %>%
filter(cat_id == "FOODS" & state_id == "CA") %>%
select(dates, sales, dummy, dow)
# Forecasting task
# Forecasting with Decomposition
fit_food_CA_week <- food.CA %>% filter(dates < "2016-04-18") %>% model(stlf = decomposition_model(STL(
sales ~ season(window = 'periodic') + trend(window = 15),
robust = TRUE),
ARIMA(season_adjust ~ PDQ(0,0,0)),
SNAIVE(season_year)),
additive_ets = ETS(sales ~ error("A") + trend("A") +
season("A")),
multiplicative_ets = ETS(sales ~ error("M") + trend("A") +
season("M")),
arima = ARIMA(sales)) %>%
mutate(combination_ad = (additive_ets + stlf + arima) / 3,
combination_mul = (multiplicative_ets + stlf + arima) / 3)
# report(fit_food_CA)
# Forecasting 1 week
food_CA_fc_week <- fit_food_CA_week %>%
forecast(h = "1 week")
# Plotting the forecast
food_CA_fc_week %>%
autoplot(food.CA %>% filter(dates <= "2016-04-24" & dates >= "2016-03-24"),
level = NULL) +
labs(y = "Sale Volumes",
title = "Food Sale in California")

# Checking the accuracy
fc_accuracy_week <- accuracy(food_CA_fc_week, food.CA, measures = list(
point_accuracy_measures,
interval_accuracy_measures,
distribution_accuracy_measures))
fc_accuracy_week %>%
group_by(.model) %>%
summarise(RMSE = mean(RMSE),
MAE = mean(MAE),
MASE = mean(MASE),
MAPE = mean(MAPE),
Winkler = mean(winkler),
CRPS = mean(CRPS)
) %>%
arrange(MAPE)
## # A tibble: 6 x 7
## .model RMSE MAE MASE MAPE Winkler CRPS
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 multiplicative_ets 246. 162. 0.162 1.40 5300. 332.
## 2 combination_mul 316. 253. 0.254 2.12 4659. 308.
## 3 additive_ets 332. 239. 0.240 2.14 4594. 309.
## 4 combination_ad 334. 258. 0.259 2.16 4445. 302.
## 5 arima 354. 310. 0.311 2.59 4388. 304.
## 6 stlf 417. 346. 0.347 2.95 4774. 338.
1 month forecast
# Forecasting task
# Forecasting with Decomposition
fit_food_CA_month <- food.CA %>% filter(dates < "2016-03-28") %>% model(stlf = decomposition_model(STL(
sales ~ season(window = 'periodic') + trend(window = 15),
robust = TRUE),
ARIMA(season_adjust ~ PDQ(0,0,0)),
SNAIVE(season_year)),
additive_ets = ETS(sales ~ error("A") + trend("A") +
season("A")),
multiplicative_ets = ETS(sales ~ error("M") + trend("A") +
season("M")),
arima = ARIMA(sales)) %>%
mutate(combination_ad = (additive_ets + stlf + arima) / 3,
combination_mul = (multiplicative_ets + stlf + arima) / 3)
# report(fit_food_CA)
# Forecasting 1 week
food_CA_fc_month <- fit_food_CA_month %>%
forecast(h = "28 days")
# Plotting the forecast
food_CA_fc_month %>%
autoplot(food.CA %>% filter(dates <= "2016-04-24" & dates >= "2016-03-27"),
level = NULL) +
labs(y = "Sale Volumes",
title = "Food Sale in California")

# Checking the accuracy
fc_accuracy_month <- accuracy(food_CA_fc_month, food.CA, measures = list(
point_accuracy_measures,
interval_accuracy_measures,
distribution_accuracy_measures))
fc_accuracy_month %>%
group_by(.model) %>%
summarise(RMSE = mean(RMSE),
MAE = mean(MAE),
MASE = mean(MASE),
MAPE = mean(MAPE),
Winkler = mean(winkler),
CRPS = mean(CRPS)
) %>%
arrange(MAPE)
## # A tibble: 6 x 7
## .model RMSE MAE MASE MAPE Winkler CRPS
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 combination_mul 744. 590. 0.592 5.21 5813. 499.
## 2 combination_ad 793. 633. 0.635 5.63 5599. 506.
## 3 stlf 843. 683. 0.685 6.04 5712. 532.
## 4 arima 899. 726. 0.728 6.05 4859. 530.
## 5 multiplicative_ets 874. 735. 0.737 6.74 7551. 616.
## 6 additive_ets 1027. 877. 0.879 8.21 6587. 635.
1 year forecast
# Forecasting task
# Forecasting with Decomposition
fit_food_CA_year <- food.CA %>% filter(dates < "2015-04-26") %>% model(stlf = decomposition_model(STL(
sales ~ season(window = 'periodic') + trend(window = 13),
robust = TRUE),
ARIMA(season_adjust ~ dummy + dow + PDQ(0,0,0)),
SNAIVE(season_year)),
additive_ets = ETS(sales ~ error("A") + trend("A") +
season("A")),
multiplicative_ets = ETS(sales ~ error("M") + trend("A") +
season("M")),
arima = ARIMA(sales)) %>%
mutate(combination_ad = (additive_ets + stlf + arima) / 3,
combination_mul = (multiplicative_ets + stlf + arima) / 3)
## Warning: Provided exogenous regressors are rank deficient, removing regressors:
## `dummy`
# report(fit_food_CA)
# Forecasting 1 year
fc_xreg <- food.CA %>%
filter(dates <= "2016-04-24" & dates >= "2015-04-25") %>%
select(dummy, dow)
food_CA_fc_year <- fit_food_CA_year %>%
forecast(h = "1 year", new_data = fc_xreg)
## Warning: Input forecast horizon `h` will be ignored as `new_data` has been
## provided.
# Plotting the forecast
food_CA_fc_year %>%
autoplot(food.CA %>% filter(dates <= "2016-04-24" & dates >= "2015-04-25"),
level = NULL) +
labs(y = "Sale Volumes",
title = "Food Sale in California")

# Checking the accuracy
fc_accuracy_year <- accuracy(food_CA_fc_year, food.CA, measures = list(
point_accuracy_measures,
interval_accuracy_measures,
distribution_accuracy_measures))
fc_accuracy_year %>%
group_by(.model) %>%
summarise(RMSE = mean(RMSE),
MAE = mean(MAE),
MASE = mean(MASE),
MAPE = mean(MAPE),
Winkler = mean(winkler),
CRPS = mean(CRPS)
) %>%
arrange(MAPE)
## # A tibble: 6 x 7
## .model RMSE MAE MASE MAPE Winkler CRPS
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stlf 2663. 2053. 2.06 516. 13990. 1490.
## 2 additive_ets 2543. 1977. 1.98 526. 26925. 1991.
## 3 combination_ad 2485. 1900. 1.90 530. 15261. 1463.
## 4 combination_mul 2348. 1758. 1.76 544. 14036. 1362.
## 5 arima 2284. 1699. 1.70 548. 16946. 1269.
## 6 multiplicative_ets 2171. 1604. 1.61 569. 21902. 1662.
fit_food_CA_year %>% select(combination_ad) %>% gg_tsresiduals()
## Warning: Removed 7 row(s) containing missing values (geom_path).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing non-finite values (stat_bin).

# food.CA.temp <- food.CA %>%
# select(dates, sales)
#
# dcmp <- food.CA.temp %>%
# model(stl = STL(sales ~ trend(window = 365) +
# season(window = "periodic"), robust = TRUE))
#
#
# components(dcmp)