[1] Setup
knitr::opts_chunk$set(echo = TRUE, warning = F, comment = "", fig.width = 10)
library(tidyverse)
library(lubridate)
library(TSstudio)
library(zoo)
[2] Data Cleaning
# load data
load("./product.RData")
head(product)
week sales price discount leaflet stores
1 2011-01-03 2573 0.72 0.17 1 26
2 2011-01-10 3099 0.56 0.29 1 26
3 2011-01-17 1186 0.73 0.15 0 26
4 2011-01-24 750 0.89 0.01 0 26
5 2011-01-31 922 0.89 0.01 0 26
6 2011-02-07 885 0.89 0.01 0 26
# check missing weeks
week_check <- seq.Date(min(product$week), max(product$week), by = "week")
setdiff(week_check, product$week)
numeric(0)
# check numeber of weeks
product$nweek <- week(product$week)
table(product$nweek)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 6
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
53
1
# remove week 53 for seasonality
product <- product %>% filter(nweek < 53)
table(product$nweek)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 6
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
# remove first 3 weeks (because promotional)
product <- product[-(1:3),]
# adjust anomaly in discount values
product$discount[1:3] <- 0
# summary
knitr::kable(summary(product))
|
Min. :2011-01-24 |
Min. : 438.0 |
Min. :0.3600 |
Min. :0.00000 |
Min. :0.00000 |
Min. :19.00 |
Min. : 1.00 |
|
1st Qu.:2012-08-28 |
1st Qu.: 795.2 |
1st Qu.:0.9000 |
1st Qu.:0.00000 |
1st Qu.:0.00000 |
1st Qu.:28.00 |
1st Qu.:13.00 |
|
Median :2014-04-10 |
Median : 966.5 |
Median :0.9400 |
Median :0.00000 |
Median :0.00000 |
Median :30.00 |
Median :25.00 |
|
Mean :2014-04-08 |
Mean :1301.6 |
Mean :0.9175 |
Mean :0.02904 |
Mean :0.06587 |
Mean :30.38 |
Mean :25.71 |
|
3rd Qu.:2015-11-14 |
3rd Qu.:1387.0 |
3rd Qu.:0.9800 |
3rd Qu.:0.00000 |
3rd Qu.:0.00000 |
3rd Qu.:33.00 |
3rd Qu.:38.75 |
|
Max. :2017-06-19 |
Max. :7123.0 |
Max. :1.0000 |
Max. :0.26000 |
Max. :1.00000 |
Max. :34.00 |
Max. :52.00 |
[3] Time Series Analysis
# original series
series <- ts(product$sales, start = c(2011,4), frequency = 52)
series
Time Series:
Start = c(2011, 4)
End = c(2017, 25)
Frequency = 52
[1] 750 922 885 711 783 808 833 859 895 1049 1169 1246 1073 1214 1168
[16] 1439 1285 1515 1686 1458 1378 6057 5845 2069 2316 1700 1269 1327 1420 1677
[31] 2174 1706 1370 1487 1185 3702 3195 876 773 868 901 894 795 733 876
[46] 833 738 815 1717 1407 1013 910 824 822 915 1664 1860 1029 896 950
[61] 816 1125 1198 883 866 1038 1098 1142 1152 1056 1281 1282 1239 2251 2049
[76] 1917 2172 1787 1802 1790 1731 1406 2022 3447 3408 1384 944 947 910 859
[91] 836 802 842 888 686 777 818 2256 2600 1022 844 867 859 730 850
[106] 981 930 939 809 438 2238 4136 1004 991 908 1285 1018 1090 3282 3318
[121] 1966 1116 1231 1609 2323 1485 6754 7123 2082 2242 2377 2335 1336 1237 1106
[136] 1388 1164 934 1008 968 3115 2615 1088 993 811 879 814 782 1603 1841
[151] 993 968 951 970 852 860 1935 2020 1737 927 877 730 863 958 698
[166] 891 972 870 982 904 3427 3969 1443 1232 1149 1500 1373 1333 1222 1304
[181] 1416 1425 4895 4892 1465 1133 862 965 1067 1103 828 830 794 872 716
[196] 795 1674 1656 721 675 626 643 727 689 788 677 752 747 647 824
[211] 640 785 639 751 776 843 2506 3102 896 1178 901 812 886 902 822
[226] 797 1375 1463 1105 1088 1430 1363 1636 1611 1085 1285 5457 4421 2129 1247
[241] 905 983 789 849 693 710 1401 1527 895 758 648 643 695 685 682
[256] 740 767 817 618 559 648 689 659 572 647 772 670 694 792 1124
[271] 845 890 861 765 796 659 645 856 736 805 901 1082 1157 1124 1019
[286] 1061 4297 4583 2027 1222 1040 945 1048 1060 855 740 720 701 1097 1220
[301] 845 596 512 586 616 567 624 612 585 720 649 541 601 651 622
[316] 588 912 1228 784 712 787 780 701 914 597 741 2796 2924 1560 997
[331] 1291 1248 1248 1360
ts_plot(series, line.mode = "lines+markers", Ygrid = T, slider = T, Xtitle = "Week",
Ytitle = "Sales", title = "Product Time Series")
# create sales baseline
product$baseline <- product$sales
product$baseline[product$discount > 0] <- NA
product$baseline <- na.approx(product$baseline)
# create price baseline
product$baseprice <- product$price
product$baseprice[product$discount > 0] <- NA
product$baseprice <- na.approx(product$baseprice)
baseline <- ts(product$baseline, start = c(2011,4), frequency = 52)
ts_plot(baseline, line.mode = "lines+markers", Ygrid = T, slider = T, Xtitle = "Week",
Ytitle = "Sales", title = "Baseline Time Series")
# baseline decomposition
ts_decompose(baseline)
# baseline seasonality focus
ts_seasonal(baseline, type = "normal")
ts_seasonal(baseline, type = "cycle")
# baseline autocorrelation
ts_cor(baseline, lag.max = 104, seasonal_lags = 52)
# baseline difference
baselineDiff <- diff(baseline)
ts_plot(baselineDiff, line.mode = "lines+markers", Ygrid = T, slider = T, Xtitle = "Week",
Ytitle = "Sales", title = "Baseline Differencing")
ts_cor(baselineDiff, lag.max = 104, seasonal_lags = 52)
[4] Baseline Forecasting
# external regressors
regressor <- list(
train = data.frame(
baseprice = product$baseprice,
stores = product$stores
),
forecast = data.frame(
baseprice = rep(0.98, 52),
stores = rep(34, 52)
)
)
# models
methods <- list(
arima1 = list(
method = "auto.arima",
notes = "Arima",
method_arg = list()
),
arima2 = list(
method = "auto.arima",
notes = "Arima with lambda",
method_arg = list(lambda = "auto")
),
tslm1 = list(
method = "tslm",
notes = "TSLM",
method_arg = list(formula = input ~ trend + season)
),
tslm2 = list(
method = "tslm",
notes = "TSLM with regressors",
method_arg = list(formula = input ~ trend + season + baseprice + stores)
),
nnetar = list(
method = "nnetar",
notes = "Default nnetar",
method_arg = list()
)
)
# training and testing
models <- train_model(
input = baseline,
methods = methods,
train_method = list(
partitions = 4,
sample.out = 52,
space = 52
),
horizon = 52,
error = "MAPE",
xreg = regressor
)
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
Registered S3 methods overwritten by 'forecast':
method from
fitted.fracdiff fracdiff
residuals.fracdiff fracdiff
# A tibble: 5 x 7
model_id model notes avg_mape avg_rmse `avg_coverage_8… `avg_coverage_9…
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 tslm2 tslm TSLM wit… 0.159 236. 0.812 0.909
2 tslm1 tslm TSLM 0.159 224. 0.875 0.933
3 arima1 auto.a… Arima 0.168 229. 0.856 0.942
4 arima2 auto.a… Arima wi… 0.172 239. NA NA
5 nnetar nnetar Default … 0.201 258. NA NA
plot_model(models)
# forecasting (regular)
plot_forecast(models$forecast$tslm2$forecast)
[5] Promotional Forecasting
# external regressors
regressor_promo <- list(
train = data.frame(
price = product$price,
stores = product$stores,
discount = product$discount,
leaflet = as.factor(product$leaflet)
),
forecast = data.frame(
price = c(rep(0.72,3),rep(0.98,4),rep(0.68,3),rep(0.98,10),rep(0.88,3),rep(0.98,50)),
stores = rep(34, 73),
discount = c(rep(0.26,3),rep(0,4),rep(0.30,3),rep(0,10),rep(0.1,3),rep(0,50)),
leaflet = c(rep("1",3),rep("0",4),rep("1",3),rep("0",10),rep("0",3),rep("0",50))
)
)
# models
methods <- list(
tslm1 = list(
method = "tslm",
notes = "TSLM with price",
method_arg = list(formula = input ~ trend + season + price)
),
tslm2 = list(
method = "tslm",
notes = "TSLM with stores",
method_arg = list(formula = input ~ trend + season + stores)
),
tslm3 = list(
method = "tslm",
notes = "TSLM with discount",
method_arg = list(formula = input ~ trend + season + discount)
),
tslm4 = list(
method = "tslm",
notes = "TSLM with leaflet",
method_arg = list(formula = input ~ trend + season + leaflet)
),
tslm5 = list(
method = "tslm",
notes = "TSLM with price & stores",
method_arg = list(formula = input ~ trend + season + price + stores)
),
tslm6 = list(
method = "tslm",
notes = "TSLM with price & discount",
method_arg = list(formula = input ~ trend + season + price + discount)
),
tslm7 = list(
method = "tslm",
notes = "TSLM with price & leaflet",
method_arg = list(formula = input ~ trend + season + price + leaflet)
),
tslm8 = list(
method = "tslm",
notes = "TSLM with stores & discount",
method_arg = list(formula = input ~ trend + season + stores + discount)
),
tslm9 = list(
method = "tslm",
notes = "TSLM with stores & leaflet",
method_arg = list(formula = input ~ trend + season + stores + leaflet)
),
tslm10 = list(
method = "tslm",
notes = "TSLM with discount & leaflet",
method_arg = list(formula = input ~ trend + season + discount + leaflet)
),
tslm11 = list(
method = "tslm",
notes = "TSLM with price & stores & discount",
method_arg = list(formula = input ~ trend + season + price + stores + discount)
),
tslm12 = list(
method = "tslm",
notes = "TSLM with price & stores & leaflet",
method_arg = list(formula = input ~ trend + season + price + stores + leaflet)
),
tslm13 = list(
method = "tslm",
notes = "TSLM with price & leaflet & discount",
method_arg = list(formula = input ~ trend + season + price + discount + leaflet)
),
tslm14 = list(
method = "tslm",
notes = "TSLM with leaflet & stores & discount",
method_arg = list(formula = input ~ trend + season + stores + discount + leaflet)
),
tslm15 = list(
method = "tslm",
notes = "TSLM with all regressors",
method_arg = list(formula = input ~ trend + season + price + stores + discount + leaflet)
)
)
# training and testing
models <- train_model(
input = series,
methods = methods,
train_method = list(
partitions = 4,
sample.out = 52,
space = 52
),
horizon = 73,
error = "MAPE",
xreg = regressor_promo
)
# A tibble: 15 x 7
model_id model notes avg_mape avg_rmse `avg_coverage_8… `avg_coverage_9…
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 tslm13 tslm TSLM with… 0.256 500. 0.827 0.933
2 tslm7 tslm TSLM with… 0.262 511. 0.865 0.942
3 tslm12 tslm TSLM with… 0.262 507. 0.851 0.942
4 tslm10 tslm TSLM with… 0.275 507. 0.827 0.928
5 tslm15 tslm TSLM with… 0.277 501. 0.837 0.938
6 tslm14 tslm TSLM with… 0.301 513. 0.822 0.933
7 tslm6 tslm TSLM with… 0.307 591. 0.827 0.928
8 tslm11 tslm TSLM with… 0.320 592. 0.832 0.923
9 tslm3 tslm TSLM with… 0.324 598. 0.837 0.918
10 tslm4 tslm TSLM with… 0.335 585. 0.846 0.947
11 tslm8 tslm TSLM with… 0.339 601. 0.837 0.918
12 tslm9 tslm TSLM with… 0.349 599. 0.822 0.942
13 tslm5 tslm TSLM with… 0.350 704. 0.861 0.938
14 tslm1 tslm TSLM with… 0.364 710. 0.856 0.938
15 tslm2 tslm TSLM with… 0.527 998. 0.856 0.928
plot_model(models)
# best model
models$forecast$tslm13$model$coefficients
(Intercept) trend season2 season3 season4 season5
2433.888649 -1.093482 -119.209296 -226.165624 -179.529472 -72.761886
season6 season7 season8 season9 season10 season11
-175.363786 -57.942078 -86.062556 -224.845254 -289.738224 -14.972566
season12 season13 season14 season15 season16 season17
-161.431343 -197.412617 -58.493033 140.947579 13.959957 51.012517
season18 season19 season20 season21 season22 season23
294.744278 109.655646 -28.821772 211.203405 362.500017 453.634421
season24 season25 season26 season27 season28 season29
495.830578 772.749435 1137.972682 951.760214 758.621490 729.193232
season30 season31 season32 season33 season34 season35
1078.145352 812.350492 733.521192 470.328690 547.182381 611.490272
season36 season37 season38 season39 season40 season41
382.224418 151.081751 31.439084 -57.030028 -355.235759 -441.827210
season42 season43 season44 season45 season46 season47
-278.872619 -356.765376 -325.794141 -156.434245 -263.370849 -170.408823
season48 season49 season50 season51 season52 price
-104.008010 -307.890846 -407.550307 -57.701562 -17.844332 -1428.645392
discount leaflet1
5032.914756 1542.832332
# forecasting (regular + promotions)
plot_forecast(models$forecast$tslm13$forecast)