library(xts)
## Warning: package 'xts' was built under R version 4.0.3
## Warning: package 'zoo' was built under R version 4.0.3
library(forecast)
## Warning: package 'forecast' was built under R version 4.0.3
data <- read.csv("Bev.csv")
dates <- seq(as.Date("2014-01-19"), length = 176, by = "weeks")
bev_xts <- xts(data, order.by = dates)
# Create the individual region sales as their own objects
MET_hi <- bev_xts[,"MET.hi"]
MET_lo <- bev_xts[,"MET.lo"]
MET_sp <- bev_xts[,"MET.sp"]
# Sum the region sales together
MET_t <- MET_hi + MET_lo + MET_sp
# Plot the metropolitan region total sales
plot(MET_t)
## ARIMA models
# Split the data into training and validation
MET_t_train <- MET_t[index(MET_t) < "2017-01-01"]
MET_t_valid <- MET_t[index(MET_t) >= "2017-01-01"]
# Use auto.arima() function for metropolitan sales training data
MET_t_model <- auto.arima(MET_t_train)
MET_t_model
## Series: MET_t_train
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.6706 6252.8350
## s.e. 0.0594 181.7488
##
## sigma^2 estimated as 574014: log likelihood=-1238.86
## AIC=2483.72 AICc=2483.88 BIC=2492.83
# Forecast the first 22 weeks of 2017
forecast_MET_t <- forecast(MET_t_model, h = 22)
# Plot this forecast #
plot(forecast_MET_t)
## Model performance
# Convert to numeric for ease
for_MET_t <- as.numeric(forecast_MET_t$mean)
v_MET_t <- as.numeric(MET_t_valid)
# Calculate the MAE
MAE <- mean(abs(for_MET_t - v_MET_t))
# Calculate the MAPE
MAPE <- 100*mean(abs((for_MET_t - v_MET_t)/v_MET_t))
# Print to see how good your forecast is!
print(MAE)
## [1] 898.8403
print(MAPE)
## [1] 17.10332
# Convert your forecast to an xts object
for_dates <- seq(as.Date("2017-01-01"), length = 22, by = "weeks")
for_MET_t_xts <- xts(forecast_MET_t$mean, order.by = for_dates)
# Plot the validation data set
plot(MET_t_valid, main = 'Forecast Comparison', ylim = c(4000, 8500))
# Overlay the forecast of 2017
lines(for_MET_t_xts, col = "blue")
# Plot the validation data set
plot(MET_t_valid, main = 'Forecast Comparison', ylim = c(4000, 8500))
# Overlay the forecast of 2017
lines(for_MET_t_xts, col = "blue")
# Convert the limits to xts objects
lower <- xts(forecast_MET_t$lower[,2], order.by = for_dates)
upper <- xts(forecast_MET_t$upper[,2], order.by = for_dates)
# Adding confidence intervals of forecast to plot
lines(lower, col = "blue", lty = "dashed")
lines(upper, col = "blue", lty = "dashed")
\[ Price \space Elasticity = \frac{ \%Change \space in \space demand}{\% Change \space in \space price} \]
Elastic: Products have % changes in demand larger than the % change in price (Price elasticity > 1)
Inelastic: Products have % changes in demand smaller than the % change in price (Price elasticity < 1)
Unit elastic: (Price elasticity = 1)
We can estimate elasticity with linear regression:
bev_xts_train <- bev_xts[index(bev_xts) < "2017-01-01"]
MET_hi <- bev_xts[index(bev_xts) < "2017-01-01", "MET.hi"]
MET_hi_p <- bev_xts[index(bev_xts) < "2017-01-01", "MET.hi.p"]
# Save the prices of each product
l_MET_hi_p <- as.vector(log(bev_xts_train[,"MET.hi.p"]))
# Save as a data frame
MET_hi_train <- data.frame(as.vector(log(MET_hi)), l_MET_hi_p)
colnames(MET_hi_train) <- c("log_sales", "log_price")
# Calculate the regression
model_MET_hi <- lm(log_sales ~ log_price, data = MET_hi_train)
model_MET_hi
##
## Call:
## lm(formula = log_sales ~ log_price, data = MET_hi_train)
##
## Coefficients:
## (Intercept) log_price
## 13.977 -1.517
# Plot the product's sales
plot(MET_hi)
# Plot the product's price
plot(MET_hi_p)
# Create date indices for New Year's week
n.dates <- as.Date(c("2014-12-28", "2015-12-27", "2016-12-25"))
# Create xts objects for New Year's
newyear <- as.xts(rep(1, 3), order.by = n.dates)
# Create sequence of 154 weeks for merging
dates_train <- seq(as.Date("2014-01-19"), length = 154, by = "weeks")
# Merge training dates into New Year's object
newyear <- merge(newyear, dates_train, fill = 0)
# Create MET_hi_train_2 by adding newyear
MET_hi_train_2 <- data.frame(MET_hi_train, as.vector(newyear))
colnames(MET_hi_train_2)[3] <- "newyear"
# Build regressions for the product
model_MET_hi_full <- lm(log_sales ~ log_price + newyear, data = MET_hi_train_2)
model_MET_hi_full
##
## Call:
## lm(formula = log_sales ~ log_price + newyear, data = MET_hi_train_2)
##
## Coefficients:
## (Intercept) log_price newyear
## 13.5083 -1.4036 0.3375
bev_xts_valid <- bev_xts[index(bev_xts) >= '2017-01-01']
# Subset the validation prices #
l_MET_hi_p_valid <- as.vector(log(bev_xts_valid[,"MET.hi.p"]))
# Create a validation data frame #
MET_hi_valid <- data.frame(l_MET_hi_p_valid)
colnames(MET_hi_valid) <- "log_price"
# Predict the log of sales for your high end product
pred_MET_hi <- predict(model_MET_hi, newdata = MET_hi_valid)
# Convert predictions out of log scale
pred_MET_hi <- exp(pred_MET_hi)
# Convert to an xts object
dates_valid <- seq(as.Date("2017-01-01"), length = 22, by = "weeks")
pred_MET_hi_xts <- xts(pred_MET_hi, order.by = dates_valid)
# Plot the forecast
plot(pred_MET_hi_xts)
# Calculate and print the MAPE
MET_hi_v <- bev_xts_valid[,"MET.hi"]
MAPE <- 100*mean(abs((pred_MET_hi_xts - MET_hi_v)/MET_hi_v))
print(MAPE)
## [1] 29.57455
REsiduals might be related across time. When this happens we can use times series models to predict them!
# Calculate the residuals from the model
MET_hi_full_res <- residuals(model_MET_hi_full)
# Convert the residuals to an xts object
MET_hi_full_res <- xts(MET_hi_full_res, order.by = dates_train)
hist(MET_hi_full_res)
# Plot the residuals over time
plot(MET_hi_full_res)
# Build an ARIMA model on the residuals: MET_hi_arima
MET_hi_arima <- auto.arima(MET_hi_full_res)
# Look at a summary of the model
summary(MET_hi_arima)
## Series: MET_hi_full_res
## ARIMA(3,0,1) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ma1
## 1.2150 -0.1758 -0.2945 -0.4675
## s.e. 0.1578 0.1885 0.0901 0.1574
##
## sigma^2 estimated as 0.03905: log likelihood=32.51
## AIC=-55.02 AICc=-54.61 BIC=-39.83
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0004563225 0.1950202 0.1456822 -46.5269 224.0129 0.5251787
## ACF1
## Training set 0.001241359
# Forecast 22 weeks with your model: for_MET_hi_arima
for_MET_hi_arima <- forecast(MET_hi_arima, h = 22)
# Print first 10 observations
head(for_MET_hi_arima, n = 10)
## $method
## [1] "ARIMA(3,0,1) with zero mean"
##
## $model
## Series: MET_hi_full_res
## ARIMA(3,0,1) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ma1
## 1.2150 -0.1758 -0.2945 -0.4675
## s.e. 0.1578 0.1885 0.0901 0.1574
##
## sigma^2 estimated as 0.03905: log likelihood=32.51
## AIC=-55.02 AICc=-54.61 BIC=-39.83
##
## $level
## [1] 80 95
##
## $mean
## Time Series:
## Start = 1079
## End = 1226
## Frequency = 0.142857142857143
## [1] -0.059520971 -0.123195485 -0.108395233 -0.092513118 -0.057065746
## [6] -0.021147386 0.011584522 0.034599931 0.046231063 0.046676810
## [11] 0.038395199 0.024829004 0.009670533 -0.003923182 -0.013779374
## [16] -0.018900559 -0.019386556 -0.016173922 -0.010676814 -0.004419406
## [21] 0.001270855 0.005465546
##
## $lower
## Time Series:
## Start = 1079
## End = 1226
## Frequency = 0.142857142857143
## 80% 95%
## 1079 -0.3127598 -0.4468164
## 1086 -0.4393618 -0.6067301
## 1093 -0.4749463 -0.6689867
## 1100 -0.4774324 -0.6811964
## 1107 -0.4458092 -0.6515975
## 1114 -0.4099994 -0.6158452
## 1121 -0.3811843 -0.5891035
## 1128 -0.3665620 -0.5789243
## 1135 -0.3642032 -0.5814739
## 1142 -0.3705096 -0.5913548
## 1149 -0.3819623 -0.6044861
## 1156 -0.3962088 -0.6190928
## 1163 -0.4113721 -0.6342586
## 1170 -0.4255309 -0.6487166
## 1177 -0.4366929 -0.6605699
## 1184 -0.4433369 -0.6680199
## 1191 -0.4449813 -0.6702775
## 1198 -0.4423385 -0.6679364
## 1205 -0.4369750 -0.6626436
## 1212 -0.4307176 -0.6563862
## 1219 -0.4251130 -0.6508269
## 1226 -0.4211319 -0.6469590
##
## $upper
## Time Series:
## Start = 1079
## End = 1226
## Frequency = 0.142857142857143
## 80% 95%
## 1079 0.1937179 0.3277744
## 1086 0.1929708 0.3603392
## 1093 0.2581558 0.4521962
## 1100 0.2924062 0.4961702
## 1107 0.3316777 0.5374660
## 1114 0.3677046 0.5735505
## 1121 0.4043533 0.6122725
## 1128 0.4357619 0.6481242
## 1135 0.4566653 0.6739361
## 1142 0.4638632 0.6847084
## 1149 0.4587527 0.6812765
## 1156 0.4458668 0.6687508
## 1163 0.4307131 0.6535996
## 1170 0.4176846 0.6408702
## 1177 0.4091342 0.6330111
## 1184 0.4055358 0.6302188
## 1191 0.4062082 0.6315044
## 1198 0.4099906 0.6355885
## 1205 0.4156214 0.6412900
## 1212 0.4218788 0.6475474
## 1219 0.4276547 0.6533686
## 1226 0.4320630 0.6578900
##
## $x
## Time Series:
## Start = 1
## End = 1072
## Frequency = 0.142857142857143
## [1] -0.058999033 -0.315247314 -0.277546767 -0.110328345 -0.185057923
## [6] 0.048526306 0.206772604 0.338390379 0.151226805 -0.053650055
## [11] -0.080313306 -0.225218761 -0.406019869 -0.369101660 -0.331795950
## [16] -0.201770811 0.131847156 0.112059705 0.064097578 -0.052187658
## [21] 0.022621469 0.385533629 0.551251323 0.383513737 0.381696390
## [26] 0.047318052 -0.019221122 -0.201950896 -0.289680210 -0.445180098
## [31] -0.442613822 -0.411588404 -0.454958230 -0.460394699 -0.227813165
## [36] -0.116487166 0.206677217 0.306363735 0.348159419 0.295356662
## [41] 0.151378899 0.052733942 -0.249497110 -0.240150659 -0.132552961
## [46] -0.050660014 0.195509107 0.257980732 0.425005528 -0.092678721
## [51] 0.255219544 0.075872125 0.227307574 0.230754513 0.156006446
## [56] 0.297540772 -0.403191175 -0.403987269 -0.303061868 -0.156425653
## [61] 0.027713965 0.156550327 0.487456160 0.633098785 0.486250863
## [66] 0.421380928 0.302615159 -0.008411134 0.129749099 -0.506167070
## [71] -0.384310519 -0.288744691 -0.001615007 -0.007550550 0.133482154
## [76] -0.023723335 -0.156002790 -0.202150089 -0.197921265 -0.172237396
## [81] -0.008936655 0.130196365 -0.116448143 -0.349711702 -0.412404063
## [86] -0.422156872 -0.453129471 -0.350456107 -0.180943770 -0.154663958
## [91] 0.436683748 0.567294931 0.592620938 0.283277129 0.136579056
## [96] -0.053245550 -0.196425236 -0.520054827 -0.488988147 -0.483123266
## [101] 0.445431787 0.197347895 0.195419226 0.383594412 0.602960014
## [106] 0.570329499 0.603540729 0.683539391 -0.046731532 -0.336803694
## [111] -0.503267850 -0.575379265 -0.230232147 -0.067937744 0.262775823
## [116] 0.393201575 0.394605079 0.456772121 0.205598698 -0.101157120
## [121] 0.404977028 -0.416882804 -0.229953402 0.035500029 0.290107802
## [126] 0.374077388 0.493610848 0.385006232 0.465082489 0.547582786
## [131] 0.427940183 0.215599559 0.077339010 -0.193139487 -0.281919019
## [136] -0.502015015 -0.682780880 -0.787682450 -0.842705435 -0.697532148
## [141] -0.406923669 -0.324832500 -0.057221755 0.025107635 -0.044701895
## [146] 0.001374887 0.107768333 0.249931308 0.163666966 0.077917109
## [151] 0.154829515 -0.016125815 0.235240796 -0.104669174
##
## $series
## [1] "MET_hi_full_res"
##
## $fitted
## Time Series:
## Start = 1
## End = 1072
## Frequency = 0.142857142857143
## [1] -0.024039176 -0.067900130 -0.265081581 -0.256696683 -0.060191294
## [6] -0.065446378 0.070765187 0.233626979 0.311525456 0.138297794
## [11] -0.101688935 -0.142680988 -0.205135666 -0.336150967 -0.295346185
## [16] -0.201627178 -0.078049573 0.195253544 0.211293029 0.088168688
## [21] -0.042058390 -0.012456186 0.293747469 0.474941861 0.298261289
## [26] 0.194984467 -0.053525088 -0.160124016 -0.236376019 -0.285878603
## [31] -0.356016842 -0.333713819 -0.254750989 -0.256461118 -0.262835873
## [36] -0.078237468 0.051992187 0.266369838 0.351508441 0.309855423
## [41] 0.214204620 0.058837699 -0.046673238 -0.262170287 -0.273747390
## [46] -0.111367948 0.004094533 0.195998686 0.265018424 0.338654352
## [51] -0.061642458 0.053080596 0.063950495 0.111304523 0.162213446
## [56] 0.084938542 0.166730733 -0.321683718 -0.469110705 -0.256091450
## [61] -0.064397829 0.107363518 0.208411251 0.426119101 0.540648045
## [66] 0.361368563 0.211985197 0.108019322 -0.133089429 -0.052883185
## [71] -0.423412323 -0.434444363 -0.202314307 0.068151444 0.111540691
## [76] 0.153729301 0.032896941 -0.136368188 -0.180445992 -0.150822692
## [81] -0.104927121 0.032834237 0.164966937 -0.030173072 -0.293379079
## [86] -0.349649951 -0.303529182 -0.284938980 -0.191182645 -0.029570905
## [91] 0.005591384 0.409510176 0.584276523 0.487799449 0.168544413
## [96] -0.043444311 -0.167553018 -0.256024436 -0.458217199 -0.430460323
## [101] -0.323250817 0.410772150 0.403532478 0.168862361 0.273201215
## [106] 0.453438082 0.419327908 0.369337766 0.409532900 -0.141384849
## [111] -0.510947404 -0.542094011 -0.495862075 -0.154551733 0.086890184
## [116] 0.316795095 0.415832685 0.342857636 0.316549302 0.105157377
## [121] -0.197118175 0.167787163 -0.274578462 -0.346232698 0.027861074
## [126] 0.291357044 0.354375622 0.383441672 0.270104851 0.260862508
## [131] 0.336113672 0.243777952 0.038625584 -0.088069691 -0.262638327
## [136] -0.322342805 -0.419508289 -0.535212163 -0.571118497 -0.557354227
## [141] -0.401835901 -0.121220026 -0.022508035 0.123659364 0.182309892
## [146] 0.064262220 0.031538874 0.108223945 0.218065437 0.148611145
## [151] 0.025341495 0.065680011 -0.031515359 0.118340632
##
## $residuals
## Time Series:
## Start = 1
## End = 1072
## Frequency = 0.142857142857143
## [1] -0.0349598564 -0.2473471831 -0.0124651860 0.1463683380 -0.1248666293
## [6] 0.1139726842 0.1360074169 0.1047633996 -0.1602986514 -0.1919478486
## [11] 0.0213756290 -0.0825377733 -0.2008842037 -0.0329506929 -0.0364497642
## [16] -0.0001436333 0.2098967286 -0.0831938385 -0.1471954508 -0.1403563456
## [21] 0.0646798592 0.3979898154 0.2575038539 -0.0914281240 0.0834351009
## [26] -0.1476664148 0.0343039666 -0.0418268799 -0.0533041907 -0.1593014944
## [31] -0.0865969798 -0.0778745844 -0.2002072411 -0.2039335809 0.0350227076
## [36] -0.0382496980 0.1546850299 0.0399938967 -0.0033490229 -0.0144987614
## [41] -0.0628257214 -0.0061037572 -0.2028238716 0.0220196278 0.1411944295
## [46] 0.0607079345 0.1914145740 0.0619820458 0.1599871042 -0.4313330734
## [51] 0.3168620026 0.0227915290 0.1633570791 0.1194499903 -0.0062069997
## [56] 0.2126022306 -0.5699219087 -0.0823035512 0.1660488371 0.0996657965
## [61] 0.0921117940 0.0491868093 0.2790449090 0.2069796843 -0.0543971822
## [66] 0.0600123652 0.0906299618 -0.1164304557 0.2628385276 -0.4532838855
## [71] 0.0391018048 0.1456996719 0.2006993005 -0.0757019941 0.0219414632
## [76] -0.1774526359 -0.1888997306 -0.0657819008 -0.0174752730 -0.0214147039
## [81] 0.0959904659 0.0973621281 -0.2814150804 -0.3195386300 -0.1190249837
## [86] -0.0725069209 -0.1496002889 -0.0655171273 0.0102388755 -0.1250930523
## [91] 0.4310923641 0.1577847546 0.0083444153 -0.2045223191 -0.0319653566
## [96] -0.0098012381 -0.0288722173 -0.2640303914 -0.0307709475 -0.0526629427
## [101] 0.7686826039 -0.2134242546 -0.2081132526 0.2147320514 0.3297587994
## [106] 0.1168914166 0.1842128206 0.3142016249 -0.4562644315 -0.1954188449
## [111] 0.0076795539 -0.0332852532 0.2656299284 0.0866139892 0.1758856391
## [116] 0.0764064797 -0.0212276059 0.1139144849 -0.1109506038 -0.2063144961
## [121] 0.6020952027 -0.5846699672 0.0446250606 0.3817327274 0.2622467273
## [126] 0.0827203434 0.1392352256 0.0015645599 0.1949776384 0.2867202777
## [131] 0.0918265111 -0.0281783928 0.0387134257 -0.1050697960 -0.0192806922
## [136] -0.1796722102 -0.2632725908 -0.2524702861 -0.2715869377 -0.1401779205
## [141] -0.0050877682 -0.2036124737 -0.0347137195 -0.0985517295 -0.2270117875
## [146] -0.0628873337 0.0762294590 0.1417073621 -0.0543984708 -0.0706940356
## [151] 0.1294880202 -0.0818058263 0.2667561544 -0.2230098056
# Convert your forecasts into an xts object
dates_valid <- seq(as.Date("2017-01-01"), length = 22, by = "weeks")
for_MET_hi_arima <- xts(for_MET_hi_arima$mean, order.by = dates_valid)
# Plot the forecast
plot(for_MET_hi_arima)
Combining techniques:
\[ Log(Y_{t}) = \beta_{0} + \beta_{1} \space log(X_{1}) + \beta_{2} \space X_{2} + \cdots + \epsilon_{t} \]
\[ \epsilon_{t} = \alpha_{0} + \alpha_{1}\epsilon_{t-1} + \alpha_{2}\epsilon_{t-2} +\cdots + \epsilon \]
\[ log(Y_{t}) = log(\hat{Y_{t}}) + \hat{\epsilon_{t}} \]
\[ Y_{t} = \hat{Y_{t}} \space \times \space exp(\hat{\epsilon_{t}}) \]
# Convert your residual forecast to the exponential version
for_MET_hi_arima <- exp(for_MET_hi_arima)
# Multiply your forecasts together!
for_MET_hi_final <- for_MET_hi_arima * pred_MET_hi_xts
# Plot the final forecast - don't touch the options!
plot(for_MET_hi_final, ylim = c(1000, 4300))
# Overlay the validation data set
lines(MET_hi_v, col = "blue")
Error metrics
# Calculate the MAE
MAE <- mean(abs(MET_hi_v - for_MET_hi_final))
print(MAE)
## [1] 474.7013
# Calculate the MAPE
MAPE <- 100*mean(abs((for_MET_hi_final - MET_hi_v)/MET_hi_v))
print(MAPE)
## [1] 28.44671
# Build an ARIMA model using the auto.arima function
MET_hi_model_arima <- auto.arima(MET_hi)
# Forecast the ARIMA model you just built above
for_MET_hi <- forecast(MET_hi_model_arima, h = 22)
# Save the forecast as an xts object
dates_valid <- seq(as.Date("2017-01-01"), length = 22, by = "weeks")
for_MET_hi_xts <- xts(for_MET_hi$mean, order.by = dates_valid)
# Calculate the MAPE of the forecast
MAPE <- 100*mean(abs((for_MET_hi_xts - MET_hi_v)/MET_hi_v))
print(MAPE)
## [1] 40.82418
# Ensemble the two forecasts together
for_MET_hi_en <- 0.5*(pred_MET_hi_xts+for_MET_hi_xts)
# Calculate the MAE and MAPE
MAE <- mean(abs(for_MET_hi_en - MET_hi_v))
print(MAE)
## [1] 572.8845
MAPE <- 100*mean(abs((for_MET_hi_en - MET_hi_v)/MET_hi_v))
print(MAPE)
## [1] 34.55672
mape <- function(yhat, y){
mean(abs((y - yhat)/y))*100}
MET_sp <- bev_xts[index(bev_xts) < '2017-01-01', "MET.sp"]
MET_sp_v <- bev_xts[index(bev_xts) >= '2017-01-01', "MET.sp"]
# Build a time series model
MET_sp_model_arima <- auto.arima(MET_sp)
# Forecast the time series model you just built for 22 periods
for_MET_sp <- forecast(MET_sp_model_arima, h = 22)
# Create an xts object on the forecast object you just created
for_MET_sp_xts <- xts(for_MET_sp$mean, order.by = dates_valid)
# Calculate the MAPE on your forecast with the validation data
MAPE <- mape(for_MET_sp_xts, MET_sp_v)
print(MAPE)
## [1] 6.352973
With regression…
# Build a regression model on the training data
model_MET_sp_full <- lm(log_sales ~ log_price + christmas + valentine + newyear + mother, data = MET_sp_train)
# Forecast the regression model using the predict function
pred_MET_sp <- predict(model_MET_sp_full, newdata = MET_sp_valid)
# Exponentiate your predictions and create an xts object
pred_MET_sp <- exp(pred_MET_sp)
pred_MET_sp_xts <- xts(pred_MET_sp, order.by = dates_valid)
# Calculate MAPE
MAPE <- mape(pred_MET_sp_xts, MET_sp_v)
print(MAPE)
Ensemble forecasts
# Ensemble the two forecasts
for_MET_sp_en <- (for_MET_sp_xts + pred_MET_sp_xts)/2
# Calculate the MAPE
MAPE <- mape(for_MET_sp_en, MET_sp_v)
print(MAPE)