Data from US Census Bureau: Monthly Sales for Food Services and Drinking Places ($ in Millions)
setwd("~/Documents/MSAE/Predictive Analytics")
retaildata <- read.csv("Servicedata.csv")
retaildata <- retaildata %>%
mutate(Month = yearmonth(Month)) %>%
tsibble(index = Month)
retaildata$Value = as.numeric(retaildata$Value)
training <- retaildata %>%
filter(year(Month) < 2019 & year(Month) >2014)
test <- retaildata %>%
filter(year(Month) == 2019)
retaildata %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Value`
training %>% autoplot() +
labs(title = "Monthly Sales for Food Services and Drinking Places, 2015-2018") +
xlab("Month") +
ylab("Sales ($ in Millions)")
## Plot variable not specified, automatically selected `.vars = Value`
training %>% gg_season(Value) +
labs(title = "Seasonality of Sales for Food Services and Drinking Places") +
xlab("Month") +
ylab("Sales ($ in Millions)")
training %>%
model(classical_decomposition(Value, type = "multiplicative")) %>%
components() %>%
autoplot() +
labs(title ="Multiplicative Decomposition")
## Warning: Removed 6 rows containing missing values (`geom_line()`).
ARIMA_model <- training %>%
model(ARIMA(Value))
ARIMA_fcst <- ARIMA_model %>% forecast(test)
ARIMA_plot <- ARIMA_model %>% forecast(test) %>% autoplot(test) +
labs(title = "ARIMA model") +
xlab("Month") +
ylab("Sales ($ in Millions)")
ARIMA_accuracy <- accuracy(ARIMA_fcst, test)
report(ARIMA_model)
## Series: Value
## Model: ARIMA(0,0,0)(0,1,0)[12] w/ drift
##
## Coefficients:
## constant
## 2959.4722
## s.e. 183.2866
##
## sigma^2 estimated as 1244764: log likelihood=-303.18
## AIC=610.37 AICc=610.73 BIC=613.53
ARIMA_model %>% gg_tsresiduals(lag_max = 12) + labs(title = "ARIMA model")
ARIMA_plot
ETS_model <- training %>%
model(ETS(Value))
ETS_fcst <- ETS_model %>% forecast(test)
ETS_plot <- ETS_model %>% forecast(test) %>% autoplot(test) +
labs(title = "ETS model")+
xlab("Month") +
ylab("Sales ($ in Millions)")
ETS_accuracy <- accuracy(ETS_fcst, test)
report(ETS_model)
## Series: Value
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.006505659
## beta = 0.0001001413
## gamma = 0.0001000497
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 50656.4 239.0191 1.022326 0.9490682 1.004062 0.9750473 1.019702 1.021816
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 1.016316 1.057078 1.021613 1.048388 0.9310134 0.9335696
##
## sigma^2: 3e-04
##
## AIC AICc BIC
## 855.4233 875.8233 887.2337
ETS_model %>% gg_tsresiduals(lag_max = 12) + labs(title="ETS Model")
ETS_plot
LM_model <- training %>%
model(TSLM(Value ~ season() + trend()))
LM_fcst <- LM_model %>% forecast(test)
LM_plot <- LM_model %>% forecast(test) %>% autoplot(test) +
labs(title = "Linear model") +
xlab("Month") +
ylab("Sales ($ in Millions)")
LM_accuracy <- accuracy(LM_fcst, test)
report(LM_model)
## Series: Value
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -1275.0 -584.4 -119.1 318.2 2158.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46755.845 499.415 93.621 < 2e-16 ***
## season()year2 -199.692 653.833 -0.305 0.76186
## season()year3 6301.615 654.060 9.635 2.22e-11 ***
## season()year4 4762.173 654.438 7.277 1.68e-08 ***
## season()year5 6809.981 654.967 10.397 3.03e-12 ***
## season()year6 4626.788 655.647 7.057 3.23e-08 ***
## season()year7 4901.846 656.476 7.467 9.64e-09 ***
## season()year8 4705.903 657.455 7.158 2.39e-08 ***
## season()year9 2130.211 658.583 3.235 0.00266 **
## season()year10 3742.019 659.858 5.671 2.10e-06 ***
## season()year11 616.076 661.281 0.932 0.35790
## season()year12 4858.634 662.850 7.330 1.44e-08 ***
## trend() 246.192 9.947 24.751 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 924.6 on 35 degrees of freedom
## Multiple R-squared: 0.9654, Adjusted R-squared: 0.9535
## F-statistic: 81.27 on 12 and 35 DF, p-value: < 2.22e-16
LM_model %>% gg_tsresiduals(lag_max = 12) + labs(title="LM Model")
LM_plot
## Series: Value
## Model: COMBINATION
## Combination: Value * 0.333333333333333
##
## ======================================
##
## Series: Value
## Model: COMBINATION
## Combination: Value + Value
##
## ==========================
##
## Series: Value
## Model: COMBINATION
## Combination: Value + Value
##
## ==========================
##
## Series: Value
## Model: ARIMA(0,0,0)(0,1,0)[12] w/ drift
##
## Coefficients:
## constant
## 2959.4722
## s.e. 183.2866
##
## sigma^2 estimated as 1244764: log likelihood=-303.18
## AIC=610.37 AICc=610.73 BIC=613.53
##
## Series: Value
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.006505659
## beta = 0.0001001413
## gamma = 0.0001000497
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 50656.4 239.0191 1.022326 0.9490682 1.004062 0.9750473 1.019702 1.021816
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 1.016316 1.057078 1.021613 1.048388 0.9310134 0.9335696
##
## sigma^2: 3e-04
##
## AIC AICc BIC
## 855.4233 875.8233 887.2337
##
##
## Series: Value
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -1275.0 -584.4 -119.1 318.2 2158.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46755.845 499.415 93.621 < 2e-16 ***
## season()year2 -199.692 653.833 -0.305 0.76186
## season()year3 6301.615 654.060 9.635 2.22e-11 ***
## season()year4 4762.173 654.438 7.277 1.68e-08 ***
## season()year5 6809.981 654.967 10.397 3.03e-12 ***
## season()year6 4626.788 655.647 7.057 3.23e-08 ***
## season()year7 4901.846 656.476 7.467 9.64e-09 ***
## season()year8 4705.903 657.455 7.158 2.39e-08 ***
## season()year9 2130.211 658.583 3.235 0.00266 **
## season()year10 3742.019 659.858 5.671 2.10e-06 ***
## season()year11 616.076 661.281 0.932 0.35790
## season()year12 4858.634 662.850 7.330 1.44e-08 ***
## trend() 246.192 9.947 24.751 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 924.6 on 35 degrees of freedom
## Multiple R-squared: 0.9654, Adjusted R-squared: 0.9535
## F-statistic: 81.27 on 12 and 35 DF, p-value: < 2.22e-16
#Plot
fit <- training %>%
model('ETS' = ETS(Value),
'ARIMA' = ARIMA(Value),
'Ensemble' = ((ARIMA(Value)+ETS(Value) + SNAIVE(Value))/3),
"Linear" = TSLM(Value ~ season() + trend())
)
fcst <- fit %>% forecast(test)
fcst %>%
autoplot(test, level = NULL) +
autolayer(test) +
labs(title = "Model Forecasts of 2019 Sales") +
ylab("Sales ($ in Millions)")
## Plot variable not specified, automatically selected `.vars = Value`
accuracy_metrics = rbind("ETS" = ETS_accuracy,
"ARIMA" = ARIMA_accuracy,
"LM" = LM_accuracy,
"Ensemble" = ensemble_accuracy)
accuracy_metrics <- accuracy_metrics %>% arrange(RMSE)
accuracy_metrics
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## * <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(Value) Test 461. 1295. 1046. 0.651 1.63 NaN NaN 0.723
## 2 (ARIMA(Value) + ETS(Val… Test 600. 1493. 1310. 0.828 2.06 NaN NaN 0.590
## 3 ETS(Value) Test 740. 1693. 1488. 1.04 2.33 NaN NaN 0.520
## 4 TSLM(Value ~ season() +… Test 599. 1731. 1545. 0.795 2.43 NaN NaN 0.475