Data from US Census Bureau: Monthly Sales for Food Services and Drinking Places ($ in Millions)

Load Data

setwd("~/Documents/MSAE/Predictive Analytics")
retaildata <- read.csv("Servicedata.csv")

Creating Tsibble

retaildata  <- retaildata  %>% 
  mutate(Month = yearmonth(Month)) %>% 
  tsibble(index = Month)

retaildata$Value = as.numeric(retaildata$Value)

Training Set (2015-2018) & Test Set (2019)

training <- retaildata %>% 
  filter(year(Month) < 2019 & year(Month) >2014)

test <- retaildata %>% 
  filter(year(Month) == 2019)

Time-Series Plot

retaildata %>% autoplot()

## Plot variable not specified, automatically selected `.vars = Value`

training %>% autoplot() + 
  labs(title = "Monthly Sales for Food Services and Drinking Places, 2015-2018") +
  xlab("Month") +
  ylab("Sales ($ in Millions)")

## Plot variable not specified, automatically selected `.vars = Value`

Seasonal Plots

training %>% gg_season(Value) + 
  labs(title = "Seasonality of Sales for Food Services and Drinking Places") +
  xlab("Month") +
  ylab("Sales ($ in Millions)")

training %>% 
  model(classical_decomposition(Value, type = "multiplicative")) %>% 
  components() %>% 
  autoplot() + 
  labs(title ="Multiplicative Decomposition")

## Warning: Removed 6 rows containing missing values (`geom_line()`).

ARIMA

ARIMA_model <- training %>% 
  model(ARIMA(Value))

ARIMA_fcst <- ARIMA_model %>% forecast(test)
ARIMA_plot <- ARIMA_model %>% forecast(test) %>% autoplot(test) + 
  labs(title = "ARIMA model") +
  xlab("Month") +
  ylab("Sales ($ in Millions)") 
ARIMA_accuracy <- accuracy(ARIMA_fcst, test)

report(ARIMA_model)

## Series: Value 
## Model: ARIMA(0,0,0)(0,1,0)[12] w/ drift 
## 
## Coefficients:
##        constant
##       2959.4722
## s.e.   183.2866
## 
## sigma^2 estimated as 1244764:  log likelihood=-303.18
## AIC=610.37   AICc=610.73   BIC=613.53

ARIMA_model %>% gg_tsresiduals(lag_max = 12) + labs(title = "ARIMA model")

ARIMA_plot

ETS model

ETS_model <- training %>% 
  model(ETS(Value))


ETS_fcst <- ETS_model %>% forecast(test)
ETS_plot <- ETS_model %>% forecast(test) %>% autoplot(test) + 
  labs(title = "ETS model")+
  xlab("Month") +
  ylab("Sales ($ in Millions)") 
ETS_accuracy <- accuracy(ETS_fcst, test)


report(ETS_model)

## Series: Value 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.006505659 
##     beta  = 0.0001001413 
##     gamma = 0.0001000497 
## 
##   Initial states:
##     l[0]     b[0]     s[0]     s[-1]    s[-2]     s[-3]    s[-4]    s[-5]
##  50656.4 239.0191 1.022326 0.9490682 1.004062 0.9750473 1.019702 1.021816
##     s[-6]    s[-7]    s[-8]    s[-9]    s[-10]    s[-11]
##  1.016316 1.057078 1.021613 1.048388 0.9310134 0.9335696
## 
##   sigma^2:  3e-04
## 
##      AIC     AICc      BIC 
## 855.4233 875.8233 887.2337

ETS_model %>% gg_tsresiduals(lag_max = 12) + labs(title="ETS Model")

ETS_plot

Linear model

LM_model <- training %>% 
  model(TSLM(Value ~ season() + trend()))



LM_fcst <- LM_model %>% forecast(test)
LM_plot <- LM_model %>% forecast(test) %>% autoplot(test) + 
  labs(title = "Linear model") +
  xlab("Month") +
  ylab("Sales ($ in Millions)") 


LM_accuracy <- accuracy(LM_fcst, test)

report(LM_model)

## Series: Value 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1275.0  -584.4  -119.1   318.2  2158.8 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    46755.845    499.415  93.621  < 2e-16 ***
## season()year2   -199.692    653.833  -0.305  0.76186    
## season()year3   6301.615    654.060   9.635 2.22e-11 ***
## season()year4   4762.173    654.438   7.277 1.68e-08 ***
## season()year5   6809.981    654.967  10.397 3.03e-12 ***
## season()year6   4626.788    655.647   7.057 3.23e-08 ***
## season()year7   4901.846    656.476   7.467 9.64e-09 ***
## season()year8   4705.903    657.455   7.158 2.39e-08 ***
## season()year9   2130.211    658.583   3.235  0.00266 ** 
## season()year10  3742.019    659.858   5.671 2.10e-06 ***
## season()year11   616.076    661.281   0.932  0.35790    
## season()year12  4858.634    662.850   7.330 1.44e-08 ***
## trend()          246.192      9.947  24.751  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 924.6 on 35 degrees of freedom
## Multiple R-squared: 0.9654,  Adjusted R-squared: 0.9535
## F-statistic: 81.27 on 12 and 35 DF, p-value: < 2.22e-16

LM_model %>% gg_tsresiduals(lag_max = 12) + labs(title="LM Model")

LM_plot

Ensemble

## Series: Value 
## Model: COMBINATION 
## Combination: Value * 0.333333333333333
## 
## ======================================
## 
## Series: Value 
## Model: COMBINATION 
## Combination: Value + Value
## 
## ==========================
## 
## Series: Value 
## Model: COMBINATION 
## Combination: Value + Value
## 
## ==========================
## 
## Series: Value 
## Model: ARIMA(0,0,0)(0,1,0)[12] w/ drift 
## 
## Coefficients:
##        constant
##       2959.4722
## s.e.   183.2866
## 
## sigma^2 estimated as 1244764:  log likelihood=-303.18
## AIC=610.37   AICc=610.73   BIC=613.53
## 
## Series: Value 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.006505659 
##     beta  = 0.0001001413 
##     gamma = 0.0001000497 
## 
##   Initial states:
##     l[0]     b[0]     s[0]     s[-1]    s[-2]     s[-3]    s[-4]    s[-5]
##  50656.4 239.0191 1.022326 0.9490682 1.004062 0.9750473 1.019702 1.021816
##     s[-6]    s[-7]    s[-8]    s[-9]    s[-10]    s[-11]
##  1.016316 1.057078 1.021613 1.048388 0.9310134 0.9335696
## 
##   sigma^2:  3e-04
## 
##      AIC     AICc      BIC 
## 855.4233 875.8233 887.2337 
## 
## 
## Series: Value 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1275.0  -584.4  -119.1   318.2  2158.8 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    46755.845    499.415  93.621  < 2e-16 ***
## season()year2   -199.692    653.833  -0.305  0.76186    
## season()year3   6301.615    654.060   9.635 2.22e-11 ***
## season()year4   4762.173    654.438   7.277 1.68e-08 ***
## season()year5   6809.981    654.967  10.397 3.03e-12 ***
## season()year6   4626.788    655.647   7.057 3.23e-08 ***
## season()year7   4901.846    656.476   7.467 9.64e-09 ***
## season()year8   4705.903    657.455   7.158 2.39e-08 ***
## season()year9   2130.211    658.583   3.235  0.00266 ** 
## season()year10  3742.019    659.858   5.671 2.10e-06 ***
## season()year11   616.076    661.281   0.932  0.35790    
## season()year12  4858.634    662.850   7.330 1.44e-08 ***
## trend()          246.192      9.947  24.751  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 924.6 on 35 degrees of freedom
## Multiple R-squared: 0.9654,  Adjusted R-squared: 0.9535
## F-statistic: 81.27 on 12 and 35 DF, p-value: < 2.22e-16

#Plot

fit <- training %>% 
  model('ETS' = ETS(Value),
        'ARIMA' = ARIMA(Value),
        'Ensemble' = ((ARIMA(Value)+ETS(Value) + SNAIVE(Value))/3),
        "Linear" = TSLM(Value ~ season() + trend())
        )

fcst <- fit %>% forecast(test)

fcst %>% 
  autoplot(test, level = NULL) +
  autolayer(test) +
  labs(title = "Model Forecasts of 2019 Sales") +
  ylab("Sales ($ in Millions)")

## Plot variable not specified, automatically selected `.vars = Value`

Accuracy Metrics

accuracy_metrics = rbind("ETS" = ETS_accuracy, 
                         "ARIMA" = ARIMA_accuracy,
                         "LM" = LM_accuracy, 
                         "Ensemble" = ensemble_accuracy)
accuracy_metrics <- accuracy_metrics %>% arrange(RMSE)
accuracy_metrics

## # A tibble: 4 × 10
##   .model                   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
## * <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(Value)             Test   461. 1295. 1046. 0.651  1.63   NaN   NaN 0.723
## 2 (ARIMA(Value) + ETS(Val… Test   600. 1493. 1310. 0.828  2.06   NaN   NaN 0.590
## 3 ETS(Value)               Test   740. 1693. 1488. 1.04   2.33   NaN   NaN 0.520
## 4 TSLM(Value ~ season() +… Test   599. 1731. 1545. 0.795  2.43   NaN   NaN 0.475

Midterm Presentation

Martha Rose Oordt

2023-07-23

Load Data

Creating Tsibble

Training Set (2015-2018) & Test Set (2019)

Time-Series Plot

Seasonal Plots

ARIMA

ETS model

Linear model

Ensemble

Accuracy Metrics