Data from US Census Bureau: Monthly Sales for Food Services and Drinking Places ($ in Millions)

Load Data

setwd("~/Documents/MSAE/Predictive Analytics")
retaildata <- read.csv("Servicedata.csv")

Creating Tsibble

retaildata  <- retaildata  %>% 
  mutate(Month = yearmonth(Month)) %>% 
  tsibble(index = Month)

retaildata$Value = as.numeric(retaildata$Value)

Training Set (2000-2021) & Test Set (2022)

training <- retaildata %>% 
  filter(year(Month) < 2022 )

test <- retaildata %>% 
  filter(year(Month) == 2022)

Time-Series Plot

retaildata %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Value`

training %>% autoplot() + 
  labs(title = "Monthly Sales for Food Services and Drinking Places, 2000-2021") +
  xlab("Month") +
  ylab("Sales ($ in Millions)") 
## Plot variable not specified, automatically selected `.vars = Value`

Seasonal Plots

training %>% gg_season(Value) + 
  labs(title = "Seasonality of Sales for Food Services and Drinking Places") +
  xlab("Month") +
  ylab("Sales ($ in Millions)") 

training %>% 
  model(classical_decomposition(Value, type = "multiplicative")) %>% 
  components() %>% 
  autoplot() + 
  labs(title ="Multiplicative Decomposition")
## Warning: Removed 6 rows containing missing values (`geom_line()`).

ARIMA

ARIMA_model <- training %>% 
  model(ARIMA(Value))

ARIMA_fcst <- ARIMA_model %>% forecast(test)
ARIMA_plot <- ARIMA_model %>% forecast(test) %>% autoplot(test) + 
  labs(title = "ARIMA model") +
  xlab("Month") +
  ylab("Sales ($ in Millions)") 
ARIMA_accuracy <- accuracy(ARIMA_fcst, test)

report(ARIMA_model)
## Series: Value 
## Model: ARIMA(2,1,2)(1,0,0)[12] 
## 
## Coefficients:
##          ar1     ar2      ma1      ma2    sar1
##       0.0368  0.5739  -0.0066  -0.8800  0.4759
## s.e.  0.0755  0.0707   0.0390   0.0385  0.0624
## 
## sigma^2 estimated as 7724650:  log likelihood=-2457.79
## AIC=4927.59   AICc=4927.92   BIC=4949.02
ARIMA_model %>% gg_tsresiduals(lag_max = 12) + labs(title = "ARIMA model")

ARIMA_plot

Fourier Models

fourier_model <- training %>% 
  model(Fourier_Model=ARIMA(Value~trend(knots=yearmonth('2020 Mar'))+fourier(K=2)))

fourier_fcst <- fourier_model %>% forecast(test)
fourier_plot <- fourier_model %>% forecast(test) %>% autoplot(test) + 
  labs(title = "Fourier model (k=2)") +
  xlab("Month") +
  ylab("Sales ($ in Millions)") 
fourier_accuracy <- accuracy(fourier_fcst, test)

report(fourier_model)
## Series: Value 
## Model: LM w/ ARIMA(2,0,1)(1,0,0)[12] errors 
## 
## Coefficients:
##           ar1     ar2     ma1    sar1
##       -0.0241  0.5373  0.9332  0.4295
## s.e.   0.0662  0.0618  0.0269  0.0622
##       trend(knots = yearmonth("2020 Mar"))trend
##                                        146.2423
## s.e.                                    14.3574
##       trend(knots = yearmonth("2020 Mar"))trend_243  fourier(K = 2)C1_12
##                                            758.2750           -1700.0997
## s.e.                                       204.8425             812.9883
##       fourier(K = 2)S1_12  fourier(K = 2)C2_12  fourier(K = 2)S2_12  intercept
##                  383.5063            -176.1013            -297.9610  23268.807
## s.e.             818.1705             473.6961             474.7696   2079.874
## 
## sigma^2 estimated as 7048245:  log likelihood=-2452.12
## AIC=4928.25   AICc=4929.49   BIC=4971.16
fourier_model %>% gg_tsresiduals(lag_max = 12) + labs(title = "Fourier model (k=2)")

fourier_plot

fourier_model2 <- training %>% 
  model(Fourier_Model=ARIMA(Value~trend(knots=yearmonth('2020 Mar'))+fourier(K=3)))

fourier_fcst2 <- fourier_model2 %>% forecast(test)
fourier_plot2 <- fourier_model2 %>% forecast(test) %>% autoplot(test) + 
  labs(title = "Fourier model (k=3)") +
  xlab("Month") +
  ylab("Sales ($ in Millions)") 
fourier_accuracy2 <- accuracy(fourier_fcst2, test)

report(fourier_model2)
## Series: Value 
## Model: LM w/ ARIMA(2,0,1)(1,0,0)[12] errors 
## 
## Coefficients:
##           ar1     ar2     ma1    sar1
##       -0.0330  0.5463  0.9339  0.4053
## s.e.   0.0655  0.0609  0.0261  0.0632
##       trend(knots = yearmonth("2020 Mar"))trend
##                                        146.6728
## s.e.                                    13.8087
##       trend(knots = yearmonth("2020 Mar"))trend_243  fourier(K = 3)C1_12
##                                            740.7073           -1690.3141
## s.e.                                       201.9509             773.0843
##       fourier(K = 3)S1_12  fourier(K = 3)C2_12  fourier(K = 3)S2_12
##                  369.7693            -176.2643            -300.7616
## s.e.             778.0500             449.9278             450.9584
##       fourier(K = 3)C3_12  fourier(K = 3)S3_12  intercept
##                 -438.0900            -509.5384   23237.69
## s.e.             324.5387             324.6039    1997.41
## 
## sigma^2 estimated as 7003567:  log likelihood=-2450.09
## AIC=4928.18   AICc=4929.87   BIC=4978.24
fourier_model2 %>% gg_tsresiduals(lag_max = 12) + labs(title = "Fourier model (k=3)")

fourier_plot2

ETS model

ETS_model <- training %>% 
  model(ETS(Value))


ETS_fcst <- ETS_model %>% forecast(test)
ETS_plot <- ETS_model %>% forecast(test) %>% autoplot(test) + 
  labs(title = "ETS model")+
  xlab("Month") +
  ylab("Sales ($ in Millions)") 
ETS_accuracy <- accuracy(ETS_fcst, test)


report(ETS_model)
## Series: Value 
## Model: ETS(M,Ad,M) 
##   Smoothing parameters:
##     alpha = 0.7039439 
##     beta  = 0.0001221414 
##     gamma = 0.0001075596 
##     phi   = 0.9796545 
## 
##   Initial states:
##      l[0]     b[0]     s[0]     s[-1]    s[-2]     s[-3]    s[-4]    s[-5]
##  24785.08 149.0824 1.011296 0.9570255 1.015934 0.9854907 1.042318 1.038841
##     s[-6]    s[-7]     s[-8]    s[-9]    s[-10]    s[-11]
##  1.028025 1.044183 0.9918669 1.028442 0.9244421 0.9321344
## 
##   sigma^2:  0.0022
## 
##      AIC     AICc      BIC 
## 5481.783 5484.575 5546.150
ETS_model %>% gg_tsresiduals(lag_max = 12) + labs(title="ETS Model")

ETS_plot 

Linear model

LM_model <- training %>% 
  model(TSLM(Value ~ season() + trend()))



LM_fcst <- LM_model %>% forecast(test)
LM_plot <- LM_model %>% forecast(test) %>% autoplot(test) + 
  labs(title = "Linear model") +
  xlab("Month") +
  ylab("Sales ($ in Millions)") 


LM_accuracy <- accuracy(LM_fcst, test)

report(LM_model)
## Series: Value 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29303.3  -2039.0    303.9   1539.7  13077.6 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    18989.775    954.431  19.896  < 2e-16 ***
## season()year2   -188.701   1216.112  -0.155 0.876814    
## season()year3   3959.826   1216.125   3.256 0.001285 ** 
## season()year4   2138.443   1216.147   1.758 0.079902 .  
## season()year5   4573.879   1216.177   3.761 0.000211 ***
## season()year6   3779.042   1216.217   3.107 0.002106 ** 
## season()year7   4509.886   1216.265   3.708 0.000257 ***
## season()year8   4615.140   1216.322   3.794 0.000186 ***
## season()year9   2114.121   1216.387   1.738 0.083431 .  
## season()year10  3557.602   1216.461   2.925 0.003765 ** 
## season()year11  1005.401   1216.545   0.826 0.409339    
## season()year12  3554.655   1216.636   2.922 0.003798 ** 
## trend()          162.246      3.261  49.759  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4033 on 251 degrees of freedom
## Multiple R-squared:  0.91,   Adjusted R-squared: 0.9057
## F-statistic: 211.4 on 12 and 251 DF, p-value: < 2.22e-16
LM_model %>% gg_tsresiduals(lag_max = 12) + labs(title="LM Model")

LM_plot 

Ensemble

## Series: Value 
## Model: COMBINATION 
## Combination: Value * 0.333333333333333
## 
## ======================================
## 
## Series: Value 
## Model: COMBINATION 
## Combination: Value + Value
## 
## ==========================
## 
## Series: Value 
## Model: COMBINATION 
## Combination: Value + Value
## 
## ==========================
## 
## Series: Value 
## Model: ARIMA(2,1,2)(1,0,0)[12] 
## 
## Coefficients:
##          ar1     ar2      ma1      ma2    sar1
##       0.0368  0.5739  -0.0066  -0.8800  0.4759
## s.e.  0.0755  0.0707   0.0390   0.0385  0.0624
## 
## sigma^2 estimated as 7724650:  log likelihood=-2457.79
## AIC=4927.59   AICc=4927.92   BIC=4949.02
## 
## Series: Value 
## Model: ETS(M,Ad,M) 
##   Smoothing parameters:
##     alpha = 0.7039439 
##     beta  = 0.0001221414 
##     gamma = 0.0001075596 
##     phi   = 0.9796545 
## 
##   Initial states:
##      l[0]     b[0]     s[0]     s[-1]    s[-2]     s[-3]    s[-4]    s[-5]
##  24785.08 149.0824 1.011296 0.9570255 1.015934 0.9854907 1.042318 1.038841
##     s[-6]    s[-7]     s[-8]    s[-9]    s[-10]    s[-11]
##  1.028025 1.044183 0.9918669 1.028442 0.9244421 0.9321344
## 
##   sigma^2:  0.0022
## 
##      AIC     AICc      BIC 
## 5481.783 5484.575 5546.150 
## 
## 
## Series: Value 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29303.3  -2039.0    303.9   1539.7  13077.6 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    18989.775    954.431  19.896  < 2e-16 ***
## season()year2   -188.701   1216.112  -0.155 0.876814    
## season()year3   3959.826   1216.125   3.256 0.001285 ** 
## season()year4   2138.443   1216.147   1.758 0.079902 .  
## season()year5   4573.879   1216.177   3.761 0.000211 ***
## season()year6   3779.042   1216.217   3.107 0.002106 ** 
## season()year7   4509.886   1216.265   3.708 0.000257 ***
## season()year8   4615.140   1216.322   3.794 0.000186 ***
## season()year9   2114.121   1216.387   1.738 0.083431 .  
## season()year10  3557.602   1216.461   2.925 0.003765 ** 
## season()year11  1005.401   1216.545   0.826 0.409339    
## season()year12  3554.655   1216.636   2.922 0.003798 ** 
## trend()          162.246      3.261  49.759  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4033 on 251 degrees of freedom
## Multiple R-squared:  0.91,   Adjusted R-squared: 0.9057
## F-statistic: 211.4 on 12 and 251 DF, p-value: < 2.22e-16

#Plot

fit <- training %>% 
  model('ARIMA' = ARIMA(Value),
        'Fourier_Model'= ARIMA(Value~trend(knots=yearmonth('2020 Mar'))+fourier(K=1)),
        'Fourier_Model2'= ARIMA(Value~trend(knots=yearmonth('2020 Mar'))+fourier(K=3)),
        'ETS' = ETS(Value),
        'Ensemble' = ((ARIMA(Value)+ETS(Value) +  TSLM(Value ~ season() + trend()))/3),
        "Linear" = TSLM(Value ~ season() + trend())
       )

fcst <- fit %>% forecast(test)

fcst %>% 
  autoplot(test, level = NULL) +
  autolayer(test) +
  labs(title = "Model Forecasts of 2022 Sales") +
  ylab("Sales ($ in Millions)") 
## Plot variable not specified, automatically selected `.vars = Value`

Accuracy Metrics

accuracy_metrics = rbind("ETS" = ETS_accuracy, 
                         "ARIMA" = ARIMA_accuracy,
                         "LM" = LM_accuracy, 
                         "Ensemble" = ensemble_accuracy,
                         "Fourier 1" = fourier_accuracy,
                         "Fourier 2" = fourier_accuracy2
                         )
accuracy_metrics <- accuracy_metrics %>% arrange(RMSE)
accuracy_metrics
## # A tibble: 6 × 10
##   .model                .type     ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
## * <chr>                 <chr>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Fourier_Model         Test  -1756.  3063.  2158. -2.39  2.86   NaN   NaN 0.121
## 2 Fourier_Model         Test  -2068.  3250.  2342. -2.78  3.10   NaN   NaN 0.135
## 3 ARIMA(Value)          Test   5307.  6980.  6610.  6.09  8.03   NaN   NaN 0.423
## 4 ETS(Value)            Test   6986.  8014.  7393.  8.25  8.86   NaN   NaN 0.529
## 5 (ARIMA(Value) + ETS(… Test   9314. 10248.  9543. 11.1  11.4    NaN   NaN 0.489
## 6 TSLM(Value ~ season(… Test  15648. 16281. 15648. 18.9  18.9    NaN   NaN 0.497