Data from US Census Bureau: Monthly Sales for Food Services and Drinking Places ($ in Millions)
setwd("~/Documents/MSAE/Predictive Analytics")
retaildata <- read.csv("Servicedata.csv")
retaildata <- retaildata %>%
mutate(Month = yearmonth(Month)) %>%
tsibble(index = Month)
retaildata$Value = as.numeric(retaildata$Value)
training <- retaildata %>%
filter(year(Month) < 2022 )
test <- retaildata %>%
filter(year(Month) == 2022)
retaildata %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Value`
training %>% autoplot() +
labs(title = "Monthly Sales for Food Services and Drinking Places, 2000-2021") +
xlab("Month") +
ylab("Sales ($ in Millions)")
## Plot variable not specified, automatically selected `.vars = Value`
training %>% gg_season(Value) +
labs(title = "Seasonality of Sales for Food Services and Drinking Places") +
xlab("Month") +
ylab("Sales ($ in Millions)")
training %>%
model(classical_decomposition(Value, type = "multiplicative")) %>%
components() %>%
autoplot() +
labs(title ="Multiplicative Decomposition")
## Warning: Removed 6 rows containing missing values (`geom_line()`).
ARIMA_model <- training %>%
model(ARIMA(Value))
ARIMA_fcst <- ARIMA_model %>% forecast(test)
ARIMA_plot <- ARIMA_model %>% forecast(test) %>% autoplot(test) +
labs(title = "ARIMA model") +
xlab("Month") +
ylab("Sales ($ in Millions)")
ARIMA_accuracy <- accuracy(ARIMA_fcst, test)
report(ARIMA_model)
## Series: Value
## Model: ARIMA(2,1,2)(1,0,0)[12]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sar1
## 0.0368 0.5739 -0.0066 -0.8800 0.4759
## s.e. 0.0755 0.0707 0.0390 0.0385 0.0624
##
## sigma^2 estimated as 7724650: log likelihood=-2457.79
## AIC=4927.59 AICc=4927.92 BIC=4949.02
ARIMA_model %>% gg_tsresiduals(lag_max = 12) + labs(title = "ARIMA model")
ARIMA_plot
fourier_model <- training %>%
model(Fourier_Model=ARIMA(Value~trend(knots=yearmonth('2020 Mar'))+fourier(K=2)))
fourier_fcst <- fourier_model %>% forecast(test)
fourier_plot <- fourier_model %>% forecast(test) %>% autoplot(test) +
labs(title = "Fourier model (k=2)") +
xlab("Month") +
ylab("Sales ($ in Millions)")
fourier_accuracy <- accuracy(fourier_fcst, test)
report(fourier_model)
## Series: Value
## Model: LM w/ ARIMA(2,0,1)(1,0,0)[12] errors
##
## Coefficients:
## ar1 ar2 ma1 sar1
## -0.0241 0.5373 0.9332 0.4295
## s.e. 0.0662 0.0618 0.0269 0.0622
## trend(knots = yearmonth("2020 Mar"))trend
## 146.2423
## s.e. 14.3574
## trend(knots = yearmonth("2020 Mar"))trend_243 fourier(K = 2)C1_12
## 758.2750 -1700.0997
## s.e. 204.8425 812.9883
## fourier(K = 2)S1_12 fourier(K = 2)C2_12 fourier(K = 2)S2_12 intercept
## 383.5063 -176.1013 -297.9610 23268.807
## s.e. 818.1705 473.6961 474.7696 2079.874
##
## sigma^2 estimated as 7048245: log likelihood=-2452.12
## AIC=4928.25 AICc=4929.49 BIC=4971.16
fourier_model %>% gg_tsresiduals(lag_max = 12) + labs(title = "Fourier model (k=2)")
fourier_plot
fourier_model2 <- training %>%
model(Fourier_Model=ARIMA(Value~trend(knots=yearmonth('2020 Mar'))+fourier(K=3)))
fourier_fcst2 <- fourier_model2 %>% forecast(test)
fourier_plot2 <- fourier_model2 %>% forecast(test) %>% autoplot(test) +
labs(title = "Fourier model (k=3)") +
xlab("Month") +
ylab("Sales ($ in Millions)")
fourier_accuracy2 <- accuracy(fourier_fcst2, test)
report(fourier_model2)
## Series: Value
## Model: LM w/ ARIMA(2,0,1)(1,0,0)[12] errors
##
## Coefficients:
## ar1 ar2 ma1 sar1
## -0.0330 0.5463 0.9339 0.4053
## s.e. 0.0655 0.0609 0.0261 0.0632
## trend(knots = yearmonth("2020 Mar"))trend
## 146.6728
## s.e. 13.8087
## trend(knots = yearmonth("2020 Mar"))trend_243 fourier(K = 3)C1_12
## 740.7073 -1690.3141
## s.e. 201.9509 773.0843
## fourier(K = 3)S1_12 fourier(K = 3)C2_12 fourier(K = 3)S2_12
## 369.7693 -176.2643 -300.7616
## s.e. 778.0500 449.9278 450.9584
## fourier(K = 3)C3_12 fourier(K = 3)S3_12 intercept
## -438.0900 -509.5384 23237.69
## s.e. 324.5387 324.6039 1997.41
##
## sigma^2 estimated as 7003567: log likelihood=-2450.09
## AIC=4928.18 AICc=4929.87 BIC=4978.24
fourier_model2 %>% gg_tsresiduals(lag_max = 12) + labs(title = "Fourier model (k=3)")
fourier_plot2
ETS_model <- training %>%
model(ETS(Value))
ETS_fcst <- ETS_model %>% forecast(test)
ETS_plot <- ETS_model %>% forecast(test) %>% autoplot(test) +
labs(title = "ETS model")+
xlab("Month") +
ylab("Sales ($ in Millions)")
ETS_accuracy <- accuracy(ETS_fcst, test)
report(ETS_model)
## Series: Value
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.7039439
## beta = 0.0001221414
## gamma = 0.0001075596
## phi = 0.9796545
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 24785.08 149.0824 1.011296 0.9570255 1.015934 0.9854907 1.042318 1.038841
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 1.028025 1.044183 0.9918669 1.028442 0.9244421 0.9321344
##
## sigma^2: 0.0022
##
## AIC AICc BIC
## 5481.783 5484.575 5546.150
ETS_model %>% gg_tsresiduals(lag_max = 12) + labs(title="ETS Model")
ETS_plot
LM_model <- training %>%
model(TSLM(Value ~ season() + trend()))
LM_fcst <- LM_model %>% forecast(test)
LM_plot <- LM_model %>% forecast(test) %>% autoplot(test) +
labs(title = "Linear model") +
xlab("Month") +
ylab("Sales ($ in Millions)")
LM_accuracy <- accuracy(LM_fcst, test)
report(LM_model)
## Series: Value
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -29303.3 -2039.0 303.9 1539.7 13077.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18989.775 954.431 19.896 < 2e-16 ***
## season()year2 -188.701 1216.112 -0.155 0.876814
## season()year3 3959.826 1216.125 3.256 0.001285 **
## season()year4 2138.443 1216.147 1.758 0.079902 .
## season()year5 4573.879 1216.177 3.761 0.000211 ***
## season()year6 3779.042 1216.217 3.107 0.002106 **
## season()year7 4509.886 1216.265 3.708 0.000257 ***
## season()year8 4615.140 1216.322 3.794 0.000186 ***
## season()year9 2114.121 1216.387 1.738 0.083431 .
## season()year10 3557.602 1216.461 2.925 0.003765 **
## season()year11 1005.401 1216.545 0.826 0.409339
## season()year12 3554.655 1216.636 2.922 0.003798 **
## trend() 162.246 3.261 49.759 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4033 on 251 degrees of freedom
## Multiple R-squared: 0.91, Adjusted R-squared: 0.9057
## F-statistic: 211.4 on 12 and 251 DF, p-value: < 2.22e-16
LM_model %>% gg_tsresiduals(lag_max = 12) + labs(title="LM Model")
LM_plot
## Series: Value
## Model: COMBINATION
## Combination: Value * 0.333333333333333
##
## ======================================
##
## Series: Value
## Model: COMBINATION
## Combination: Value + Value
##
## ==========================
##
## Series: Value
## Model: COMBINATION
## Combination: Value + Value
##
## ==========================
##
## Series: Value
## Model: ARIMA(2,1,2)(1,0,0)[12]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sar1
## 0.0368 0.5739 -0.0066 -0.8800 0.4759
## s.e. 0.0755 0.0707 0.0390 0.0385 0.0624
##
## sigma^2 estimated as 7724650: log likelihood=-2457.79
## AIC=4927.59 AICc=4927.92 BIC=4949.02
##
## Series: Value
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.7039439
## beta = 0.0001221414
## gamma = 0.0001075596
## phi = 0.9796545
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 24785.08 149.0824 1.011296 0.9570255 1.015934 0.9854907 1.042318 1.038841
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 1.028025 1.044183 0.9918669 1.028442 0.9244421 0.9321344
##
## sigma^2: 0.0022
##
## AIC AICc BIC
## 5481.783 5484.575 5546.150
##
##
## Series: Value
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -29303.3 -2039.0 303.9 1539.7 13077.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18989.775 954.431 19.896 < 2e-16 ***
## season()year2 -188.701 1216.112 -0.155 0.876814
## season()year3 3959.826 1216.125 3.256 0.001285 **
## season()year4 2138.443 1216.147 1.758 0.079902 .
## season()year5 4573.879 1216.177 3.761 0.000211 ***
## season()year6 3779.042 1216.217 3.107 0.002106 **
## season()year7 4509.886 1216.265 3.708 0.000257 ***
## season()year8 4615.140 1216.322 3.794 0.000186 ***
## season()year9 2114.121 1216.387 1.738 0.083431 .
## season()year10 3557.602 1216.461 2.925 0.003765 **
## season()year11 1005.401 1216.545 0.826 0.409339
## season()year12 3554.655 1216.636 2.922 0.003798 **
## trend() 162.246 3.261 49.759 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4033 on 251 degrees of freedom
## Multiple R-squared: 0.91, Adjusted R-squared: 0.9057
## F-statistic: 211.4 on 12 and 251 DF, p-value: < 2.22e-16
#Plot
fit <- training %>%
model('ARIMA' = ARIMA(Value),
'Fourier_Model'= ARIMA(Value~trend(knots=yearmonth('2020 Mar'))+fourier(K=1)),
'Fourier_Model2'= ARIMA(Value~trend(knots=yearmonth('2020 Mar'))+fourier(K=3)),
'ETS' = ETS(Value),
'Ensemble' = ((ARIMA(Value)+ETS(Value) + TSLM(Value ~ season() + trend()))/3),
"Linear" = TSLM(Value ~ season() + trend())
)
fcst <- fit %>% forecast(test)
fcst %>%
autoplot(test, level = NULL) +
autolayer(test) +
labs(title = "Model Forecasts of 2022 Sales") +
ylab("Sales ($ in Millions)")
## Plot variable not specified, automatically selected `.vars = Value`
accuracy_metrics = rbind("ETS" = ETS_accuracy,
"ARIMA" = ARIMA_accuracy,
"LM" = LM_accuracy,
"Ensemble" = ensemble_accuracy,
"Fourier 1" = fourier_accuracy,
"Fourier 2" = fourier_accuracy2
)
accuracy_metrics <- accuracy_metrics %>% arrange(RMSE)
accuracy_metrics
## # A tibble: 6 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## * <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Fourier_Model Test -1756. 3063. 2158. -2.39 2.86 NaN NaN 0.121
## 2 Fourier_Model Test -2068. 3250. 2342. -2.78 3.10 NaN NaN 0.135
## 3 ARIMA(Value) Test 5307. 6980. 6610. 6.09 8.03 NaN NaN 0.423
## 4 ETS(Value) Test 6986. 8014. 7393. 8.25 8.86 NaN NaN 0.529
## 5 (ARIMA(Value) + ETS(… Test 9314. 10248. 9543. 11.1 11.4 NaN NaN 0.489
## 6 TSLM(Value ~ season(… Test 15648. 16281. 15648. 18.9 18.9 NaN NaN 0.497