df <- read.xlsx("/Users/pin.lyu/Desktop/BC_Class_Folder/Predictive_Analytics/Data_Sets/Electronic_Monthly_Sales.xlsx")
Data Information
Comments: 2% of the data is missing due to uncollected data from the remaining time in 2024. Therefore, this does not pose any issues in the project.
# ETS model
ets_model <- ets(sales_ts)
summary(ets_model)
## ETS(A,N,N)
##
## Call:
## ets(y = sales_ts)
##
## Smoothing parameters:
## alpha = 0.3986
##
## Initial states:
## l = 8295.5605
##
## sigma: 127.3708
##
## AIC AICc BIC
## 669.6733 670.2066 675.3487
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 11.34781 124.7443 99.17447 0.1177874 1.163785 0.5019775 0.05556396
# ARIMA model
sarima_model <- auto.arima(sales_ts, seasonal = T)
summary(sarima_model)
## Series: sales_ts
## ARIMA(0,1,1)(0,0,1)[12]
##
## Coefficients:
## ma1 sma1
## -0.4635 -0.5115
## s.e. 0.1531 0.1928
##
## sigma^2 = 13032: log likelihood = -296.42
## AIC=598.84 AICc=599.39 BIC=604.46
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 20.35017 110.6074 88.46596 0.2268693 1.038404 0.4477757 0.00791384
dhr_model_1 <- auto.arima(sales_ts, xreg = fourier(sales_ts, K = 1),
seasonal = T, lambda = 0)
dhr1_forcasts <- forecast(dhr_model_1, xreg = fourier(sales_ts, K = 1, h = 12))
g1 <- autoplot(dhr1_forcasts) +
labs(title = " Dynamic Harmonic Regression Model (K = 1)")
dhr_model_2 <- auto.arima(sales_ts, xreg = fourier(sales_ts, K = 2),
seasonal = T, lambda = 0)
dhr2_forcasts <- forecast(dhr_model_2, xreg = fourier(sales_ts, K = 2, h = 12))
g2 <- autoplot(dhr2_forcasts) +
labs(title = " Dynamic Harmonic Regression Model (K = 2)")
dhr_model_3 <- auto.arima(sales_ts, xreg = fourier(sales_ts, K = 3),
seasonal = T, lambda = 0)
dhr3_forcasts <- forecast(dhr_model_3, xreg = fourier(sales_ts, K = 3, h = 12))
g3 <- autoplot(dhr3_forcasts) +
labs(title = " Dynamic Harmonic Regression Model (K = 3)")
dhr_model_4 <- auto.arima(sales_ts, xreg = fourier(sales_ts, K = 4),
seasonal = T, lambda = 0)
dhr4_forcasts <- forecast(dhr_model_4, xreg = fourier(sales_ts, K = 4, h = 12))
g4 <- autoplot(dhr4_forcasts) +
labs(title = " Dynamic Harmonic Regression Model (K = 4)")
dhr_model_5 <- auto.arima(sales_ts, xreg = fourier(sales_ts, K = 5),
seasonal = T, lambda = 0)
dhr5_result <- forecast(dhr_model_5, xreg = fourier(sales_ts, K = 5, h = 12))
g5 <- autoplot(dhr5_result) +
labs(title = " Dynamic Harmonic Regression Model (K = 5)")
dhr_model_6 <- auto.arima(sales_ts, xreg = fourier(sales_ts, K = 6),
seasonal = T, lambda = 0)
dhr6_forcasts <- forecast(dhr_model_6, xreg = fourier(sales_ts, K = 6, h = 12))
g6 <- autoplot(dhr6_forcasts) +
labs(title = " Dynamic Harmonic Regression Model (K = 6)")
summary(dhr_model_5) # Most optimal
## Series: sales_ts
## Regression with ARIMA(0,1,1)(1,0,0)[12] errors
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ma1 sar1 S1-12 C1-12 S2-12 C2-12 S3-12 C3-12
## -0.4861 -0.4973 0.0015 -0.0050 0.0017 -0.0005 -0.0011 -0.0014
## s.e. 0.1352 0.1304 0.0023 0.0022 0.0016 0.0016 0.0014 0.0014
## S4-12 C4-12 S5-12 C5-12
## -0.0012 -0.0009 -0.0004 0.0015
## s.e. 0.0014 0.0014 0.0014 0.0013
##
## sigma^2 = 0.0001983: log likelihood = 141.85
## AIC=-257.71 AICc=-247 BIC=-233.38
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 22.45725 102.6735 80.01182 0.2544302 0.9390807 0.4049846
## ACF1
## Training set -0.0179369
# Perform forecasts
ets_result <- forecast(ets_model, h = 12)
sarima_result <- forecast(sarima_model, h = 12)
# Average the result of three models
average_result <- (ets_result$mean + sarima_result$mean + dhr5_result$mean) /3
# Create graphs of all models
## ETS
ets_final <- autoplot(ets_result) +
labs(y = "Total Sales (Million)", title = "ETS Model Forecasts (2014 - 2015)") +
theme_classic()
## SARIMA
sarima_final <- autoplot(sarima_result) +
labs(y = "Total Sales (Million)", title = "SARIMA Model Forecasts (2014 - 2015)") +
theme_classic()
## Dynamic Harmonic Regression
dhr5_final <- autoplot(dhr5_result) +
labs(y = "Total Sales (Million)", title = "DHR Model Forecasts (2014 - 2015)") +
theme_classic()
# Plot together
grid.arrange(ets_final, sarima_final, dhr5_final, nrow = 3)
As observed above, the mean predictions of the three models poorly matched the real trend from 2014 to 2015. However, it’s worth noting that the SARIMA and Dynamic Harmonic Regression (DHR) models performed relatively well, as the real trend from 2014 to 2015 fell within their 80% prediction interval.
In terms of AIC value, the DHR model has the lowest value among all models, at 257.7, which is significantly lower than others. Thus, the DHR model appears to be the best-performing model.
The averaged model is relatively good considering it is an average of the three models, and both the SARIMA and DHR models perform well. However, due to its inclusion of elements from the ETS model, it is not as effective as the other models, as evident from the graph above. Therefore, I would rank the quality of the models in the following order: DHR, SARIMA, Average, and ETS.