tidy data
df <- read.csv("https://raw.githubusercontent.com/yli1048/yli1048/refs/heads/607/sample.csv")
glimpse(df)
## Rows: 230,090
## Columns: 4
## $ Date <chr> "2010-01-01", "2010-01-02", "2010-01-03", "2010-01-04", "2…
## $ store <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ product <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ number_sold <int> 801, 810, 818, 796, 808, 812, 830, 812, 817, 832, 834, 826…
df$Date <- ymd(df$Date)
glimpse(df)
## Rows: 230,090
## Columns: 4
## $ Date <date> 2010-01-01, 2010-01-02, 2010-01-03, 2010-01-04, 2010-01-0…
## $ store <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ product <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ number_sold <int> 801, 810, 818, 796, 808, 812, 830, 812, 817, 832, 834, 826…
sub_df <- df %>%
filter(store == 0 & product == 0)
nrow(sub_df)
## [1] 3287
sales <- select(sub_df, Date, number_sold)
glimpse(sales)
## Rows: 3,287
## Columns: 2
## $ Date <date> 2010-01-01, 2010-01-02, 2010-01-03, 2010-01-04, 2010-01-0…
## $ number_sold <int> 801, 810, 818, 796, 808, 812, 830, 812, 817, 832, 834, 826…
test_set_start_date <- as.Date("2016-12-31")
train_set <- subset(sales, Date < test_set_start_date)
test_set <- subset(sales, Date >= test_set_start_date)
dim(train_set)
## [1] 2556 2
dim(test_set)
## [1] 731 2
averaging forecasting
train_set_avg <- mean(train_set$number_sold)
simple_avg_predictions <- data.frame(
Date = test_set$Date,
number_sold = rep(train_set_avg, nrow(test_set))
)
glimpse(train_set_avg)
## num 827
series_ts <- ts(train_set$number_sold, frequency = 12)
glimpse(series_ts)
## Time-Series [1:2556] from 1 to 214: 801 810 818 796 808 812 830 812 817 832 ...
ma3 <- ma(series_ts, order = 3, centre = FALSE)
ma6 <- ma(series_ts, order = 6, centre = FALSE)
ma12 <- ma(series_ts, order = 12, centre = FALSE)
ma3_forecast <- forecast(ma3, h = nrow(test_set))
ma6_forecast <- forecast(ma6, h = nrow(test_set))
ma12_forecast <- forecast(ma12, h = nrow(test_set))
ma_forecast_df <- data.frame(
Date = test_set$Date,
MA3 = ma3_forecast$mean,
MA6 = ma6_forecast$mean,
MA12 = ma12_forecast$mean
)
glimpse(ma_forecast_df)
## Rows: 731
## Columns: 4
## $ Date <date> 2016-12-31, 2017-01-01, 2017-01-02, 2017-01-03, 2017-01-04, 2017…
## $ MA3 <dbl> 826.3337, 826.3337, 826.3337, 826.3337, 826.3337, 826.3337, 826.3…
## $ MA6 <dbl> 827.0614, 827.4481, 827.8271, 828.1985, 828.5625, 828.9192, 829.2…
## $ MA12 <dbl> 824.6271, 825.1669, 825.6959, 826.2143, 826.7224, 827.2203, 827.7…
ses <- HoltWinters(series_ts, beta = FALSE, gamma = FALSE)
des <- HoltWinters(series_ts, gamma = FALSE)
tes <- HoltWinters(series_ts)
ses_forecast <- forecast(ses, h = nrow(test_set))
des_forecast <- forecast(des, h = nrow(test_set))
tes_forecast <- forecast(tes, h = nrow(test_set))
exsm_forecast_df <- data.frame(
Date = test_set$Date,
SES = ses_forecast$mean,
DES = des_forecast$mean,
TES = tes_forecast$mean
)
glimpse(exsm_forecast_df)
## Rows: 731
## Columns: 4
## $ Date <date> 2016-12-31, 2017-01-01, 2017-01-02, 2017-01-03, 2017-01-04, 2017…
## $ SES <dbl> 825.4735, 825.4735, 825.4735, 825.4735, 825.4735, 825.4735, 825.4…
## $ DES <dbl> 829.2283, 829.9168, 830.6053, 831.2938, 831.9824, 832.6709, 833.3…
## $ TES <dbl> 830.5500, 831.8447, 832.4659, 833.8433, 833.5984, 832.8094, 831.4…
tes_seasonal_add <- HoltWinters(series_ts, seasonal = "additive")
tes_seasonal_mul <- HoltWinters(series_ts, seasonal = "multiplicative")
tes_seasonal_add_forecast <- forecast(tes_seasonal_add, h = nrow(test_set))
tes_seasonal_mul_forecast <- forecast(tes_seasonal_mul, h = nrow(test_set))
exsm_tes_forecast_df <- data.frame(
Date = test_set$Date,
TES = tes_forecast$mean,
TESAdd = tes_seasonal_add_forecast$mean,
TESMul = tes_seasonal_mul_forecast$mean
)
glimpse(exsm_tes_forecast_df)
## Rows: 731
## Columns: 4
## $ Date <date> 2016-12-31, 2017-01-01, 2017-01-02, 2017-01-03, 2017-01-04, 20…
## $ TES <dbl> 830.5500, 831.8447, 832.4659, 833.8433, 833.5984, 832.8094, 831…
## $ TESAdd <dbl> 830.5500, 831.8447, 832.4659, 833.8433, 833.5984, 832.8094, 831…
## $ TESMul <dbl> 830.5082, 831.8705, 832.4786, 833.9290, 833.6180, 832.6958, 831…
ar_model <- Arima(series_ts, order = c(1, 0, 0))
ma_model <- Arima(series_ts, order = c(0, 0, 1))
arma_model <- Arima(series_ts, order = c(1, 0, 1))
arima_model <- Arima(series_ts, order = c(1, 1, 1))
auto_arima_no_season_model <- auto.arima(series_ts, seasonal = FALSE)
auto_arima_season_model <- auto.arima(series_ts, seasonal = TRUE)
ar_forecasts <- forecast(ar_model, h = nrow(test_set))
ma_forecasts <- forecast(ma_model, h = nrow(test_set))
arma_forecasts <- forecast(arma_model, h = nrow(test_set))
arima_forecasts <- forecast(arima_model, h = nrow(test_set))
auto_arima_no_season_forecasts <- forecast(auto_arima_no_season_model, h = nrow(test_set))
auto_arima_season_forecasts <- forecast(auto_arima_season_model, h = nrow(test_set))
arima_forcast_df <- data.frame(
Date = test_set$Date,
AR = ar_forecasts$mean,
MA = ma_forecasts$mean,
ARMA = arma_forecasts$mean,
ARIMA = arima_forecasts$mean,
AutoARIMANoSeason = auto_arima_no_season_forecasts$mean,
AutoARIMASeason = auto_arima_season_forecasts$mean
)
glimpse(arima_forcast_df)
## Rows: 731
## Columns: 7
## $ Date <date> 2016-12-31, 2017-01-01, 2017-01-02, 2017-01-03, 201…
## $ AR <dbl> 823.2499, 823.4835, 823.7017, 823.9056, 824.0961, 82…
## $ MA <dbl> 828.9101, 826.8067, 826.8067, 826.8067, 826.8067, 82…
## $ ARMA <dbl> 825.4921, 825.4956, 825.4991, 825.5026, 825.5061, 82…
## $ ARIMA <dbl> 825.8523, 825.6359, 825.6523, 825.6510, 825.6511, 82…
## $ AutoARIMANoSeason <dbl> 826.5948, 823.2160, 824.2842, 824.7978, 824.1417, 82…
## $ AutoARIMASeason <dbl> 826.5948, 823.2160, 824.2842, 824.7978, 824.1417, 82…
all_model_data <- data.frame(
Date = test_set$Date,
number_sold = test_set$number_sold,
AVG = simple_avg_predictions$number_sold,
MA = ma_forecasts$mean,
MA3 = ma3_forecast$mean,
MA6 = ma6_forecast$mean,
MA12 = ma12_forecast$mean,
SES = ses_forecast$mean,
DES = des_forecast$mean,
TES = tes_forecast$mean,
TESAdd = tes_seasonal_add_forecast$mean,
TESMul = tes_seasonal_mul_forecast$mean,
AR = ar_forecasts$mean,
ARMA = arma_forecasts$mean,
ARIMA = arima_forecasts$mean,
AutoARIMANoSeason = auto_arima_no_season_forecasts$mean,
AutoARIMASeason = auto_arima_season_forecasts$mean
)
glimpse(all_model_data)
## Rows: 731
## Columns: 17
## $ Date <date> 2016-12-31, 2017-01-01, 2017-01-02, 2017-01-03, 201…
## $ number_sold <int> 842, 850, 839, 838, 835, 835, 822, 832, 824, 849, 83…
## $ AVG <dbl> 826.8083, 826.8083, 826.8083, 826.8083, 826.8083, 82…
## $ MA <dbl> 828.9101, 826.8067, 826.8067, 826.8067, 826.8067, 82…
## $ MA3 <dbl> 826.3337, 826.3337, 826.3337, 826.3337, 826.3337, 82…
## $ MA6 <dbl> 827.0614, 827.4481, 827.8271, 828.1985, 828.5625, 82…
## $ MA12 <dbl> 824.6271, 825.1669, 825.6959, 826.2143, 826.7224, 82…
## $ SES <dbl> 825.4735, 825.4735, 825.4735, 825.4735, 825.4735, 82…
## $ DES <dbl> 829.2283, 829.9168, 830.6053, 831.2938, 831.9824, 83…
## $ TES <dbl> 830.5500, 831.8447, 832.4659, 833.8433, 833.5984, 83…
## $ TESAdd <dbl> 830.5500, 831.8447, 832.4659, 833.8433, 833.5984, 83…
## $ TESMul <dbl> 830.5082, 831.8705, 832.4786, 833.9290, 833.6180, 83…
## $ AR <dbl> 823.2499, 823.4835, 823.7017, 823.9056, 824.0961, 82…
## $ ARMA <dbl> 825.4921, 825.4956, 825.4991, 825.5026, 825.5061, 82…
## $ ARIMA <dbl> 825.8523, 825.6359, 825.6523, 825.6510, 825.6511, 82…
## $ AutoARIMANoSeason <dbl> 826.5948, 823.2160, 824.2842, 824.7978, 824.1417, 82…
## $ AutoARIMASeason <dbl> 826.5948, 823.2160, 824.2842, 824.7978, 824.1417, 82…
mae_values <- c()
mape_values <- c()
rmse_values <- c()
for (col in names(all_model_data)[3:ncol(all_model_data)]) {
mae_values <- c(mae_values, mae(all_model_data$number_sold, all_model_data[[col]]))
mape_values <- c(mape_values, mape(all_model_data$number_sold, all_model_data[[col]]))
rmse_values <- c(rmse_values, rmse(all_model_data$number_sold, all_model_data[[col]]))
}
model_test_set_metrics <- data.frame(
Model = names(all_model_data)[3:ncol(all_model_data)],
MAE = mae_values,
MAPE = mape_values,
RMSE = rmse_values
)
glimpse(model_test_set_metrics)
## Rows: 15
## Columns: 4
## $ Model <chr> "AVG", "MA", "MA3", "MA6", "MA12", "SES", "DES", "TES", "TESAdd"…
## $ MAE <dbl> 33.39954, 33.39686, 33.45732, 34.61466, 35.74166, 33.56493, 247.…
## $ MAPE <dbl> 0.03969737, 0.03969411, 0.03974447, 0.04202932, 0.04360139, 0.03…
## $ RMSE <dbl> 38.41682, 38.41612, 38.52340, 39.63951, 41.46537, 38.73067, 293.…
result <- arrange(model_test_set_metrics, MAE)
print(result)
## Model MAE MAPE RMSE
## 1 MA 33.39686 0.03969411 38.41612
## 2 AVG 33.39954 0.03969737 38.41682
## 3 MA3 33.45732 0.03974447 38.52340
## 4 AR 33.46154 0.03977048 38.45220
## 5 ARIMA 33.54208 0.03981422 38.68629
## 6 SES 33.56493 0.03983333 38.73067
## 7 ARMA 33.57911 0.03988560 38.64412
## 8 AutoARIMANoSeason 33.62708 0.03995766 38.64806
## 9 AutoARIMASeason 33.62708 0.03995766 38.64806
## 10 MA6 34.61466 0.04202932 39.63951
## 11 MA12 35.74166 0.04360139 41.46537
## 12 DES 247.00190 0.30061958 293.23411
## 13 TESMul 291.46369 0.35442087 344.80077
## 14 TES 291.51791 0.35448658 344.86567
## 15 TESAdd 291.51791 0.35448658 344.86567