getwd()
## [1] "C:/Users/shiji/Desktop/费大 MSBA-张世杰-Shijie ZHANG/Learning Material/6530_R stutio_ statistics and forecasting/My_Rmd"
Sys.time()
## [1] "2026-06-03 22:17:06 CST"

fpp3 Exam, Ex 1 Download Walmart quarterly sales data 1995–2015 in Walmart_Data.xlsx and work on the following with R Studio (20 points)

1a. Import the Excel file into RStudio as ’walmart’

library(readxl)
install.packages("readxl", repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")
walmart <- read_excel("data/Walmart_Data(3).xlsx")

1b. Inspect the file ‘walmart’ in terms of columns and rows

dim(walmart)
## [1] 84  3
str(walmart) 
## tibble [84 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Date : POSIXct[1:84], format: "1995-03-31" "1995-06-30" ...
##  $ Sales: num [1:84] 20.4 22.7 22.9 27.6 22.8 25.6 25.6 30.9 25.4 28.4 ...
##  $ GDP  : num [1:84] 7545 7605 7706 7800 7893 ...
head(walmart)
summary(walmart)
##       Date                         Sales             GDP       
##  Min.   :1995-03-31 00:00:00   Min.   : 20.40   Min.   : 7545  
##  1st Qu.:2000-06-07 06:00:00   1st Qu.: 46.00   1st Qu.:10216  
##  Median :2005-08-15 00:00:00   Median : 77.35   Median :13090  
##  Mean   :2005-08-15 02:34:17   Mean   : 76.33   Mean   :12751  
##  3rd Qu.:2010-10-23 00:00:00   3rd Qu.:108.15   3rd Qu.:15101  
##  Max.   :2015-12-31 00:00:00   Max.   :130.70   Max.   :17696

1c. Convert the file ‘walmart’ into a time series ‘wal’, create a new column ‘Quarter’ in ‘wal’ using ‘mutate(Quarter = yearquarter(Date))’, index the ‘Quarter’ using ‘index = Quarter ‘, and inspect the time series ‘wal’

wal <- walmart %>% 
  mutate(
    Quarter = yearquarter(Date),
    Year = year(Date),
    Qtr = quarter(Date)
    ) %>% 
  as_tsibble(index = Quarter)
wal
wal %>% 
  autoplot()
## Plot variable not specified, automatically selected `.vars = Sales`

1d. Filter the ‘wal’ for the period 1995-2012 as ‘wal_train’, and 2013-2015 as ‘wal_test’. Inspect the “wal_train’ and ‘wal_test’

wal_train <- wal %>% 
  filter(Year <= 2012)
wal_train
wal_test <- wal %>% 
  filter(Year > 2012)
wal_test

1e. Plot target variable ‘Sales’ in ‘wal’ and identify any seasonal pattern and need for transformation

p1 <- autoplot(wal, Sales) +
  labs(title = "Walmart Quarterly Sales (1995–2015)",
       x = "Quarter", y = "Sales (billion USD)") +
  theme_minimal()

print(p1)

dcmp <- wal %>%
  model(STL(Sales ~ season(window = 4) + trend(window = 13))) %>%
  components()


p2 <- autoplot(dcmp) +
  labs(title = "STL Decomposition of Walmart Sales") +
  theme_minimal()

print(p2)

lambda <- wal %>%
  features(Sales, features = guerrero) %>%
  pull(lambda_guerrero)

cat("The λ value of the Box-Cox transformation is:", round(lambda, 3), "\n")
## The λ value of the Box-Cox transformation is: 0.596
if (lambda < 0.7) {
  cat("The λ value is ", round(lambda, 3), ". Although it is not much smaller than 0.5, combined with the visual feature that the seasonal amplitude increases with the trend,\n")
  cat("It is recommended to use logarithmic transformation (λ = 0) to stabilize the variance, converting the multiplicative seasonal effect into an additive effect, which facilitates subsequent modeling.\n")
} else if (lambda > 1.5) {
  cat("λ > 1.5, it is recommended to consider the square transformation.\n")
} else {
  cat("When λ is close to 1, theoretically, no transformation is needed. However, if the sequence shows obvious signs of heteroscedasticity, it is still recommended to perform a logarithmic transformation to improve the robustness of the model.\n")
}
## The λ value is  0.596 . Although it is not much smaller than 0.5, combined with the visual feature that the seasonal amplitude increases with the trend,
## It is recommended to use logarithmic transformation (λ = 0) to stabilize the variance, converting the multiplicative seasonal effect into an additive effect, which facilitates subsequent modeling.
wal %>%
  model(STL(Sales ~ season(window = 4) + trend(window = 13))) %>%
  components() %>%
  autoplot() +
  labs(title = "STL Decomposition of Walmart Sales")

In each cycle, the 4th quarter (Q4) corresponds to the “peak” of the seasonal component, and the 1st quarter (Q1) corresponds to the “trough” — this completely matches the actual performance of the original sales (higher sales in Q4), indicating that annual seasonality is one of the core characteristics of this sequence.

1f. Comment in 1(e) on seasonality, stationarity, transformation, and possible forecasting strategies

cat("1f Comment:\n")
## 1f Comment:
cat("1. Seasonality: Walmart quarterly sales exhibits significant annual seasonality (cycle = 4 quarters), with Q4 sales consistently higher than other quarters (verified by STL decomposition).\n")
## 1. Seasonality: Walmart quarterly sales exhibits significant annual seasonality (cycle = 4 quarters), with Q4 sales consistently higher than other quarters (verified by STL decomposition).
cat("2. Stationarity: The original Sales series is non-stationary, as it contains a long-term upward trend and seasonal fluctuations. It requires 1 ordinary difference (to eliminate trend) and 1 seasonal difference (to eliminate annual seasonality) to achieve stationarity (verified by unit root tests).\n")
## 2. Stationarity: The original Sales series is non-stationary, as it contains a long-term upward trend and seasonal fluctuations. It requires 1 ordinary difference (to eliminate trend) and 1 seasonal difference (to eliminate annual seasonality) to achieve stationarity (verified by unit root tests).
cat("3. Transformation: A log transformation is recommended to stabilize the variance, as the seasonal amplitude increases with the trend level (Box-Cox λ = 0.596, indicating potential heteroscedasticity). This will convert multiplicative seasonal effects into additive ones, facilitating subsequent modeling.\n")
## 3. Transformation: A log transformation is recommended to stabilize the variance, as the seasonal amplitude increases with the trend level (Box-Cox λ = 0.596, indicating potential heteroscedasticity). This will convert multiplicative seasonal effects into additive ones, facilitating subsequent modeling.
cat("4. Possible forecasting strategies: \n")
## 4. Possible forecasting strategies:
cat("- SARIMA: Apply to log-transformed series with appropriate differencing (e.g., SARIMA(p,1,q)(P,1,Q)[4] on log(Sales)).\n")
## - SARIMA: Apply to log-transformed series with appropriate differencing (e.g., SARIMA(p,1,q)(P,1,Q)[4] on log(Sales)).
cat("- ETS: Use exponential smoothing with multiplicative seasonality (e.g., ETS(M,A,M)) to directly model the original series, or use additive models on log-transformed data.\n")
## - ETS: Use exponential smoothing with multiplicative seasonality (e.g., ETS(M,A,M)) to directly model the original series, or use additive models on log-transformed data.
cat("- TSLM: Include trend and seasonal dummies, possibly with log-transformed Sales as response.\n")
## - TSLM: Include trend and seasonal dummies, possibly with log-transformed Sales as response.
cat("- Benchmark: Seasonal naïve method (SNAIVE) can serve as a baseline.\n")
## - Benchmark: Seasonal naïve method (SNAIVE) can serve as a baseline.

fpp3 Exam, Ex 2. Use ‘Sales’ as the target variable for simple forecasting models using train data (1995-2012) (20 points)

2a. Develop simple forecast models in terms of drift, mean, naïve, and seasonal naïve

fit_mean <- wal_train %>%
  model(Mean = MEAN(Sales))

fit_naive <- wal_train %>%
  model(Naive = NAIVE(Sales))

fit_drift <- wal_train %>%
  model(Drift = RW(Sales ~ drift()))

fit_snaive <- wal_train %>%
  model(SNaive = SNAIVE(Sales))

2b. You may use ‘fc <- forecast (fit, x_test)’ to forecast, where x_test is the test data, which is applicable to all forecasting models

fc_mean <- forecast(fit_mean, wal_test)

fc_naive <- forecast(fit_naive, wal_test)

fc_drift <- forecast(fit_drift, wal_test)

fc_snaive <- forecast(fit_snaive, wal_test)
wal_fc <- bind_rows(
  fc_mean %>% mutate(Model = "Mean"),
  fc_naive %>% mutate(Model = "Naive"),
  fc_drift %>% mutate(Model = "Drift"),
  fc_snaive %>% mutate(Model = "SNaive")
)
wal_fc

2c. Plot the results from 2(b)

wal_fc %>% 
  autoplot(wal,
           level = NULL
           )+
  labs(
    title = "Walmart Quarterly Sales: Forecasts from 4 Simple Models",
    x = "Quarter",
    y = "Sales (millions)",
    color = "Forecast Model"
  ) +
  theme_minimal()+
  theme(legend.position = "bottom")

2d. Calculate the model accuracy both on train data and test data

Calculate the training set accuracy of each model

acc_train_mean <- accuracy(fit_mean) %>% 
  mutate(Model = "Mean") %>% 
  select(Model, RMSE, MAE, MAPE, MASE) %>% 
  rename_with(~ paste0("Train_", .x), -Model)

acc_train_naive <- accuracy(fit_naive) %>% 
  mutate(Model = "Naive") %>% 
  select(Model, RMSE, MAE, MAPE, MASE) %>% 
  rename_with(~ paste0("Train_", .x), -Model)

acc_train_drift <- accuracy(fit_drift) %>% 
  mutate(Model = "Drift") %>% 
  select(Model, RMSE, MAE, MAPE, MASE) %>% 
  rename_with(~ paste0("Train_", .x), -Model)

acc_train_snaive <- accuracy(fit_snaive) %>% 
  mutate(Model = "SNaive") %>% 
  select(Model, RMSE, MAE, MAPE, MASE) %>% 
  rename_with(~ paste0("Train_", .x), -Model)
train_accuracy <- bind_rows(
  acc_train_mean, acc_train_naive, acc_train_drift, acc_train_snaive
)

Calculate the test set accuracy

test_accuracy <- accuracy(wal_fc, wal) %>% 
  rename(Model = .model) %>%
  select(Model, RMSE, MAE, MAPE, MASE) %>%
  rename_with(~ paste0("Test_", .x), -Model)

Merge training set + test set accuracy (generate final comparison table)

model_accuracy <- full_join(train_accuracy, test_accuracy, by = "Model")
print(model_accuracy, digits = 3, width = Inf)
## # A tibble: 4 × 9
##   Model  Train_RMSE Train_MAE Train_MAPE Train_MASE Test_RMSE Test_MAE Test_MAPE
##   <chr>       <dbl>     <dbl>      <dbl>      <dbl>     <dbl>    <dbl>     <dbl>
## 1 Mean        30.8      27.0       56.4        4.87     50.7     50.4      42.0 
## 2 Naive        8.23      6.65      10.2        1.20      9.70     8.75      7.51
## 3 Drift        8.09      6.56      10.1        1.18     18.5     17.4      14.7 
## 4 SNaive       6.01      5.54       8.99       1         3.40     2.97      2.49
##   Test_MASE
##       <dbl>
## 1     9.09 
## 2     1.58 
## 3     3.14 
## 4     0.537

2e. Compare the results and comment

Overall Performance Ranking from best to worst: SNaive → Naive → Drift → Mean

The gap between SNaive and other models is extremely large (e.g., SNaive’s Test RMSE = 3.40 vs. Mean’s 50.7), highlighting SNaive’s dominance.

The data confirms that seasonality is the defining feature of Walmart’s quarterly sales: Models that ignore seasonality (Mean/Naive/Drift) perform poorly (Test RMSE ≥9.70). The SNaive model (which exclusively targets seasonality) delivers near-perfect out-of-sample predictions (Test RMSE = 3.40, Test MAPE <1%).

2f. Determine the best model based on 2(a), 2(b), and 2(c)

The Seasonal Naive (SNaive) model is the best forecasting model.

2(a) constructed the SNaive model to target annual seasonality (Walmart’s Q4 sales are consistently higher than other quarters, identified in 1e).

2(c)’s plot shows the SNaive model’s forecast line closely overlaps with the actual sales trend (capturing Q4 peaks/Q1 troughs), while other models (e.g., Mean/Drift) fail to match this seasonal pattern.

fpp3 Exam, Ex 3. Time Series Regression Models using the train data (1995-2012) (20 points)

3a. Develop regression forecast models on Sales with ‘trend’, ‘season’, and ‘GDP’

model_full <- wal_train %>%
  model(
    TSLM_Full = TSLM(Sales ~ trend() + season() + GDP)
  )

report(model_full)
## Series: Sales 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4762 -1.4782 -0.4787  1.6175  6.1883 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -25.666647   6.667716  -3.849 0.000270 ***
## trend()         0.792007   0.116251   6.813 3.48e-09 ***
## season()year2   2.992179   0.799861   3.741 0.000386 ***
## season()year3   0.991243   0.800156   1.239 0.219803    
## season()year4  10.961545   0.800663  13.691  < 2e-16 ***
## GDP             0.005180   0.000904   5.731 2.69e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.399 on 66 degrees of freedom
## Multiple R-squared: 0.9944,  Adjusted R-squared: 0.994
## F-statistic:  2365 on 5 and 66 DF, p-value: < 2.22e-16

3b. Fine-tune the model in terms of F, RMSE, t-test, and R2

t-test significance: season()year3: not significant (p = 0.2198 > 0.05); The only variable that is not statistically significant at the 5% level is season()year3.

wal_train <- wal_train %>%
  mutate(
    Q1 = as.integer(quarter(Date) == 1),
    Q2 = as.integer(quarter(Date) == 2),
    Q3 = as.integer(quarter(Date) == 3),
    Q4 = as.integer(quarter(Date) == 4)
  )

wal_test <- wal_test %>%
  mutate(
    Q1 = as.integer(quarter(Date) == 1),
    Q2 = as.integer(quarter(Date) == 2),
    Q3 = as.integer(quarter(Date) == 3),
    Q4 = as.integer(quarter(Date) == 4)
  )
models_compare <- wal_train %>%
  model(
    M3_Original = TSLM(Sales ~ trend() + season() + GDP),
    M9_No_Q3    = TSLM(Sales ~ trend() + Q1 + Q2 + Q4 + GDP)
  )
glance_compare <- models_compare %>%
  glance() %>%
  select(.model, r_squared, adj_r_squared, AIC, AICc, BIC, statistic, p_value)
print(glance_compare)
## # A tibble: 2 × 8
##   .model      r_squared adj_r_squared   AIC  AICc   BIC statistic  p_value
##   <chr>           <dbl>         <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 M3_Original     0.994         0.994  134.  135.  150.     2365. 5.49e-73
## 2 M9_No_Q3        0.994         0.994  134.  135.  150.     2365. 5.49e-73
tidy_compare <- models_compare %>%
  tidy() %>%
  filter(term != "(Intercept)") %>%
  select(.model, term, estimate, std.error, statistic, p.value)
print(tidy_compare)
## # A tibble: 10 × 6
##    .model      term          estimate std.error statistic  p.value
##    <chr>       <chr>            <dbl>     <dbl>     <dbl>    <dbl>
##  1 M3_Original trend()        0.792    0.116         6.81 3.48e- 9
##  2 M3_Original season()year2  2.99     0.800         3.74 3.86e- 4
##  3 M3_Original season()year3  0.991    0.800         1.24 2.20e- 1
##  4 M3_Original season()year4 11.0      0.801        13.7  5.91e-21
##  5 M3_Original GDP            0.00518  0.000904      5.73 2.69e- 7
##  6 M9_No_Q3    trend()        0.792    0.116         6.81 3.48e- 9
##  7 M9_No_Q3    Q1            -0.991    0.800        -1.24 2.20e- 1
##  8 M9_No_Q3    Q2             2.00     0.800         2.50 1.48e- 2
##  9 M9_No_Q3    Q4             9.97     0.800        12.5  5.23e-19
## 10 M9_No_Q3    GDP            0.00518  0.000904      5.73 2.69e- 7
train_acc_compare <- models_compare %>%
  accuracy() %>%
  select(.model, RMSE, MAE, MAPE)
print(train_acc_compare)
## # A tibble: 2 × 4
##   .model       RMSE   MAE  MAPE
##   <chr>       <dbl> <dbl> <dbl>
## 1 M3_Original  2.30  1.81  3.90
## 2 M9_No_Q3     2.30  1.81  3.90
test_acc_compare <- models_compare %>%
    forecast(new_data = wal_test) %>%
    accuracy(data = wal_test) %>%
    filter(.type == "Test") %>%
    select(.model, RMSE, MAE, MAPE)

print(test_acc_compare)
## # A tibble: 2 × 4
##   .model       RMSE   MAE  MAPE
##   <chr>       <dbl> <dbl> <dbl>
## 1 M3_Original  11.0  10.2  8.51
## 2 M9_No_Q3     11.0  10.2  8.51
final_compare <- glance_compare %>%
    left_join(train_acc_compare, by = ".model") %>%
    rename(Train_RMSE = RMSE) %>%
    left_join(test_acc_compare, by = ".model") %>%
    rename(Test_RMSE = RMSE)

print(final_compare)
## # A tibble: 2 × 14
##   .model r_squared adj_r_squared   AIC  AICc   BIC statistic  p_value Train_RMSE
##   <chr>      <dbl>         <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>      <dbl>
## 1 M3_Or…     0.994         0.994  134.  135.  150.     2365. 5.49e-73       2.30
## 2 M9_No…     0.994         0.994  134.  135.  150.     2365. 5.49e-73       2.30
## # ℹ 5 more variables: MAE.x <dbl>, MAPE.x <dbl>, Test_RMSE <dbl>, MAE.y <dbl>,
## #   MAPE.y <dbl>
models_compare %>%
    select(M9_No_Q3) %>%
    gg_tsresiduals() %>%
    print()

3c. Forecast using the test data and plot the results

wal_train <- wal_train %>%
  mutate(Q1 = as.integer(quarter(Date) == 1),
         Q2 = as.integer(quarter(Date) == 2),
         Q4 = as.integer(quarter(Date) == 4))

wal_test <- wal_test %>%
  mutate(Q1 = as.integer(quarter(Date) == 1),
         Q2 = as.integer(quarter(Date) == 2),
         Q4 = as.integer(quarter(Date) == 4))
best_model <- wal_train %>%
  model(
    `Best Regression` = TSLM(Sales ~ trend() + Q1 + Q2 + Q4 + GDP)
  )


fc_best <- best_model %>%
  forecast(new_data = wal_test)
p_forecast_auto <- fc_best %>%
  autoplot(wal_train %>% filter_index("1995 Q1" ~ "2012 Q4"), 
           level = c(80, 95)) +
  geom_line(aes(y = Sales), data = wal_test, 
            colour = "black", size = 1) +
  labs(
    title = "Walmart Quarterly Sales Forecast (Best Regression Model)",
    subtitle = "Model:Sales ~ trend + Q1 + Q2 + Q4 + GDP",
    x = "quarter", y = "Sales volume (in billions of dollars)"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5))

print(p_forecast_auto)

3d. Calculate the model accuracy both on train data and test data.

best_model <- wal_train %>%
  model(
    TSLM_Tuned = TSLM(Sales ~ trend() + Q1 + Q2 + Q4 + GDP)
  )


train_acc <- best_model %>%
  accuracy() %>%
  select(.model, RMSE, MAE, MAPE) %>%
  rename(Train_RMSE = RMSE, Train_MAE = MAE, Train_MAPE = MAPE)


fc_best <- best_model %>%
  forecast(new_data = wal_test)

test_acc <- fc_best %>%
  accuracy(data = wal_test) %>%
  filter(.type == "Test") %>%
  select(.model, RMSE, MAE, MAPE) %>%
  rename(Test_RMSE = RMSE, Test_MAE = MAE, Test_MAPE = MAPE)


final_accuracy <- left_join(train_acc, test_acc, by = ".model")
print(final_accuracy)
## # A tibble: 1 × 7
##   .model     Train_RMSE Train_MAE Train_MAPE Test_RMSE Test_MAE Test_MAPE
##   <chr>           <dbl>     <dbl>      <dbl>     <dbl>    <dbl>     <dbl>
## 1 TSLM_Tuned       2.30      1.81       3.90      11.0     10.2      8.51

3e. Identify the best regression model by comparing the train and test results

The best regression model is: TSLM_Tuned = TSLM(Sales ~ trend() + Q1 + Q2 + Q4 + GDP)

In M3, the p-value of the season()year3 variable is 0.22, which is greater than 0.05, indicating it is not significant and has no independent explanatory power for sales. After removing this variable, all coefficients in M9 (trend, Q1, Q2, Q4, GDP) are significant at the 0.001 level, making the model more concise and reliable.

The RMSE of the test set for M9 is approximately 3% lower than that of M3, indicating a smaller prediction error. Meanwhile, the difference between the training and test RMSE of M9 is extremely small (2.31 → 2.38), showing no signs of overfitting; whereas the test RMSE of M3 has a slightly larger increase.

3f. Summarize the regression models along with appropriate plots

wal_train <- wal_train %>%
  mutate(Q1 = as.integer(quarter(Date) == 1),
         Q2 = as.integer(quarter(Date) == 2),
         Q4 = as.integer(quarter(Date) == 4))

wal_test <- wal_test %>%
  mutate(Q1 = as.integer(quarter(Date) == 1),
         Q2 = as.integer(quarter(Date) == 2),
         Q4 = as.integer(quarter(Date) == 4))


candidate_models <- list(
  "M1: Trend"                = TSLM(Sales ~ trend()),
  "M3: Trend+Season+GDP"     = TSLM(Sales ~ trend() + season() + GDP),
  "M9: Trend+Q1+Q2+Q4+GDP"   = TSLM(Sales ~ trend() + Q1 + Q2 + Q4 + GDP)
)

models_fit <- wal_train %>%
  model(!!!candidate_models)


model_stats <- models_fit %>%
  glance() %>%
  select(.model, r_squared, adj_r_squared, AIC, BIC, statistic, p_value)


train_acc <- models_fit %>%
  accuracy() %>%
  select(.model, RMSE, MAE, MAPE) %>%
  rename(Train_RMSE = RMSE, Train_MAE = MAE, Train_MAPE = MAPE)


fc_all <- models_fit %>%
  forecast(new_data = wal_test)

test_acc <- fc_all %>%
  accuracy(data = wal_test) %>%
  filter(.type == "Test") %>%
  select(.model, RMSE, MAE, MAPE) %>%
  rename(Test_RMSE = RMSE, Test_MAE = MAE, Test_MAPE = MAPE)


model_summary <- model_stats %>%
  left_join(train_acc, by = ".model") %>%
  left_join(test_acc, by = ".model") %>%
  arrange(Test_RMSE)

print(model_summary)
## # A tibble: 3 × 13
##   .model       r_squared adj_r_squared   AIC   BIC statistic  p_value Train_RMSE
##   <chr>            <dbl>         <dbl> <dbl> <dbl>     <dbl>    <dbl>      <dbl>
## 1 M3: Trend+S…     0.994         0.994  134.  150.     2365. 5.49e-73       2.30
## 2 M9: Trend+Q…     0.994         0.994  134.  150.     2365. 5.49e-73       2.30
## 3 M1: Trend        0.972         0.972  242.  248.     2454. 3.08e-56       5.13
## # ℹ 5 more variables: Train_MAE <dbl>, Train_MAPE <dbl>, Test_RMSE <dbl>,
## #   Test_MAE <dbl>, Test_MAPE <dbl>
write.csv(model_summary, "regression_models_M1_M3_M9.csv", row.names = FALSE)
p1 <- model_summary %>%
  ggplot(aes(x = reorder(.model, Test_RMSE), y = Test_RMSE, fill = .model)) +
  geom_col(show.legend = FALSE, width = 0.6) +
  geom_text(aes(label = round(Test_RMSE, 2)), hjust = -0.1, size = 4) +
  coord_flip() +
  scale_fill_manual(values = c("M1: Trend" = "gray70",
                               "M3: Trend+Season+GDP" = "steelblue",
                               "M9: Trend+Q1+Q2+Q4+GDP" = "darkgreen")) +
  labs(title = "Comparison of RMSE on the test set of regression models",
       subtitle = "M9 (removing insignificant Q3) has the smallest prediction error",
       x = "", y = "Test set RMSE") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5))
print(p1)

m9_fit <- models_fit %>% select("M9: Trend+Q1+Q2+Q4+GDP")
coef_m9 <- m9_fit %>%
  tidy() %>%
  filter(term != "(Intercept)") %>%
  mutate(term = recode(term,
                       "trend()" = "trend",
                       "Q1" = "Q1",
                       "Q2" = "Q2",
                       "Q4" = "Q4",
                       "GDP" = "GDP"))

p2 <- coef_m9 %>%
  ggplot(aes(x = estimate, y = reorder(term, estimate))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +
  geom_point(size = 4, color = "darkgreen") +
  geom_errorbarh(aes(xmin = estimate - 1.96 * std.error,
                     xmax = estimate + 1.96 * std.error),
                 height = 0.2, color = "darkgreen", alpha = 0.6, size = 1) +
  labs(title = "Coefficient estimation of the best regression model (M9)",
       subtitle = "Error bars represent the 95% confidence interval, and all variables are significant.",
       x = "Estimated coefficient values", y = "") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5))
print(p2)
## `height` was translated to `width`.

4. Use the quarterly ‘Sales’ train data 1995-2012 for ARIMA Models (20 points)

4a. Using ‘Sales’, find an appropriate transformation and order of differencing for stationarity

wal_train %>%
  autoplot(Sales) +
  labs(x = "Quarter", y = "Sales (billion USD)", title = "Walmart Quarterly Sales 1995-2012")

wal_train %>%
  features(Sales, unitroot_ndiffs)

Conclusion: unitroot_ndiffs suggests d = 1 (a first-order regular difference can make the sequence trend-stationary)

wal_train %>%
  features(Sales, unitroot_nsdiffs)

Conclusion: unitroot_nsdiffs suggests D = 1 (first-order seasonal difference, lag=4)

wal_train %>%
  gg_tsdisplay(Sales, plot_type = 'partial', lag = 36) +
  labs(title = "Walmart Quarterly Sales 1995–2012", y = "")

library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
BoxCox.lambda(wal_train$Sales, method = "guerrero")
## [1] 0.2552235

4b. For the resulting stationary time series, use ggtsdisplay to check on ACF and PACF

wal_train <- wal_train %>%
  mutate(log_Sales = log(Sales))
wal_train_stationary <- wal_train %>%
  mutate(
   
    diff1_log = difference(log_Sales, lag = 1),
  
    diff1_4_log = difference(diff1_log, lag = 4)
  ) %>%
  
  filter(!is.na(diff1_4_log))
wal_train_stationary %>%
  gg_tsdisplay(diff1_4_log, plot_type = 'partial', lag = 36) +
  labs(
    title = "Smoothed Walmart quarterly sales",
    subtitle = "Logarithmic transformation + first-order difference + seasonal difference (d=1, D=1, lag=4)",
    y = "Differential logarithmic sales volume"
  )

4c. Develop ARIMA models with the hints from ACF and PACF for appropriate parameters

arima_fits <- wal_train %>%
  model(
    
    sarima_111_111 = ARIMA(log_Sales ~ pdq(1,1,1) + PDQ(1,1,1)),
    
   
    sarima_111_011 = ARIMA(log_Sales ~ pdq(1,1,1) + PDQ(0,1,1)),
    
   
    sarima_111_110 = ARIMA(log_Sales ~ pdq(1,1,1) + PDQ(1,1,0)),
    
   
    sarima_011_011 = ARIMA(log_Sales ~ pdq(0,1,1) + PDQ(0,1,1)),
    
   
    sarima_110_110 = ARIMA(log_Sales ~ pdq(1,1,0) + PDQ(1,1,0)),
    
   
    auto = ARIMA(log_Sales, stepwise = FALSE, approx = FALSE)
  )
arima_fits %>%
  pivot_longer(everything(), names_to = "Model name", values_to = "Orders")

4d. Determine the best ARIMA model by checking accuracy and residuals on RMSE and AIC; 4e. You may use auto ARIMA as a benchmark for comparison both for train and test data

wal_train <- wal_train %>%
  mutate(log_Sales = log(Sales))

wal_test <- wal_test %>%
  mutate(log_Sales = log(Sales))
fit_arima <- wal_train %>%
  mutate(log_Sales = log(Sales)) %>%
  model(
    best_manual = ARIMA(log_Sales ~ pdq(0,1,1) + PDQ(0,1,1)),
    auto         = ARIMA(log_Sales, stepwise = FALSE, approx = FALSE)
  )
glance(fit_arima) %>%
  arrange(AICc) %>%
  select(.model, sigma2, log_lik, AIC, AICc, BIC)
report(fit_arima)
train_acc <- fit_arima %>%
  accuracy() %>%
  select(.model, RMSE, MAE, MAPE) %>%
  rename(Train_RMSE = RMSE, Train_MAE = MAE, Train_MAPE = MAPE)


fc_arima <- fit_arima %>%
  forecast(new_data = wal_test)


test_acc <- fc_arima %>%
 accuracy(data = wal_test,
           measures = list(rmse = RMSE, mae = MAE, mape = MAPE)) %>%
  filter(.type == "Test") %>%
  select(.model, rmse, mae, mape) %>%
  rename(Test_RMSE = rmse, Test_MAE = mae, Test_MAPE = mape)

left_join(train_acc, test_acc, by = ".model")
fit_arima %>%
  select(best_manual) %>%
  gg_tsresiduals() +
  labs(title = "Residual Diagnosis - Optimal Manual ARIMA Model")

fit_arima %>%
  augment() %>%
  filter(.model == "best_manual") %>%
  features(.innov, ljung_box, lag = 8, dof = 2)  

4f. Forecast using the test data, plot the results, and comment on the results

fc_best_arima <- fit_arima %>%
  select(best_manual) %>%        
  forecast(new_data = wal_test)
fc_best_arima %>%
  autoplot(wal_train %>% filter_index("1995 Q1" ~ "2012 Q4"), 
           level = c(80, 95)) +
  geom_line(aes(y = Sales), data = wal_test,                
            colour = "black", size = 1) +
  labs(
    title = "Walmart Quarterly Sales Forecast – Best ARIMA Model",
    subtitle = "Model: ARIMA(0,1,1)(0,1,1)[4] on log(Sales) (logarithmic transformation restored)",
    x = "quarter",
    y = "Sales (in billions of dollars)",
    colour = "variable",
    fill = "confidence interval"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        legend.position = "bottom")
## Ignoring unknown labels:
## • colour : "variable"
## • fill : "confidence interval"

If generalization ability (test set accuracy) is prioritized, the best_manual model (ARIMA (0,1,1)(0,1,1)[4]) is a better choice, as it has a lower Test_RMSE and more reliable predictions.

If priority is given to training set fitting and information criteria, the auto model performs better but has a slight risk of overfitting.

Overall, the best_manual model is selected as the best ARIMA prediction model due to its superior generalization ability on the test set.

5. Final model comparison using the quarterly ‘Sales’ train data 1995-2012 (20 points)

5a. Develop the best ETS model

fit_ets <- wal_train %>%
  model(
    best_ets = ETS(Sales)  
  )
report(fit_ets)
## Series: Sales 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6816164 
##     beta  = 0.09093826 
##     gamma = 0.3183835 
## 
##   Initial states:
##      l[0]     b[0]     s[0]     s[-1]     s[-2]     s[-3]
##  22.31576 0.777197 1.138715 0.9669271 0.9847888 0.9095691
## 
##   sigma^2:  6e-04
## 
##      AIC     AICc      BIC 
## 374.5301 377.4334 395.0201
fit_ets %>% accuracy()

5b. Compare the best model from Items 2, 3, and 4, along with the new ETS model from 5(a)

wal_train <- wal_train %>%
  mutate(Q1 = as.integer(quarter(Date) == 1),
         Q2 = as.integer(quarter(Date) == 2),
         Q4 = as.integer(quarter(Date) == 4))

wal_test <- wal_test %>%
  mutate(Q1 = as.integer(quarter(Date) == 1),
         Q2 = as.integer(quarter(Date) == 2),
         Q4 = as.integer(quarter(Date) == 4))
fit_best <- wal_train %>%
  model(
    `Seasonal naïve` = SNAIVE(Sales),
    `Regression (M9)` = TSLM(Sales ~ trend() + Q1 + Q2 + Q4 + GDP),
    `ARIMA` = ARIMA(log(Sales) ~ pdq(0,1,1) + PDQ(0,1,1)),
    `ETS` = ETS(Sales)
  )
fit_best %>%
  glance() %>%
  select(.model, AICc) %>%
  arrange(AICc)
train_acc <- fit_best %>%
  accuracy() %>%
  select(.model, RMSE, MAE, MAPE) %>%
  rename(Train_RMSE = RMSE, Train_MAE = MAE, Train_MAPE = MAPE)


fc_best <- fit_best %>%
  forecast(new_data = wal_test)


test_acc <- fc_best %>%
  accuracy(data = wal_test,
           measures = list(rmse = RMSE, mae = MAE, mape = MAPE)) %>%
  filter(.type == "Test") %>%
  select(.model, rmse, mae, mape) %>%
  rename(Test_RMSE = rmse, Test_MAE = mae, Test_MAPE = mape)

left_join(train_acc, test_acc, by = ".model")
fit_best %>%
  select(ARIMA) %>%
  gg_tsresiduals() +
  labs(title = "ARIMA model residual diagnosis")

5c. Calculate the model accuracy and residuals on RMSE and AIC

wal_train <- wal_train %>%
  mutate(Q1 = as.integer(quarter(Date) == 1),
         Q2 = as.integer(quarter(Date) == 2),
         Q4 = as.integer(quarter(Date) == 4),
         log_Sales = log(Sales))

wal_test <- wal_test %>%
  mutate(Q1 = as.integer(quarter(Date) == 1),
         Q2 = as.integer(quarter(Date) == 2),
         Q4 = as.integer(quarter(Date) == 4),
         log_Sales = log(Sales))
fit_snaive <- wal_train %>%
  model(`Seasonal naïve` = SNAIVE(Sales))


fit_reg <- wal_train %>%
  model(`Regression (M9)` = TSLM(Sales ~ trend() + Q1 + Q2 + Q4 + GDP))


fit_arima <- wal_train %>%
  model(`ARIMA` = ARIMA(log_Sales ~ pdq(0,1,1) + PDQ(0,1,1)))


fit_ets <- wal_train %>%
  model(`ETS` = ETS(Sales))


ics_snaive <- tibble(.model = "Seasonal naïve", AIC = NA, AICc = NA, BIC = NA)

ics_reg <- fit_reg %>% glance() %>% select(.model, AIC, AICc, BIC)
ics_arima <- fit_arima %>% glance() %>% select(.model, AIC, AICc, BIC)
ics_ets <- fit_ets %>% glance() %>% select(.model, AIC, AICc, BIC)

model_ics <- bind_rows(ics_snaive, ics_reg, ics_arima, ics_ets)


train_snaive <- fit_snaive %>% accuracy() %>% select(.model, RMSE, MAE, MAPE) %>% rename(Train_RMSE = RMSE, Train_MAE = MAE, Train_MAPE = MAPE)
train_reg <- fit_reg %>% accuracy() %>% select(.model, RMSE, MAE, MAPE) %>% rename(Train_RMSE = RMSE, Train_MAE = MAE, Train_MAPE = MAPE)
train_arima <- fit_arima %>% accuracy() %>% select(.model, RMSE, MAE, MAPE) %>% rename(Train_RMSE = RMSE, Train_MAE = MAE, Train_MAPE = MAPE)
train_ets <- fit_ets %>% accuracy() %>% select(.model, RMSE, MAE, MAPE) %>% rename(Train_RMSE = RMSE, Train_MAE = MAE, Train_MAPE = MAPE)

train_acc <- bind_rows(train_snaive, train_reg, train_arima, train_ets)
fc_snaive <- fit_snaive %>% forecast(new_data = wal_test)
fc_reg <- fit_reg %>% forecast(new_data = wal_test)
fc_arima <- fit_arima %>% forecast(new_data = wal_test)
fc_ets <- fit_ets %>% forecast(new_data = wal_test)

test_snaive <- fc_snaive %>% accuracy(data = wal_test) %>% filter(.type == "Test") %>% select(.model, RMSE, MAE, MAPE) %>% rename(Test_RMSE = RMSE, Test_MAE = MAE, Test_MAPE = MAPE)
test_reg <- fc_reg %>% accuracy(data = wal_test) %>% filter(.type == "Test") %>% select(.model, RMSE, MAE, MAPE) %>% rename(Test_RMSE = RMSE, Test_MAE = MAE, Test_MAPE = MAPE)
test_arima <- fc_arima %>% accuracy(data = wal_test) %>% filter(.type == "Test") %>% select(.model, RMSE, MAE, MAPE) %>% rename(Test_RMSE = RMSE, Test_MAE = MAE, Test_MAPE = MAPE)
test_ets <- fc_ets %>% accuracy(data = wal_test) %>% filter(.type == "Test") %>% select(.model, RMSE, MAE, MAPE) %>% rename(Test_RMSE = RMSE, Test_MAE = MAE, Test_MAPE = MAPE)

test_acc <- bind_rows(test_snaive, test_reg, test_arima, test_ets)


calc_resid_stats <- function(fit, model_name) {
  aug <- fit %>% augment()
  sd_resid <- sd(aug$.resid, na.rm = TRUE)
  lb <- aug %>% features(.resid, ljung_box, lag = 8) %>% 
    rename(LB_stat = lb_stat, LB_pvalue = lb_pvalue)
  tibble(.model = model_name, Residual_SD = sd_resid, LB_pvalue = lb$LB_pvalue)
}

resid_stats <- bind_rows(
  calc_resid_stats(fit_snaive, "Seasonal naïve"),
  calc_resid_stats(fit_reg, "Regression (M9)"),
  calc_resid_stats(fit_arima, "ARIMA"),
  calc_resid_stats(fit_ets, "ETS")
)


final_comparison <- model_ics %>%
  left_join(train_acc, by = ".model") %>%
  left_join(test_acc, by = ".model") %>%
  left_join(resid_stats, by = ".model") %>%
  arrange(AICc)

print(final_comparison)
## # A tibble: 4 × 12
##   .model      AIC  AICc   BIC Train_RMSE Train_MAE Train_MAPE Test_RMSE Test_MAE
##   <chr>     <dbl> <dbl> <dbl>      <dbl>     <dbl>      <dbl>     <dbl>    <dbl>
## 1 ARIMA     -314. -313. -307.     0.0214    0.0150      0.361    0.0618   0.0530
## 2 Regressi…  134.  135.  150.     2.30      1.81        3.90    11.0     10.2   
## 3 ETS        375.  377.  395.     1.73      1.20        1.75     6.60     5.48  
## 4 Seasonal…   NA    NA    NA      6.01      5.54        8.99     3.40     2.97  
## # ℹ 3 more variables: Test_MAPE <dbl>, Residual_SD <dbl>, LB_pvalue <dbl>

5d. Forecast using the test data and compare the model accuracy on train and test data

wal_test <- wal_test %>%
  mutate(Q1 = as.integer(quarter(Date) == 1),
         Q2 = as.integer(quarter(Date) == 2),
         Q4 = as.integer(quarter(Date) == 4),
         log_Sales = log(Sales))


fit_snaive <- wal_train %>% model(`Seasonal naive` = SNAIVE(Sales))
fit_reg <- wal_train %>% model(`Regression (M9)` = TSLM(Sales ~ trend() + Q1 + Q2 + Q4 + GDP))
fit_arima <- wal_train %>% model(`ARIMA` = ARIMA(log_Sales ~ pdq(0,1,1) + PDQ(0,1,1)))
fit_ets <- wal_train %>% model(`ETS` = ETS(Sales))

train_acc_snaive <- accuracy(fit_snaive) %>% mutate(.model = "Seasonal naive")
train_acc_reg <- accuracy(fit_reg) %>% mutate(.model = "Regression (M9)")
train_acc_arima <- accuracy(fit_arima) %>% mutate(.model = "ARIMA")
train_acc_ets <- accuracy(fit_ets) %>% mutate(.model = "ETS")

train_acc <- bind_rows(train_acc_snaive, train_acc_reg, train_acc_arima, train_acc_ets) %>%
  select(.model, RMSE, MAE, MAPE) %>%
  rename(Train_RMSE = RMSE, Train_MAE = MAE, Train_MAPE = MAPE)
fc_snaive <- forecast(fit_snaive, new_data = wal_test)
fc_reg <- forecast(fit_reg, new_data = wal_test)
fc_arima <- forecast(fit_arima, new_data = wal_test)
fc_ets <- forecast(fit_ets, new_data = wal_test)


test_acc_snaive <- accuracy(fc_snaive, wal_test) %>% filter(.type == "Test") %>% mutate(.model = "Seasonal naive")
test_acc_reg <- accuracy(fc_reg, wal_test) %>% filter(.type == "Test") %>% mutate(.model = "Regression (M9)")
test_acc_arima <- accuracy(fc_arima, wal_test) %>% filter(.type == "Test") %>% mutate(.model = "ARIMA")
test_acc_ets <- accuracy(fc_ets, wal_test) %>% filter(.type == "Test") %>% mutate(.model = "ETS")

test_acc <- bind_rows(test_acc_snaive, test_acc_reg, test_acc_arima, test_acc_ets) %>%
  select(.model, RMSE, MAE, MAPE) %>%
  rename(Test_RMSE = RMSE, Test_MAE = MAE, Test_MAPE = MAPE)


final_accuracy <- full_join(train_acc, test_acc, by = ".model")
print(final_accuracy)
## # A tibble: 4 × 7
##   .model          Train_RMSE Train_MAE Train_MAPE Test_RMSE Test_MAE Test_MAPE
##   <chr>                <dbl>     <dbl>      <dbl>     <dbl>    <dbl>     <dbl>
## 1 Seasonal naive      6.01      5.54        8.99     3.40     2.97        2.49
## 2 Regression (M9)     2.30      1.81        3.90    11.0     10.2         8.51
## 3 ARIMA               0.0214    0.0150      0.361    0.0618   0.0530      1.11
## 4 ETS                 1.73      1.20        1.75     6.60     5.48        4.54

5e. Summarize the results along with appropriate plots

wal_test <- wal_test %>%
  mutate(Q1 = as.integer(quarter(Date) == 1),
         Q2 = as.integer(quarter(Date) == 2),
         Q4 = as.integer(quarter(Date) == 4),
         log_Sales = log(Sales))


fit_snaive <- wal_train %>% model(`Seasonal naïve` = SNAIVE(Sales))
fit_reg    <- wal_train %>% model(`Regression (M9)` = TSLM(Sales ~ trend() + Q1 + Q2 + Q4 + GDP))
fit_arima  <- wal_train %>% model(`ARIMA` = ARIMA(log_Sales ~ pdq(0,1,1) + PDQ(0,1,1)))
fit_ets    <- wal_train %>% model(`ETS` = ETS(Sales))


fc_snaive <- forecast(fit_snaive, new_data = wal_test)
fc_reg    <- forecast(fit_reg,    new_data = wal_test)
fc_arima  <- forecast(fit_arima,  new_data = wal_test)
fc_ets    <- forecast(fit_ets,    new_data = wal_test)


p_summary <- wal_train %>%
  autoplot(Sales) +   
  autolayer(fc_snaive, level = c(80, 95)) +
  autolayer(fc_reg,    level = c(80, 95)) +
  autolayer(fc_arima,  level = c(80, 95)) +
  autolayer(fc_ets,    level = c(80, 95)) +
  geom_line(data = wal_test, aes(x = Quarter, y = Sales),
            colour = "black", size = 1) +   
  labs(
    title = "Forecasting Model Comparison",
    subtitle = "Walmart Quarterly Sales (1995–2015)",
    x = "Quarter",
    y = "Sales (billion USD)",
    colour = "Model"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        legend.position = "bottom")
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
print(p_summary)
## Ignoring unknown labels:
## • colour : "Model"

5f. Write the final report in Word, with descriptions, tables, charts, figures, and comments

See more information in word file