Load Data

sticker_data <- read.csv("/Users/emilybroderick/Desktop/R/PAF/Final/train.csv")
sticker_test <- read.csv("/Users/emilybroderick/Desktop/R/PAF/Final/test.csv")

Clean and Convert to Tsibble

# Clean raw data
sticker_data <- sticker_data %>%
  mutate(date = as.Date(date))

# Compute year cutoffs without unevenly separating groups
global_cutoff <- sticker_data %>%
  summarise(cut = sort(unique(date))[floor(0.8 * n_distinct(date))]) %>%
  pull(cut)

train_raw <- sticker_data %>% filter(date <= global_cutoff)
test_raw  <- sticker_data %>% filter(date >  global_cutoff)

# Convert to tsibble
train <- train_raw %>%
  dplyr::select(country, store, product, date, num_sold) %>%
  as_tsibble(key = c(country, store, product), index = date) 

test <- test_raw %>%
  dplyr::select(country, store, product, date, num_sold) %>%
  as_tsibble(key = c(country, store, product), index = date)

Aggregate Data

# Create hierarchical training set
train_agg <- train %>%
  aggregate_key((country / store) * product, num_sold = sum(num_sold), .auto = FALSE)

# Filter aggregated training set
train_top_level <- train_agg %>%
  filter(is_aggregated(country),
         is_aggregated(store),
         is_aggregated(product)) %>%
  dplyr::select(date, num_sold) %>% # Remove keys entirely
  as_tsibble(index = date)

# Create hierarchical test set
test_agg <- test %>%
  aggregate_key((country / store) * product, num_sold = sum(num_sold), .auto = FALSE)

# Filter aggregated test set
test_top_level <- test_agg %>%
  filter(is_aggregated(country),
         is_aggregated(store),
         is_aggregated(product)) %>%
  dplyr::select(date, num_sold) %>% # Remove keys entirely
  as_tsibble(index = date)

# Create hierarchy for full dataset (kaggle forecast)
sticker_agg <- sticker_data %>%
  rename(num_sold = num_sold) %>%
  as_tsibble(key = c(country, store, product), index = date) %>%
  aggregate_key((country / store) * product, num_sold = sum(num_sold), .auto = FALSE)

# Filter aggregated full dataset (plotting)
sticker_top_level <- sticker_agg %>%
  filter(is_aggregated(country),
         is_aggregated(store),
         is_aggregated(product)) %>%
  dplyr::select(date, num_sold) %>% # Remove keys entirely
  as_tsibble(index = date)

Create Features

# Dynamic Regression Feature Creation

all_dates <- train_top_level %>% 
  distinct(date) %>% 
  arrange(date) %>% 
  pull(date)

# Create features for aggregated data
test_top_level <- test_top_level %>%
  mutate(month = month(date),
         wday = wday(date, label = TRUE, week_start = 1),
         is_weekend = wday %in% c("Sat", "Sun"),
         business_day = !(is_weekend)
  ) %>%
  arrange(date) %>%
  mutate(lag1 = lag(num_sold, 1),
         lag7 = lag(num_sold, 7),
         lag28 = lag(num_sold, 28),
         roll7_mean = slider::slide_dbl(num_sold, mean, .before = 6, .complete = TRUE)
  )

train_top_level <- train_top_level %>%
  mutate(month = month(date),
         wday = wday(date, label = TRUE, week_start = 1),
         is_weekend = wday %in% c("Sat", "Sun"),
         business_day = !(is_weekend)
  ) %>%
  arrange(date) %>%
  mutate(lag1 = lag(num_sold, 1),
         lag7 = lag(num_sold, 7),
         lag28 = lag(num_sold, 28),
         roll7_mean = slider::slide_dbl(num_sold, mean, .before = 6, .complete = TRUE)
  )

Initial Visualizations

Time Plot

# Create a time plot to get an idea of what the data look like
time_plot <- autoplot(train_top_level, num_sold) +
  labs(title = "Sticker Sales",
       y = "Number Sold")

time_plot

Seasonal Plot

# Create a seasonal plot of the years by month
seasonal_plot_yr <- train_top_level %>% 
  gg_season(num_sold, period = "year") +
  labs(y="Number Sold", 
       title="Seasonal Plot: Sticker Sales")

seasonal_plot_yr

ACF Graph

# Check ACF features for stationarity
acf_feat <- train_top_level %>% 
  features(num_sold, feat_acf)

acf_feat %>%
  gt() %>%
  tab_header(title = "Time Series ACF Features")

acf <- acf(train_top_level$num_sold, main="ACF of Sticker Sales")

PACF Graph

# Check PACF features
pacf_feat <- train_top_level %>% 
  features(num_sold, feat_pacf)

pacf_feat %>%
  gt() %>%
  tab_header(title = "Time Series PACF Features")

pacf <- pacf(train_top_level$num_sold, main = "PACF of Sticker Sales")

Decompose the Training Set

Additive STL Decomposition

add_dcmp <- train_top_level %>%
  model(stl = STL(num_sold)) 
components(add_dcmp) %>%
  autoplot() + # Plot the decomp
  labs(title = "Additive STL Decomposition")

Multiplicative STL Decomposition

# Mult decomp
mult_dcmp <- train_top_level %>%
  model(stl = STL(log(num_sold))) # Log transform
components(mult_dcmp) %>%
  autoplot() + # Plot the decomp
  labs(title = "Multiplicative STL Decomposition")

Which is Best?

# Compare accuracy
decomp <- train_top_level %>% model(add = STL(num_sold),
                                       mult = STL(log(num_sold)))
acc_decomp <- accuracy(decomp)

# Filter out important accuracy measures with important models
acc_summary_tbl <- acc_decomp %>%
  dplyr::select(Model = .model, RMSE, MAE, MAPE, ACF1)

# Make the table presentable
acc_summary_tbl %>%
  gt() %>%
  tab_header(title = "Additive vs. Multiplicative STL Decomposition")

Aggregated Modeling

ETS (M,Ad,M)

The ETS model was chosen based on lowest AICc.

# Fit the model
ets_fit <- train_top_level %>%
  model(ETS(log(num_sold)))

# View selected model and info
report(ets_fit)

# Forecast model
ets_fc <- forecast(ets_fit, new_data = test_top_level)

# Plot forecast
ets_plot <- ets_fc %>%
  autoplot(sticker_top_level %>% 
             filter(!is.na(num_sold)), level = c(80, 95)) +
  geom_line(aes(y = .mean), data = ets_fc, linetype = 1) +
  labs(title = "Sticker Sales with ETS () Forecast",
       y = "Number Sold")

ets_plot

ARIMA (3,0,2)(1,1,0)

# Fit model
arst_fit <- train_top_level %>%
  model(ARIMA(log(num_sold)))

# View selected model and info
report(arst_fit)

# Forecast model
arst_fc <- forecast(arst_fit, new_data = test_top_level)

# Plot forecast
arst_plot <- arst_fc %>%
  autoplot(sticker_top_level %>% 
             filter(!is.na(num_sold)), level = c(80, 95)) +
  geom_line(aes(y = .mean), data = arst_fc, linetype = 1) +
  labs(title = "Sticker Sales with ARIMA () Forecast",
       y = "Number Sold")

arst_plot

Dynamic Regression

# Build models
dhr_fit <- model(train_top_level,
                 `K = 1` = ARIMA(log(num_sold) ~ month + is_weekend + lag1 + lag7 + 
                                   roll7_mean + fourier(K=1)),
                 `K = 2` = ARIMA(log(num_sold) ~ month + is_weekend + lag1 + lag7 + 
                                   roll7_mean + fourier(K=2)),
                 `K = 3` = ARIMA(log(num_sold) ~ month + is_weekend + lag1 + lag7 + 
                                   roll7_mean + fourier(K=3))
)

# Compare model AICc and BIC
glance(dhr_fit)

# Forecast model
dhr_fc <- forecast(dhr_fit, new_data = test_top_level)

# Plot model
dhr_plot <- dhr_fc %>%
  autoplot(sticker_top_level %>% 
             filter(!is.na(num_sold)), level = c(80, 95)) +
  geom_line(aes(y = .mean), data = dhr_fc, linetype = 1) +
  labs(title = "Sticker Sales with DHR Forecast",
       y = "Number Sold")

dhr_plot

Neural Network Model - Failed

nn_fit <- train_top_level %>%
  model(NNETAR = NNETAR(log(num_sold)))

nn_fc <- forecast(nn_fit, new_data = test_top_level)

nn_plot <- nn_fc %>%
  autoplot(sticker_top_level %>% 
             filter(!is.na(num_sold)), level = c(80, 95)) +
  geom_line(aes(y = .mean), data = nn_fc, linetype = 1) +
  labs(title = "Sticker Sales with Neural Network Forecast",
       y = "Number Sold")

nn_plot

Model Accuracy Comparison

All Models

# Build models all together
fits <- train_top_level %>%
  model(
    ETS = ETS(log(num_sold)),
    ARIMA = ARIMA(log(num_sold)),
    `DHR (K = 3)` = ARIMA(log(num_sold) ~ month + is_weekend + lag1 + lag7 + 
                                   roll7_mean + fourier(K=3)),
    Ensemble = ((ARIMA(log(num_sold) ~ month + is_weekend + lag1 + lag7 + 
                                   roll7_mean + fourier(K=3))) + 
                  ARIMA(log(num_sold)) +
                  ETS(log(num_sold))) / 3
  )

# Forecast models
fc <- forecast(fits, new_data = test_top_level)

Compare Accuracy

# Get accuracy stats
compare <- accuracy(fc, test_top_level) 

# Filter out important accuracy measures with important models
summary_tbl <- compare %>%
  dplyr::select(Model = .model, RMSE, MAE, MAPE, ACF1)

summary_tbl %>%
  gt() %>%
  tab_header(title = "Model Comparison")

Hierarchical Modeling

Bottom-Up ETS

# Load sample submission
sample_sub <- read.csv("/Users/emilybroderick/Desktop/R/PAF/Final/sample_submission.csv",
                       stringsAsFactors = FALSE) 

# Prepare test data
sticker_test <- sticker_test %>%
  mutate(date = as.Date(date))

# test bottom-level tsibble
test_bottom <- sticker_test %>%
  as_tsibble(key = c(country, store, product), index = date)

# Create num_sold variable
test_bottom$num_sold <- NA

# Aggregate to hierarchical structure
test_hts <- test_bottom %>%
  aggregate_key((country / store) * product, num_sold = NA)

Build Model and Forecast

# Build model on full train.csv
ets_fit <- sticker_agg %>%
  model(ets = ETS(log(num_sold))) %>%
  reconcile(ETS = bottom_up(ets))

# Forecast for test horizon
bu_fc <- ets_fit %>%
  forecast(new_data = test_hts)

Submit to Kaggle

# Convert forecasts to a clean tibble
fc_tbl <- bu_fc %>%
  as_tibble() %>%
  filter(
    .model == "ETS",
    !is_aggregated(country),
    !is_aggregated(store),
    !is_aggregated(product)
  ) %>%
  mutate(
    num_sold = exp(.mean),
    num_sold = pmax(0, num_sold),
    num_sold = round(num_sold)
  ) %>%
  dplyr::select(country, store, product, date, num_sold)

# Add ID back 
fc_tbl <- sticker_test %>%
  mutate(date = as.Date(date)) %>%
  left_join(
    fc_tbl,
    by = c("country", "store", "product", "date")
  ) %>%
  dplyr::select(id, num_sold) %>%
  arrange(id) %>% # ensure correct order

# Write CSV
write.csv(fc_tbl, "submission.csv", row.names = FALSE)

Kaggle Score

Score: 1.45 x 10^172

Username: broderem

Sticker Sales Forecasting — Final Project

Emily Broderick (Kaggle username: Broderem)

2025-12-04

Load Data

Clean and Convert to Tsibble

Aggregate Data

Create Features

Initial Visualizations

Time Plot

Seasonal Plot

ACF Graph

PACF Graph

Decompose the Training Set

Additive STL Decomposition

Multiplicative STL Decomposition

Which is Best?

Aggregated Modeling

ETS (M,Ad,M)

ARIMA (3,0,2)(1,1,0)

Dynamic Regression

Neural Network Model - Failed

Model Accuracy Comparison

All Models

Compare Accuracy

Hierarchical Modeling

Bottom-Up ETS

Build Model and Forecast

Submit to Kaggle

Kaggle Score