Forecasting Sales (An Attempt on Intermittent Demand)

Data exploration

Ho Huu Binh

9/3/2022

Load Package

# Time series decomposition
library("feasts") # Using STL
library('tidyverse')
library('tsibble')
library('fable')
library('stR') # Using Seasonal Trend regression model 
library('bsts')


# Date + forecast
library('lubridate') # date and time
library('prophet') # time series analysis
library('timetk') # time series analysis
library('timeDate') # time series analysis

# general data manipulation
library('dplyr') # data manipulation
library('readr') # input/output
library('vroom') # input/output
library('tibble') # data wrangling
library('tidyr') # data wrangling

# Prophet model 
library('fable.prophet')

# Quadratic Solver
library('quadprog')

# Decomposition and forecast components
library("stsm") # Model BSTS
require("stsm.class")
library("KFKSDS") # Smoothing, forecasting with Kalman Filter

# Forecasting with Adam
library("smooth")

# General visualization
library('ggplot2') # visualisation
library('scales') # visualisation
library('patchwork') # visualisation
library('RColorBrewer') # visualisation
library('corrplot') # visualisation


# specific visualisation
library('alluvial') # visualisation
library('ggrepel') # visualisation
library('ggforce') # visualisation
library('ggridges') # visualisation
library('gganimate') # animations
library('GGally') # visualisation
library('ggthemes') # visualisation
library('wesanderson') # visualisation
library('kableExtra') # display


# install.packages("modeltime.ensemble") 


# Install Python Dependencies
# modeltime.gluonts::install_gluonts()
# 
# library(tidymodels)
# library(modeltime)
# library(modeltime.resample)
# library(modeltime.gluonts)
# library(modeltime.ensemble)

Load data

rm(list = ls())

# Preventing clash from Dplyr and MASS package for select function

select <- dplyr::select

train_df <- vroom("D:\\Dataset\\sales_train_validation.csv", delim = ",", col_types = cols())
price_df <- vroom("D:\\Dataset\\sell_prices.csv", delim = ",",
                  col_types = cols())
calendar_df <- vroom("D:\\Dataset\\calendar.csv", col_types = cols())

test_df <- vroom("D:\\Dataset\\sales_train_evaluation.csv", delim = ",",
                 col_types = cols())

# Retrieving special holidays 
holidays <- c(USThanksgivingDay(2011:2016), USChristmasDay(2011:2016),
              USGoodFriday(2011:2016), USNewYearsDay(2011:2016),
              USIndependenceDay(2011:2016), USElectionDay(2011:2016),
              USLaborDay(2011:2016))@Data
holidays <- as.Date(holidays) 
holidays_df <- holidays %>%
  as.data.frame() %>% bind_cols(as.data.frame(rep(c("ThanksGiving", "Christmas",
                                                    "GoodFriday","Newyear",
                                                    "Independay","Elecday","Laborday"), each = 6)))
names(holidays_df) <- c("date", "holiday")
holidays_df <- holidays_df %>%
  as_tsibble(index = date)

Function to transform dataframe from wide to long format

wide_to_long <- function(data){
  start_date <- date("2011-01-29")
  
  data %>%
    select(id, starts_with("d_")) %>%
    pivot_longer(starts_with("d_"), names_to = "dates",
                 values_to = "sales") %>% # group days columns with values equal to number of sales
    mutate(dates = as.integer(str_remove(dates, "d_"))) %>% # remove the prefix "d_"
    mutate(dates = start_date + dates - 1) %>% # operation to transform into date format
    mutate(dates = ymd(dates)) %>% # transform to date class
    mutate(id = str_remove(id, "_validation")) # remove suffix "_validation" for each type of product
}

Prepare data

start_date = date("2011-01-29")

temp <- train_df %>%
  group_by(cat_id, state_id) %>% 
  summarise_at(vars(starts_with("d_")), sum) %>% 
  ungroup() %>% 
  select(ends_with("id"), starts_with("d_")) %>%  
  pivot_longer(starts_with("d_"), names_to = "dates", values_to = "sales") %>%
  mutate(dates = as.integer(str_remove(dates, "d_"))) %>% 
  mutate(dates = start_date + dates - 1) %>%
  mutate(dates = ymd(dates)) %>%
  mutate(dummy = if_else(dates %in% holidays, 1, 0)) %>%
  mutate(dow = wday(dates, label = TRUE))
  # filter(!str_detect(as.character(dates), '..-12-25'))
  # replace_na(list(sales =  0))

temp_ts <- temp %>%
  as_tsibble(key = c(cat_id, state_id), index = dates)

food.CA <- temp_ts %>%
  filter(cat_id == "FOODS" & state_id == "CA") %>%
  select(dates, sales, dummy, dow) 

id_examples <- str_c(c("FOODS_2_092_CA_1", "HOUSEHOLD_2_071_TX_2",
                       "HOBBIES_1_348_WI_3"), "_validation")

sales <- train_df %>%
  filter(id %in% id_examples) %>%  
  wide_to_long() %>% 
  mutate(state_id = str_sub(id, -4, -3))

# Price dataframe

prices <- price_df %>% 
  unite("id", item_id, store_id, sep = "_") %>% 
  filter(id %in% str_remove(id_examples, "_validation")) %>%
  select(-wm_yr_wk)


example_df <- sales %>%
  mutate(holiday = if_else(dates %in% holidays, 1, 0))%>% 
  mutate(dow = wday(dates, label = TRUE))

# start and end times for holiday intervals
holiday_intervals <- example_df %>%
  group_by(state_id) %>% 
  mutate(foo = lead(holiday, 1),
         bar = lag(holiday, 1)) %>% 
  mutate(holiday_start = if_else(holiday == 1 & bar == 0, dates, date(NA_character_)),
        holiday_end = if_else(holiday == 1 & foo == 0, dates, date(NA_character_))) %>% 
  ungroup()

holiday_intervals <- holiday_intervals %>% 
  select(state_id, holiday_start) %>% 
  filter(!is.na(holiday_start)) %>% 
  bind_cols(holiday_intervals %>% 
    select(holiday_end) %>% 
    filter(!is.na(holiday_end)))

Visualizing

example_df %>% 
  filter(between(dates, date("2015-04-01"), date("2015-10-01"))) %>% 
  left_join(holiday_intervals, by = c("state_id", "dates" = "holiday_start")) %>% 
  mutate(holiday_start = if_else(is.na(holiday_end), date(NA_character_), dates))-> example_df_plot
  
ggplot(example_df_plot, aes(dates, sales, group = id)) +
  geom_rect(data = example_df_plot[example_df_plot$id == unique(example_df$id)[1] ,], aes(xmin = holiday_start, xmax = holiday_end + ddays(1), ymin = -Inf, ymax = Inf), fill = "grey90", na.rm = TRUE, alpha = 0.2) +
  geom_rect(data = example_df_plot[example_df_plot$id == unique(example_df$id)[2],], aes(xmin = holiday_start, xmax = holiday_end + ddays(1), ymin = -Inf, ymax = Inf), fill = "grey90", na.rm = TRUE, alpha = 0.2) + 
  geom_rect(data = example_df_plot[example_df_plot$id == unique(example_df$id)[3],], aes(xmin = holiday_start, xmax = holiday_end + ddays(1), ymin = -Inf, ymax = Inf), fill = "grey90", na.rm = TRUE, alpha = 0.7) + 
  geom_line(aes(col = id), na.rm = TRUE) +
  facet_wrap(~id, nrow = 3, scales = "free") +
  theme_hc() +
  theme(legend.position = "none") +
  labs(x = "", y = "Sales", title = "Sales for 3 random items for 6 months in 2015 with typical US events",
       subtitle = "Grey background = holiday dates.")

Croston optimization function

crost.opt <- function(data,type=c("croston","sba","sbj"),cost=c("mar","msr","mae","mse","mape"),
                      w=NULL,nop=c(2,1),init,init.opt=c(TRUE,FALSE)){
# Optimisation function for Croston and variants
  
  type <- type[1]
  cost <- cost[1]
  nop <- nop[1]
  init.opt <- init.opt[1]
  
  # Croston decomposition
  nzd <- which(data != 0)               # Find location on non-zero demand
  k <- length(nzd)
  x <- c(nzd[1],nzd[2:k]-nzd[1:(k-1)])  # Intervals
  
  if (is.null(w) == TRUE && init.opt == FALSE){
    # Optimise only w
    p0 <- c(rep(0.05,nop))
    lbound <- c(rep(0,nop))
    ubound <- c(rep(1,nop))
    if (nop != 1){
      wopt <- optim(par=p0,crost.cost,method="Nelder-Mead",data=data,cost=cost,
                    type=type,w=w,nop=nop,w.opt=is.null(w),init=init,init.opt=init.opt,
                    lbound=lbound,ubound=ubound,control=list(maxit=2000))$par    
    } else {
      # Use Brent
      wopt <- optim(par=p0,crost.cost,method="Brent",data=data,cost=cost,
                    type=type,w=w,nop=nop,w.opt=is.null(w),init=init,init.opt=init.opt,
                    lbound=lbound,ubound=ubound,lower=lbound,upper=ubound,
                    control=list(maxit=2000))$par  
    }
    wopt <- c(wopt,init)
  } else if (is.null(w) == TRUE && init.opt == TRUE){
    # Optimise w and init
    p0 <- c(rep(0.05,nop),init[1],init[2])
    lbound <- c(rep(0,nop),0,1)
    ubound <- c(rep(1,nop),max(data),max(x))
    wopt <- optim(par=p0,crost.cost,method="Nelder-Mead",data=data,cost=cost,
                  type=type,w=w,nop=nop,w.opt=is.null(w),init=init,init.opt=init.opt,
                  lbound=lbound,ubound=ubound,control=list(maxit=2000))$par
  } else if (is.null(w) == FALSE && init.opt == TRUE){
    # Optimise only init
    nop <- length(w)
    p0 <- c(init[1],init[2])
    lbound <- c(0,1)
    ubound <- c(max(data),max(x))
    wopt <- optim(par=p0,crost.cost,method="Nelder-Mead",data=data,cost=cost,
                  type=type,w=w,nop=nop,w.opt=is.null(w),init=init,init.opt=init.opt,
                  lbound=lbound,ubound=ubound,control=list(maxit=2000))$par
    wopt <- c(w,wopt)
  }
  
  return(list(w=wopt[1:nop],init=wopt[(nop+1):(nop+2)]))
  
}

Cost function for Croston

crost.cost <- function(p0,data,cost,type,w,nop,w.opt,init,init.opt,lbound,ubound){
   if (w.opt == TRUE && init.opt == TRUE){
    frc.in <- croston(data=data,w=p0[1:nop],h=0,init=p0[(nop+1):(nop+2)],
                    type=type,opt.on=TRUE)$frc.in
  } else if (w.opt == TRUE && init.opt == FALSE){
    frc.in <- croston(data=data,w=p0[1:nop],h=0,init=init,
                    type=type,opt.on=TRUE)$frc.in
  } else if (w.opt == FALSE && init.opt == TRUE){
    frc.in <- croston(data=data,w=w,h=0,init=p0,
                   type=type,opt.on=TRUE)$frc.in
  }
  
  if (cost == "mse"){
    E <- data - frc.in  
    E <- E[!is.na(E)]
    E <- mean(E^2)
  } 
  else if (cost == "mape"){
    E <- data - frc.in
    E <- E[!is.na(E)]
    E <- mean(abs(E)/data) * 100 
  }
  else if(cost == "mae"){
    E <- data - frc.in  
    E <- E[!is.na(E)]
    E <- mean(abs(E))
  } else if(cost == "mar"){
    n <- length(data)
    temp <- cumsum(data)/(1:n)
    n <- ceiling(0.3*n)
    temp[1:n] <- temp[n]
    E <- abs(frc.in - temp)
    E <- E[!is.na(E)]
    E <- sum(E)
  } else if(cost == "msr"){
    n <- length(data)
    temp <- cumsum(data)/(1:n)
    n <- ceiling(0.3*n)
    temp[1:n] <- temp[n]
    E <- (frc.in - temp)^2
    E <- E[!is.na(E)]
    E <- sum(E)    
  }
  
  # Constrains
  for (i in 1:(nop*w.opt+2*init.opt)){
    if (!(p0[i]>=lbound[i]) | !(p0[i]<=ubound[i])){
      E <- 9*10^99
    }
  }
  
  return(E)
  
}

Croston main function

croston <- function(data, h = 10, w= NULL, init = c("mean", "naive"), nop = c(2,1),
                    type = c("croston","sba","sbj"), cost = c("mar","msr","mae","mse","mape"),
                    init.opt = c(TRUE, FALSE),
                    plot=c(FALSE,TRUE),
                    opt.on=c(FALSE,TRUE),
                    na.rm=c(FALSE,TRUE)){
  # Default settings
  type <- tolower(type[1])
  cost <- cost[1]
  init.opt <- init.opt[1]
  plot <- plot[1]
  opt.on <- opt.on[1]
  na.rm <- na.rm[1]
  
  if(!is.numeric(init)){
    init <- init[1]}
  else {
    if (length(init >=2)){
      init <- init[1:2]
    }
  else {
    init <- "mean"
  }
  }
  
  # Check the lenght of nop is correctly specified
  if (nop > 2 || nop < 1){
    nop <- 2
    warning("nop can be either 1 or 2. Overriden to 2.")
  }
  
  # Prepare data
  if (class(data)=="data.frame"){
    if (ncol(data) > 1){
      warning("Data frame with more than one columns. Using only first one for univariate time series.")
    }
    data <- data[[1]]
  }
  if (na.rm == TRUE){
    data <- data[!is.na(data)]
  }
  n <- length(data)
  
  # Checking number of integer-valued observations. At least two !!!
  
  if (sum(data != 0 ) < 2){
    stop("Need at least two non-zero values to perform modeling")
  }
  
   # Croston decomposition
  nzd <- which(data != 0) # Find location on non-zero demand
  k <- length(nzd)
  z <- data[nzd]   # Demand
  x <- c(nzd[1],nzd[2:k]-nzd[1:(k-1)])  # Intervals
  
  # Initialise
  if (!(is.numeric(init) && length(init)==2)){
    if (init=="mean"){
      init <- c(z[1],mean(x))
    } else {
      init <- c(z[1],x[1]) # Naive with the first interval
    }
  }
  
   # Optimise parameters if requested
  if (opt.on == FALSE){
    if (is.null(w) || init.opt == TRUE){
      wopt <- crost.opt(data,type,cost,w,nop,init,init.opt)
      w <- wopt$w
      init <- wopt$init
    }
  }
  # Pre-allocate memory
  zfit <- vector("numeric", k)
  xfit <- vector("numeric", k)
  
  
  
   # Assign initial values and parameters
  if (opt.on == FALSE){
    if (init[1]<0){
      stop("Initial demand cannot be a negative number.")
    } 
    if (init[2]<1){
      stop("Initial interval cannot be less than 1.")
    }
  } 
  zfit[1] <- init[1]
  xfit[1] <- init[2]
  
  if (length(w)==1){
    a.demand <- w[1]
    a.interval <- w[1]
  } else {
    a.demand <- w[1]
    a.interval <- w[2]
  }
  
  # Set coefficient
  if(type == "sba"){
    coeff <- 1-(a.interval/2)
  } else if(type == "sbj"){
    coeff <- (1-a.interval/(2-a.interval))
  } else {
    coeff <- 1
  }
  
  # Fit model
  for (i in 2:k){
    zfit[i] <- zfit[i-1] + a.demand * (z[i] - zfit[i-1])      # Demand
    xfit[i] <- xfit[i-1] + a.interval * (x[i] - xfit[i-1])    # Interval
  }
  cc <- coeff * zfit/xfit
  
  # Calculate in-sample demand rate
  frc.in <- x.in <- z.in <- rep(NA,n)
  tv <- c(nzd+1,n)  # Time vector used to create frc.in forecasts
  for (i in 1:k){
    if (tv[i]<=n){
      frc.in[tv[i]:min(c(tv[i+1],n))] <- cc[i]
      x.in[tv[i]:min(c(tv[i+1],n))] <- xfit[i]
      z.in[tv[i]:min(c(tv[i+1],n))] <- zfit[i]
    }
  }
  
  # Forecast out-of-sample demand rate
  if (h>0){
    frc.out <- rep(cc[k],h)
    x.out <- rep(xfit[k],h)
    z.out <- rep(zfit[k],h)
  } else {
    frc.out <- x.out <- z.out <- NULL
  }
  
  # Ploting setting
  if (plot==TRUE){
    plot(1:n,data,type="l",xlim=c(1,(n+h)),xlab="Period",ylab="",
         xaxs="i",yaxs="i",ylim=c(0,max(data)*1.1))
    lines(which(data>0),data[data>0],type="p",pch=20)
    lines(1:n,frc.in,col="green")
    lines((n+1):(n+h),frc.out,col="red",lwd=2)
  }
  
  # Prepare demand and interval vectors for output
  c.in <- array(cbind(z.in,x.in),c(n,2),dimnames=list(NULL,c("Demand","Interval")))
  if (h>0){
    c.out <- array(cbind(z.out,x.out),c(h,2),dimnames=list(NULL,c("Demand","Interval")))
  } else {
    c.out <- NULL
  }
  c.coeff <- coeff
  comp <- list(c.in=c.in,c.out=c.out,coeff=coeff)
  
  return(list(model=type,frc.in=frc.in,frc.out=frc.out,
              weights=w,initial=c(zfit[1],xfit[1]),components=comp))
}

Test on real data

y <- example_df %>%
  filter(id == unique(example_df$id)[1] & dates < "2016-04-18") %>%
  pull(sales) %>%
  ts(start = c(2011,01))

fc <- croston(y, h = 7, type = "sba", cost = "mae", init.opt = TRUE, plot = TRUE)

y_true <- example_df %>% 
  filter(id == unique(example_df$id)[1]) %>%
  slice(tail(row_number(), 7)) %>%
  pull(sales)

# Testing the overdispersion test
library(AER)

## Loading required package: car

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

## Loading required package: lmtest

## Loading required package: sandwich

## Loading required package: survival

dispersiontest(glm(y~1,family="poisson"))

## 
##  Overdispersion test
## 
## data:  glm(y ~ 1, family = "poisson")
## z = 11.429, p-value < 2.2e-16
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
##   2.039269

# Confirm that the expectation is equal to the variance => Expected point forecast 
example_df %>% 
  filter(id == unique(example_df$id)[1] & dates < "2016-04-18") %>%
  select(dates, sales) %>%
  as_tsibble(index = dates) %>%
  fabletools::model(CROSTON(sales)) %>%
  fabletools::forecast(h = 7) -> fc_package

compare <- tibble(method1 = fc$frc.out,
                  method2 = fc_package$.mean,
                  actual = y_true)

compare %>%
  kable(caption = "Table: Compare Point Forecast of 2 Ways 
        (My Croston function and Fable Package)", escape = TRUE) %>%
  kable_classic(full_width = FALSE, html_font = "Cambria")

Table: Compare Point Forecast of 2 Ways (My Croston function and Fable Package)
method1	method2	actual
0.4231962	0.5196051	0
0.4231962	0.5196051	0
0.4231962	0.5196051	1
0.4231962	0.5196051	0
0.4231962	0.5196051	0
0.4231962	0.5196051	0
0.4231962	0.5196051	1

Modified Croston Method

y <- example_df %>%
  filter(id == unique(example_df$id)[1]) %>%
  pull(sales) %>%
  ts(start = c(2011,01))

plot(y, col = "black", lwd = 2,  main = "A Example of Intermittent Demand")

adamModelsiETS <- vector("list",4)
adamModelsiETS[[1]] <- adam(y, "FFF", h=10, holdout=TRUE,
                            occurrence="odds-ratio")
adamModelsiETS[[2]] <- adam(y, "FFF", h=10, holdout=TRUE,
                            occurrence="inverse-odds-ratio")
adamModelsiETS[[3]] <- adam(y, "FFF", h=10, holdout=TRUE,
                            occurrence="direct")
adamModelsiETS[[4]] <- adam(y, "FFF", h=10, holdout=TRUE,
                            occurrence="general")
adamModelsiETSAICcs <-
    setNames(sapply(adamModelsiETS,AICc),
             c("odds-ratio", "inverse-odds-ratio",
               "direct", "general"))
adamModelsiETSAICcs

##         odds-ratio inverse-odds-ratio             direct            general 
##           6473.214           6473.214           6520.805           6448.940

adamModelsiETS[[4]]$occurrence

## Occurrence state space model estimated: General
## Underlying ETS model: oETS[G](MNN)
## 
## Error standard deviation: NaN
## Sample size: 1903
## Number of estimated parameters: 4
## Number of degrees of freedom: 1899
## Information criteria: 
##      AIC     AICc      BIC     BICc 
## 2021.368 2021.389 2043.573 2043.652

oETSModel <- oes(y, "MNN", h=10, holdout=TRUE,
                 occurrence=names(adamModelsiETSAICcs)[4])

adamETSARIMAModel <- adam(y,  model="PPP", 
                        occurrence=oETSModel,
                        orders=list(ar=c(3,3),i=c(2,1),ma=c(3,3),select=TRUE),
                        h=10, holdout=TRUE, initial="back",
                        distribution="dlnorm",
                        bounds = "admissible")

adamETSARIMAModel

## Time elapsed: 1.68 seconds
## Model estimated using auto.adam() function: iETS(ANN)[G]
## Occurrence model type: General
## Distribution assumed in the model: Mixture of Bernoulli and Log-Normal
## Loss function type: likelihood; Loss function value: 1647.615
## Persistence vector g:
##  alpha 
## 0.0561 
## 
## Sample size: 1903
## Number of estimated parameters: 2
## Number of degrees of freedom: 1901
## Number of provided parameters: 4
## Information criteria:
##      AIC     AICc      BIC     BICc 
## 5312.598 5312.604 5323.700 5323.724 
## 
## Forecast errors:
## Asymmetry: 3.78%; sMSE: 5.779%; rRMSE: 0.886; sPIS: -311.444%; sCE: 79.141%

plot(forecast(adamETSARIMAModel, h = 365, interval = "parametric"))

plot(reapply(adamETSARIMAModel))