In the previous section, we have learned about how the inventory management operate in the warehouse, knew how to calculate the opening and ending inventory. So in this section, we will pratice to forecast the demand consumption in the future. If you want to know detail analyst, you can visit this page Demand Forecasting: Application of ARIMA model with R by Houssam Akkouche author. ## Call packages:

#Call packages:
pacman::p_load(rio,
               here,
               janitor,
               tidyverse,
               dplyr,
               magrittr,
               lubridate,
               stringr
               )

1.Import data:

You can download this file from this page Historical Product Demand

#Then you can import them by: #Step 1: file.choose() to select the address of file #Step 2: rio::import(“the address of file”) #Step 3: naniar::gg_miss_var() to check the missing value of dataframe

#Change to suitable class (I change the name dataset to product_demand to shortly write)
product_demand <-product_demand %>% 
    mutate(Date = as.Date(Date,format = "%Y/%m/%d"),
           Product_Category = as.factor(Product_Category))

product_demand$Order_Demand <- 
  gsub("[(]", "-", product_demand$Order_Demand)
product_demand$Order_Demand <- 
  gsub("[)]", "", product_demand$Order_Demand)
product_demand$Order_Demand <- 
  as.numeric(product_demand$Order_Demand)

#File product_demand, you can download it from the previous section. Because it's heavy so I cannot add it into this report.
product_demand <-product_demand %>%
  mutate(Month = month(Date),
         Year = year(Date),
         Week_day = wday(Date)) %>% 
  filter(Year %in% c(2016:2012))

df1<-product_demand %>% 
  group_by(Year,Month) %>% 
  summarise(month_demand = sum(Order_Demand,na.rm = T))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
df1$Month_Year = as.Date(str_c(df1$Year,df1$Month,"1",sep = "-"))
library("TTR")
#First we will divde the data into training data and testing data in 70-30:
#Create ts object for month demand variable:
training_df<-df1[df1$Month_Year < as.Date("2015-03-22"),]
testing_df <-df1[df1$Month_Year >= as.Date("2015-03-22"),]

demand_training<-ts(training_df$month_demand,
                      frequency = 12,
                      start = c(2012,1))
demand_full<-ts(df1$month_demand,
                frequency = 12,
                      start = c(2012,1))
#Decompose the time object into 3 components: trend, seasonal, random
p<-decompose(demand_training)

plot(p)

2.PACF and ACF analyst:

#First we will calculate the different in demand product monthly:
#Check stationary assumption:
m = c(1:3)
lapply(m, function(x) {
         a<-diff(log(demand_training), lag = x)
         tseries::adf.test(a)$p.value}
       ) #p<0,05 is accepted
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Warning in tseries::adf.test(a): p-value smaller than printed p-value

## Warning in tseries::adf.test(a): p-value smaller than printed p-value
## [[1]]
## [1] 0.01
## 
## [[2]]
## [1] 0.01
## 
## [[3]]
## [1] 0.1133475
#So the value accepted is lag 1 or 2. It suggested that we should use 1 lag or 2 lag time series of diff(log(demand_components))
#Rename of two time series:
ts1<-diff(log(demand_training),1)
ts2<-diff(log(demand_training),2)
#Plot value ACF for 2 series:
library(forecast)
## Warning: package 'forecast' was built under R version 4.2.3
p1<-Acf(ts1,plot =F)
p2<-Acf(ts2,plot =F)
cowplot::plot_grid(autoplot(p1), autoplot(p2), nrow = 2)

#Plot value PACF for 2 series
p3<-Pacf(ts1,plot =F)
p4<-Pacf(ts2,plot =F)
cowplot::plot_grid(autoplot(p3), autoplot(p4), nrow = 2)

3.Forecast by ARIMA model approach:

3.1.Select the best model:

#Look the graphs, we choose optimal value for arima (p,d,q) model is (3,1,2) or we will use function in forecast package to find suitable model.
model<-auto.arima(ts1,trace = T) #We will choose model (0,1,1)
## 
##  ARIMA(2,0,2)(1,0,1)[12] with non-zero mean : -53.59472
##  ARIMA(0,0,0)            with non-zero mean : -42.93819
##  ARIMA(1,0,0)(1,0,0)[12] with non-zero mean : -48.85966
##  ARIMA(0,0,1)(0,0,1)[12] with non-zero mean : Inf
##  ARIMA(0,0,0)            with zero mean     : -44.98445
##  ARIMA(2,0,2)(0,0,1)[12] with non-zero mean : -55.38358
##  ARIMA(2,0,2)            with non-zero mean : -57.29734
##  ARIMA(2,0,2)(1,0,0)[12] with non-zero mean : -56.09742
##  ARIMA(1,0,2)            with non-zero mean : Inf
##  ARIMA(2,0,1)            with non-zero mean : -59.00472
##  ARIMA(2,0,1)(1,0,0)[12] with non-zero mean : -58.48372
##  ARIMA(2,0,1)(0,0,1)[12] with non-zero mean : -57.59841
##  ARIMA(2,0,1)(1,0,1)[12] with non-zero mean : -56.1491
##  ARIMA(1,0,1)            with non-zero mean : Inf
##  ARIMA(2,0,0)            with non-zero mean : -61.43027
##  ARIMA(2,0,0)(1,0,0)[12] with non-zero mean : -61.31788
##  ARIMA(2,0,0)(0,0,1)[12] with non-zero mean : -60.40807
##  ARIMA(2,0,0)(1,0,1)[12] with non-zero mean : -59.16376
##  ARIMA(1,0,0)            with non-zero mean : -48.92909
##  ARIMA(3,0,0)            with non-zero mean : -58.83972
##  ARIMA(3,0,1)            with non-zero mean : Inf
##  ARIMA(2,0,0)            with zero mean     : -63.2462
##  ARIMA(2,0,0)(1,0,0)[12] with zero mean     : -63.61601
##  ARIMA(2,0,0)(1,0,1)[12] with zero mean     : -61.61415
##  ARIMA(2,0,0)(0,0,1)[12] with zero mean     : -62.60866
##  ARIMA(1,0,0)(1,0,0)[12] with zero mean     : -51.2282
##  ARIMA(3,0,0)(1,0,0)[12] with zero mean     : -60.96011
##  ARIMA(2,0,1)(1,0,0)[12] with zero mean     : -60.9656
##  ARIMA(1,0,1)(1,0,0)[12] with zero mean     : -59.01621
##  ARIMA(3,0,1)(1,0,0)[12] with zero mean     : -59.12165
## 
##  Best model: ARIMA(2,0,0)(1,0,0)[12] with zero mean

3.2.Compare the training model to test value:

#Create model for training data:
model_training<-Arima(demand_training, 
             order = c(model[["arma"]][c(1,2,6)]),
             seasonal = list(order = c(model[["arma"]][c(3,4,7)]),period = model[["arma"]][5]), 
             include.drift = FALSE)
#Forecast by training model:
training_forecast<-forecast(model_training,h = 21)

#Calculate RMSE:
accuracy(training_forecast,testing_df$month_demand)
##                      ME    RMSE     MAE        MPE    MAPE      MASE
## Training set   665935.5 8049506 6471794 -0.1798406 7.89135 0.7172872
## Test set     -2673550.4 6777349 5312444 -3.6898334 6.44421 0.5887932
##                     ACF1
## Training set -0.09450096
## Test set              NA
#Use chart for presenting the differents:
plot(training_forecast,
      main = "Model ARIMA(2,0,0)(1,0,0)[12]",
      xlab = "Time",
      ylab = "Order Demand")
lines(demand_full, 
      col = "red",
      lwd = "2")
legend("topleft",
       legend = c("Actual","Forecast"),
       col = c("red","blue"),
       box.lty = 0,
       lty = 1,
       cex = 1.5,
       lwd = 2)

This plot above show that all the predicted value is surrounds the actual value in 95%.

So we will choose model ‘ARIMA(2,0,0)(1,1,0)[12]’ to predict the future value for demand_full object.

#Modeling demand product by arima model:
fit <- Arima(demand_full, 
             order = c(model[["arma"]][c(1,2,6)]),
             seasonal = list(order = c(model[["arma"]][c(3,4,7)]),period = model[["arma"]][5]), 
             include.drift = FALSE)
summary(fit)
## Series: demand_full 
## ARIMA(2,0,0)(1,0,0)[12] with non-zero mean 
## 
## Coefficients:
##          ar1     ar2    sar1      mean
##       0.3226  0.1287  0.4034  83749940
## s.e.  0.1356  0.1318  0.1309   2641906
## 
## sigma^2 = 6.023e+13:  log likelihood = -1036.1
## AIC=2082.2   AICc=2083.31   BIC=2092.67
## 
## Training set error measures:
##                    ME    RMSE     MAE        MPE     MAPE      MASE        ACF1
## Training set 371148.9 7497930 6035438 -0.3530097 7.257455 0.6865972 -0.07443246

3.2.Forecast the demand in future:

#Predicting for 18 months with 99.5% range:
predict_fit<-forecast:::forecast.Arima(fit, h = 18, level = c(99.5))
#Print predicted value:
predict_fit
##          Point Forecast  Lo 99.5   Hi 99.5
## Jan 2017       80865768 59080113 102651423
## Feb 2017       78799799 55908250 101691348
## Mar 2017       87108869 63661982 110555757
## Apr 2017       81476545 57892293 105060797
## May 2017       81800772 58170552 105430991
## Jun 2017       82968004 59324159 106611849
## Jul 2017       85573766 61925666 109221867
## Aug 2017       82234449 58585047 105883851
## Sep 2017       81260709 57610905 104910512
## Oct 2017       83784961 60135034 107434887
## Nov 2017       86206515 62556550 109856479
## Dec 2017       82358761 58708784 106008737
## Jan 2018       82585386 57351090 107819681
## Feb 2018       81752594 56358709 107146479
## Mar 2018       85104423 59627912 110580934
## Apr 2018       82832742 57335521 108329964
## May 2018       82963613 57459440 108467787
## Jun 2018       83434477 57928240 108940713
#Plot the forecast value
forecast:::plot.forecast(predict_fit, 
     xlab ="Time",
     ylab = "Order demand in Warehouse")