In the previous section, we have learned about how the inventory management operate in the warehouse, knew how to calculate the opening and ending inventory. So in this section, we will pratice to forecast the demand consumption in the future. If you want to know detail analyst, you can visit this page Demand Forecasting: Application of ARIMA model with R by Houssam Akkouche author. ## Call packages:
#Call packages:
pacman::p_load(rio,
here,
janitor,
tidyverse,
dplyr,
magrittr,
lubridate,
stringr
)
You can download this file from this page Historical Product Demand
#Then you can import them by: #Step 1: file.choose() to select the address of file #Step 2: rio::import(“the address of file”) #Step 3: naniar::gg_miss_var() to check the missing value of dataframe
#Change to suitable class (I change the name dataset to product_demand to shortly write)
product_demand <-product_demand %>%
mutate(Date = as.Date(Date,format = "%Y/%m/%d"),
Product_Category = as.factor(Product_Category))
product_demand$Order_Demand <-
gsub("[(]", "-", product_demand$Order_Demand)
product_demand$Order_Demand <-
gsub("[)]", "", product_demand$Order_Demand)
product_demand$Order_Demand <-
as.numeric(product_demand$Order_Demand)
#File product_demand, you can download it from the previous section. Because it's heavy so I cannot add it into this report.
product_demand <-product_demand %>%
mutate(Month = month(Date),
Year = year(Date),
Week_day = wday(Date)) %>%
filter(Year %in% c(2016:2012))
df1<-product_demand %>%
group_by(Year,Month) %>%
summarise(month_demand = sum(Order_Demand,na.rm = T))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
df1$Month_Year = as.Date(str_c(df1$Year,df1$Month,"1",sep = "-"))
library("TTR")
#First we will divde the data into training data and testing data in 70-30:
#Create ts object for month demand variable:
training_df<-df1[df1$Month_Year < as.Date("2015-03-22"),]
testing_df <-df1[df1$Month_Year >= as.Date("2015-03-22"),]
demand_training<-ts(training_df$month_demand,
frequency = 12,
start = c(2012,1))
demand_full<-ts(df1$month_demand,
frequency = 12,
start = c(2012,1))
#Decompose the time object into 3 components: trend, seasonal, random
p<-decompose(demand_training)
plot(p)
#First we will calculate the different in demand product monthly:
#Check stationary assumption:
m = c(1:3)
lapply(m, function(x) {
a<-diff(log(demand_training), lag = x)
tseries::adf.test(a)$p.value}
) #p<0,05 is accepted
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Warning in tseries::adf.test(a): p-value smaller than printed p-value
## Warning in tseries::adf.test(a): p-value smaller than printed p-value
## [[1]]
## [1] 0.01
##
## [[2]]
## [1] 0.01
##
## [[3]]
## [1] 0.1133475
#So the value accepted is lag 1 or 2. It suggested that we should use 1 lag or 2 lag time series of diff(log(demand_components))
#Rename of two time series:
ts1<-diff(log(demand_training),1)
ts2<-diff(log(demand_training),2)
#Plot value ACF for 2 series:
library(forecast)
## Warning: package 'forecast' was built under R version 4.2.3
p1<-Acf(ts1,plot =F)
p2<-Acf(ts2,plot =F)
cowplot::plot_grid(autoplot(p1), autoplot(p2), nrow = 2)
#Plot value PACF for 2 series
p3<-Pacf(ts1,plot =F)
p4<-Pacf(ts2,plot =F)
cowplot::plot_grid(autoplot(p3), autoplot(p4), nrow = 2)
#Look the graphs, we choose optimal value for arima (p,d,q) model is (3,1,2) or we will use function in forecast package to find suitable model.
model<-auto.arima(ts1,trace = T) #We will choose model (0,1,1)
##
## ARIMA(2,0,2)(1,0,1)[12] with non-zero mean : -53.59472
## ARIMA(0,0,0) with non-zero mean : -42.93819
## ARIMA(1,0,0)(1,0,0)[12] with non-zero mean : -48.85966
## ARIMA(0,0,1)(0,0,1)[12] with non-zero mean : Inf
## ARIMA(0,0,0) with zero mean : -44.98445
## ARIMA(2,0,2)(0,0,1)[12] with non-zero mean : -55.38358
## ARIMA(2,0,2) with non-zero mean : -57.29734
## ARIMA(2,0,2)(1,0,0)[12] with non-zero mean : -56.09742
## ARIMA(1,0,2) with non-zero mean : Inf
## ARIMA(2,0,1) with non-zero mean : -59.00472
## ARIMA(2,0,1)(1,0,0)[12] with non-zero mean : -58.48372
## ARIMA(2,0,1)(0,0,1)[12] with non-zero mean : -57.59841
## ARIMA(2,0,1)(1,0,1)[12] with non-zero mean : -56.1491
## ARIMA(1,0,1) with non-zero mean : Inf
## ARIMA(2,0,0) with non-zero mean : -61.43027
## ARIMA(2,0,0)(1,0,0)[12] with non-zero mean : -61.31788
## ARIMA(2,0,0)(0,0,1)[12] with non-zero mean : -60.40807
## ARIMA(2,0,0)(1,0,1)[12] with non-zero mean : -59.16376
## ARIMA(1,0,0) with non-zero mean : -48.92909
## ARIMA(3,0,0) with non-zero mean : -58.83972
## ARIMA(3,0,1) with non-zero mean : Inf
## ARIMA(2,0,0) with zero mean : -63.2462
## ARIMA(2,0,0)(1,0,0)[12] with zero mean : -63.61601
## ARIMA(2,0,0)(1,0,1)[12] with zero mean : -61.61415
## ARIMA(2,0,0)(0,0,1)[12] with zero mean : -62.60866
## ARIMA(1,0,0)(1,0,0)[12] with zero mean : -51.2282
## ARIMA(3,0,0)(1,0,0)[12] with zero mean : -60.96011
## ARIMA(2,0,1)(1,0,0)[12] with zero mean : -60.9656
## ARIMA(1,0,1)(1,0,0)[12] with zero mean : -59.01621
## ARIMA(3,0,1)(1,0,0)[12] with zero mean : -59.12165
##
## Best model: ARIMA(2,0,0)(1,0,0)[12] with zero mean
#Create model for training data:
model_training<-Arima(demand_training,
order = c(model[["arma"]][c(1,2,6)]),
seasonal = list(order = c(model[["arma"]][c(3,4,7)]),period = model[["arma"]][5]),
include.drift = FALSE)
#Forecast by training model:
training_forecast<-forecast(model_training,h = 21)
#Calculate RMSE:
accuracy(training_forecast,testing_df$month_demand)
## ME RMSE MAE MPE MAPE MASE
## Training set 665935.5 8049506 6471794 -0.1798406 7.89135 0.7172872
## Test set -2673550.4 6777349 5312444 -3.6898334 6.44421 0.5887932
## ACF1
## Training set -0.09450096
## Test set NA
#Use chart for presenting the differents:
plot(training_forecast,
main = "Model ARIMA(2,0,0)(1,0,0)[12]",
xlab = "Time",
ylab = "Order Demand")
lines(demand_full,
col = "red",
lwd = "2")
legend("topleft",
legend = c("Actual","Forecast"),
col = c("red","blue"),
box.lty = 0,
lty = 1,
cex = 1.5,
lwd = 2)
This plot above show that all the predicted value is surrounds the actual value in 95%.
So we will choose model ‘ARIMA(2,0,0)(1,1,0)[12]’ to predict the future value for demand_full object.
#Modeling demand product by arima model:
fit <- Arima(demand_full,
order = c(model[["arma"]][c(1,2,6)]),
seasonal = list(order = c(model[["arma"]][c(3,4,7)]),period = model[["arma"]][5]),
include.drift = FALSE)
summary(fit)
## Series: demand_full
## ARIMA(2,0,0)(1,0,0)[12] with non-zero mean
##
## Coefficients:
## ar1 ar2 sar1 mean
## 0.3226 0.1287 0.4034 83749940
## s.e. 0.1356 0.1318 0.1309 2641906
##
## sigma^2 = 6.023e+13: log likelihood = -1036.1
## AIC=2082.2 AICc=2083.31 BIC=2092.67
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 371148.9 7497930 6035438 -0.3530097 7.257455 0.6865972 -0.07443246
#Predicting for 18 months with 99.5% range:
predict_fit<-forecast:::forecast.Arima(fit, h = 18, level = c(99.5))
#Print predicted value:
predict_fit
## Point Forecast Lo 99.5 Hi 99.5
## Jan 2017 80865768 59080113 102651423
## Feb 2017 78799799 55908250 101691348
## Mar 2017 87108869 63661982 110555757
## Apr 2017 81476545 57892293 105060797
## May 2017 81800772 58170552 105430991
## Jun 2017 82968004 59324159 106611849
## Jul 2017 85573766 61925666 109221867
## Aug 2017 82234449 58585047 105883851
## Sep 2017 81260709 57610905 104910512
## Oct 2017 83784961 60135034 107434887
## Nov 2017 86206515 62556550 109856479
## Dec 2017 82358761 58708784 106008737
## Jan 2018 82585386 57351090 107819681
## Feb 2018 81752594 56358709 107146479
## Mar 2018 85104423 59627912 110580934
## Apr 2018 82832742 57335521 108329964
## May 2018 82963613 57459440 108467787
## Jun 2018 83434477 57928240 108940713
#Plot the forecast value
forecast:::plot.forecast(predict_fit,
xlab ="Time",
ylab = "Order demand in Warehouse")