Introduction

This report presents the results of time series analysis for Google Trends data for two search terms: warehouse inventory management and warehouse automation over a time period of 4 years (2020-01-01 till 2024-01-01). Data was analysed in weekly frequency.

The aim is to search for…

Utilised methods comprise of:

  • Verifying integration order
  • Estimating properly specified Arima model
  • Testing for Granger causality
  • Estimating properly specified Vector Autoregressive Model

Note: In all statistical tests we assume 5% significance level. Plots for ACF, PACF and FEVD for VAR model reveal much more details on the slides attached to the file.

Loading Required Libraries

First, we load all the necessary libraries.

# Install required packages if not already installed
# install.packages("gtrendsR")
# install.packages("xts")
# install.packages("lmtest")
# install.packages("formattable")
# install.packages("vars")
# install.packages("kableExtra")
# install.packages("tidyverse")
# install.packages("MLmetrics")
# install.packages("forecast")
# install.packages("ggplot2")
# install.packages('fUnitRoots')
# install.packages('urca')

# Load required libraries
source("testdf.R") # Proprietary function from labs
library(urca)
library(vars)
library(tidyverse)
library(gtrendsR)
library(xts)
library(formattable)
library(kableExtra)
library(lmtest)
library(forecast)
library(MLmetrics)
library(ggplot2)

Loading and Preparing Data

Define Search Terms and Time Frame

Here we define the search terms and time frame for our analysis.

# Define search terms and time frame
# search_terms <- c("warehouse inventory management", "warehouse automation")
# time_frame <- "2020-01-01 2024-01-01"

# gtrendsR::setHandleParameters(
# user = 'user',
# password = 'pass',
# proxyport = 8080,
# proxyauth = 1,
# extra_curl_opts = list(timeout = 60)
# )

# Load Google Trends data
# gtrends_data <- gtrendsR::gtrends(search_terms, time = time_frame)

# Extract interest over time
# warehouse_manage <- gtrends_data$interest_over_time[gtrends_data$interest_over_time$keyword == "warehouse inventory management", ]
# warehouse_auto <- gtrends_data$interest_over_time[gtrends_data$interest_over_time$keyword == "warehouse automation", ]

# Merge data frames on date
# combined_data <- merge(warehouse_manage[, c("date", "hits")], warehouse_auto[, c("date", "hits")], by = "date")
# names(combined_data) <- c("date", "warehouse_inv_management", "warehouse_automation")

# write.csv(combined_data, 'warehouse_interest.csv', row.names=FALSE)

# Or read prepared data instead
gtrends_data = read.csv('warehouse_interest.csv')

Convert Data to Time Series

The data is converted to an xts object for further analysis.

warehouse = xts(gtrends_data[,2:3], as.POSIXct(gtrends_data$date))
plot(warehouse[, 1:2],
     col = c("black", "orange"),
     major.ticks = "years", 
     grid.ticks.on = "years",
     grid.ticks.lty = 3,
     main = "Interest in Warehouse Management & Automation",
     legend.loc = "topleft")

Integration order

Examining original series

First we test stationarity of original series.

# Perform ADF test on undifferenced data
testdf(warehouse$warehouse_inv_management, max.augmentations = 3)

##   augmentations       adf p_adf     bgodfrey      p_bg
## 1             0 -9.920687  0.01 1.400837e+00 0.2365834
## 2             1 -6.313657  0.01 3.238734e-01 0.5692891
## 3             2 -4.884254  0.01 4.512887e-03 0.9464400
## 4             3 -4.740063  0.01 8.601951e-05 0.9926000
testdf(warehouse$warehouse_automation, max.augmentations = 3)

##   augmentations       adf      p_adf   bgodfrey       p_bg
## 1             0 -7.177741 0.01000000 5.06612612 0.02439785
## 2             1 -4.945859 0.01000000 0.91885813 0.33777497
## 3             2 -3.674759 0.01000000 0.02745651 0.86839300
## 4             3 -3.212615 0.02203127 0.01505831 0.90233491
# Perform the KPSS test
kpss.test <- ur.kpss(warehouse$warehouse_inv_management, 
                     type = c("mu"))
summary(kpss.test)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 4 lags. 
## 
## Value of test-statistic is: 1.7446 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
kpss.test <- ur.kpss(warehouse$warehouse_automation, 
                     type = c("mu"))
summary(kpss.test)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 4 lags. 
## 
## Value of test-statistic is: 2.2939 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Decision:

ADF test suggest stationarity of both time series - the results can be interpreted with no augmentations for warehouse_inv_management and with one augmentation for warehouse_automation. However, we need to reject the null of stationarity in KPSS for boths series on assumed level of significance. Therefore we calculate the first differences.

Examining first differences of series

# First differences
diff_data <- lapply(gtrends_data[, 2:3], function(x) diff(x))
diff_warehouse <- xts(do.call(cbind, diff_data), order.by = as.POSIXct(gtrends_data$date[-1]))
diff_warehouse <- diff_warehouse[-1, ]

# Perform ADF test of differenced series
testdf(diff_warehouse$warehouse_inv_management, max.augmentations = 3)

##   augmentations       adf p_adf   bgodfrey       p_bg
## 1             0 -24.13968  0.01 3.98178676 0.04599475
## 2             1 -16.51575  0.01 0.13578785 0.71250490
## 3             2 -11.58224  0.01 0.07418190 0.78534250
## 4             3 -10.97197  0.01 0.02917258 0.86438116
testdf(diff_warehouse$warehouse_automation, max.augmentations = 3)

##   augmentations       adf p_adf   bgodfrey       p_bg
## 1             0 -22.01179  0.01 3.43831107 0.06370078
## 2             1 -16.87796  0.01 0.38010712 0.53754591
## 3             2 -12.30271  0.01 0.08585735 0.76951125
## 4             3 -10.88466  0.01 0.23521483 0.62768378
# Perform the KPSS test
# In case of warehouse without diffs it was rejected
kpss.test <- ur.kpss(diff_warehouse$warehouse_inv_management, 
                     type = c("mu"))
summary(kpss.test)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 4 lags. 
## 
## Value of test-statistic is: 0.0153 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
kpss.test <- ur.kpss(diff_warehouse$warehouse_automation, 
                     type = c("mu"))
summary(kpss.test)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 4 lags. 
## 
## Value of test-statistic is: 0.016 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Decision:

We reject the null of unit-root in Dickey-Fuller case and we cannot reject the null of stationarity in KPSS. Thus we conclude that both time series are I(1).

Granger Causality

Testing for Causality

We perform Granger causality tests on differenced series to see if one time series can predict another. By using stationary series we avoid the risk of spurious regression.

# 3, 4, 5 Lags automation ~ management
grangertest(warehouse_automation ~ warehouse_inv_management,
            data = diff_warehouse,
            order = 3)
## Granger causality test
## 
## Model 1: warehouse_automation ~ Lags(warehouse_automation, 1:3) + Lags(warehouse_inv_management, 1:3)
## Model 2: warehouse_automation ~ Lags(warehouse_automation, 1:3)
##   Res.Df Df     F   Pr(>F)   
## 1    198                     
## 2    201 -3 4.173 0.006824 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
grangertest(warehouse_automation ~ warehouse_inv_management,
            data = diff_warehouse,
            order = 4)
## Granger causality test
## 
## Model 1: warehouse_automation ~ Lags(warehouse_automation, 1:4) + Lags(warehouse_inv_management, 1:4)
## Model 2: warehouse_automation ~ Lags(warehouse_automation, 1:4)
##   Res.Df Df      F  Pr(>F)  
## 1    195                    
## 2    199 -4 3.0176 0.01914 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
grangertest(warehouse_automation ~ warehouse_inv_management,
            data = diff_warehouse,
            order = 5)
## Granger causality test
## 
## Model 1: warehouse_automation ~ Lags(warehouse_automation, 1:5) + Lags(warehouse_inv_management, 1:5)
## Model 2: warehouse_automation ~ Lags(warehouse_automation, 1:5)
##   Res.Df Df      F  Pr(>F)  
## 1    192                    
## 2    197 -5 2.3196 0.04489 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 3, 4, 5 Lags management ~ automation  
grangertest(warehouse_inv_management ~ warehouse_automation,
            data = diff_warehouse,
            order = 3)
## Granger causality test
## 
## Model 1: warehouse_inv_management ~ Lags(warehouse_inv_management, 1:3) + Lags(warehouse_automation, 1:3)
## Model 2: warehouse_inv_management ~ Lags(warehouse_inv_management, 1:3)
##   Res.Df Df      F Pr(>F)
## 1    198                 
## 2    201 -3 0.5302 0.6621
grangertest(warehouse_inv_management ~ warehouse_automation,
            data = diff_warehouse,
            order = 4)
## Granger causality test
## 
## Model 1: warehouse_inv_management ~ Lags(warehouse_inv_management, 1:4) + Lags(warehouse_automation, 1:4)
## Model 2: warehouse_inv_management ~ Lags(warehouse_inv_management, 1:4)
##   Res.Df Df      F Pr(>F)
## 1    195                 
## 2    199 -4 0.5658 0.6878
grangertest(warehouse_inv_management ~ warehouse_automation,
            data = diff_warehouse,
            order = 5)
## Granger causality test
## 
## Model 1: warehouse_inv_management ~ Lags(warehouse_inv_management, 1:5) + Lags(warehouse_automation, 1:5)
## Model 2: warehouse_inv_management ~ Lags(warehouse_inv_management, 1:5)
##   Res.Df Df      F Pr(>F)
## 1    192                 
## 2    197 -5 0.5289 0.7543

Decision:

For all tested orders the lags of warehouse_inv_management are significant for warehouse_automation, but not conversely. Thus we conclude that warehouse_inv_management Granger causes warehouse_automation.

Vector Autoregression (VAR) Model

Selecting Optimal Number of Lags

We use VAR model to capture the linear interdependencies between both time series. Once again models are estimated on diffs to be able to interprete coefficients. First we test multiple lags to find proper specification.

# 6 lags
VARselect((diff_warehouse), lag.max = 8) %>% # maximum lag
  .$criteria %>% 
  t() %>% 
  as_tibble() %>% 
  mutate(nLags = 1:nrow(.)) %>%
  select(nLags, everything()) %>%
  kbl(digits = 3) %>%
  kable_classic("striped", full_width = F) 
nLags AIC(n) HQ(n) SC(n) FPE(n)
1 10.009 10.049 10.108 22224.51
2 9.818 9.885 9.983 18363.91
3 9.831 9.925 10.062 18608.76
4 9.796 9.916 10.093 17969.36
5 9.781 9.928 10.144 17701.94
6 9.753 9.927 10.182 17215.54
7 9.774 9.974 10.269 17583.71
8 9.795 10.022 10.356 17955.72

Seasonality Adjustment

Seasonality effects should also be considered.

VARselect((diff_warehouse), lag.max = 8, season = 12) %>% # maximum lag
  .$criteria %>% 
  t() %>% 
  as_tibble() %>% 
  mutate(nLags = 1:nrow(.)) %>%
  select(nLags, everything()) %>%
  kbl(digits = 3) %>%
  kable_classic("striped", full_width = F)  
nLags AIC(n) HQ(n) SC(n) FPE(n)
1 10.143 10.330 10.605 25430.70
2 9.944 10.157 10.471 20834.73
3 9.954 10.195 10.548 21067.04
4 9.922 10.189 10.582 20404.47
5 9.911 10.204 10.636 20182.91
6 9.883 10.203 10.675 19642.83
7 9.902 10.249 10.760 20028.81
8 9.924 10.297 10.847 20483.54
VARselect((diff_warehouse), lag.max = 8, season = 4) %>% # maximum lag
  .$criteria %>% 
  t() %>% 
  as_tibble() %>%
  mutate(nLags = 1:nrow(.)) %>%
  select(nLags, everything()) %>%
  kbl(digits = 3) %>%
  kable_classic("striped", full_width = F)  
nLags AIC(n) HQ(n) SC(n) FPE(n)
1 10.052 10.132 10.250 23194.00
2 9.858 9.964 10.121 19104.57
3 9.874 10.008 10.204 19424.17
4 9.832 9.992 10.228 18630.29
5 9.818 10.005 10.280 18375.30
6 9.788 10.001 10.316 17828.31
7 9.812 10.052 10.406 18271.80
8 9.830 10.097 10.490 18607.18

Decision:

We select model with 6 lags and no seasonality adjustment because AIC and FPE information criteria are the lowest.

We reject models incorporating less lags because the 5th and 6th lags proved significant after estimation.

More complex models incorporating more lags or seasonality effects were rejected because they have higher values of all information criteria.

Estimating VAR Model

We estimate the VAR model with 6 lags.

# Estimation of model with 6 lags
warehouse.var6 <- VAR(diff_warehouse,
                    p = 6)
summary(warehouse.var6)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: warehouse_inv_management, warehouse_automation 
## Deterministic variables: const 
## Sample size: 202 
## Log Likelihood: -1532.199 
## Roots of the characteristic polynomial:
## 0.8076 0.8076 0.782 0.782 0.7701 0.7701 0.7435 0.7435 0.6977 0.6977 0.6521 0.1636
## Call:
## VAR(y = diff_warehouse, p = 6)
## 
## 
## Estimation results for equation warehouse_inv_management: 
## ========================================================= 
## warehouse_inv_management = warehouse_inv_management.l1 + warehouse_automation.l1 + warehouse_inv_management.l2 + warehouse_automation.l2 + warehouse_inv_management.l3 + warehouse_automation.l3 + warehouse_inv_management.l4 + warehouse_automation.l4 + warehouse_inv_management.l5 + warehouse_automation.l5 + warehouse_inv_management.l6 + warehouse_automation.l6 + const 
## 
##                              Estimate Std. Error t value Pr(>|t|)    
## warehouse_inv_management.l1 -0.694936   0.071665  -9.697  < 2e-16 ***
## warehouse_automation.l1      0.077933   0.076958   1.013 0.312513    
## warehouse_inv_management.l2 -0.457943   0.087943  -5.207 4.98e-07 ***
## warehouse_automation.l2      0.018524   0.089173   0.208 0.835665    
## warehouse_inv_management.l3 -0.271406   0.092018  -2.949 0.003585 ** 
## warehouse_automation.l3      0.005995   0.093506   0.064 0.948949    
## warehouse_inv_management.l4 -0.327244   0.092157  -3.551 0.000484 ***
## warehouse_automation.l4     -0.060459   0.093244  -0.648 0.517511    
## warehouse_inv_management.l5 -0.187393   0.089507  -2.094 0.037627 *  
## warehouse_automation.l5     -0.132865   0.087293  -1.522 0.129666    
## warehouse_inv_management.l6 -0.169861   0.074452  -2.281 0.023635 *  
## warehouse_automation.l6     -0.077721   0.074722  -1.040 0.299608    
## const                        0.251475   0.811723   0.310 0.757051    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 11.51 on 189 degrees of freedom
## Multiple R-Squared: 0.3652,  Adjusted R-squared: 0.3249 
## F-statistic: 9.062 on 12 and 189 DF,  p-value: 1.164e-13 
## 
## 
## Estimation results for equation warehouse_automation: 
## ===================================================== 
## warehouse_automation = warehouse_inv_management.l1 + warehouse_automation.l1 + warehouse_inv_management.l2 + warehouse_automation.l2 + warehouse_inv_management.l3 + warehouse_automation.l3 + warehouse_inv_management.l4 + warehouse_automation.l4 + warehouse_inv_management.l5 + warehouse_automation.l5 + warehouse_inv_management.l6 + warehouse_automation.l6 + const 
## 
##                             Estimate Std. Error t value Pr(>|t|)    
## warehouse_inv_management.l1 -0.15812    0.06672  -2.370 0.018799 *  
## warehouse_automation.l1     -0.61445    0.07165  -8.576 3.50e-15 ***
## warehouse_inv_management.l2 -0.17249    0.08187  -2.107 0.036456 *  
## warehouse_automation.l2     -0.46303    0.08302  -5.577 8.35e-08 ***
## warehouse_inv_management.l3  0.05261    0.08567   0.614 0.539866    
## warehouse_automation.l3     -0.29625    0.08705  -3.403 0.000813 ***
## warehouse_inv_management.l4 -0.03314    0.08580  -0.386 0.699746    
## warehouse_automation.l4     -0.32387    0.08681  -3.731 0.000252 ***
## warehouse_inv_management.l5 -0.09242    0.08333  -1.109 0.268788    
## warehouse_automation.l5     -0.25011    0.08127  -3.078 0.002397 ** 
## warehouse_inv_management.l6 -0.17284    0.06931  -2.494 0.013501 *  
## warehouse_automation.l6     -0.03911    0.06956  -0.562 0.574672    
## const                        0.24665    0.75569   0.326 0.744489    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 10.72 on 189 degrees of freedom
## Multiple R-Squared: 0.3784,  Adjusted R-squared: 0.339 
## F-statistic: 9.589 on 12 and 189 DF,  p-value: 1.897e-14 
## 
## 
## 
## Covariance matrix of residuals:
##                          warehouse_inv_management warehouse_automation
## warehouse_inv_management                  132.481                5.852
## warehouse_automation                        5.852              114.822
## 
## Correlation matrix of residuals:
##                          warehouse_inv_management warehouse_automation
## warehouse_inv_management                  1.00000              0.04745
## warehouse_automation                      0.04745              1.00000

As in case of Granger causality test we may observe significance of lags of warehouse_inv_management in the model for warehouse_automation but not conversely. More specifically first, second and sixth lag of warehouse_inv_management are significant for warehouse_automation.

In the model for warehouse_inv_management all lags of dependent variable are significant, but none of warehouse_automation.

Another general observation is that all significant lags in both models had a negative effect on the variable - this can be intepreted as a sign that in the historical data increases were succeeded by drops of values and both series concentrated around a trend. Examination of the first figure displaying original data reveals that series experienced a side-walk.

Plotting VAR Model

We plot the results of the VAR model.

plot(warehouse.var6)

After closer examination it can be observed that the only bars crossing bounds on ACF and PACF plots are seventh and tenth for warehouse_automation. The model with 7 lags could be considered, however all information criteria were higher for this specification, regardless of seasonality effects. Because the margin by which the bar crosses the bounds is tiny, we should resort to more formal residual tests.

Residual Tests

Portmanteau Test

We perform the Portmanteau test to formally check for autocorrelation in residuals.

# Autocorrelation of residuals test (Portmanteau)
serial.test(warehouse.var6)
## 
##  Portmanteau Test (asymptotic)
## 
## data:  Residuals of VAR object warehouse.var6
## Chi-squared = 43.821, df = 40, p-value = 0.3127

Decision:

The null of Portmantau is that series are independent (no autocorrelation). We cannot reject the null, so we can deem the residuals white noise.

Breusch-Godfrey Test

We also perform the Breusch-Godfrey test for autocorrelation in residuals.

# Autocorrelation test (BG)
serial.test(warehouse.var6, type='BG')
## 
##  Breusch-Godfrey LM test
## 
## data:  Residuals of VAR object warehouse.var6
## Chi-squared = 18.462, df = 20, p-value = 0.557

Decision:

BG test confirms the result of Portmanteu - we cannot reject the null of no autocorrelation of residuals.

Error Variance and Impulse responses

plot(fevd(warehouse.var6, n.ahead = 36))

Decision:

Forecast error variance decomposition allows to examine the distribution of errors with respect to variables. Here we may observe that a significant part of variance in predicting warehouse_automation stems from variance in warehouse_inv_management. On the other hand virtually all variance in predicting warehouse_inv_management comes from lags of this variable and not warehouse_automation.

plot(irf(warehouse.var6, n.ahead = 36))

Decision:

Impulse response function should gradually die away to deem the model stable. Here we can observe this effect both in case of warehouse_automation and warehouse_inv_management - the model is stable.

Forecasting

We produce a forecast for an out-of-sample period of 14 weeks, to estimate ex-post error.

# Set aside the last 14 observations for testing
train <- head((diff_warehouse), -14)
test <- tail((diff_warehouse), 14)

# Fit the VAR model on the training data
var_model <- VAR(train, p = 6)

# Forecast the next 14 periods
forecast_steps <- 14
forecast_result <- predict(var_model, n.ahead = forecast_steps, ci=0.95)

management_forecast <- xts(forecast_result$fcst$warehouse_inv_management[,-4], 
                    # we exclude the last column with CI
                    tail(index(diff_warehouse), 14))
names(management_forecast) <- c("warehouse_inv_management_fore", "warehouse_inv_management_lower", "warehouse_inv_management_upper")
automation_forecast <- xts(forecast_result$fcst$warehouse_automation[,-4], 
                    # we exclude the last column with CI
                    tail(index((diff_warehouse)), 14))
names(automation_forecast) <- c("warehouse_automation_fore", "warehouse_automation_lower", "warehouse_automation_upper")

Reconstructing forecasts:

# Extract the last observed value from the original series
last_value_management <- as.numeric(last(warehouse$warehouse_inv_management, 15)[1])
last_value_automation <- as.numeric(last(warehouse$warehouse_automation, 15)[1])

# Add the last observed value to the forecasts to reconstruct forecast with cumsum
# Upper and lower boundries are shifted by a scalar (confirmed on presentation :))
management_forecast_adjusted <- c(last_value_management, management_forecast$warehouse_inv_management_fore)
management_forecast_lower_adjusted <- management_forecast$warehouse_inv_management_lower + last_value_management
management_forecast_upper_adjusted <- management_forecast$warehouse_inv_management_upper + last_value_management

automation_forecast_adjusted <- c(last_value_automation, automation_forecast$warehouse_automation_fore)
automation_forecast_lower_adjusted <- automation_forecast$warehouse_automation_lower + last_value_automation
automation_forecast_upper_adjusted <- automation_forecast$warehouse_automation_upper + last_value_automation

# Calculate cumulative sum for management forecast + convert to xts
management_forecast_cumsum <- xts(cumsum(management_forecast_adjusted)[-1], order.by = tail(index(warehouse), 14))
management_forecast_lower <- xts(management_forecast_lower_adjusted, order.by = tail(index(warehouse), 14))
management_forecast_upper<- xts(management_forecast_upper_adjusted, order.by = tail(index(warehouse), 14))

# Calculate cumulative sum for automation forecast + convert to xts
automation_forecast_cumsum <- xts(cumsum(automation_forecast_adjusted)[-1], order.by = tail(index(warehouse), 14))
automation_forecast_lower <- xts(automation_forecast_lower_adjusted, order.by = tail(index(warehouse), 14))
automation_forecast_upper <- xts(automation_forecast_upper_adjusted, order.by = tail(index(warehouse), 14))

# Combine the forecast and original series
warehouse_combined <- merge(warehouse[, c("warehouse_inv_management",
                                          "warehouse_automation")],
                            management_forecast_cumsum,
                            management_forecast_lower,
                            management_forecast_upper,
                            automation_forecast_cumsum,
                            automation_forecast_lower,
                            automation_forecast_upper)


# Assign names to the new columns
names(warehouse_combined) <- c("warehouse_inv_management",
                               "warehouse_automation",
                               "warehouse_inv_management_fore",
                               "warehouse_inv_management_lower",
                               "warehouse_inv_management_upper",
                               "warehouse_automation_fore",
                               "warehouse_automation_lower",
                               "warehouse_automation_upper")

# Convert xts object to data frame for ggplot2
warehouse_combined_df <- fortify(warehouse_combined, melt = TRUE)

# Separate data frames for plotting
management_df <- warehouse_combined_df[warehouse_combined_df$Series %in% c("warehouse_inv_management", 
                                                                           "warehouse_inv_management_fore", 
                                                                           "warehouse_inv_management_lower", 
                                                                           "warehouse_inv_management_upper"),]

automation_df <- warehouse_combined_df[warehouse_combined_df$Series %in% c("warehouse_automation", 
                                                                           "warehouse_automation_fore", 
                                                                           "warehouse_automation_lower", 
                                                                           "warehouse_automation_upper"),]

# Assuming diff_warehouse is your combined dataframe already
# Plot for warehouse automation
plot(warehouse_combined[, c("warehouse_automation_fore", "warehouse_automation",
                       "warehouse_automation_lower", "warehouse_automation_upper")], 
     major.ticks = "years", 
     grid.ticks.on = "years",
     grid.ticks.lty = 3,
     main = "14-week forecast of warehouse automation",
     col = c("blue", "orange", "red", "red"),
     type = "l",  # type = "l" for lines
     xlab = "Time", 
     ylim = c(0, 150),
     ylab = "Values")

# Plot for warehouse inventory management
plot(warehouse_combined[, c("warehouse_inv_management_fore", "warehouse_inv_management",
                       "warehouse_inv_management_lower", "warehouse_inv_management_upper")], 
     major.ticks = "years", 
     grid.ticks.on = "years",
     grid.ticks.lty = 3,
     main = "14-week forecast of warehouse inv. management",
     col = c("blue", "black", "red", "red"),
     type = "l",  # type = "l" for lines
     ylim = c(0, 150),
     xlab = "Time", 
     ylab = "Values")

# Calculate error metrics
calculate_errors <- function(actual, forecast) {
  mae <- mean(abs(actual - forecast))
  mse <- mean((actual - forecast)^2)
  mape <- mean(abs((actual - forecast) / actual)) * 100
  amape <- mean(abs(actual - forecast) / pmax(abs(actual), abs(forecast))) * 100
  return(c(MAE = mae, MSE = mse, MAPE = mape, AMAPE = amape))
}

# Extract actual values and forecast values for error calculation
actual_management <- as.numeric(warehouse_combined$warehouse_inv_management[-(1:(nrow(warehouse_combined) - 14))])
forecast_management <- as.numeric(management_forecast_cumsum)

actual_automation <- as.numeric(warehouse_combined$warehouse_automation[-(1:(nrow(warehouse_combined) - 14))])
forecast_automation <- as.numeric(automation_forecast_cumsum)

# Calculate errors
errors_management <- calculate_errors(actual_management, forecast_management)
errors_automation <- calculate_errors(actual_automation, forecast_automation)

# Error Metrics for Warehouse Inventory Management
print(errors_management)
##       MAE       MSE      MAPE     AMAPE 
##  11.88496 203.10425  15.51050  14.85481
# Error Metrics for Warehouse Automation:
print(errors_automation)
##       MAE       MSE      MAPE     AMAPE 
##  7.494310 80.787107 10.479329  9.365967