Load package and view data

# Set working directory
setwd("C:/Users/DellPC/Desktop/thesis")

# Load packages
suppressMessages(library(tidyverse))
suppressMessages(library(tidymodels))
suppressMessages(library(readxl))
suppressMessages(library(timetk))
suppressMessages(library(tidyquant))
suppressMessages(library(modeltime))
suppressMessages(library(fpp3))
suppressMessages(library(skimr))
suppressMessages(library(janitor))
suppressMessages(library(DataExplorer))
suppressMessages(library(scales))
suppressMessages(library(ggthemes))
suppressMessages(library(ggsci))

Convert the datetime

# Add up date function 
add_up_date <- function (x) {
    ymd(as.character(x)) %m+% years(4)
}

# Read the Product Master Data
df_product_master <- read_excel('data_final.xlsx', sheet = 'ProductMaster')

# Read the Historical Sales Data
df_soline <- read_excel('data_final.xlsx', sheet = 'SOLine') %>% 
   mutate_if(is.POSIXct, .funs  = add_up_date)

df_toline <- read_excel('data_final.xlsx', sheet = 'TOLine') %>% 
   mutate_if(is.POSIXct, .funs  = add_up_date)

# Read the Historical Supply Data
df_roline <- read_excel('data_final.xlsx', sheet = 'ROLine') %>% 
   mutate_if(is.POSIXct, .funs  = add_up_date)

df_poline <-  read_excel('data_final.xlsx', sheet = 'POLine') %>% 
   mutate_if(is.POSIXct, .funs  = add_up_date)

df_init_inventory <- read_excel('data_final.xlsx', sheet = 'Initial_Inventories')

View the Data

Product Master Data

glimpse(df_product_master)
## Rows: 19
## Columns: 8
## $ ProductId        <chr> "SKU01", "SKU02", "SKU03", "SKU04", "SKU05", "SKU06",~
## $ Name             <chr> "Strawberry", "Mango", "Rice", "Potato", "Tomato", "S~
## $ ProductGroup     <chr> "Fruits", "Fruits", "Rice", "Vegetables", "Vegetables~
## $ KgPerCarton      <dbl> 3.000, 12.336, 3.000, 1.884, 4.320, 6.270, 2.800, 6.2~
## $ CartonsPerPallet <dbl> 250, 75, 250, 456, 96, 136, 96, 125, 160, 136, 456, 4~
## $ KgPerPallet      <dbl> 750.000, 925.200, 750.000, 859.104, 414.720, 852.720,~
## $ SalesUnit        <chr> "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg",~
## $ SupplierUnit     <chr> "Car", "Car", "Car", "Car", "Car", "Kg", "Kg", "Kg", ~

Sales Data

glimpse(df_soline)
## Rows: 13,584
## Columns: 11
## $ SoOrderLineId  <dbl> 2082, 2083, 2094, 2098, 2138, 2154, 2167, 2168, 2185, 2~
## $ SoOrderId      <dbl> 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 40, 41,~
## $ SoEnteredDate  <date> 2021-08-02, 2021-08-02, 2021-08-02, 2021-08-02, 2021-0~
## $ SoExpectedDate <date> 2021-08-02, 2021-08-02, 2021-08-02, 2021-08-02, 2021-0~
## $ SoCustomerId   <chr> "CUS009", "CUS009", "CUS009", "CUS009", "CUS009", "CUS0~
## $ SoProductId    <chr> "SKU19", "SKU11", "SKU09", "SKU04", "SKU14", "SKU05", "~
## $ SoQuantity     <dbl> 6.000, 3.744, 1.260, 5.652, 0.660, 1.920, 1.254, 3.780,~
## $ SoInKg         <dbl> 6.000, 3.744, 1.260, 5.652, 0.660, 1.920, 1.254, 3.780,~
## $ SoInCarton     <dbl> 1.00000, 2.00000, 0.25000, 3.00000, 0.11111, 0.44444, 0~
## $ SoUnitCost     <dbl> 106684.31, 23577.89, 172811.98, 24108.71, 27312.76, 272~
## $ SoSalesUnit    <chr> "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "~
glimpse(df_toline)
## Rows: 13,465
## Columns: 10
## $ ToOrderLineId   <dbl> 1903, 1907, 1916, 1990, 2015, 2046, 2047, 2049, 2059, ~
## $ SoOrderLineId   <dbl> 2632, 2579, 2196, 2564, 4172, 2581, 2558, 2557, 4108, ~
## $ ToCustomerId    <chr> "CUS003", "CUS025", "CUS015", "CUS025", "CUS010", "CUS~
## $ ToDepartureDate <date> 2021-08-02, 2021-08-02, 2021-08-02, 2021-08-02, 2021-~
## $ ToArrivalDate   <date> 2021-08-03, 2021-08-03, 2021-08-03, 2021-08-02, 2021-~
## $ ToProductId     <chr> "SKU01", "SKU01", "SKU01", "SKU03", "SKU07", "SKU04", ~
## $ ToInKg          <dbl> 600.0000, 150.0000, 45.0000, 90.0000, 28.0000, 9.4200,~
## $ ToInCarton      <dbl> 200.00, 50.00, 15.00, 30.00, 10.00, 5.00, 5.00, 5.00, ~
## $ ToQuantity      <dbl> 600.0000, 150.0000, 45.0000, 90.0000, 28.0000, 9.4200,~
## $ ToSalesUnit     <chr> "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", ~

Supply Data

glimpse(df_roline)
## Rows: 1,777
## Columns: 10
## $ RoOrderLineId   <dbl> 29293, 29294, 29295, 29296, 29298, 29308, 29323, 29369~
## $ PoOrderLineId   <dbl> 4659, 4659, 4659, 4659, 4688, 4659, 4409, 4459, 4428, ~
## $ RoSupplierId    <chr> "SU001", "SU001", "SU001", "SU001", "SU002", "SU001", ~
## $ RoDepartureDate <date> 2021-08-02, 2021-08-02, 2021-08-02, 2021-08-02, 2021-~
## $ RoArrivalDate   <date> 2021-08-03, 2021-08-03, 2021-08-03, 2021-08-03, 2021-~
## $ RoProductId     <chr> "SKU01", "SKU01", "SKU01", "SKU01", "SKU03", "SKU01", ~
## $ RoInKg          <dbl> 3000.00, 2997.00, 2997.00, 3000.00, 2108.00, 738.00, 6~
## $ RoInCarton      <dbl> 1000.00, 999.00, 999.00, 1000.00, 702.67, 246.00, 136.~
## $ RoSupplierQty   <dbl> 1000.00, 999.00, 999.00, 1000.00, 702.67, 246.00, 685.~
## $ RoSupplierUnit  <chr> "Car", "Car", "Car", "Car", "Car", "Car", "Kg", "Car",~
glimpse(df_poline)
## Rows: 1,769
## Columns: 11
## $ PoOrderId      <dbl> 4409, 4410, 4428, 4429, 4459, 4460, 4488, 4530, 4531, 4~
## $ PoOrderLineId  <dbl> 4409, 4410, 4428, 4429, 4459, 4460, 4488, 4530, 4531, 4~
## $ PoSupplierId   <chr> "SU004", "SU004", "SU003", "SU003", "SU003", "SU003", "~
## $ PoEnteredDate  <date> 2021-08-01, 2021-08-01, 2021-08-01, 2021-08-01, 2021-0~
## $ PoExpectedDate <date> 2021-08-09, 2021-08-10, 2021-08-10, 2021-08-10, 2021-0~
## $ PoProductId    <chr> "SKU09", "SKU09", "SKU07", "SKU07", "SKU05", "SKU05", "~
## $ PoQuantity     <dbl> 685.44, 70.56, 75.60, 61.60, 40.00, 6.00, 1911.00, 73.4~
## $ PoInKg         <dbl> 685.44, 70.56, 75.60, 61.60, 172.80, 25.92, 23574.00, 7~
## $ PoInCarton     <dbl> 136, 14, 27, 22, 40, 6, 1911, 17, 3, 2, 22, 1, 1, 7, 42~
## $ PoSupplierUnit <chr> "Kg", "Kg", "Kg", "Kg", "Car", "Car", "Car", "Kg", "Kg"~
## $ PoUnitCost     <dbl> 114173.00, 114173.00, 25000.00, 25000.00, 64800.00, 648~
glimpse(df_init_inventory)
## Rows: 19
## Columns: 3
## $ ProductId <chr> "SKU01", "SKU02", "SKU03", "SKU04", "SKU05", "SKU06", "SKU07~
## $ Quantity  <dbl> 6485.0000, 29791.5000, 6154.9310, 1400.5000, 275.5000, 1336.~
## $ Unit      <chr> "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", ~

Data Processing

Aggregate Demand Data

demand <- df_soline %>% 
   group_by(SoEnteredDate) %>%
   summarise(SoQuantity = sum(SoQuantity),
             SoValue = sum(SoQuantity*SoUnitCost)) %>%
   ungroup() 

Take the overview of data

glimpse(demand)
## Rows: 153
## Columns: 3
## $ SoEnteredDate <date> 2021-08-01, 2021-08-02, 2021-08-03, 2021-08-04, 2021-08~
## $ SoQuantity    <dbl> 1986.886, 9277.690, 6383.204, 13672.854, 11737.044, 4531~
## $ SoValue       <dbl> 2546578751, 63911263370, 25530734478, 42962880054, 53582~

Visualization the time series

demand %>%
   plot_time_series(
      .date_var = SoEnteredDate,
      .value = SoValue,
    .smooth_color = "#18BC9C",
    .smooth_size = 0.5,
    .title = "The time series of total SO Sales Value During This Period"
   )

Check missing values by column

demand %>% 
   plot_missing(
      ggtheme = theme_light(),
      title = "Percentage of Missing Values by Column"
   )

Seasonal Breakdown Visualization

demand %>%
   plot_seasonal_diagnostics(
      .date_var = SoEnteredDate,
      .value = SoValue
   )

STL Breakdown Component

demand %>%  
    plot_stl_diagnostics(
      .date_var = SoEnteredDate,
      .value = SoValue,
      .interactive = TRUE
    )
## frequency = 7 observations per 1 week
## trend = 31 observations per 1 month

Anomaly Diagnosis

demand %>% 
   plot_anomaly_diagnostics(
      .date = SoEnteredDate,
      .value = SoValue,
      .title = "Anomaly Diagnostics SoValue Time Series",
     .anom_color ="#FB3029", 
     .max_anomalies = 0.07, 
     .alpha = 0.05
    )
## frequency = 7 observations per 1 week
## trend = 31 observations per 1 month

ACF

p1 <- demand %>% 
   tk_acf_diagnostics(
      .date_var = SoEnteredDate,
      .value = SoValue
   ) %>%
   ggplot(aes(x = lag, y = ACF)) +
   # Add horizontal line a y=0
    geom_hline(yintercept = 0) +
    # Plot autocorrelations
    geom_point(size = 2, color = "steelblue") +
    geom_segment(aes(xend = lag, yend = 0), size = 1) +
    # Add cutoffs
    geom_line(aes(y = .white_noise_upper), color = "blue", linetype = 2) +
    geom_line(aes(y = .white_noise_lower), color = "red", linetype = 2) +
    # Aesthetics
    expand_limits(y = c(-1, 1)) +
    theme_light() +
    theme(
        legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5)
    ) + 
   labs(
      title = "AutoCorrelation (ACF)",
      subtitle = "SoValue Through August 2021 to December 2021"
    )
## Max lag exceeds data available. Using max lag: 152

PACF

p2 <- demand %>% 
   tk_acf_diagnostics(
      .date_var = SoEnteredDate,
      .value = SoValue
   ) %>%
   ggplot(aes(x = lag, y = PACF)) +
   # Add horizontal line a y=0
    geom_hline(yintercept = 0) +
    # Plot autocorrelations
    geom_point(size = 2, color = "red") +
    geom_segment(aes(xend = lag, yend = 0), size = 1) +
    # Add cutoffs
    geom_line(aes(y = .white_noise_upper), color = "black", linetype = 2) +
    geom_line(aes(y = .white_noise_lower), color = "black", linetype = 2) +
    # Aesthetics
    expand_limits(y = c(-1, 1)) +
    theme_light() +
    theme(
        legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5)
    ) + 
   labs(
      title = "Partial Correlation",
      subtitle = "SoValue Through August 2021 to December 2021"
    )
## Max lag exceeds data available. Using max lag: 152
cowplot::plot_grid(p1, p2)

Moving Average

demand %>% 
   select(SoEnteredDate, SoValue) %>%
   mutate(adjusted_15_mm_close = slidify_vec(
        .x      = SoValue,
        .period = 15,
        .f      = mean,
        na.rm   = TRUE,
        .align  = "center",
        .partial = TRUE),
    adjusted_30_mm_close = slidify_vec(
        .x      = SoValue,
        .period = 30,
        .f      = mean,
        na.rm   = TRUE,
        .align  = "center",
        .partial = TRUE),
    adjusted_100_mm_close = slidify_vec(
        .x      = SoValue,
        .period = 100,
        .f      = mean,
        na.rm   = TRUE,
        .align  = "center",
        .partial = TRUE),
    adjusted_300_mm_close = slidify_vec(
        .x      = SoValue,
        .period = 300,
        .f      = mean,
        na.rm   = TRUE,
        .align  = "center",
        .partial = TRUE)
    ) %>% 
  pivot_longer(contains("close"), names_repair = "unique") %>%
  ggplot(aes(x=SoEnteredDate, y=value)) + 
   geom_line(aes(color=name), size=1) + 
  theme_light() + 
   theme(legend.position = "bottom", plot.title = element_text(hjust=0.5)) + 
  guides(color = guide_legend(nrow=2, byrow=TRUE)) + 
  labs(
    title = "Moving Average (MA) as a Metric",
    subtitle = str_glue("GS Stock 2015 - 2017 "),
    y = "Closing Price") + 
   scale_colour_lancet()+ 
  scale_x_date(expand = c(0,0),)

Part2

demand %>% 
   select(SoEnteredDate, SoValue) %>%
   as_tsibble(index = SoEnteredDate)  %>%
   gg_lag(SoValue, geom = "point") + 
   theme_light()

Model

demand_tbl <- demand %>%
   tk_augment_timeseries_signature() %>%
   mutate(
    SoValue_transformed = log_interval_vec(SoValue, limit_lower = 0, offset = 1),
    SoValue_transformed = standardize_vec(SoValue_transformed),
    SoValue_transformed1 = standardize_inv_vec(SoValue_transformed, mean = -1.87644984320702, sd = 1.89587378623263),
    SoValue_transformed1 = log_interval_inv_vec(SoValue_transformed1, 0, limit_upper = 171308577084.647, offset = 0)
  ) %>% 
  select(SoEnteredDate, SoValue, SoValue_transformed,SoValue_transformed1 , everything())
## tk_augment_timeseries_signature(): Using the following .date_var variable: SoEnteredDate
## log_interval_vec(): 
##  Using limit_lower: 0
##  Using limit_upper: 171308577084.647
##  Using offset: 1
## Standardization Parameters
## mean: -1.87644984320702
## standard deviation: 1.89587378623263
demand_tbl %>% 
  glimpse()
## Rows: 153
## Columns: 33
## $ SoEnteredDate        <date> 2021-08-01, 2021-08-02, 2021-08-03, 2021-08-04, ~
## $ SoValue              <dbl> 2546578751, 63911263370, 25530734478, 42962880054~
## $ SoValue_transformed  <dbl> -1.222280096, 0.715981356, 0.070811152, 0.4125058~
## $ SoValue_transformed1 <dbl> 2546578752, 63911263371, 25530734479, 42962880055~
## $ SoQuantity           <dbl> 1986.886, 9277.690, 6383.204, 13672.854, 11737.04~
## $ index.num            <dbl> 1627776000, 1627862400, 1627948800, 1628035200, 1~
## $ diff                 <dbl> NA, 86400, 86400, 86400, 86400, 86400, 86400, 864~
## $ year                 <int> 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2~
## $ year.iso             <int> 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2~
## $ half                 <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2~
## $ quarter              <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3~
## $ month                <int> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8~
## $ month.xts            <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7~
## $ month.lbl            <ord> August, August, August, August, August, August, A~
## $ day                  <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15~
## $ hour                 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ minute               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ second               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ hour12               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ am.pm                <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ wday                 <int> 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3~
## $ wday.xts             <int> 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2~
## $ wday.lbl             <ord> Sunday, Monday, Tuesday, Wednesday, Thursday, Fri~
## $ mday                 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15~
## $ qday                 <int> 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 4~
## $ yday                 <int> 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,~
## $ mweek                <int> 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3~
## $ week                 <int> 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 3~
## $ week.iso             <int> 30, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 3~
## $ week2                <int> 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1~
## $ week3                <int> 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0~
## $ week4                <int> 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1~
## $ mday7                <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3~
p1 <- demand_tbl %>%
   plot_time_series_regression(
    SoEnteredDate,
    .formula = SoValue_transformed ~ SoEnteredDate,
    .interactive=FALSE,
    .show_summary=TRUE,
    .title = "Linear Model Without Implementing Feature Engineering",
    .legend_show = FALSE,
    .line_color = "#D1362F",
    .smooth_color = "#EA3B46",
    .line_size = 1
    )
## 
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7002 -0.1176  0.2521  0.6017  2.1961 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -19.377099  34.703933  -0.558    0.577
## SoEnteredDate   0.001024   0.001835   0.558    0.577
## 
## Residual standard error: 1.002 on 151 degrees of freedom
## Multiple R-squared:  0.00206,    Adjusted R-squared:  -0.004548 
## F-statistic: 0.3118 on 1 and 151 DF,  p-value: 0.5774
seasonality_formula <- as.formula(
  SoValue_transformed ~ 
    splines::ns(index.num) +
  + day + wday.lbl + month
)

p2 <- demand_tbl %>% 
  plot_time_series_regression(
    SoValue_transformed,
    .formula = seasonality_formula,
    .interactive=FALSE,
    .show_summary=TRUE,
    .title = "Linear Model With Feature Engineering",
    .legend_show = FALSE,
    .line_color = "#D1362F", 
    .smooth_color = "#EA3B46",
    .line_size = 1
    )
## 
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.60847 -0.27840  0.01038  0.30788  1.82567 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -52.7376    47.6150  -1.108  0.26990    
## splines::ns(index.num) -40.5837    36.8741  -1.101  0.27292    
## day                      0.2116     0.1944   1.088  0.27839    
## wday.lbl.L              -1.3430     0.1261 -10.651  < 2e-16 ***
## wday.lbl.Q              -1.2631     0.1260 -10.022  < 2e-16 ***
## wday.lbl.C              -0.9742     0.1256  -7.756 1.50e-12 ***
## wday.lbl^4              -0.5403     0.1252  -4.314 2.97e-05 ***
## wday.lbl^5              -0.3563     0.1251  -2.848  0.00506 ** 
## wday.lbl^6              -0.2379     0.1252  -1.900  0.05944 .  
## month                    6.5651     5.9326   1.107  0.27032    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5866 on 143 degrees of freedom
## Multiple R-squared:  0.6763, Adjusted R-squared:  0.6559 
## F-statistic:  33.2 on 9 and 143 DF,  p-value: < 2.2e-16
cowplot::plot_grid(p1, p2, nrow=2)

lambda <- 0.184435805917973

ts_boxcox_SoValue <- demand_tbl %>%
   mutate(
      SoValue_box_cox = box_cox_vec(SoValue, lambda = "auto")
   )
## box_cox_vec(): Using value for lambda: 0.184435805917973
ts_boxcox_SoValue  %>% 
    ggplot(aes(x=SoEnteredDate, y= SoValue_box_cox )) + geom_line(size=1, color="#5C5C5C") + geom_point(size=0.5, color="#5C5C5C") + theme_bw() + 
  scale_y_continuous(labels=function(x) format(x, big.mark = ",", scientific = FALSE)) +
  labs(
    title = "Number of ads across time",
    subtitle = "Box Cox Transformation"
  ) + theme(plot.title = element_text(hjust=0.5, face="bold")) 

ts_boxcox_SoValue <- ts_boxcox_SoValue %>% 
  select(SoEnteredDate, SoValue_box_cox)

# 22% is the testing set
# Splitting dataframe
splitted_df <- time_series_split(ts_boxcox_SoValue, assess = "14 days", cumulative = TRUE)
## Using date_var: SoEnteredDate
new_colors_pal <- c("#27223C", "#D1362F")
# Extracing training and test sets
train_df <- training(splitted_df)
test_df <- testing(splitted_df)


splitted_df %>% 
  tk_time_series_cv_plan() %>% 
  ggplot(aes(x=SoEnteredDate, y = SoValue_box_cox, color=.key)) + 
   geom_line(size=1) + 
  scale_color_manual(values = new_colors_pal) + theme_bw() + 
  labs(
    title = "Time Series Split",
    subtitle = "Boxcox Value"
  ) +  geom_label(
    label = "Training Set",
    color = "#27223C",
    vjust = 0.5,
    aes(x=as.Date("2021-11-24"),
        y = 450)) + 
  geom_label(
    label = "Testing Set",
    color = "#D1362F",
    vjust = 0.5,
    aes(x=as.Date("2021-12-30")),
        y = 500) +
  theme(legend.position = "none") + scale_x_date(expand = c(0,0)) + 
  geom_vline(xintercept = as.Date("2021-12-25"), lty="dashed")

prohet_fit <- prophet_reg() %>% 
  set_engine("prophet") %>% 
  fit(SoValue_box_cox ~ SoEnteredDate, data = ts_boxcox_SoValue)
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
model_tbl <- modeltime_table(
  prohet_fit
)

calibration_tbl <- model_tbl %>% 
  modeltime_calibrate(
    new_data = test_df
  )

forecast_tbl <- calibration_tbl %>% 
  modeltime_forecast(actual_data = ts_boxcox_SoValue)
## Using '.calibration_data' to forecast.
forecast_tbl %>% 
  rename(
    "time" = ".index",
    "value" = ".value"
  ) %>% 
  ggplot(aes(x=time, y=value, color=.key)) + geom_line(size=1) + geom_point(size=0.5) + theme_bw() + scale_color_manual(values=new_colors_pal) + 
  theme(legend.position = "bottom") + 
  labs(
    title = "Forecasting with Prophet",
    subtitle = "Number of Ads",
    y = "BoxCox Transform Value"
  ) + scale_x_date(expand = c(0,0)) 

forecast_tbl %>% 
  rename(
    "time" = ".index",
    "value" = ".value"
  ) %>% 
  ggplot(aes(x=time, y=value, color=.key)) + geom_line(size=1) + 
   geom_point(size=0.5) + 
   theme_bw() + 
   scale_color_manual(values=new_colors_pal) + 
  theme(legend.position = "bottom") + 
  labs(
    title = "Forecasting with Prophet",
    subtitle = "Number of Ads",
    y = "BoxCox Transform Value"
  ) + scale_x_date(expand = c(0,0)) 

calibration_tbl %>% 
  modeltime_accuracy() %>% 
  pivot_longer(cols = mae:rsq) %>% 
  mutate(
    name = name %>% as_factor() %>% fct_reorder(value) %>% fct_rev()
  ) %>% 
  ggplot(aes(x=name, y=value, fill=name)) + geom_col(color="black") + 
  theme_bw() + 
   ggthemes::scale_fill_tableau() + theme(legend.position = "bottom") + 
  geom_label(aes(label = round(value,2)), size=3, color="white") + 
  labs(
    title = "Metrics for evaluating Accuracy",
    subtitle = "Prophet Model with BoxCox Transformation"
  )

refit_tbl <- calibration_tbl %>% 
  modeltime_refit(data = ts_boxcox_SoValue)
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
# inverting transformation
forecast_ads_tbl <- refit_tbl %>% 
  modeltime_forecast(h = "60 days", actual_data = ts_boxcox_SoValue)

forecast_ads_tbl <- forecast_ads_tbl %>% 
  mutate(.value = box_cox_inv_vec(.value, lambda = lambda),
         .conf_lo = box_cox_inv_vec(.conf_lo, lambda = lambda),
         .conf_hi = box_cox_inv_vec(.conf_hi, lambda = lambda))


forecast_ads_tbl %>% 
    rename(
    "time" = ".index",
    "value" = ".value"
  )  %>% 
    ggplot(aes(x=time, y=value, color=.key)) + geom_line(size=1) + 
  theme_bw() + scale_color_manual(values = new_colors_pal) + 
  labs(
    title = "Prophet Forecast for the next 60 days",
    subtitle = "Number of Ads (Boxcox Inverted)",
    y = "The Forecast SO Sales Value",
    caption = str_glue("Lambda: {lambda}")
  ) + 
  scale_y_continuous(labels=function(x) format(x, big.mark = ",", scientific = FALSE)) + 
  theme(legend.position = "bottom") + geom_smooth(method = "loess", color = "#191919", lty="dashed") + scale_x_date(expand = c(0,0)) 
## `geom_smooth()` using formula 'y ~ x'

forecast_ads_tbl %>% 
   filter(.key == "prediction") 
## # A tibble: 60 x 7
##    .model_id .model_desc .key       .index           .value    .conf_lo .conf_hi
##        <int> <chr>       <fct>      <date>            <dbl>       <dbl>    <dbl>
##  1         1 PROPHET     prediction 2022-01-01   874490232.    8599488.  1.02e10
##  2         1 PROPHET     prediction 2022-01-02 41524149010. 6918053368.  1.59e11
##  3         1 PROPHET     prediction 2022-01-03 39847081086. 6531743133.  1.54e11
##  4         1 PROPHET     prediction 2022-01-04 39087455003. 6358468225.  1.52e11
##  5         1 PROPHET     prediction 2022-01-05 48439162376. 8561982196.  1.80e11
##  6         1 PROPHET     prediction 2022-01-06 34204539938. 5271561017.  1.37e11
##  7         1 PROPHET     prediction 2022-01-07 44848156843. 7698387197.  1.69e11
##  8         1 PROPHET     prediction 2022-01-08   890013183.    8960987.  1.03e10
##  9         1 PROPHET     prediction 2022-01-09 41884491124. 7001719105.  1.60e11
## 10         1 PROPHET     prediction 2022-01-10 40195518407. 6611582687.  1.55e11
## # ... with 50 more rows

Top Down Breakdown

sku_ratio <- df_soline %>% 
   group_by(SoProductId) %>% 
   summarise(SoValue = sum(SoUnitCost*SoQuantity)) %>% 
   ungroup() %>% 
   mutate(ratio_sales = SoValue/sum(SoValue))


forecast_df_sku <- forecast_ads_tbl %>% 
   filter(.key == "prediction") %>%
   distinct(.index, .value) %>%
   mutate(key = 1) %>%
      left_join(sku_ratio %>% 
                   distinct(SoProductId) %>%
                   mutate(key = 1),
                by = "key") %>%
   transmute(SoProductId = SoProductId,
             SoEnteredDate = .index,
             forecast_SoValue = .value) %>%
   left_join(sku_ratio %>% 
                select(SoProductId, ratio_sales),
             by = "SoProductId") %>%
   mutate(forecast_SoValue_SKU = forecast_SoValue*ratio_sales)


View(forecast_df_sku)

Supply Planning

ABC classification

abc_classification <- df_soline %>%
   group_by(SoProductId) %>%
   summarise(SoValue = sum(SoQuantity*SoUnitCost)) %>%
   ungroup() %>% 
   mutate(percent_SoValue = SoValue/sum(SoValue)) %>%
   arrange(desc(percent_SoValue)) %>%
   transmute(
      SoProductId = SoProductId,
      cumulative_percent_SoValue = cumsum(percent_SoValue),
      abc_classification = case_when(cumulative_percent_SoValue < 0.8 ~ "A",
                                         cumulative_percent_SoValue > 0.95 ~ "C",
                                         TRUE ~ "B")
      ) 

XYZ analysis

xyz_classification <- df_soline %>%
   group_by(SoProductId, SoEnteredDate) %>%
   summarise(SoValue = sum(SoQuantity*SoUnitCost)) %>% 
   ungroup() %>%
   group_by(SoProductId) %>%
   summarise(mean_SoValue = mean(SoValue),
             std_SoValue = tidyquant::STDEV(SoValue)) %>%
   ungroup() %>%
   mutate(coeff_variation = std_SoValue/mean_SoValue,
          xyz_classification = case_when(coeff_variation < 1 ~ "X",
                                         coeff_variation > 2 ~ "Z",
                                         TRUE ~ "Y")) 
## `summarise()` has grouped output by 'SoProductId'. You can override using the `.groups` argument.

Combine ABCXYZ into segmentation

combine_abcxyz <- abc_classification %>% 
   left_join(xyz_classification, by = "SoProductId") %>% 
   select(SoProductId, abc_classification, xyz_classification, mean_SoValue, std_SoValue)

combine_abcxyz
## # A tibble: 19 x 5
##    SoProductId abc_classification xyz_classification mean_SoValue std_SoValue
##    <chr>       <chr>              <chr>                     <dbl>       <dbl>
##  1 SKU02       A                  X                     63432856.   41469793.
##  2 SKU01       A                  Y                     26292428.   26439009.
##  3 SKU09       A                  X                     19468751.   16252103.
##  4 SKU03       B                  Y                      9438450.   15201238.
##  5 SKU06       B                  Y                      4212497.    5965093.
##  6 SKU07       B                  X                      3170418.    2347783.
##  7 SKU10       B                  Y                      2987871.    3378320.
##  8 SKU13       B                  X                      2986203.    1814053.
##  9 SKU08       B                  X                      2657091.    1748820.
## 10 SKU05       B                  X                      2266377.    1929744.
## 11 SKU16       C                  Y                      2278063.    3406340.
## 12 SKU14       C                  X                      1381570.     914395.
## 13 SKU17       C                  X                      1324321.     942007.
## 14 SKU15       C                  X                      1264916.    1042418.
## 15 SKU04       C                  Z                      1241745.    4060086.
## 16 SKU11       C                  Z                       729414.    2004652.
## 17 SKU19       C                  Y                      1687552.    1713646.
## 18 SKU12       C                  Z                       590568.    2082984.
## 19 SKU20       C                  X                       621568.     471857.

Supply Lead Time

combine_supply_leadtime <- df_poline %>% 
   transmute(PoOrderLineId = PoOrderLineId, 
             PoEnteredDate = PoEnteredDate,
             PoProductId = PoProductId) %>% 
   inner_join(df_roline %>% 
                 select(PoOrderLineId, RoArrivalDate, RoProductId),
              by = c("PoOrderLineId" = "PoOrderLineId", "PoProductId" = "RoProductId")) %>% 
   mutate(supply_leadtime  = as.numeric(difftime(RoArrivalDate, PoEnteredDate, units = "days"))) %>%
   group_by(PoProductId) %>% 
   summarise(avg_supply_leadtime = mean(supply_leadtime),
             std_supply_leadtime = tidyquant::STDEV(supply_leadtime))

Supply Calculation

final_supply <- combine_abcxyz %>% 
   left_join(combine_supply_leadtime, by = c("SoProductId" = "PoProductId")) %>%
   mutate(abcxyz_classification = paste(abc_classification, xyz_classification, sep ="")) %>%
   transmute(SKU = SoProductId, 
             abcxyz_classification = abcxyz_classification,
             avg_demand_value = mean_SoValue,
             std_demand_value = std_SoValue,
             avg_supply_leadtime = avg_supply_leadtime,
             std_supply_leadtime = std_supply_leadtime) %>%
   mutate(target_service_level = case_when(abcxyz_classification %in% c("AX", "AY", "BX") ~ 0.95,
                                           abcxyz_classification == "BY" ~ 0.9,
                                           abcxyz_classification %in% c("CX", "CY", "CZ") ~ 0.8)) %>%
   mutate(inventory_policy = case_when(abcxyz_classification %in% c("AX", "AY", "BX") ~ "Continuous Review Policy",
                                           abcxyz_classification == "BY" ~ "Continuous Review Policy",
                                           abcxyz_classification %in% c("CX", "CY", "CZ") ~ "Periodic Review Policy")) %>%
   mutate(z_score_value = qnorm(as.numeric(target_service_level)),
          est_safety_stock_value = z_score_value*sqrt((avg_supply_leadtime*std_demand_value^2) + (std_supply_leadtime*avg_demand_value)^2),
          est_inventory_value = est_safety_stock_value + (avg_supply_leadtime*avg_demand_value)
   ) %>%
   select(SKU, abcxyz_classification, est_safety_stock_value, est_inventory_value, abcxyz_classification, everything())

View(final_supply)