Load package and view data

# Set working directory
setwd("C:/Users/DellPC/Desktop/thesis")

# Load packages
suppressMessages(library(tidyverse))
suppressMessages(library(tidymodels))
suppressMessages(library(readxl))
suppressMessages(library(timetk))
suppressMessages(library(tidyquant))
suppressMessages(library(modeltime))
suppressMessages(library(fpp3))
suppressMessages(library(skimr))
suppressMessages(library(janitor))
suppressMessages(library(DataExplorer))
suppressMessages(library(scales))
suppressMessages(library(ggthemes))
suppressMessages(library(ggsci))

Convert the datetime

# Add up date function 
add_up_date <- function (x) {
    ymd(as.character(x)) %m+% years(4)
}

# Read the Product Master Data
df_product_master <- read_excel('data_final.xlsx', sheet = 'ProductMaster')

# Read the Historical Sales Data
df_soline <- read_excel('data_final.xlsx', sheet = 'SOLine') %>% 
   mutate_if(is.POSIXct, .funs  = add_up_date)

df_toline <- read_excel('data_final.xlsx', sheet = 'TOLine') %>% 
   mutate_if(is.POSIXct, .funs  = add_up_date)

# Read the Historical Supply Data
df_roline <- read_excel('data_final.xlsx', sheet = 'ROLine') %>% 
   mutate_if(is.POSIXct, .funs  = add_up_date)

df_poline <-  read_excel('data_final.xlsx', sheet = 'POLine') %>% 
   mutate_if(is.POSIXct, .funs  = add_up_date)

df_init_inventory <- read_excel('data_final.xlsx', sheet = 'Initial_Inventories')

View the Data

Product Master Data

glimpse(df_product_master)

## Rows: 19
## Columns: 8
## $ ProductId        <chr> "SKU01", "SKU02", "SKU03", "SKU04", "SKU05", "SKU06",~
## $ Name             <chr> "Strawberry", "Mango", "Rice", "Potato", "Tomato", "S~
## $ ProductGroup     <chr> "Fruits", "Fruits", "Rice", "Vegetables", "Vegetables~
## $ KgPerCarton      <dbl> 3.000, 12.336, 3.000, 1.884, 4.320, 6.270, 2.800, 6.2~
## $ CartonsPerPallet <dbl> 250, 75, 250, 456, 96, 136, 96, 125, 160, 136, 456, 4~
## $ KgPerPallet      <dbl> 750.000, 925.200, 750.000, 859.104, 414.720, 852.720,~
## $ SalesUnit        <chr> "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg",~
## $ SupplierUnit     <chr> "Car", "Car", "Car", "Car", "Car", "Kg", "Kg", "Kg", ~

Sales Data

glimpse(df_soline)

## Rows: 13,584
## Columns: 11
## $ SoOrderLineId  <dbl> 2082, 2083, 2094, 2098, 2138, 2154, 2167, 2168, 2185, 2~
## $ SoOrderId      <dbl> 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 40, 41,~
## $ SoEnteredDate  <date> 2021-08-02, 2021-08-02, 2021-08-02, 2021-08-02, 2021-0~
## $ SoExpectedDate <date> 2021-08-02, 2021-08-02, 2021-08-02, 2021-08-02, 2021-0~
## $ SoCustomerId   <chr> "CUS009", "CUS009", "CUS009", "CUS009", "CUS009", "CUS0~
## $ SoProductId    <chr> "SKU19", "SKU11", "SKU09", "SKU04", "SKU14", "SKU05", "~
## $ SoQuantity     <dbl> 6.000, 3.744, 1.260, 5.652, 0.660, 1.920, 1.254, 3.780,~
## $ SoInKg         <dbl> 6.000, 3.744, 1.260, 5.652, 0.660, 1.920, 1.254, 3.780,~
## $ SoInCarton     <dbl> 1.00000, 2.00000, 0.25000, 3.00000, 0.11111, 0.44444, 0~
## $ SoUnitCost     <dbl> 106684.31, 23577.89, 172811.98, 24108.71, 27312.76, 272~
## $ SoSalesUnit    <chr> "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "~

glimpse(df_toline)

## Rows: 13,465
## Columns: 10
## $ ToOrderLineId   <dbl> 1903, 1907, 1916, 1990, 2015, 2046, 2047, 2049, 2059, ~
## $ SoOrderLineId   <dbl> 2632, 2579, 2196, 2564, 4172, 2581, 2558, 2557, 4108, ~
## $ ToCustomerId    <chr> "CUS003", "CUS025", "CUS015", "CUS025", "CUS010", "CUS~
## $ ToDepartureDate <date> 2021-08-02, 2021-08-02, 2021-08-02, 2021-08-02, 2021-~
## $ ToArrivalDate   <date> 2021-08-03, 2021-08-03, 2021-08-03, 2021-08-02, 2021-~
## $ ToProductId     <chr> "SKU01", "SKU01", "SKU01", "SKU03", "SKU07", "SKU04", ~
## $ ToInKg          <dbl> 600.0000, 150.0000, 45.0000, 90.0000, 28.0000, 9.4200,~
## $ ToInCarton      <dbl> 200.00, 50.00, 15.00, 30.00, 10.00, 5.00, 5.00, 5.00, ~
## $ ToQuantity      <dbl> 600.0000, 150.0000, 45.0000, 90.0000, 28.0000, 9.4200,~
## $ ToSalesUnit     <chr> "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", ~

Supply Data

glimpse(df_roline)

## Rows: 1,777
## Columns: 10
## $ RoOrderLineId   <dbl> 29293, 29294, 29295, 29296, 29298, 29308, 29323, 29369~
## $ PoOrderLineId   <dbl> 4659, 4659, 4659, 4659, 4688, 4659, 4409, 4459, 4428, ~
## $ RoSupplierId    <chr> "SU001", "SU001", "SU001", "SU001", "SU002", "SU001", ~
## $ RoDepartureDate <date> 2021-08-02, 2021-08-02, 2021-08-02, 2021-08-02, 2021-~
## $ RoArrivalDate   <date> 2021-08-03, 2021-08-03, 2021-08-03, 2021-08-03, 2021-~
## $ RoProductId     <chr> "SKU01", "SKU01", "SKU01", "SKU01", "SKU03", "SKU01", ~
## $ RoInKg          <dbl> 3000.00, 2997.00, 2997.00, 3000.00, 2108.00, 738.00, 6~
## $ RoInCarton      <dbl> 1000.00, 999.00, 999.00, 1000.00, 702.67, 246.00, 136.~
## $ RoSupplierQty   <dbl> 1000.00, 999.00, 999.00, 1000.00, 702.67, 246.00, 685.~
## $ RoSupplierUnit  <chr> "Car", "Car", "Car", "Car", "Car", "Car", "Kg", "Car",~

glimpse(df_poline)

## Rows: 1,769
## Columns: 11
## $ PoOrderId      <dbl> 4409, 4410, 4428, 4429, 4459, 4460, 4488, 4530, 4531, 4~
## $ PoOrderLineId  <dbl> 4409, 4410, 4428, 4429, 4459, 4460, 4488, 4530, 4531, 4~
## $ PoSupplierId   <chr> "SU004", "SU004", "SU003", "SU003", "SU003", "SU003", "~
## $ PoEnteredDate  <date> 2021-08-01, 2021-08-01, 2021-08-01, 2021-08-01, 2021-0~
## $ PoExpectedDate <date> 2021-08-09, 2021-08-10, 2021-08-10, 2021-08-10, 2021-0~
## $ PoProductId    <chr> "SKU09", "SKU09", "SKU07", "SKU07", "SKU05", "SKU05", "~
## $ PoQuantity     <dbl> 685.44, 70.56, 75.60, 61.60, 40.00, 6.00, 1911.00, 73.4~
## $ PoInKg         <dbl> 685.44, 70.56, 75.60, 61.60, 172.80, 25.92, 23574.00, 7~
## $ PoInCarton     <dbl> 136, 14, 27, 22, 40, 6, 1911, 17, 3, 2, 22, 1, 1, 7, 42~
## $ PoSupplierUnit <chr> "Kg", "Kg", "Kg", "Kg", "Car", "Car", "Car", "Kg", "Kg"~
## $ PoUnitCost     <dbl> 114173.00, 114173.00, 25000.00, 25000.00, 64800.00, 648~

glimpse(df_init_inventory)

## Rows: 19
## Columns: 3
## $ ProductId <chr> "SKU01", "SKU02", "SKU03", "SKU04", "SKU05", "SKU06", "SKU07~
## $ Quantity  <dbl> 6485.0000, 29791.5000, 6154.9310, 1400.5000, 275.5000, 1336.~
## $ Unit      <chr> "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", ~

Data Processing

Aggregate Demand Data

demand <- df_soline %>% 
   group_by(SoEnteredDate) %>%
   summarise(SoQuantity = sum(SoQuantity),
             SoValue = sum(SoQuantity*SoUnitCost)) %>%
   ungroup()

Take the overview of data

glimpse(demand)

## Rows: 153
## Columns: 3
## $ SoEnteredDate <date> 2021-08-01, 2021-08-02, 2021-08-03, 2021-08-04, 2021-08~
## $ SoQuantity    <dbl> 1986.886, 9277.690, 6383.204, 13672.854, 11737.044, 4531~
## $ SoValue       <dbl> 2546578751, 63911263370, 25530734478, 42962880054, 53582~

Visualization the time series

demand %>%
   plot_time_series(
      .date_var = SoEnteredDate,
      .value = SoValue,
    .smooth_color = "#18BC9C",
    .smooth_size = 0.5,
    .title = "The time series of total SO Sales Value During This Period"
   )

Check missing values by column

demand %>% 
   plot_missing(
      ggtheme = theme_light(),
      title = "Percentage of Missing Values by Column"
   )

Seasonal Breakdown Visualization

demand %>%
   plot_seasonal_diagnostics(
      .date_var = SoEnteredDate,
      .value = SoValue
   )

STL Breakdown Component

demand %>%  
    plot_stl_diagnostics(
      .date_var = SoEnteredDate,
      .value = SoValue,
      .interactive = TRUE
    )

## frequency = 7 observations per 1 week

## trend = 31 observations per 1 month

Anomaly Diagnosis

demand %>% 
   plot_anomaly_diagnostics(
      .date = SoEnteredDate,
      .value = SoValue,
      .title = "Anomaly Diagnostics SoValue Time Series",
     .anom_color ="#FB3029", 
     .max_anomalies = 0.07, 
     .alpha = 0.05
    )

## frequency = 7 observations per 1 week

## trend = 31 observations per 1 month

ACF

p1 <- demand %>% 
   tk_acf_diagnostics(
      .date_var = SoEnteredDate,
      .value = SoValue
   ) %>%
   ggplot(aes(x = lag, y = ACF)) +
   # Add horizontal line a y=0
    geom_hline(yintercept = 0) +
    # Plot autocorrelations
    geom_point(size = 2, color = "steelblue") +
    geom_segment(aes(xend = lag, yend = 0), size = 1) +
    # Add cutoffs
    geom_line(aes(y = .white_noise_upper), color = "blue", linetype = 2) +
    geom_line(aes(y = .white_noise_lower), color = "red", linetype = 2) +
    # Aesthetics
    expand_limits(y = c(-1, 1)) +
    theme_light() +
    theme(
        legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5)
    ) + 
   labs(
      title = "AutoCorrelation (ACF)",
      subtitle = "SoValue Through August 2021 to December 2021"
    )

## Max lag exceeds data available. Using max lag: 152

PACF

p2 <- demand %>% 
   tk_acf_diagnostics(
      .date_var = SoEnteredDate,
      .value = SoValue
   ) %>%
   ggplot(aes(x = lag, y = PACF)) +
   # Add horizontal line a y=0
    geom_hline(yintercept = 0) +
    # Plot autocorrelations
    geom_point(size = 2, color = "red") +
    geom_segment(aes(xend = lag, yend = 0), size = 1) +
    # Add cutoffs
    geom_line(aes(y = .white_noise_upper), color = "black", linetype = 2) +
    geom_line(aes(y = .white_noise_lower), color = "black", linetype = 2) +
    # Aesthetics
    expand_limits(y = c(-1, 1)) +
    theme_light() +
    theme(
        legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5)
    ) + 
   labs(
      title = "Partial Correlation",
      subtitle = "SoValue Through August 2021 to December 2021"
    )

## Max lag exceeds data available. Using max lag: 152

cowplot::plot_grid(p1, p2)

Moving Average

demand %>% 
   select(SoEnteredDate, SoValue) %>%
   mutate(adjusted_15_mm_close = slidify_vec(
        .x      = SoValue,
        .period = 15,
        .f      = mean,
        na.rm   = TRUE,
        .align  = "center",
        .partial = TRUE),
    adjusted_30_mm_close = slidify_vec(
        .x      = SoValue,
        .period = 30,
        .f      = mean,
        na.rm   = TRUE,
        .align  = "center",
        .partial = TRUE),
    adjusted_100_mm_close = slidify_vec(
        .x      = SoValue,
        .period = 100,
        .f      = mean,
        na.rm   = TRUE,
        .align  = "center",
        .partial = TRUE),
    adjusted_300_mm_close = slidify_vec(
        .x      = SoValue,
        .period = 300,
        .f      = mean,
        na.rm   = TRUE,
        .align  = "center",
        .partial = TRUE)
    ) %>% 
  pivot_longer(contains("close"), names_repair = "unique") %>%
  ggplot(aes(x=SoEnteredDate, y=value)) + 
   geom_line(aes(color=name), size=1) + 
  theme_light() + 
   theme(legend.position = "bottom", plot.title = element_text(hjust=0.5)) + 
  guides(color = guide_legend(nrow=2, byrow=TRUE)) + 
  labs(
    title = "Moving Average (MA) as a Metric",
    subtitle = str_glue("GS Stock 2015 - 2017 "),
    y = "Closing Price") + 
   scale_colour_lancet()+ 
  scale_x_date(expand = c(0,0),)

Part2

demand %>% 
   select(SoEnteredDate, SoValue) %>%
   as_tsibble(index = SoEnteredDate)  %>%
   gg_lag(SoValue, geom = "point") + 
   theme_light()

Model

demand_tbl <- demand %>%
   tk_augment_timeseries_signature() %>%
   mutate(
    SoValue_transformed = log_interval_vec(SoValue, limit_lower = 0, offset = 1),
    SoValue_transformed = standardize_vec(SoValue_transformed),
    SoValue_transformed1 = standardize_inv_vec(SoValue_transformed, mean = -1.87644984320702, sd = 1.89587378623263),
    SoValue_transformed1 = log_interval_inv_vec(SoValue_transformed1, 0, limit_upper = 171308577084.647, offset = 0)
  ) %>% 
  select(SoEnteredDate, SoValue, SoValue_transformed,SoValue_transformed1 , everything())

## tk_augment_timeseries_signature(): Using the following .date_var variable: SoEnteredDate

## log_interval_vec(): 
##  Using limit_lower: 0
##  Using limit_upper: 171308577084.647
##  Using offset: 1

## Standardization Parameters
## mean: -1.87644984320702
## standard deviation: 1.89587378623263

demand_tbl %>% 
  glimpse()

## Rows: 153
## Columns: 33
## $ SoEnteredDate        <date> 2021-08-01, 2021-08-02, 2021-08-03, 2021-08-04, ~
## $ SoValue              <dbl> 2546578751, 63911263370, 25530734478, 42962880054~
## $ SoValue_transformed  <dbl> -1.222280096, 0.715981356, 0.070811152, 0.4125058~
## $ SoValue_transformed1 <dbl> 2546578752, 63911263371, 25530734479, 42962880055~
## $ SoQuantity           <dbl> 1986.886, 9277.690, 6383.204, 13672.854, 11737.04~
## $ index.num            <dbl> 1627776000, 1627862400, 1627948800, 1628035200, 1~
## $ diff                 <dbl> NA, 86400, 86400, 86400, 86400, 86400, 86400, 864~
## $ year                 <int> 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2~
## $ year.iso             <int> 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2~
## $ half                 <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2~
## $ quarter              <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3~
## $ month                <int> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8~
## $ month.xts            <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7~
## $ month.lbl            <ord> August, August, August, August, August, August, A~
## $ day                  <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15~
## $ hour                 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ minute               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ second               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ hour12               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ am.pm                <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ wday                 <int> 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3~
## $ wday.xts             <int> 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2~
## $ wday.lbl             <ord> Sunday, Monday, Tuesday, Wednesday, Thursday, Fri~
## $ mday                 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15~
## $ qday                 <int> 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 4~
## $ yday                 <int> 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,~
## $ mweek                <int> 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3~
## $ week                 <int> 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 3~
## $ week.iso             <int> 30, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 3~
## $ week2                <int> 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1~
## $ week3                <int> 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0~
## $ week4                <int> 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1~
## $ mday7                <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3~

p1 <- demand_tbl %>%
   plot_time_series_regression(
    SoEnteredDate,
    .formula = SoValue_transformed ~ SoEnteredDate,
    .interactive=FALSE,
    .show_summary=TRUE,
    .title = "Linear Model Without Implementing Feature Engineering",
    .legend_show = FALSE,
    .line_color = "#D1362F",
    .smooth_color = "#EA3B46",
    .line_size = 1
    )

## 
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7002 -0.1176  0.2521  0.6017  2.1961 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -19.377099  34.703933  -0.558    0.577
## SoEnteredDate   0.001024   0.001835   0.558    0.577
## 
## Residual standard error: 1.002 on 151 degrees of freedom
## Multiple R-squared:  0.00206,    Adjusted R-squared:  -0.004548 
## F-statistic: 0.3118 on 1 and 151 DF,  p-value: 0.5774

seasonality_formula <- as.formula(
  SoValue_transformed ~ 
    splines::ns(index.num) +
  + day + wday.lbl + month
)

p2 <- demand_tbl %>% 
  plot_time_series_regression(
    SoValue_transformed,
    .formula = seasonality_formula,
    .interactive=FALSE,
    .show_summary=TRUE,
    .title = "Linear Model With Feature Engineering",
    .legend_show = FALSE,
    .line_color = "#D1362F", 
    .smooth_color = "#EA3B46",
    .line_size = 1
    )

## 
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.60847 -0.27840  0.01038  0.30788  1.82567 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -52.7376    47.6150  -1.108  0.26990    
## splines::ns(index.num) -40.5837    36.8741  -1.101  0.27292    
## day                      0.2116     0.1944   1.088  0.27839    
## wday.lbl.L              -1.3430     0.1261 -10.651  < 2e-16 ***
## wday.lbl.Q              -1.2631     0.1260 -10.022  < 2e-16 ***
## wday.lbl.C              -0.9742     0.1256  -7.756 1.50e-12 ***
## wday.lbl^4              -0.5403     0.1252  -4.314 2.97e-05 ***
## wday.lbl^5              -0.3563     0.1251  -2.848  0.00506 ** 
## wday.lbl^6              -0.2379     0.1252  -1.900  0.05944 .  
## month                    6.5651     5.9326   1.107  0.27032    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5866 on 143 degrees of freedom
## Multiple R-squared:  0.6763, Adjusted R-squared:  0.6559 
## F-statistic:  33.2 on 9 and 143 DF,  p-value: < 2.2e-16

cowplot::plot_grid(p1, p2, nrow=2)

lambda <- 0.184435805917973

ts_boxcox_SoValue <- demand_tbl %>%
   mutate(
      SoValue_box_cox = box_cox_vec(SoValue, lambda = "auto")
   )

## box_cox_vec(): Using value for lambda: 0.184435805917973

ts_boxcox_SoValue  %>% 
    ggplot(aes(x=SoEnteredDate, y= SoValue_box_cox )) + geom_line(size=1, color="#5C5C5C") + geom_point(size=0.5, color="#5C5C5C") + theme_bw() + 
  scale_y_continuous(labels=function(x) format(x, big.mark = ",", scientific = FALSE)) +
  labs(
    title = "Number of ads across time",
    subtitle = "Box Cox Transformation"
  ) + theme(plot.title = element_text(hjust=0.5, face="bold"))

ts_boxcox_SoValue <- ts_boxcox_SoValue %>% 
  select(SoEnteredDate, SoValue_box_cox)

# 22% is the testing set
# Splitting dataframe
splitted_df <- time_series_split(ts_boxcox_SoValue, assess = "14 days", cumulative = TRUE)

## Using date_var: SoEnteredDate

new_colors_pal <- c("#27223C", "#D1362F")
# Extracing training and test sets
train_df <- training(splitted_df)
test_df <- testing(splitted_df)


splitted_df %>% 
  tk_time_series_cv_plan() %>% 
  ggplot(aes(x=SoEnteredDate, y = SoValue_box_cox, color=.key)) + 
   geom_line(size=1) + 
  scale_color_manual(values = new_colors_pal) + theme_bw() + 
  labs(
    title = "Time Series Split",
    subtitle = "Boxcox Value"
  ) +  geom_label(
    label = "Training Set",
    color = "#27223C",
    vjust = 0.5,
    aes(x=as.Date("2021-11-24"),
        y = 450)) + 
  geom_label(
    label = "Testing Set",
    color = "#D1362F",
    vjust = 0.5,
    aes(x=as.Date("2021-12-30")),
        y = 500) +
  theme(legend.position = "none") + scale_x_date(expand = c(0,0)) + 
  geom_vline(xintercept = as.Date("2021-12-25"), lty="dashed")

prohet_fit <- prophet_reg() %>% 
  set_engine("prophet") %>% 
  fit(SoValue_box_cox ~ SoEnteredDate, data = ts_boxcox_SoValue)

## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.

## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

model_tbl <- modeltime_table(
  prohet_fit
)

calibration_tbl <- model_tbl %>% 
  modeltime_calibrate(
    new_data = test_df
  )

forecast_tbl <- calibration_tbl %>% 
  modeltime_forecast(actual_data = ts_boxcox_SoValue)

## Using '.calibration_data' to forecast.

forecast_tbl %>% 
  rename(
    "time" = ".index",
    "value" = ".value"
  ) %>% 
  ggplot(aes(x=time, y=value, color=.key)) + geom_line(size=1) + geom_point(size=0.5) + theme_bw() + scale_color_manual(values=new_colors_pal) + 
  theme(legend.position = "bottom") + 
  labs(
    title = "Forecasting with Prophet",
    subtitle = "Number of Ads",
    y = "BoxCox Transform Value"
  ) + scale_x_date(expand = c(0,0))

forecast_tbl %>% 
  rename(
    "time" = ".index",
    "value" = ".value"
  ) %>% 
  ggplot(aes(x=time, y=value, color=.key)) + geom_line(size=1) + 
   geom_point(size=0.5) + 
   theme_bw() + 
   scale_color_manual(values=new_colors_pal) + 
  theme(legend.position = "bottom") + 
  labs(
    title = "Forecasting with Prophet",
    subtitle = "Number of Ads",
    y = "BoxCox Transform Value"
  ) + scale_x_date(expand = c(0,0))

calibration_tbl %>% 
  modeltime_accuracy() %>% 
  pivot_longer(cols = mae:rsq) %>% 
  mutate(
    name = name %>% as_factor() %>% fct_reorder(value) %>% fct_rev()
  ) %>% 
  ggplot(aes(x=name, y=value, fill=name)) + geom_col(color="black") + 
  theme_bw() + 
   ggthemes::scale_fill_tableau() + theme(legend.position = "bottom") + 
  geom_label(aes(label = round(value,2)), size=3, color="white") + 
  labs(
    title = "Metrics for evaluating Accuracy",
    subtitle = "Prophet Model with BoxCox Transformation"
  )

refit_tbl <- calibration_tbl %>% 
  modeltime_refit(data = ts_boxcox_SoValue)

## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.

## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

# inverting transformation
forecast_ads_tbl <- refit_tbl %>% 
  modeltime_forecast(h = "60 days", actual_data = ts_boxcox_SoValue)

forecast_ads_tbl <- forecast_ads_tbl %>% 
  mutate(.value = box_cox_inv_vec(.value, lambda = lambda),
         .conf_lo = box_cox_inv_vec(.conf_lo, lambda = lambda),
         .conf_hi = box_cox_inv_vec(.conf_hi, lambda = lambda))


forecast_ads_tbl %>% 
    rename(
    "time" = ".index",
    "value" = ".value"
  )  %>% 
    ggplot(aes(x=time, y=value, color=.key)) + geom_line(size=1) + 
  theme_bw() + scale_color_manual(values = new_colors_pal) + 
  labs(
    title = "Prophet Forecast for the next 60 days",
    subtitle = "Number of Ads (Boxcox Inverted)",
    y = "The Forecast SO Sales Value",
    caption = str_glue("Lambda: {lambda}")
  ) + 
  scale_y_continuous(labels=function(x) format(x, big.mark = ",", scientific = FALSE)) + 
  theme(legend.position = "bottom") + geom_smooth(method = "loess", color = "#191919", lty="dashed") + scale_x_date(expand = c(0,0))

## `geom_smooth()` using formula 'y ~ x'

forecast_ads_tbl %>% 
   filter(.key == "prediction")

## # A tibble: 60 x 7
##    .model_id .model_desc .key       .index           .value    .conf_lo .conf_hi
##        <int> <chr>       <fct>      <date>            <dbl>       <dbl>    <dbl>
##  1         1 PROPHET     prediction 2022-01-01   874490232.    8599488.  1.02e10
##  2         1 PROPHET     prediction 2022-01-02 41524149010. 6918053368.  1.59e11
##  3         1 PROPHET     prediction 2022-01-03 39847081086. 6531743133.  1.54e11
##  4         1 PROPHET     prediction 2022-01-04 39087455003. 6358468225.  1.52e11
##  5         1 PROPHET     prediction 2022-01-05 48439162376. 8561982196.  1.80e11
##  6         1 PROPHET     prediction 2022-01-06 34204539938. 5271561017.  1.37e11
##  7         1 PROPHET     prediction 2022-01-07 44848156843. 7698387197.  1.69e11
##  8         1 PROPHET     prediction 2022-01-08   890013183.    8960987.  1.03e10
##  9         1 PROPHET     prediction 2022-01-09 41884491124. 7001719105.  1.60e11
## 10         1 PROPHET     prediction 2022-01-10 40195518407. 6611582687.  1.55e11
## # ... with 50 more rows

Top Down Breakdown

sku_ratio <- df_soline %>% 
   group_by(SoProductId) %>% 
   summarise(SoValue = sum(SoUnitCost*SoQuantity)) %>% 
   ungroup() %>% 
   mutate(ratio_sales = SoValue/sum(SoValue))


forecast_df_sku <- forecast_ads_tbl %>% 
   filter(.key == "prediction") %>%
   distinct(.index, .value) %>%
   mutate(key = 1) %>%
      left_join(sku_ratio %>% 
                   distinct(SoProductId) %>%
                   mutate(key = 1),
                by = "key") %>%
   transmute(SoProductId = SoProductId,
             SoEnteredDate = .index,
             forecast_SoValue = .value) %>%
   left_join(sku_ratio %>% 
                select(SoProductId, ratio_sales),
             by = "SoProductId") %>%
   mutate(forecast_SoValue_SKU = forecast_SoValue*ratio_sales)


View(forecast_df_sku)

Supply Planning

ABC classification

abc_classification <- df_soline %>%
   group_by(SoProductId) %>%
   summarise(SoValue = sum(SoQuantity*SoUnitCost)) %>%
   ungroup() %>% 
   mutate(percent_SoValue = SoValue/sum(SoValue)) %>%
   arrange(desc(percent_SoValue)) %>%
   transmute(
      SoProductId = SoProductId,
      cumulative_percent_SoValue = cumsum(percent_SoValue),
      abc_classification = case_when(cumulative_percent_SoValue < 0.8 ~ "A",
                                         cumulative_percent_SoValue > 0.95 ~ "C",
                                         TRUE ~ "B")
      )

XYZ analysis

xyz_classification <- df_soline %>%
   group_by(SoProductId, SoEnteredDate) %>%
   summarise(SoValue = sum(SoQuantity*SoUnitCost)) %>% 
   ungroup() %>%
   group_by(SoProductId) %>%
   summarise(mean_SoValue = mean(SoValue),
             std_SoValue = tidyquant::STDEV(SoValue)) %>%
   ungroup() %>%
   mutate(coeff_variation = std_SoValue/mean_SoValue,
          xyz_classification = case_when(coeff_variation < 1 ~ "X",
                                         coeff_variation > 2 ~ "Z",
                                         TRUE ~ "Y"))

## `summarise()` has grouped output by 'SoProductId'. You can override using the `.groups` argument.

Combine ABCXYZ into segmentation

combine_abcxyz <- abc_classification %>% 
   left_join(xyz_classification, by = "SoProductId") %>% 
   select(SoProductId, abc_classification, xyz_classification, mean_SoValue, std_SoValue)

combine_abcxyz

## # A tibble: 19 x 5
##    SoProductId abc_classification xyz_classification mean_SoValue std_SoValue
##    <chr>       <chr>              <chr>                     <dbl>       <dbl>
##  1 SKU02       A                  X                     63432856.   41469793.
##  2 SKU01       A                  Y                     26292428.   26439009.
##  3 SKU09       A                  X                     19468751.   16252103.
##  4 SKU03       B                  Y                      9438450.   15201238.
##  5 SKU06       B                  Y                      4212497.    5965093.
##  6 SKU07       B                  X                      3170418.    2347783.
##  7 SKU10       B                  Y                      2987871.    3378320.
##  8 SKU13       B                  X                      2986203.    1814053.
##  9 SKU08       B                  X                      2657091.    1748820.
## 10 SKU05       B                  X                      2266377.    1929744.
## 11 SKU16       C                  Y                      2278063.    3406340.
## 12 SKU14       C                  X                      1381570.     914395.
## 13 SKU17       C                  X                      1324321.     942007.
## 14 SKU15       C                  X                      1264916.    1042418.
## 15 SKU04       C                  Z                      1241745.    4060086.
## 16 SKU11       C                  Z                       729414.    2004652.
## 17 SKU19       C                  Y                      1687552.    1713646.
## 18 SKU12       C                  Z                       590568.    2082984.
## 19 SKU20       C                  X                       621568.     471857.

Supply Lead Time

combine_supply_leadtime <- df_poline %>% 
   transmute(PoOrderLineId = PoOrderLineId, 
             PoEnteredDate = PoEnteredDate,
             PoProductId = PoProductId) %>% 
   inner_join(df_roline %>% 
                 select(PoOrderLineId, RoArrivalDate, RoProductId),
              by = c("PoOrderLineId" = "PoOrderLineId", "PoProductId" = "RoProductId")) %>% 
   mutate(supply_leadtime  = as.numeric(difftime(RoArrivalDate, PoEnteredDate, units = "days"))) %>%
   group_by(PoProductId) %>% 
   summarise(avg_supply_leadtime = mean(supply_leadtime),
             std_supply_leadtime = tidyquant::STDEV(supply_leadtime))

Supply Calculation

final_supply <- combine_abcxyz %>% 
   left_join(combine_supply_leadtime, by = c("SoProductId" = "PoProductId")) %>%
   mutate(abcxyz_classification = paste(abc_classification, xyz_classification, sep ="")) %>%
   transmute(SKU = SoProductId, 
             abcxyz_classification = abcxyz_classification,
             avg_demand_value = mean_SoValue,
             std_demand_value = std_SoValue,
             avg_supply_leadtime = avg_supply_leadtime,
             std_supply_leadtime = std_supply_leadtime) %>%
   mutate(target_service_level = case_when(abcxyz_classification %in% c("AX", "AY", "BX") ~ 0.95,
                                           abcxyz_classification == "BY" ~ 0.9,
                                           abcxyz_classification %in% c("CX", "CY", "CZ") ~ 0.8)) %>%
   mutate(inventory_policy = case_when(abcxyz_classification %in% c("AX", "AY", "BX") ~ "Continuous Review Policy",
                                           abcxyz_classification == "BY" ~ "Continuous Review Policy",
                                           abcxyz_classification %in% c("CX", "CY", "CZ") ~ "Periodic Review Policy")) %>%
   mutate(z_score_value = qnorm(as.numeric(target_service_level)),
          est_safety_stock_value = z_score_value*sqrt((avg_supply_leadtime*std_demand_value^2) + (std_supply_leadtime*avg_demand_value)^2),
          est_inventory_value = est_safety_stock_value + (avg_supply_leadtime*avg_demand_value)
   ) %>%
   select(SKU, abcxyz_classification, est_safety_stock_value, est_inventory_value, abcxyz_classification, everything())

View(final_supply)

thesis

Nguyen_LSCM

5/13/2022

Load package and view data

Convert the datetime

View the Data

Product Master Data

Sales Data

Supply Data

Data Processing

Aggregate Demand Data

Take the overview of data

Visualization the time series

Check missing values by column

Seasonal Breakdown Visualization

STL Breakdown Component

Anomaly Diagnosis

ACF

PACF

Moving Average

Part2

Model

Top Down Breakdown

Supply Planning

ABC classification

XYZ analysis

Combine ABCXYZ into segmentation

Supply Lead Time

Supply Calculation