# Set working directory
setwd("C:/Users/DellPC/Desktop/thesis")
# Load packages
suppressMessages(library(tidyverse))
suppressMessages(library(tidymodels))
suppressMessages(library(readxl))
suppressMessages(library(timetk))
suppressMessages(library(tidyquant))
suppressMessages(library(modeltime))
suppressMessages(library(fpp3))
suppressMessages(library(skimr))
suppressMessages(library(janitor))
suppressMessages(library(DataExplorer))
suppressMessages(library(scales))
suppressMessages(library(ggthemes))
suppressMessages(library(ggsci))
# Add up date function
add_up_date <- function (x) {
ymd(as.character(x)) %m+% years(4)
}
# Read the Product Master Data
df_product_master <- read_excel('data_final.xlsx', sheet = 'ProductMaster')
# Read the Historical Sales Data
df_soline <- read_excel('data_final.xlsx', sheet = 'SOLine') %>%
mutate_if(is.POSIXct, .funs = add_up_date)
df_toline <- read_excel('data_final.xlsx', sheet = 'TOLine') %>%
mutate_if(is.POSIXct, .funs = add_up_date)
# Read the Historical Supply Data
df_roline <- read_excel('data_final.xlsx', sheet = 'ROLine') %>%
mutate_if(is.POSIXct, .funs = add_up_date)
df_poline <- read_excel('data_final.xlsx', sheet = 'POLine') %>%
mutate_if(is.POSIXct, .funs = add_up_date)
df_init_inventory <- read_excel('data_final.xlsx', sheet = 'Initial_Inventories')
glimpse(df_product_master)
## Rows: 19
## Columns: 8
## $ ProductId <chr> "SKU01", "SKU02", "SKU03", "SKU04", "SKU05", "SKU06",~
## $ Name <chr> "Strawberry", "Mango", "Rice", "Potato", "Tomato", "S~
## $ ProductGroup <chr> "Fruits", "Fruits", "Rice", "Vegetables", "Vegetables~
## $ KgPerCarton <dbl> 3.000, 12.336, 3.000, 1.884, 4.320, 6.270, 2.800, 6.2~
## $ CartonsPerPallet <dbl> 250, 75, 250, 456, 96, 136, 96, 125, 160, 136, 456, 4~
## $ KgPerPallet <dbl> 750.000, 925.200, 750.000, 859.104, 414.720, 852.720,~
## $ SalesUnit <chr> "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg",~
## $ SupplierUnit <chr> "Car", "Car", "Car", "Car", "Car", "Kg", "Kg", "Kg", ~
glimpse(df_soline)
## Rows: 13,584
## Columns: 11
## $ SoOrderLineId <dbl> 2082, 2083, 2094, 2098, 2138, 2154, 2167, 2168, 2185, 2~
## $ SoOrderId <dbl> 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 40, 41,~
## $ SoEnteredDate <date> 2021-08-02, 2021-08-02, 2021-08-02, 2021-08-02, 2021-0~
## $ SoExpectedDate <date> 2021-08-02, 2021-08-02, 2021-08-02, 2021-08-02, 2021-0~
## $ SoCustomerId <chr> "CUS009", "CUS009", "CUS009", "CUS009", "CUS009", "CUS0~
## $ SoProductId <chr> "SKU19", "SKU11", "SKU09", "SKU04", "SKU14", "SKU05", "~
## $ SoQuantity <dbl> 6.000, 3.744, 1.260, 5.652, 0.660, 1.920, 1.254, 3.780,~
## $ SoInKg <dbl> 6.000, 3.744, 1.260, 5.652, 0.660, 1.920, 1.254, 3.780,~
## $ SoInCarton <dbl> 1.00000, 2.00000, 0.25000, 3.00000, 0.11111, 0.44444, 0~
## $ SoUnitCost <dbl> 106684.31, 23577.89, 172811.98, 24108.71, 27312.76, 272~
## $ SoSalesUnit <chr> "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "~
glimpse(df_toline)
## Rows: 13,465
## Columns: 10
## $ ToOrderLineId <dbl> 1903, 1907, 1916, 1990, 2015, 2046, 2047, 2049, 2059, ~
## $ SoOrderLineId <dbl> 2632, 2579, 2196, 2564, 4172, 2581, 2558, 2557, 4108, ~
## $ ToCustomerId <chr> "CUS003", "CUS025", "CUS015", "CUS025", "CUS010", "CUS~
## $ ToDepartureDate <date> 2021-08-02, 2021-08-02, 2021-08-02, 2021-08-02, 2021-~
## $ ToArrivalDate <date> 2021-08-03, 2021-08-03, 2021-08-03, 2021-08-02, 2021-~
## $ ToProductId <chr> "SKU01", "SKU01", "SKU01", "SKU03", "SKU07", "SKU04", ~
## $ ToInKg <dbl> 600.0000, 150.0000, 45.0000, 90.0000, 28.0000, 9.4200,~
## $ ToInCarton <dbl> 200.00, 50.00, 15.00, 30.00, 10.00, 5.00, 5.00, 5.00, ~
## $ ToQuantity <dbl> 600.0000, 150.0000, 45.0000, 90.0000, 28.0000, 9.4200,~
## $ ToSalesUnit <chr> "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", ~
glimpse(df_roline)
## Rows: 1,777
## Columns: 10
## $ RoOrderLineId <dbl> 29293, 29294, 29295, 29296, 29298, 29308, 29323, 29369~
## $ PoOrderLineId <dbl> 4659, 4659, 4659, 4659, 4688, 4659, 4409, 4459, 4428, ~
## $ RoSupplierId <chr> "SU001", "SU001", "SU001", "SU001", "SU002", "SU001", ~
## $ RoDepartureDate <date> 2021-08-02, 2021-08-02, 2021-08-02, 2021-08-02, 2021-~
## $ RoArrivalDate <date> 2021-08-03, 2021-08-03, 2021-08-03, 2021-08-03, 2021-~
## $ RoProductId <chr> "SKU01", "SKU01", "SKU01", "SKU01", "SKU03", "SKU01", ~
## $ RoInKg <dbl> 3000.00, 2997.00, 2997.00, 3000.00, 2108.00, 738.00, 6~
## $ RoInCarton <dbl> 1000.00, 999.00, 999.00, 1000.00, 702.67, 246.00, 136.~
## $ RoSupplierQty <dbl> 1000.00, 999.00, 999.00, 1000.00, 702.67, 246.00, 685.~
## $ RoSupplierUnit <chr> "Car", "Car", "Car", "Car", "Car", "Car", "Kg", "Car",~
glimpse(df_poline)
## Rows: 1,769
## Columns: 11
## $ PoOrderId <dbl> 4409, 4410, 4428, 4429, 4459, 4460, 4488, 4530, 4531, 4~
## $ PoOrderLineId <dbl> 4409, 4410, 4428, 4429, 4459, 4460, 4488, 4530, 4531, 4~
## $ PoSupplierId <chr> "SU004", "SU004", "SU003", "SU003", "SU003", "SU003", "~
## $ PoEnteredDate <date> 2021-08-01, 2021-08-01, 2021-08-01, 2021-08-01, 2021-0~
## $ PoExpectedDate <date> 2021-08-09, 2021-08-10, 2021-08-10, 2021-08-10, 2021-0~
## $ PoProductId <chr> "SKU09", "SKU09", "SKU07", "SKU07", "SKU05", "SKU05", "~
## $ PoQuantity <dbl> 685.44, 70.56, 75.60, 61.60, 40.00, 6.00, 1911.00, 73.4~
## $ PoInKg <dbl> 685.44, 70.56, 75.60, 61.60, 172.80, 25.92, 23574.00, 7~
## $ PoInCarton <dbl> 136, 14, 27, 22, 40, 6, 1911, 17, 3, 2, 22, 1, 1, 7, 42~
## $ PoSupplierUnit <chr> "Kg", "Kg", "Kg", "Kg", "Car", "Car", "Car", "Kg", "Kg"~
## $ PoUnitCost <dbl> 114173.00, 114173.00, 25000.00, 25000.00, 64800.00, 648~
glimpse(df_init_inventory)
## Rows: 19
## Columns: 3
## $ ProductId <chr> "SKU01", "SKU02", "SKU03", "SKU04", "SKU05", "SKU06", "SKU07~
## $ Quantity <dbl> 6485.0000, 29791.5000, 6154.9310, 1400.5000, 275.5000, 1336.~
## $ Unit <chr> "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", "Kg", ~
demand <- df_soline %>%
group_by(SoEnteredDate) %>%
summarise(SoQuantity = sum(SoQuantity),
SoValue = sum(SoQuantity*SoUnitCost)) %>%
ungroup()
glimpse(demand)
## Rows: 153
## Columns: 3
## $ SoEnteredDate <date> 2021-08-01, 2021-08-02, 2021-08-03, 2021-08-04, 2021-08~
## $ SoQuantity <dbl> 1986.886, 9277.690, 6383.204, 13672.854, 11737.044, 4531~
## $ SoValue <dbl> 2546578751, 63911263370, 25530734478, 42962880054, 53582~
demand %>%
plot_time_series(
.date_var = SoEnteredDate,
.value = SoValue,
.smooth_color = "#18BC9C",
.smooth_size = 0.5,
.title = "The time series of total SO Sales Value During This Period"
)
demand %>%
plot_missing(
ggtheme = theme_light(),
title = "Percentage of Missing Values by Column"
)
demand %>%
plot_seasonal_diagnostics(
.date_var = SoEnteredDate,
.value = SoValue
)
demand %>%
plot_stl_diagnostics(
.date_var = SoEnteredDate,
.value = SoValue,
.interactive = TRUE
)
## frequency = 7 observations per 1 week
## trend = 31 observations per 1 month
demand %>%
plot_anomaly_diagnostics(
.date = SoEnteredDate,
.value = SoValue,
.title = "Anomaly Diagnostics SoValue Time Series",
.anom_color ="#FB3029",
.max_anomalies = 0.07,
.alpha = 0.05
)
## frequency = 7 observations per 1 week
## trend = 31 observations per 1 month
p1 <- demand %>%
tk_acf_diagnostics(
.date_var = SoEnteredDate,
.value = SoValue
) %>%
ggplot(aes(x = lag, y = ACF)) +
# Add horizontal line a y=0
geom_hline(yintercept = 0) +
# Plot autocorrelations
geom_point(size = 2, color = "steelblue") +
geom_segment(aes(xend = lag, yend = 0), size = 1) +
# Add cutoffs
geom_line(aes(y = .white_noise_upper), color = "blue", linetype = 2) +
geom_line(aes(y = .white_noise_lower), color = "red", linetype = 2) +
# Aesthetics
expand_limits(y = c(-1, 1)) +
theme_light() +
theme(
legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)
) +
labs(
title = "AutoCorrelation (ACF)",
subtitle = "SoValue Through August 2021 to December 2021"
)
## Max lag exceeds data available. Using max lag: 152
p2 <- demand %>%
tk_acf_diagnostics(
.date_var = SoEnteredDate,
.value = SoValue
) %>%
ggplot(aes(x = lag, y = PACF)) +
# Add horizontal line a y=0
geom_hline(yintercept = 0) +
# Plot autocorrelations
geom_point(size = 2, color = "red") +
geom_segment(aes(xend = lag, yend = 0), size = 1) +
# Add cutoffs
geom_line(aes(y = .white_noise_upper), color = "black", linetype = 2) +
geom_line(aes(y = .white_noise_lower), color = "black", linetype = 2) +
# Aesthetics
expand_limits(y = c(-1, 1)) +
theme_light() +
theme(
legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)
) +
labs(
title = "Partial Correlation",
subtitle = "SoValue Through August 2021 to December 2021"
)
## Max lag exceeds data available. Using max lag: 152
cowplot::plot_grid(p1, p2)
demand %>%
select(SoEnteredDate, SoValue) %>%
mutate(adjusted_15_mm_close = slidify_vec(
.x = SoValue,
.period = 15,
.f = mean,
na.rm = TRUE,
.align = "center",
.partial = TRUE),
adjusted_30_mm_close = slidify_vec(
.x = SoValue,
.period = 30,
.f = mean,
na.rm = TRUE,
.align = "center",
.partial = TRUE),
adjusted_100_mm_close = slidify_vec(
.x = SoValue,
.period = 100,
.f = mean,
na.rm = TRUE,
.align = "center",
.partial = TRUE),
adjusted_300_mm_close = slidify_vec(
.x = SoValue,
.period = 300,
.f = mean,
na.rm = TRUE,
.align = "center",
.partial = TRUE)
) %>%
pivot_longer(contains("close"), names_repair = "unique") %>%
ggplot(aes(x=SoEnteredDate, y=value)) +
geom_line(aes(color=name), size=1) +
theme_light() +
theme(legend.position = "bottom", plot.title = element_text(hjust=0.5)) +
guides(color = guide_legend(nrow=2, byrow=TRUE)) +
labs(
title = "Moving Average (MA) as a Metric",
subtitle = str_glue("GS Stock 2015 - 2017 "),
y = "Closing Price") +
scale_colour_lancet()+
scale_x_date(expand = c(0,0),)
demand %>%
select(SoEnteredDate, SoValue) %>%
as_tsibble(index = SoEnteredDate) %>%
gg_lag(SoValue, geom = "point") +
theme_light()
demand_tbl <- demand %>%
tk_augment_timeseries_signature() %>%
mutate(
SoValue_transformed = log_interval_vec(SoValue, limit_lower = 0, offset = 1),
SoValue_transformed = standardize_vec(SoValue_transformed),
SoValue_transformed1 = standardize_inv_vec(SoValue_transformed, mean = -1.87644984320702, sd = 1.89587378623263),
SoValue_transformed1 = log_interval_inv_vec(SoValue_transformed1, 0, limit_upper = 171308577084.647, offset = 0)
) %>%
select(SoEnteredDate, SoValue, SoValue_transformed,SoValue_transformed1 , everything())
## tk_augment_timeseries_signature(): Using the following .date_var variable: SoEnteredDate
## log_interval_vec():
## Using limit_lower: 0
## Using limit_upper: 171308577084.647
## Using offset: 1
## Standardization Parameters
## mean: -1.87644984320702
## standard deviation: 1.89587378623263
demand_tbl %>%
glimpse()
## Rows: 153
## Columns: 33
## $ SoEnteredDate <date> 2021-08-01, 2021-08-02, 2021-08-03, 2021-08-04, ~
## $ SoValue <dbl> 2546578751, 63911263370, 25530734478, 42962880054~
## $ SoValue_transformed <dbl> -1.222280096, 0.715981356, 0.070811152, 0.4125058~
## $ SoValue_transformed1 <dbl> 2546578752, 63911263371, 25530734479, 42962880055~
## $ SoQuantity <dbl> 1986.886, 9277.690, 6383.204, 13672.854, 11737.04~
## $ index.num <dbl> 1627776000, 1627862400, 1627948800, 1628035200, 1~
## $ diff <dbl> NA, 86400, 86400, 86400, 86400, 86400, 86400, 864~
## $ year <int> 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2~
## $ year.iso <int> 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2~
## $ half <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2~
## $ quarter <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3~
## $ month <int> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8~
## $ month.xts <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7~
## $ month.lbl <ord> August, August, August, August, August, August, A~
## $ day <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15~
## $ hour <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ minute <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ second <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ hour12 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ am.pm <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ wday <int> 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3~
## $ wday.xts <int> 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2~
## $ wday.lbl <ord> Sunday, Monday, Tuesday, Wednesday, Thursday, Fri~
## $ mday <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15~
## $ qday <int> 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 4~
## $ yday <int> 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,~
## $ mweek <int> 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3~
## $ week <int> 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 3~
## $ week.iso <int> 30, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 3~
## $ week2 <int> 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1~
## $ week3 <int> 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0~
## $ week4 <int> 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1~
## $ mday7 <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3~
p1 <- demand_tbl %>%
plot_time_series_regression(
SoEnteredDate,
.formula = SoValue_transformed ~ SoEnteredDate,
.interactive=FALSE,
.show_summary=TRUE,
.title = "Linear Model Without Implementing Feature Engineering",
.legend_show = FALSE,
.line_color = "#D1362F",
.smooth_color = "#EA3B46",
.line_size = 1
)
##
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7002 -0.1176 0.2521 0.6017 2.1961
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -19.377099 34.703933 -0.558 0.577
## SoEnteredDate 0.001024 0.001835 0.558 0.577
##
## Residual standard error: 1.002 on 151 degrees of freedom
## Multiple R-squared: 0.00206, Adjusted R-squared: -0.004548
## F-statistic: 0.3118 on 1 and 151 DF, p-value: 0.5774
seasonality_formula <- as.formula(
SoValue_transformed ~
splines::ns(index.num) +
+ day + wday.lbl + month
)
p2 <- demand_tbl %>%
plot_time_series_regression(
SoValue_transformed,
.formula = seasonality_formula,
.interactive=FALSE,
.show_summary=TRUE,
.title = "Linear Model With Feature Engineering",
.legend_show = FALSE,
.line_color = "#D1362F",
.smooth_color = "#EA3B46",
.line_size = 1
)
##
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.60847 -0.27840 0.01038 0.30788 1.82567
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -52.7376 47.6150 -1.108 0.26990
## splines::ns(index.num) -40.5837 36.8741 -1.101 0.27292
## day 0.2116 0.1944 1.088 0.27839
## wday.lbl.L -1.3430 0.1261 -10.651 < 2e-16 ***
## wday.lbl.Q -1.2631 0.1260 -10.022 < 2e-16 ***
## wday.lbl.C -0.9742 0.1256 -7.756 1.50e-12 ***
## wday.lbl^4 -0.5403 0.1252 -4.314 2.97e-05 ***
## wday.lbl^5 -0.3563 0.1251 -2.848 0.00506 **
## wday.lbl^6 -0.2379 0.1252 -1.900 0.05944 .
## month 6.5651 5.9326 1.107 0.27032
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5866 on 143 degrees of freedom
## Multiple R-squared: 0.6763, Adjusted R-squared: 0.6559
## F-statistic: 33.2 on 9 and 143 DF, p-value: < 2.2e-16
cowplot::plot_grid(p1, p2, nrow=2)
lambda <- 0.184435805917973
ts_boxcox_SoValue <- demand_tbl %>%
mutate(
SoValue_box_cox = box_cox_vec(SoValue, lambda = "auto")
)
## box_cox_vec(): Using value for lambda: 0.184435805917973
ts_boxcox_SoValue %>%
ggplot(aes(x=SoEnteredDate, y= SoValue_box_cox )) + geom_line(size=1, color="#5C5C5C") + geom_point(size=0.5, color="#5C5C5C") + theme_bw() +
scale_y_continuous(labels=function(x) format(x, big.mark = ",", scientific = FALSE)) +
labs(
title = "Number of ads across time",
subtitle = "Box Cox Transformation"
) + theme(plot.title = element_text(hjust=0.5, face="bold"))
ts_boxcox_SoValue <- ts_boxcox_SoValue %>%
select(SoEnteredDate, SoValue_box_cox)
# 22% is the testing set
# Splitting dataframe
splitted_df <- time_series_split(ts_boxcox_SoValue, assess = "14 days", cumulative = TRUE)
## Using date_var: SoEnteredDate
new_colors_pal <- c("#27223C", "#D1362F")
# Extracing training and test sets
train_df <- training(splitted_df)
test_df <- testing(splitted_df)
splitted_df %>%
tk_time_series_cv_plan() %>%
ggplot(aes(x=SoEnteredDate, y = SoValue_box_cox, color=.key)) +
geom_line(size=1) +
scale_color_manual(values = new_colors_pal) + theme_bw() +
labs(
title = "Time Series Split",
subtitle = "Boxcox Value"
) + geom_label(
label = "Training Set",
color = "#27223C",
vjust = 0.5,
aes(x=as.Date("2021-11-24"),
y = 450)) +
geom_label(
label = "Testing Set",
color = "#D1362F",
vjust = 0.5,
aes(x=as.Date("2021-12-30")),
y = 500) +
theme(legend.position = "none") + scale_x_date(expand = c(0,0)) +
geom_vline(xintercept = as.Date("2021-12-25"), lty="dashed")
prohet_fit <- prophet_reg() %>%
set_engine("prophet") %>%
fit(SoValue_box_cox ~ SoEnteredDate, data = ts_boxcox_SoValue)
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
model_tbl <- modeltime_table(
prohet_fit
)
calibration_tbl <- model_tbl %>%
modeltime_calibrate(
new_data = test_df
)
forecast_tbl <- calibration_tbl %>%
modeltime_forecast(actual_data = ts_boxcox_SoValue)
## Using '.calibration_data' to forecast.
forecast_tbl %>%
rename(
"time" = ".index",
"value" = ".value"
) %>%
ggplot(aes(x=time, y=value, color=.key)) + geom_line(size=1) + geom_point(size=0.5) + theme_bw() + scale_color_manual(values=new_colors_pal) +
theme(legend.position = "bottom") +
labs(
title = "Forecasting with Prophet",
subtitle = "Number of Ads",
y = "BoxCox Transform Value"
) + scale_x_date(expand = c(0,0))
forecast_tbl %>%
rename(
"time" = ".index",
"value" = ".value"
) %>%
ggplot(aes(x=time, y=value, color=.key)) + geom_line(size=1) +
geom_point(size=0.5) +
theme_bw() +
scale_color_manual(values=new_colors_pal) +
theme(legend.position = "bottom") +
labs(
title = "Forecasting with Prophet",
subtitle = "Number of Ads",
y = "BoxCox Transform Value"
) + scale_x_date(expand = c(0,0))
calibration_tbl %>%
modeltime_accuracy() %>%
pivot_longer(cols = mae:rsq) %>%
mutate(
name = name %>% as_factor() %>% fct_reorder(value) %>% fct_rev()
) %>%
ggplot(aes(x=name, y=value, fill=name)) + geom_col(color="black") +
theme_bw() +
ggthemes::scale_fill_tableau() + theme(legend.position = "bottom") +
geom_label(aes(label = round(value,2)), size=3, color="white") +
labs(
title = "Metrics for evaluating Accuracy",
subtitle = "Prophet Model with BoxCox Transformation"
)
refit_tbl <- calibration_tbl %>%
modeltime_refit(data = ts_boxcox_SoValue)
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
# inverting transformation
forecast_ads_tbl <- refit_tbl %>%
modeltime_forecast(h = "60 days", actual_data = ts_boxcox_SoValue)
forecast_ads_tbl <- forecast_ads_tbl %>%
mutate(.value = box_cox_inv_vec(.value, lambda = lambda),
.conf_lo = box_cox_inv_vec(.conf_lo, lambda = lambda),
.conf_hi = box_cox_inv_vec(.conf_hi, lambda = lambda))
forecast_ads_tbl %>%
rename(
"time" = ".index",
"value" = ".value"
) %>%
ggplot(aes(x=time, y=value, color=.key)) + geom_line(size=1) +
theme_bw() + scale_color_manual(values = new_colors_pal) +
labs(
title = "Prophet Forecast for the next 60 days",
subtitle = "Number of Ads (Boxcox Inverted)",
y = "The Forecast SO Sales Value",
caption = str_glue("Lambda: {lambda}")
) +
scale_y_continuous(labels=function(x) format(x, big.mark = ",", scientific = FALSE)) +
theme(legend.position = "bottom") + geom_smooth(method = "loess", color = "#191919", lty="dashed") + scale_x_date(expand = c(0,0))
## `geom_smooth()` using formula 'y ~ x'
forecast_ads_tbl %>%
filter(.key == "prediction")
## # A tibble: 60 x 7
## .model_id .model_desc .key .index .value .conf_lo .conf_hi
## <int> <chr> <fct> <date> <dbl> <dbl> <dbl>
## 1 1 PROPHET prediction 2022-01-01 874490232. 8599488. 1.02e10
## 2 1 PROPHET prediction 2022-01-02 41524149010. 6918053368. 1.59e11
## 3 1 PROPHET prediction 2022-01-03 39847081086. 6531743133. 1.54e11
## 4 1 PROPHET prediction 2022-01-04 39087455003. 6358468225. 1.52e11
## 5 1 PROPHET prediction 2022-01-05 48439162376. 8561982196. 1.80e11
## 6 1 PROPHET prediction 2022-01-06 34204539938. 5271561017. 1.37e11
## 7 1 PROPHET prediction 2022-01-07 44848156843. 7698387197. 1.69e11
## 8 1 PROPHET prediction 2022-01-08 890013183. 8960987. 1.03e10
## 9 1 PROPHET prediction 2022-01-09 41884491124. 7001719105. 1.60e11
## 10 1 PROPHET prediction 2022-01-10 40195518407. 6611582687. 1.55e11
## # ... with 50 more rows
sku_ratio <- df_soline %>%
group_by(SoProductId) %>%
summarise(SoValue = sum(SoUnitCost*SoQuantity)) %>%
ungroup() %>%
mutate(ratio_sales = SoValue/sum(SoValue))
forecast_df_sku <- forecast_ads_tbl %>%
filter(.key == "prediction") %>%
distinct(.index, .value) %>%
mutate(key = 1) %>%
left_join(sku_ratio %>%
distinct(SoProductId) %>%
mutate(key = 1),
by = "key") %>%
transmute(SoProductId = SoProductId,
SoEnteredDate = .index,
forecast_SoValue = .value) %>%
left_join(sku_ratio %>%
select(SoProductId, ratio_sales),
by = "SoProductId") %>%
mutate(forecast_SoValue_SKU = forecast_SoValue*ratio_sales)
View(forecast_df_sku)
abc_classification <- df_soline %>%
group_by(SoProductId) %>%
summarise(SoValue = sum(SoQuantity*SoUnitCost)) %>%
ungroup() %>%
mutate(percent_SoValue = SoValue/sum(SoValue)) %>%
arrange(desc(percent_SoValue)) %>%
transmute(
SoProductId = SoProductId,
cumulative_percent_SoValue = cumsum(percent_SoValue),
abc_classification = case_when(cumulative_percent_SoValue < 0.8 ~ "A",
cumulative_percent_SoValue > 0.95 ~ "C",
TRUE ~ "B")
)
xyz_classification <- df_soline %>%
group_by(SoProductId, SoEnteredDate) %>%
summarise(SoValue = sum(SoQuantity*SoUnitCost)) %>%
ungroup() %>%
group_by(SoProductId) %>%
summarise(mean_SoValue = mean(SoValue),
std_SoValue = tidyquant::STDEV(SoValue)) %>%
ungroup() %>%
mutate(coeff_variation = std_SoValue/mean_SoValue,
xyz_classification = case_when(coeff_variation < 1 ~ "X",
coeff_variation > 2 ~ "Z",
TRUE ~ "Y"))
## `summarise()` has grouped output by 'SoProductId'. You can override using the `.groups` argument.
combine_abcxyz <- abc_classification %>%
left_join(xyz_classification, by = "SoProductId") %>%
select(SoProductId, abc_classification, xyz_classification, mean_SoValue, std_SoValue)
combine_abcxyz
## # A tibble: 19 x 5
## SoProductId abc_classification xyz_classification mean_SoValue std_SoValue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 SKU02 A X 63432856. 41469793.
## 2 SKU01 A Y 26292428. 26439009.
## 3 SKU09 A X 19468751. 16252103.
## 4 SKU03 B Y 9438450. 15201238.
## 5 SKU06 B Y 4212497. 5965093.
## 6 SKU07 B X 3170418. 2347783.
## 7 SKU10 B Y 2987871. 3378320.
## 8 SKU13 B X 2986203. 1814053.
## 9 SKU08 B X 2657091. 1748820.
## 10 SKU05 B X 2266377. 1929744.
## 11 SKU16 C Y 2278063. 3406340.
## 12 SKU14 C X 1381570. 914395.
## 13 SKU17 C X 1324321. 942007.
## 14 SKU15 C X 1264916. 1042418.
## 15 SKU04 C Z 1241745. 4060086.
## 16 SKU11 C Z 729414. 2004652.
## 17 SKU19 C Y 1687552. 1713646.
## 18 SKU12 C Z 590568. 2082984.
## 19 SKU20 C X 621568. 471857.
combine_supply_leadtime <- df_poline %>%
transmute(PoOrderLineId = PoOrderLineId,
PoEnteredDate = PoEnteredDate,
PoProductId = PoProductId) %>%
inner_join(df_roline %>%
select(PoOrderLineId, RoArrivalDate, RoProductId),
by = c("PoOrderLineId" = "PoOrderLineId", "PoProductId" = "RoProductId")) %>%
mutate(supply_leadtime = as.numeric(difftime(RoArrivalDate, PoEnteredDate, units = "days"))) %>%
group_by(PoProductId) %>%
summarise(avg_supply_leadtime = mean(supply_leadtime),
std_supply_leadtime = tidyquant::STDEV(supply_leadtime))
final_supply <- combine_abcxyz %>%
left_join(combine_supply_leadtime, by = c("SoProductId" = "PoProductId")) %>%
mutate(abcxyz_classification = paste(abc_classification, xyz_classification, sep ="")) %>%
transmute(SKU = SoProductId,
abcxyz_classification = abcxyz_classification,
avg_demand_value = mean_SoValue,
std_demand_value = std_SoValue,
avg_supply_leadtime = avg_supply_leadtime,
std_supply_leadtime = std_supply_leadtime) %>%
mutate(target_service_level = case_when(abcxyz_classification %in% c("AX", "AY", "BX") ~ 0.95,
abcxyz_classification == "BY" ~ 0.9,
abcxyz_classification %in% c("CX", "CY", "CZ") ~ 0.8)) %>%
mutate(inventory_policy = case_when(abcxyz_classification %in% c("AX", "AY", "BX") ~ "Continuous Review Policy",
abcxyz_classification == "BY" ~ "Continuous Review Policy",
abcxyz_classification %in% c("CX", "CY", "CZ") ~ "Periodic Review Policy")) %>%
mutate(z_score_value = qnorm(as.numeric(target_service_level)),
est_safety_stock_value = z_score_value*sqrt((avg_supply_leadtime*std_demand_value^2) + (std_supply_leadtime*avg_demand_value)^2),
est_inventory_value = est_safety_stock_value + (avg_supply_leadtime*avg_demand_value)
) %>%
select(SKU, abcxyz_classification, est_safety_stock_value, est_inventory_value, abcxyz_classification, everything())
View(final_supply)