R Markdown
This is an R Markdown document. Markdown is a simple formatting
syntax for authoring HTML, PDF, and MS Word documents. For more details
on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be
generated that includes both content as well as the output of any
embedded R code chunks within the document. You can embed an R code
chunk like this:
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.1.0 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ MASS::select() masks dplyr::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(corrplot)
## corrplot 0.95 loaded
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:purrr':
##
## some
##
## The following object is masked from 'package:dplyr':
##
## recode
library(GGally)
library(ROCR)
library(data.table)
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
##
## The following object is masked from 'package:purrr':
##
## transpose
##
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(Hmisc)
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
library(tibble)
library(knitr)
library(here)
## here() starts at C:/Users/aliso/Documents/UTSA/Data Analytics Applications/OnlineRetail
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
##
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(rpart)
library(rpart.plot)
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:ggplot2':
##
## margin
##
## The following object is masked from 'package:dplyr':
##
## combine
library(broom)
library(tidyr)
# Read in 2009-2010
year1 <- read_excel("online_retail.xlsx", sheet = "Year 2009-2010")
# Read in 2010-2011
year2 <- read_excel("online_retail.xlsx", sheet = "Year 2010-2011")
glimpse(year1)
## Rows: 525,461
## Columns: 8
## $ Invoice <chr> "489434", "489434", "489434", "489434", "489434", "48943…
## $ StockCode <chr> "85048", "79323P", "79323W", "22041", "21232", "22064", …
## $ Description <chr> "15CM CHRISTMAS GLASS BALL 20 LIGHTS", "PINK CHERRY LIGH…
## $ Quantity <dbl> 12, 12, 12, 48, 24, 24, 24, 10, 12, 12, 24, 12, 10, 18, …
## $ InvoiceDate <dttm> 2009-12-01 07:45:00, 2009-12-01 07:45:00, 2009-12-01 07…
## $ Price <dbl> 6.95, 6.75, 6.75, 2.10, 1.25, 1.65, 1.25, 5.95, 2.55, 3.…
## $ `Customer ID` <dbl> 13085, 13085, 13085, 13085, 13085, 13085, 13085, 13085, …
## $ Country <chr> "United Kingdom", "United Kingdom", "United Kingdom", "U…
glimpse(year2)
## Rows: 541,910
## Columns: 8
## $ Invoice <chr> "536365", "536365", "536365", "536365", "536365", "53636…
## $ StockCode <chr> "85123A", "71053", "84406B", "84029G", "84029E", "22752"…
## $ Description <chr> "WHITE HANGING HEART T-LIGHT HOLDER", "WHITE METAL LANTE…
## $ Quantity <dbl> 6, 6, 8, 6, 6, 2, 6, 6, 6, 6, 3, 3, 3, 32, 6, 6, 8, 6, 6…
## $ InvoiceDate <dttm> 2010-12-01 08:26:00, 2010-12-01 08:26:00, 2010-12-01 08…
## $ Price <dbl> 2.55, 3.39, 2.75, 3.39, 3.39, 7.65, 4.25, 1.85, 1.85, 4.…
## $ `Customer ID` <dbl> 17850, 17850, 17850, 17850, 17850, 17850, 17850, 17850, …
## $ Country <chr> "United Kingdom", "United Kingdom", "United Kingdom", "U…
anyNA(year1)
## [1] TRUE
anyNA(year2)
## [1] TRUE
na_count_yr1 <- sum(is.na(year1))
print(na_count_yr1)
## [1] 110855
na_count_yr2 <- sum(is.na(year2))
print(na_count_yr2)
## [1] 136534
no_NA_year1 <- na.omit(year1)
no_NA_year2 <- na.omit(year2)
anyNA(no_NA_year1)
## [1] FALSE
anyNA(no_NA_year2)
## [1] FALSE
summary(no_NA_year1)
## Invoice StockCode Description Quantity
## Length:417534 Length:417534 Length:417534 Min. :-9360.00
## Class :character Class :character Class :character 1st Qu.: 2.00
## Mode :character Mode :character Mode :character Median : 4.00
## Mean : 12.76
## 3rd Qu.: 12.00
## Max. :19152.00
## InvoiceDate Price Customer ID
## Min. :2009-12-01 07:45:00 Min. : 0.000 Min. :12346
## 1st Qu.:2010-03-26 11:26:00 1st Qu.: 1.250 1st Qu.:13983
## Median :2010-07-08 19:12:00 Median : 1.950 Median :15311
## Mean :2010-07-01 01:12:19 Mean : 3.888 Mean :15361
## 3rd Qu.:2010-10-14 14:08:00 3rd Qu.: 3.750 3rd Qu.:16799
## Max. :2010-12-09 20:01:00 Max. :25111.090 Max. :18287
## Country
## Length:417534
## Class :character
## Mode :character
##
##
##
summary(no_NA_year2)
## Invoice StockCode Description Quantity
## Length:406830 Length:406830 Length:406830 Min. :-80995.00
## Class :character Class :character Class :character 1st Qu.: 2.00
## Mode :character Mode :character Mode :character Median : 5.00
## Mean : 12.06
## 3rd Qu.: 12.00
## Max. : 80995.00
## InvoiceDate Price Customer ID
## Min. :2010-12-01 08:26:00 Min. : 0.000 Min. :12346
## 1st Qu.:2011-04-06 15:02:00 1st Qu.: 1.250 1st Qu.:13953
## Median :2011-07-31 11:48:00 Median : 1.950 Median :15152
## Mean :2011-07-10 16:31:30 Mean : 3.461 Mean :15288
## 3rd Qu.:2011-10-20 13:06:00 3rd Qu.: 3.750 3rd Qu.:16791
## Max. :2011-12-09 12:50:00 Max. :38970.000 Max. :18287
## Country
## Length:406830
## Class :character
## Mode :character
##
##
##
no_NA_df <- bind_rows(no_NA_year1, no_NA_year2)
# Rename column in position 7 to 'CustomerID'
colnames(no_NA_df)[7] <- 'CustomerID'
head(no_NA_df)
## # A tibble: 6 × 8
## Invoice StockCode Description Quantity InvoiceDate Price CustomerID
## <chr> <chr> <chr> <dbl> <dttm> <dbl> <dbl>
## 1 489434 85048 "15CM CHRISTM… 12 2009-12-01 07:45:00 6.95 13085
## 2 489434 79323P "PINK CHERRY … 12 2009-12-01 07:45:00 6.75 13085
## 3 489434 79323W "WHITE CHERRY… 12 2009-12-01 07:45:00 6.75 13085
## 4 489434 22041 "RECORD FRAME… 48 2009-12-01 07:45:00 2.1 13085
## 5 489434 21232 "STRAWBERRY C… 24 2009-12-01 07:45:00 1.25 13085
## 6 489434 22064 "PINK DOUGHNU… 24 2009-12-01 07:45:00 1.65 13085
## # ℹ 1 more variable: Country <chr>
library(dplyr)
library(lubridate)
library(ggplot2)
#Top selling products for 2009-2011
no_NA_revenue<-no_NA_df |> group_by(StockCode, Description)|>summarise(sales=sum(Quantity*Price))|>ungroup()|> arrange(desc(sales))
## `summarise()` has grouped output by 'StockCode'. You can override using the
## `.groups` argument.
no_NA_revenue
## # A tibble: 5,302 × 3
## StockCode Description sales
## <chr> <chr> <dbl>
## 1 22423 REGENCY CAKESTAND 3 TIER 269737.
## 2 85123A WHITE HANGING HEART T-LIGHT HOLDER 242701.
## 3 85099B JUMBO BAG RED RETROSPOT 134845.
## 4 84879 ASSORTED COLOUR BIRD ORNAMENT 126354.
## 5 POST POSTAGE 112249.
## 6 47566 PARTY BUNTING 102686.
## 7 22086 PAPER CHAIN KIT 50'S CHRISTMAS 78367.
## 8 79321 CHILLI LIGHTS 72229.
## 9 21137 BLACK RECORD COVER FRAME 67127.
## 10 85099F JUMBO BAG STRAWBERRY 64089.
## # ℹ 5,292 more rows
#Look for time series trends
pacman::p_load("pacman","tidyverse", "openxlsx", "forecast", "modeltime", "parsnip", "rsample", "timetk", "xts", "tidyquant", "feasts", "prophet")
no_NA_dlyrevenue<-no_NA_df |> group_by(InvoiceDate)|>summarise(dlysales=sum(Quantity*Price))|>ungroup()|>arrange(desc(dlysales))
no_NA_dlyrevenue
## # A tibble: 41,439 × 2
## InvoiceDate dlysales
## <dttm> <dbl>
## 1 2011-12-09 09:15:00 168470.
## 2 2011-01-18 10:01:00 77184.
## 3 2010-01-07 12:34:00 44052.
## 4 2011-06-10 15:28:00 39597.
## 5 2010-09-27 16:59:00 33168.
## 6 2010-12-07 16:43:00 32562.
## 7 2011-09-20 11:05:00 31698.
## 8 2010-10-14 09:46:00 26007.
## 9 2011-06-15 13:37:00 23427.
## 10 2010-07-15 15:29:00 22863.
## # ℹ 41,429 more rows
no_NA_top10revenue<-no_NA_df |> filter(StockCode != "POST") |> group_by(StockCode, Description)|>summarise(sales=sum(Quantity*Price))|>ungroup()|> arrange(desc(sales))|> slice_head(n=10)
## `summarise()` has grouped output by 'StockCode'. You can override using the
## `.groups` argument.
no_NA_top10revenue
## # A tibble: 10 × 3
## StockCode Description sales
## <chr> <chr> <dbl>
## 1 22423 REGENCY CAKESTAND 3 TIER 269737.
## 2 85123A WHITE HANGING HEART T-LIGHT HOLDER 242701.
## 3 85099B JUMBO BAG RED RETROSPOT 134845.
## 4 84879 ASSORTED COLOUR BIRD ORNAMENT 126354.
## 5 47566 PARTY BUNTING 102686.
## 6 22086 PAPER CHAIN KIT 50'S CHRISTMAS 78367.
## 7 79321 CHILLI LIGHTS 72229.
## 8 21137 BLACK RECORD COVER FRAME 67127.
## 9 85099F JUMBO BAG STRAWBERRY 64089.
## 10 82484 WOOD BLACK BOARD ANT WHITE FINISH 60188.
ggplot(no_NA_top10revenue, aes(x = reorder(Description, sales), y = sales)) +
geom_bar(stat = "identity", fill = "skyblue") +
coord_flip() + # To flip the bars horizontally for better readability
labs(title = "Top Selling Products (2009-2011)",
x = "Product",
y = "Total Sales (in $10k)") +
theme_minimal() +
theme(axis.text.y = element_text(size = 8))

ggplot(data=no_NA_dlyrevenue, aes(x=InvoiceDate, y=dlysales)) + geom_line() + geom_ma(ma_fun = SMA, color = "orange", n = 4) + geom_ma(ma_fun = SMA, n=5, color = "lightblue")+ theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust = 1))

no_NA_df$sales <- no_NA_df$Quantity*no_NA_df$Price
head(no_NA_df)
## # A tibble: 6 × 9
## Invoice StockCode Description Quantity InvoiceDate Price CustomerID
## <chr> <chr> <chr> <dbl> <dttm> <dbl> <dbl>
## 1 489434 85048 "15CM CHRISTM… 12 2009-12-01 07:45:00 6.95 13085
## 2 489434 79323P "PINK CHERRY … 12 2009-12-01 07:45:00 6.75 13085
## 3 489434 79323W "WHITE CHERRY… 12 2009-12-01 07:45:00 6.75 13085
## 4 489434 22041 "RECORD FRAME… 48 2009-12-01 07:45:00 2.1 13085
## 5 489434 21232 "STRAWBERRY C… 24 2009-12-01 07:45:00 1.25 13085
## 6 489434 22064 "PINK DOUGHNU… 24 2009-12-01 07:45:00 1.65 13085
## # ℹ 2 more variables: Country <chr>, sales <dbl>
ggplot(data=no_NA_df, aes(x=InvoiceDate, y=sales)) + geom_line() + geom_ma(ma_fun = SMA, color = "orange", n = 4) + geom_ma(ma_fun = SMA, n=5, color = "lightblue")+ theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust = 1))

library(dplyr)
pacman::p_load(pacman,
tidyverse, openxlsx, forecast, modeltime, parsnip, rsample, timetk, xts, tidyquant, feasts, prophet)
d3 <- no_NA_df %>%
mutate(Date = as.Date(InvoiceDate, format = "%Y-%m-%d")) %>%
group_by(Date) %>%
summarise(Daily_Sales = sum(sales))
head(d3)
## # A tibble: 6 × 2
## Date Daily_Sales
## <date> <dbl>
## 1 2009-12-01 42708.
## 2 2009-12-02 52578.
## 3 2009-12-03 61534.
## 4 2009-12-04 33687.
## 5 2009-12-05 9803.
## 6 2009-12-06 24284.
ggplot(d3, aes(x = Date, y = Daily_Sales)) +
geom_line(color = "blue") +
labs(title = "Daily Sales Trend", x = "Date", y = "Sales") + theme(axis.text.x = element_text(angle = 45, hjust = 1))

sales_ts <- ts(d3$Daily_Sales, start = c(year(min(d3$Date)), month(min(d3$Date))), frequency = 365)
# Fit ARIMA model
arima_model <- auto.arima(sales_ts)
# Forecast future demand
forecast_arima <- forecast(arima_model, h = 30) # Sales forecast for the next 30 days
# Plot the forecast
autoplot(forecast_arima) +
labs(title = "ARIMA Forecast", x = "Date", y = "Sales")

# Fit ETS model
ets_model <- ets(sales_ts)
## Warning in ets(sales_ts): I can't handle data with frequency greater than 24.
## Seasonality will be ignored. Try stlf() if you need seasonal forecasts.
# Forecast future demand
forecast_ets <- forecast(ets_model, h = 30)
# Plot the forecast
autoplot(forecast_ets) +
labs(title = "ETS Forecast", x = "Date", y = "Sales")

suppressPackageStartupMessages({
library(dplyr)
library(lubridate)
library(tidyr)
library(purrr)
library(forecast)
library(ggplot2)
library(readr)
})
# ---------------- USER INPUTS ----------------
exclude_codes <- c("POST") # e.g., c("22423")
horizon_days <- 60 # number of future days to forecast
lookback_days_for_top <- 0 # 0 = all history
# --------------------------------------------
# Ensure required columns exist: InvoiceDate, StockCode, Quantity, Price
stopifnot(all(c("InvoiceDate","StockCode","Quantity","Price") %in% names(no_NA_df)))
# 1) Standardize types
df <- no_NA_df %>%
dplyr::mutate(
InvoiceDate = as.Date(InvoiceDate),
StockCode = as.character(StockCode),
Quantity = as.numeric(Quantity),
Price = as.numeric(Price)
) %>%
dplyr::filter(!is.na(InvoiceDate), !is.na(StockCode), !is.na(Quantity), !is.na(Price))
# If Description missing, create it (keeps code robust)
if (!"Description" %in% names(df)) {
df <- df %>% dplyr::mutate(Description = NA_character_)
} else {
df <- df %>% dplyr::mutate(Description = as.character(Description))
}
# 2) Top-10 products by revenue (optional lookback)
df_top_scope <- if (lookback_days_for_top > 0) {
cutoff <- max(df$InvoiceDate, na.rm = TRUE) - lookback_days_for_top
df %>% dplyr::filter(InvoiceDate > cutoff)
} else df
top10_tbl <- df_top_scope %>%
dplyr::filter(!StockCode %in% exclude_codes) %>%
dplyr::group_by(StockCode, Description) %>%
dplyr::summarise(sales = sum(Quantity * Price, na.rm = TRUE), .groups = "drop") %>%
dplyr::arrange(dplyr::desc(sales)) %>%
dplyr::slice_head(n = 10)
top10_ids <- top10_tbl$StockCode
message("Top 10 StockCodes: ", paste(top10_ids, collapse = ", "))
## Top 10 StockCodes: 22423, 85123A, 85099B, 84879, 47566, 22086, 79321, 21137, 85099F, 82484
# 3) Aggregate DAILY demand & fill missing days
daily_raw <- df %>%
dplyr::filter(StockCode %in% top10_ids) %>%
dplyr::group_by(StockCode, InvoiceDate) %>%
dplyr::summarise(units = sum(Quantity), .groups = "drop")
daily <- daily_raw %>%
dplyr::group_by(StockCode) %>%
tidyr::complete(InvoiceDate = seq(min(InvoiceDate), max(InvoiceDate), by = "1 day"),
fill = list(units = 0)) %>%
dplyr::ungroup() %>%
dplyr::arrange(StockCode, InvoiceDate)
# 4) Modeling helpers
safe_fit <- function(y) {
fit <- tryCatch(
forecast::auto.arima(y, seasonal = TRUE, stepwise = FALSE, approximation = FALSE),
error = function(e) NULL
)
if (is.null(fit)) forecast::ets(y) else fit
}
to_ts <- function(df_prod) {
stats::ts(df_prod$units, frequency = 7) # daily with weekly seasonality
}
# 5) Fit + forecast per product
results <- top10_ids %>%
purrr::map_dfr(function(pid) {
d <- daily %>% dplyr::filter(StockCode == pid)
if (sum(d$units > 0, na.rm = TRUE) < 3) {
fc_vals <- rep(mean(d$units, na.rm = TRUE), horizon_days)
tibble::tibble(
StockCode = pid,
day_ahead = 1:horizon_days,
forecast = fc_vals,
lo95 = NA_real_,
hi95 = NA_real_
)
} else {
y <- to_ts(d)
fit <- safe_fit(y)
fc <- forecast::forecast(fit, h = horizon_days)
tibble::tibble(
StockCode = pid,
day_ahead = 1:horizon_days,
forecast = as.numeric(fc$mean),
lo95 = as.numeric(fc$lower[, "95%"]),
hi95 = as.numeric(fc$upper[, "95%"])
)
}
}) %>%
dplyr::left_join(
top10_tbl %>% dplyr::select(StockCode, Description),
by = "StockCode"
)
# 6) Save outputs
readr::write_csv(results, "top10_daily_forecasts2.csv")
message("Saved: top10_daily_forecasts2.csv")
## Saved: top10_daily_forecasts2.csv
# 7) Plot one product’s history + forecast
plot_one <- function(pid) {
df_hist <- daily %>% dplyr::filter(StockCode == pid)
df_fc <- results %>% dplyr::filter(StockCode == pid)
title_txt <- paste0(pid, if (!all(is.na(df_fc$Description))) {
paste0(" – ", unique(stats::na.omit(df_fc$Description))[1])
} else "")
last_hist <- max(df_hist$InvoiceDate, na.rm = TRUE)
ggplot2::ggplot() +
# historical demand in blue
ggplot2::geom_line(data = df_hist,
ggplot2::aes(x = InvoiceDate, y = units),
color = "steelblue", linewidth = 1) +
# forecast interval in light red
ggplot2::geom_ribbon(data = df_fc,
ggplot2::aes(x = last_hist + day_ahead,
ymin = lo95, ymax = hi95),
fill = "salmon", alpha = 0.3) +
# forecast line in dark red
ggplot2::geom_line(data = df_fc,
ggplot2::aes(x = last_hist + day_ahead, y = forecast),
color = "firebrick", linewidth = 1) +
ggplot2::labs(title = paste("Daily Demand & Forecast:", title_txt),
x = "Date", y = "Units") +
ggplot2::theme_minimal()
}
# Example:
# plot_one(top10_ids[1])
for (pid in top10_ids) {
print(plot_one(pid))
}










suppressPackageStartupMessages({
library(dplyr); library(lubridate); library(tidyr)
})
# ---- Choose how to define a "Period" ----
# Set fiscal_start_month = 1 for calendar years (Jan–Dec -> "YYYY-YYYY+1" labels still okay),
# or use e.g. 7 for fiscal (Jul–Jun), 4 for Apr–Mar, etc.
fiscal_start_month <- 7 # << change to 1 for calendar-year style periods
no_NA_df <- no_NA_df %>%
dplyr::mutate(
InvoiceDate = as.POSIXct(InvoiceDate),
Date = as.Date(InvoiceDate),
StockCode = as.character(StockCode),
Description = as.character(Description),
Quantity = as.numeric(Quantity),
Price = as.numeric(Price),
sales = Quantity * Price
) %>%
# Create Period dynamically from Date and fiscal start
dplyr::mutate(
Year = lubridate::year(Date),
Month = lubridate::month(Date),
PeriodStartYear = dplyr::if_else(Month < fiscal_start_month, Year - 1L, Year),
PeriodEndYear = PeriodStartYear + 1L,
Period = paste0(PeriodStartYear, "-", PeriodEndYear)
)
# Sanity check (optional)
# print(table(no_NA_df$Period, useNA="ifany"))
### 1) Auto-select current & prior periods, compute KPIs (dedup safe)
suppressPackageStartupMessages({
library(dplyr); library(lubridate); library(tidyr); library(purrr); library(scales)
})
# ---- Pick the latest two observed Periods dynamically ----
periods_found <- no_NA_df %>%
dplyr::distinct(Period, PeriodStartYear, PeriodEndYear) %>%
dplyr::arrange(dplyr::desc(PeriodStartYear), dplyr::desc(PeriodEndYear))
stopifnot(nrow(periods_found) >= 1)
current_period <- periods_found$Period[1]
prior_period <- if (nrow(periods_found) >= 2) periods_found$Period[2] else NA_character_
# ---- Revenue / Units / Returns by Period + SKU + Description ----
rev_by_period <- no_NA_df %>%
dplyr::group_by(Period, StockCode, Description) %>%
dplyr::summarise(
Revenue = sum(sales, na.rm = TRUE),
Units_shipped = sum(Quantity[Quantity > 0], na.rm = TRUE),
Units_return = sum(abs(Quantity[Quantity < 0]), na.rm = TRUE),
.groups = "drop"
)
# ---- Collapse to one row per SKU for each period (dedupe fix) ----
cur <- rev_by_period %>%
dplyr::filter(Period == current_period) %>%
dplyr::group_by(StockCode) %>%
dplyr::summarise(
# pick representative non-NA Description by highest Revenue
Description = {
tmp <- dplyr::arrange(pick(dplyr::everything()), dplyr::desc(Revenue))
dplyr::first(stats::na.omit(tmp$Description))
},
cur_rev = sum(Revenue, na.rm = TRUE),
cur_units = sum(Units_shipped, na.rm = TRUE),
cur_ret = sum(Units_return, na.rm = TRUE),
.groups = "drop"
)
prior <- rev_by_period %>%
dplyr::filter(Period == prior_period) %>%
dplyr::group_by(StockCode) %>%
dplyr::summarise(
prior_rev = sum(Revenue, na.rm = TRUE),
.groups = "drop"
)
# ---- YoY + ReturnRate ----
yoy_tbl <- cur %>%
dplyr::left_join(prior, by = "StockCode") %>%
dplyr::mutate(
YoY = dplyr::case_when(
is.na(prior_rev) ~ NA_real_,
prior_rev == 0 & cur_rev > 0 ~ NA_real_,
TRUE ~ (cur_rev - prior_rev) / prior_rev
),
ReturnRate = dplyr::case_when(
is.na(cur_units) | cur_units <= 0 ~ NA_real_,
TRUE ~ cur_ret / cur_units
)
)
# ---- Build helpers for Momentum (current period) & 6M Trend ----
cur_df <- no_NA_df %>% dplyr::filter(Period == current_period)
last_date <- max(cur_df$Date, na.rm = TRUE)
last_complete_month <- lubridate::floor_date(last_date, "month") - lubridate::days(1)
end_3m <- last_complete_month
start_3m <- lubridate::floor_date(end_3m %m-% months(2), "month")
end_prev <- start_3m - lubridate::days(1)
start_prev <- lubridate::floor_date(end_prev %m-% months(2), "month")
momentum_tbl <- cur_df %>%
dplyr::filter(Date >= start_prev & Date <= end_3m) %>%
dplyr::mutate(
bucket = dplyr::case_when(
Date >= start_3m & Date <= end_3m ~ "last3",
Date >= start_prev & Date <= end_prev ~ "prev3",
TRUE ~ NA_character_
)
) %>%
dplyr::filter(!is.na(bucket)) %>%
dplyr::group_by(StockCode, bucket) %>%
dplyr::summarise(rev = sum(sales, na.rm = TRUE), .groups = "drop") %>%
tidyr::pivot_wider(names_from = bucket, values_from = rev, values_fill = 0) %>%
dplyr::mutate(
Momentum3M = dplyr::if_else(prev3 > 0, (last3 / prev3) - 1, NA_real_),
MomentumLabel = dplyr::case_when(
is.na(Momentum3M) ~ "—",
Momentum3M > 0.05 ~ "↑ Accelerating",
Momentum3M < -0.05 ~ "↓ Softening",
TRUE ~ "→ Stable"
)
) %>%
dplyr::select(StockCode, Momentum3M, MomentumLabel)
six_end <- last_complete_month
six_start <- lubridate::floor_date(six_end %m-% months(5), "month")
monthly_cur <- cur_df %>%
dplyr::filter(Date >= six_start & Date <= six_end) %>%
dplyr::mutate(ym = lubridate::floor_date(Date, "month")) %>%
dplyr::group_by(StockCode, ym) %>%
dplyr::summarise(rev = sum(sales, na.rm = TRUE), .groups = "drop")
# ---- 6-Month Trend (safe version) ----
trend_tbl <- monthly_cur %>%
dplyr::group_by(StockCode) %>%
dplyr::mutate(t = as.numeric(ym)) %>%
tidyr::nest() %>%
dplyr::mutate(
model = purrr::map(data, ~ tryCatch(
stats::lm(rev ~ t, data = .x),
error = function(e) NULL
)),
slope = purrr::map_dbl(model, ~ {
if (is.null(.x)) NA_real_ else stats::coef(.x)[["t"]]
})
) %>%
dplyr::select(StockCode, slope) %>%
dplyr::ungroup()
# ---- Base for current period ----
base_tbl <- yoy_tbl %>%
dplyr::left_join(momentum_tbl, by = "StockCode")
### 2) Limit to Top-10 most frequently purchased SKUs (current period) and render tables
suppressPackageStartupMessages({ library(kableExtra); library(scales) })
# ---- Top 10 by purchase frequency (units) in current period ----
top10_freq <- no_NA_df %>%
dplyr::filter(Period == current_period, Quantity > 0) %>%
dplyr::group_by(StockCode, Description) %>%
dplyr::summarise(total_units = sum(Quantity, na.rm = TRUE), .groups = "drop") %>%
dplyr::arrange(dplyr::desc(total_units)) %>%
dplyr::slice_head(n = 10)
top10_ids <- top10_freq$StockCode
base_top <- base_tbl %>% dplyr::filter(StockCode %in% top10_ids)
trend_top <- trend_tbl %>% dplyr::filter(StockCode %in% top10_ids)
# ---- Table 6: Product Leaderboard (exact columns) ----
tbl6 <- base_top %>%
dplyr::arrange(dplyr::desc(cur_rev)) %>%
dplyr::transmute(
SKU = StockCode,
Description = dplyr::coalesce(Description, "(No description)"),
Revenue_num = dplyr::if_else(is.na(cur_rev), 0, cur_rev),
Units_num = dplyr::if_else(is.na(cur_units), 0, cur_units),
YoY_chr = dplyr::if_else(is.na(YoY), "—", scales::percent(YoY, accuracy = 0.1)),
Momentum_chr = dplyr::coalesce(MomentumLabel, "—"),
Return_chr = dplyr::if_else(is.na(ReturnRate), "—", scales::percent(ReturnRate, accuracy = 0.1))
) %>%
dplyr::mutate(
Revenue = scales::dollar(Revenue_num, accuracy = 1),
Units = formatC(Units_num, big.mark = ",", format = "f", digits = 0)
) %>%
dplyr::transmute(
SKU, Description, Revenue, Units,
`YoY %` = YoY_chr,
`3M Momentum` = Momentum_chr,
`Return Rate` = Return_chr
) %>%
dplyr::mutate(dplyr::across(dplyr::everything(), ~ ifelse(is.na(.), "—", as.character(.))))
tbl6 %>%
kableExtra::kbl(
caption = paste0("Table 6. Product Leaderboard — Top 10 Most Frequently Purchased (", current_period, ")"),
align = c("l","l","r","r","r","l","r")
) %>%
kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover"))
Table 6. Product Leaderboard — Top 10 Most Frequently Purchased
(2011-2012)
SKU
|
Description
|
Revenue
|
Units
|
YoY %
|
3M Momentum
|
Return Rate
|
23084
|
RABBIT NIGHT LIGHT
|
$46,013
|
24,484
|
797.8%
|
↑ Accelerating
|
0.4%
|
85099B
|
JUMBO BAG RED RETROSPOT
|
$44,289
|
24,395
|
-45.2%
|
↑ Accelerating
|
4.0%
|
85123A
|
WHITE HANGING HEART T-LIGHT HOLDER
|
$41,020
|
14,774
|
-65.3%
|
↑ Accelerating
|
0.3%
|
22086
|
PAPER CHAIN KIT 50’S CHRISTMAS
|
$35,850
|
13,473
|
6.4%
|
↑ Accelerating
|
3.0%
|
84879
|
ASSORTED COLOUR BIRD ORNAMENT
|
$32,092
|
20,339
|
-51.6%
|
↑ Accelerating
|
0.2%
|
22197
|
POPCORN HOLDER
|
$22,598
|
30,309
|
-22.4%
|
↑ Accelerating
|
1.4%
|
22952
|
60 CAKE CASES VINTAGE CHRISTMAS
|
$6,705
|
13,561
|
-20.1%
|
↑ Accelerating
|
1.9%
|
21212
|
PACK OF 72 RETROSPOT CAKE CASES
|
$6,493
|
13,354
|
-70.7%
|
↑ Accelerating
|
1.5%
|
84077
|
WORLD WAR 2 GLIDERS ASSTD DESIGNS
|
$6,233
|
23,355
|
-54.7%
|
↑ Accelerating
|
0.2%
|
23843
|
PAPER CRAFT , LITTLE BIRDIE
|
$0
|
80,995
|
—
|
—
|
100.0%
|
# ---- Table 7: Decliner Watchlist (no 3M momentum) ----
decliners_top <- base_top %>%
dplyr::left_join(trend_top, by = "StockCode") %>%
dplyr::mutate(
yoy_flag = !is.na(YoY) & YoY < 0,
slope_flag = !is.na(slope) & slope < 0
) %>%
dplyr::filter(yoy_flag | slope_flag) %>%
dplyr::transmute(
SKU = StockCode,
Description = dplyr::coalesce(Description, "(No description)"),
Revenue_num = dplyr::if_else(is.na(cur_rev), 0, cur_rev),
Units_num = dplyr::if_else(is.na(cur_units), 0, cur_units),
YoY_chr = dplyr::if_else(is.na(YoY), "—", scales::percent(YoY, accuracy = 0.1)),
Return_chr = dplyr::if_else(is.na(ReturnRate), "—", scales::percent(ReturnRate, accuracy = 0.1)),
Trend_chr = dplyr::case_when(
is.na(slope) ~ "—",
slope < 0 ~ "↓ Declining",
slope > 0 ~ "↑ Improving",
TRUE ~ "→ Flat"
)
) %>%
dplyr::arrange(dplyr::desc(Revenue_num))
if (nrow(decliners_top) == 0) {
tibble::tibble(Note = "No top-10 SKUs meet decliner criteria (negative YoY or negative 6M trend).") %>%
kableExtra::kbl(
caption = paste0("Table 7. Decliner Watchlist — Top 10 Most Frequently Purchased (", current_period, ")")
) %>%
kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover"))
} else {
decliners_top %>%
dplyr::mutate(
Revenue = scales::dollar(Revenue_num, accuracy = 1),
Units = formatC(Units_num, big.mark = ",", format = "f", digits = 0)
) %>%
dplyr::transmute(
SKU, Description, Revenue, Units,
`YoY %` = YoY_chr,
`Return Rate` = Return_chr,
`6M Trend` = Trend_chr
) %>%
dplyr::mutate(dplyr::across(dplyr::everything(), ~ ifelse(is.na(.), "—", as.character(.)))) %>%
kableExtra::kbl(
caption = paste0("Table 7. Decliner Watchlist — Top 10 Most Frequently Purchased (", current_period, ")"),
align = c("l","l","r","r","r","r","l")
) %>%
kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover"))
}
Table 7. Decliner Watchlist — Top 10 Most Frequently Purchased
(2011-2012)
SKU
|
Description
|
Revenue
|
Units
|
YoY %
|
Return Rate
|
6M Trend
|
85099B
|
JUMBO BAG RED RETROSPOT
|
$44,289
|
24,395
|
-45.2%
|
4.0%
|
↑ Improving
|
85123A
|
WHITE HANGING HEART T-LIGHT HOLDER
|
$41,020
|
14,774
|
-65.3%
|
0.3%
|
↑ Improving
|
84879
|
ASSORTED COLOUR BIRD ORNAMENT
|
$32,092
|
20,339
|
-51.6%
|
0.2%
|
↑ Improving
|
22197
|
POPCORN HOLDER
|
$22,598
|
30,309
|
-22.4%
|
1.4%
|
↑ Improving
|
22952
|
60 CAKE CASES VINTAGE CHRISTMAS
|
$6,705
|
13,561
|
-20.1%
|
1.9%
|
↑ Improving
|
21212
|
PACK OF 72 RETROSPOT CAKE CASES
|
$6,493
|
13,354
|
-70.7%
|
1.5%
|
↓ Declining
|
84077
|
WORLD WAR 2 GLIDERS ASSTD DESIGNS
|
$6,233
|
23,355
|
-54.7%
|
0.2%
|
↑ Improving
|
suppressPackageStartupMessages({
library(dplyr); library(lubridate); library(tidyr)
library(readr); library(kableExtra); library(scales)
})
# --- Identify top 10 most frequently purchased items ---
top10_freq <- no_NA_df %>%
dplyr::filter(Quantity > 0) %>%
dplyr::group_by(StockCode, Description) %>%
dplyr::summarise(total_units = sum(Quantity, na.rm = TRUE), .groups = "drop") %>%
dplyr::arrange(dplyr::desc(total_units)) %>%
dplyr::slice_head(n = 10)
top10_ids <- top10_freq$StockCode
# --- Filter dataset to top 10 items only ---
df_top10 <- no_NA_df %>%
dplyr::filter(StockCode %in% top10_ids)
# --- Inventory Parameters ---
service_level <- 0.95
z_value <- qnorm(service_level)
holding_cost_per_unit_per_year <- 2.00
order_cost_per_order <- 50.00
working_days_per_year <- 365
lead_time_days <- 7
# --- Daily demand (recent 90 days for top 10) ---
df_daily <- df_top10 %>%
dplyr::mutate(Date = as.Date(InvoiceDate)) %>%
dplyr::group_by(StockCode, Description, Date) %>%
dplyr::summarise(units = sum(Quantity[Quantity > 0], na.rm = TRUE), .groups = "drop")
last_date <- max(df_daily$Date, na.rm = TRUE)
start_lookback <- last_date - days(89)
df_scope <- df_daily %>%
dplyr::filter(Date >= start_lookback & Date <= last_date)
# --- Demand stats per SKU ---
demand_stats <- df_scope %>%
dplyr::group_by(StockCode, Description) %>%
dplyr::summarise(
mean_daily = mean(units, na.rm = TRUE),
sd_daily = sd(units, na.rm = TRUE),
days_obs = n(),
.groups = "drop"
)
# --- Compute Safety Stock, ROP, EOQ ---
inv_opt <- demand_stats %>%
dplyr::mutate(
sd_lead = sd_daily * sqrt(lead_time_days),
SafetyStock = z_value * sd_lead,
ReorderPoint = pmax(0, mean_daily * lead_time_days + SafetyStock),
AnnualDemand = mean_daily * working_days_per_year,
EOQ = ifelse(holding_cost_per_unit_per_year > 0,
sqrt((2 * AnnualDemand * order_cost_per_order) /
holding_cost_per_unit_per_year),
NA_real_)
) %>%
dplyr::mutate(
SafetyStock = round(SafetyStock, 0),
ReorderPoint = round(ReorderPoint, 0),
EOQ = round(EOQ, 0)
)
# --- Table 8: Inventory Optimization (Top 10 SKUs) ---
inv_opt %>%
dplyr::select(
SKU = StockCode,
Description,
`Mean Daily Demand` = mean_daily,
`Daily SD` = sd_daily,
`Safety Stock` = SafetyStock,
`Reorder Point` = ReorderPoint,
`EOQ (units)` = EOQ
) %>%
dplyr::mutate(
`Mean Daily Demand` = round(`Mean Daily Demand`, 2),
`Daily SD` = round(`Daily SD`, 2)
) %>%
kableExtra::kbl(
caption = "Table 8. Inventory Stock Optimization — Top 10 Most Frequently Purchased Items",
align = c("l","l","r","r","r","r","r")
) %>%
kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover"))
Table 8. Inventory Stock Optimization — Top 10 Most Frequently Purchased
Items
SKU
|
Description
|
Mean Daily Demand
|
Daily SD
|
Safety Stock
|
Reorder Point
|
EOQ (units)
|
17003
|
BROCADE RING PURSE
|
124.69
|
254.46
|
1107
|
1980
|
1508
|
21212
|
PACK OF 72 RETROSPOT CAKE CASES
|
93.35
|
121.02
|
527
|
1180
|
1508
|
21977
|
PACK OF 60 PINK PAISLEY CAKE CASES
|
65.51
|
120.62
|
525
|
983
|
1508
|
23166
|
MEDIUM CERAMIC TOP STORAGE JAR
|
24.76
|
41.32
|
180
|
353
|
1508
|
23843
|
PAPER CRAFT , LITTLE BIRDIE
|
80995.00
|
NA
|
NA
|
NA
|
1508
|
84077
|
WORLD WAR 2 GLIDERS ASSTD DESIGNS
|
239.03
|
566.44
|
2465
|
4138
|
1508
|
84879
|
ASSORTED COLOUR BIRD ORNAMENT
|
148.60
|
173.45
|
755
|
1795
|
1508
|
84991
|
60 TEATIME FAIRY CAKE CASES
|
75.70
|
119.47
|
520
|
1050
|
1508
|
85099B
|
JUMBO BAG RED RETROSPOT
|
194.88
|
248.15
|
1080
|
2444
|
1508
|
85123A
|
CREAM HANGING HEART T-LIGHT HOLDER
|
28.50
|
38.89
|
169
|
369
|
1508
|
85123A
|
WHITE HANGING HEART T-LIGHT HOLDER
|
117.86
|
160.82
|
700
|
1525
|
1508
|