#install packages
install.packages('readr')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages('dplyr')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages('ggplot2')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages('scales')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages('lubridate')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages('zoo')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages('ggrepel')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
#load libraries
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggrepel)
#load data
sourcing_data <- read_csv("Procurement_Cleaned_Data.csv")
## Rows: 767 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): PO_ID, Supplier, Order_Date, Delivery_Date, Item_Category, Order_S...
## dbl (11): Delivery_Year, Delivery_Month, Quantity, Unit_Price, Negotiated_Pr...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#view data
head(sourcing_data)
## # A tibble: 6 × 18
## PO_ID Supplier Order_Date Delivery_Date Delivery_Year Delivery_Month
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 PO-00001 Alpha_Inc 10/17/2023 10/25/2023 2023 10
## 2 PO-00002 Delta_Logistics 4/25/2022 5/5/2022 2022 5
## 3 PO-00003 Gamma_Co 1/26/2022 2/15/2022 2022 2
## 4 PO-00004 Beta_Supplies 10/9/2022 10/28/2022 2022 10
## 5 PO-00005 Delta_Logistics 9/8/2022 9/20/2022 2022 9
## 6 PO-00006 Epsilon_Group 8/17/2022 8/29/2022 2022 8
## # ℹ 12 more variables: Item_Category <chr>, Order_Status <chr>, Quantity <dbl>,
## # Unit_Price <dbl>, Negotiated_Price <dbl>, Defective_Units <dbl>,
## # Compliance <chr>, Comp_True_Flag <dbl>, Total_Purchase_Amount <dbl>,
## # Defect_Value <dbl>, Actual_Time <dbl>, Negotiated_Savings <dbl>
#structure of the data
str(sourcing_data)
## spc_tbl_ [767 × 18] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ PO_ID : chr [1:767] "PO-00001" "PO-00002" "PO-00003" "PO-00004" ...
## $ Supplier : chr [1:767] "Alpha_Inc" "Delta_Logistics" "Gamma_Co" "Beta_Supplies" ...
## $ Order_Date : chr [1:767] "10/17/2023" "4/25/2022" "1/26/2022" "10/9/2022" ...
## $ Delivery_Date : chr [1:767] "10/25/2023" "5/5/2022" "2/15/2022" "10/28/2022" ...
## $ Delivery_Year : num [1:767] 2023 2022 2022 2022 2022 ...
## $ Delivery_Month : num [1:767] 10 5 2 10 9 8 6 4 11 7 ...
## $ Item_Category : chr [1:767] "Office Supplies" "Office Supplies" "MRO" "Packaging" ...
## $ Order_Status : chr [1:767] "Cancelled" "Delivered" "Delivered" "Delivered" ...
## $ Quantity : num [1:767] 1176 1509 910 1344 1180 ...
## $ Unit_Price : num [1:767] 20.1 39.3 95.5 99.8 64.1 ...
## $ Negotiated_Price : num [1:767] 17.8 37.3 92.3 95.5 60.5 ...
## $ Defective_Units : num [1:767] 0 235 41 112 171 39 96 22 89 8 ...
## $ Compliance : chr [1:767] "Yes" "Yes" "Yes" "Yes" ...
## $ Comp_True_Flag : num [1:767] 1 1 1 1 0 1 0 1 1 1 ...
## $ Total_Purchase_Amount: num [1:767] 20945 56346 83957 128379 71425 ...
## $ Defect_Value : num [1:767] 0 8775 3783 10698 10351 ...
## $ Actual_Time : num [1:767] 8 10 20 19 12 12 11 14 4 12 ...
## $ Negotiated_Savings : num [1:767] 2728 2988 2958 5820 4177 ...
## - attr(*, "spec")=
## .. cols(
## .. PO_ID = col_character(),
## .. Supplier = col_character(),
## .. Order_Date = col_character(),
## .. Delivery_Date = col_character(),
## .. Delivery_Year = col_double(),
## .. Delivery_Month = col_double(),
## .. Item_Category = col_character(),
## .. Order_Status = col_character(),
## .. Quantity = col_double(),
## .. Unit_Price = col_double(),
## .. Negotiated_Price = col_double(),
## .. Defective_Units = col_double(),
## .. Compliance = col_character(),
## .. Comp_True_Flag = col_double(),
## .. Total_Purchase_Amount = col_double(),
## .. Defect_Value = col_double(),
## .. Actual_Time = col_double(),
## .. Negotiated_Savings = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
#change the dates to date datatype
sourcing_data$Delivery_Date <- as.Date(sourcing_data$Delivery_Date, format = "%m/%d/%Y")
sourcing_data$Order_Date <- as.Date(sourcing_data$Order_Date, format = "%m/%d/%y")
#verify the datatype change
str(sourcing_data$Delivery_Date)
## Date[1:767], format: "2023-10-25" "2022-05-05" "2022-02-15" "2022-10-28" "2022-09-20" ...
summary(sourcing_data$Delivery_Date)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## "2022-01-06" "2022-07-10" "2022-12-24" "2023-01-03" "2023-07-10" "2023-12-31"
str(sourcing_data$Order_Date)
## Date[1:767], format: "2020-10-17" "2020-04-25" "2020-01-26" "2020-10-09" "2020-09-08" ...
summary(sourcing_data$Order_Date)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## "2020-01-01" "2020-03-29" "2020-07-06" "2020-06-30" "2020-09-28" "2020-12-30"
#column names
names(sourcing_data)
## [1] "PO_ID" "Supplier" "Order_Date"
## [4] "Delivery_Date" "Delivery_Year" "Delivery_Month"
## [7] "Item_Category" "Order_Status" "Quantity"
## [10] "Unit_Price" "Negotiated_Price" "Defective_Units"
## [13] "Compliance" "Comp_True_Flag" "Total_Purchase_Amount"
## [16] "Defect_Value" "Actual_Time" "Negotiated_Savings"
#summarize $ amount purchased by supplier by year
summary_data <- sourcing_data %>%
group_by(Supplier, Delivery_Year) %>%
summarise(Total_Purchase_Amount = sum(Total_Purchase_Amount, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'Supplier'. You can override using the
## `.groups` argument.
#identify desc order of suppliers based on 2023 spend
top_2023_suppliers <- summary_data %>%
filter(Delivery_Year == 2023) %>%
arrange(desc(Total_Purchase_Amount)) %>%
pull(Supplier)
#reorder supplier factor levels
summary_data <- summary_data %>%
mutate(Supplier = factor(Supplier, levels = top_2023_suppliers))
#plot the graph
ggplot(summary_data, aes(x = Supplier, y = Total_Purchase_Amount, fill = as.factor(Delivery_Year))) +
geom_bar(stat = "identity", position = "dodge") +
scale_y_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
labs(title = "Total Purchase Amount by Supplier and Year",
x = "Supplier",
y = "Total Purchase Amount",
fill = "Delivery Year") +
theme_minimal()
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## List of 1
## $ axis.text.x:List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 1
## ..$ vjust : NULL
## ..$ angle : num 45
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
#summarize $ amount purchased by category by year
summary_category_data <- sourcing_data %>%
group_by(Item_Category, Delivery_Year) %>%
summarise(Total_Purchase_Amount = sum(Total_Purchase_Amount, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'Item_Category'. You can override using the
## `.groups` argument.
#identify desc order of category based on 2023 spend
top_2023_categories <- summary_category_data %>%
filter(Delivery_Year == 2023) %>%
arrange(desc(Total_Purchase_Amount)) %>%
pull(Item_Category)
#reorder supplier factor levels
summary_category_data <- summary_category_data %>%
mutate(Item_Category = factor(Item_Category, levels = top_2023_categories))
#plot the graph
ggplot(summary_category_data, aes(x = Item_Category, y = Total_Purchase_Amount, fill = as.factor(Delivery_Year))) +
geom_bar(stat = "identity", position = "dodge") +
scale_y_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
labs(title = "Total Purchase Amount by Category and Year",
x = "Category",
y = "Total Purchase Amount",
fill = "Delivery Year") +
theme_minimal()
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## List of 1
## $ axis.text.x:List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 1
## ..$ vjust : NULL
## ..$ angle : num 45
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
#summarize $ amount saved by supplier by year
summary_savings_data <- sourcing_data %>%
group_by(Supplier, Delivery_Year) %>%
summarise(Negotiated_Savings = sum(Negotiated_Savings, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'Supplier'. You can override using the
## `.groups` argument.
#identify desc order of savings by supplier based on 2023 spend
top_2023_savings <- summary_savings_data %>%
filter(Delivery_Year == 2023) %>%
arrange(desc(Negotiated_Savings)) %>%
pull(Supplier)
#reorder supplier factor levels
summary_savings_data <- summary_savings_data %>%
mutate(Supplier = factor(Supplier, levels = top_2023_savings))
#plot the graph
ggplot(summary_savings_data, aes(x = Supplier, y = Negotiated_Savings, fill = as.factor(Delivery_Year))) +
geom_bar(stat = "identity", position = "dodge") +
scale_y_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
labs(title = "Total Savings Amount by Supplier and Year",
x = "Supplier",
y = "Negotiated Savings",
fill = "Delivery Year") +
theme_minimal()
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## List of 1
## $ axis.text.x:List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 1
## ..$ vjust : NULL
## ..$ angle : num 45
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
#monthly summary comparison
monthly_summary <- sourcing_data %>%
group_by(Delivery_Year, Delivery_Month) %>%
summarise(Total_Spend = sum(Total_Purchase_Amount, na.rm = TRUE), Orders = n()) %>%
arrange(Delivery_Year, Delivery_Month)
## `summarise()` has grouped output by 'Delivery_Year'. You can override using the
## `.groups` argument.
#bar chart monthly spend
# Prepare data
monthly_summary <- sourcing_data %>%
mutate(Delivery_MonthDate = floor_date(Delivery_Date, unit = "month")) %>%
group_by(Delivery_MonthDate) %>%
summarise(
Total_Spend = sum(Total_Purchase_Amount, na.rm = TRUE),
Orders = n()
) %>%
arrange(Delivery_MonthDate) %>%
ungroup() %>%
# Create labels for x-axis:
mutate(
MonthLabel = ifelse(
month(Delivery_MonthDate) == 1,
paste0("Jan ", year(Delivery_MonthDate)),
format(Delivery_MonthDate, "%b") # This gives "Feb", "Mar", etc.
)
)
# Plot with explicit breaks and labels:
ggplot(monthly_summary, aes(x = Delivery_MonthDate, y = Total_Spend)) +
geom_col(fill = "steelblue") +
scale_y_continuous(labels = label_dollar(scale = 1e-6, suffix = "M", prefix = "$")) +
scale_x_date(
breaks = monthly_summary$Delivery_MonthDate, # exact breaks
labels = monthly_summary$MonthLabel, # exact labels
expand = c(0, 0)
) +
labs(title = "Monthly Spend Totals",
x = NULL,
y = "Total Spend") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#line graph for monthly data to compare 2022 to 2023
# Prepare data: get year, month number, and total spend per month-year
monthly_summary <- sourcing_data %>%
mutate(
Delivery_Year = year(Delivery_Date),
Delivery_Month = month(Delivery_Date)
) %>%
group_by(Delivery_Year, Delivery_Month) %>%
summarise(
Total_Spend = sum(Total_Purchase_Amount, na.rm = TRUE),
Orders = n()
) %>%
ungroup()
## `summarise()` has grouped output by 'Delivery_Year'. You can override using the
## `.groups` argument.
# Create factor for month to ensure Jan-Dec order
monthly_summary <- monthly_summary %>%
mutate(
Month_Factor = factor(
month.abb[Delivery_Month], levels = month.abb
)
)
# Plot line chart
ggplot(monthly_summary, aes(x = Month_Factor, y = Total_Spend, group = factor(Delivery_Year), color = factor(Delivery_Year))) +
geom_line(linewidth = 1.2) + # changed size to linewidth here
geom_point(size = 2) + # size is still used for points
scale_y_continuous(labels = label_dollar(scale = 1e-6, suffix = "M", prefix = "$")) +
labs(
title = "Monthly Spend by Year",
x = "Month",
y = "Total Spend",
color = "Year"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 0, hjust = 0.5),
legend.position = "top"
)
#rolling 3 month average chart
# Step 1: Create a date column for grouping by month
monthly_summary <- sourcing_data %>%
mutate(Delivery_MonthDate = floor_date(as.Date(Delivery_Date), "month")) %>%
group_by(Delivery_MonthDate) %>%
summarise(
Total_Spend = sum(Total_Purchase_Amount, na.rm = TRUE),
Orders = n(),
.groups = "drop"
) %>%
arrange(Delivery_MonthDate)
# Step 2: Rolling average over full timeline
monthly_summary <- monthly_summary %>%
mutate(Rolling_Spend = rollmean(Total_Spend, k = 3, fill = NA, align = "right")) %>%
mutate(
Delivery_Year = year(Delivery_MonthDate),
Delivery_Month = month(Delivery_MonthDate),
Month_Factor = factor(month.abb[Delivery_Month], levels = month.abb)
)
# Step 3: Plot rolling average
ggplot(monthly_summary, aes(x = Month_Factor, y = Rolling_Spend, group = factor(Delivery_Year), color = factor(Delivery_Year))) +
geom_line(linewidth = 1.2) +
geom_point(size = 2) +
scale_y_continuous(labels = label_dollar(scale = 1e-6, suffix = "M", prefix = "$")) +
labs(
title = "Rolling 3-Month Average Spend by Year",
x = "Month",
y = "Rolling 3-Month Avg Spend",
color = "Year"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 0, hjust = 0.5),
legend.position = "top"
)
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
#getting crazy with graphing - overlaying the 3 month trend and
actual
# Step 1: Summarize monthly spend and compute rolling average
monthly_summary <- sourcing_data %>%
mutate(Delivery_MonthDate = floor_date(as.Date(Delivery_Date), "month")) %>%
group_by(Delivery_MonthDate) %>%
summarise(
Total_Spend = sum(Total_Purchase_Amount, na.rm = TRUE),
Orders = n(),
.groups = "drop"
) %>%
arrange(Delivery_MonthDate) %>%
mutate(
Rolling_Spend = rollmean(Total_Spend, k = 3, fill = NA, align = "right"),
Delivery_Year = year(Delivery_MonthDate),
Delivery_Month = month(Delivery_MonthDate),
Month_Factor = factor(month.abb[Delivery_Month], levels = month.abb)
)
# Step 2: Plot both actual spend and rolling average
ggplot(monthly_summary, aes(x = Month_Factor)) +
# Bars for monthly spend
geom_col(aes(y = Total_Spend, fill = factor(Delivery_Year)), position = position_dodge(width = 0.8), width = 0.7, alpha = 0.6) +
# Line for rolling 3-month average
geom_line(aes(y = Rolling_Spend, group = factor(Delivery_Year), color = factor(Delivery_Year)), linewidth = 1.2) +
geom_point(aes(y = Rolling_Spend, color = factor(Delivery_Year)), size = 2) +
scale_y_continuous(labels = label_dollar(scale = 1e-6, suffix = "M", prefix = "$")) +
labs(
title = "Monthly Spend with Rolling 3-Month Average",
x = "Month",
y = "Spend",
fill = "Year",
color = "Rolling Avg Year"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 0, hjust = 0.5),
legend.position = "top"
)
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
#percent savings by supplier, not dollar value savings next #summarize both savings and spend by supplier and year
summary_savings_data <- sourcing_data %>%
group_by(Supplier, Delivery_Year) %>%
summarise(
Total_Spend = sum(Total_Purchase_Amount, na.rm = TRUE),
Negotiated_Savings = sum(Negotiated_Savings, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(Savings_Percent = Negotiated_Savings / Total_Spend)
#order suppliers by 2023 savings percentage
top_2023_savings <- summary_savings_data %>%
filter(Delivery_Year == 2023) %>%
arrange(desc(Savings_Percent)) %>%
pull(Supplier)
summary_savings_data <- summary_savings_data %>%
mutate(Supplier = factor(Supplier, levels = top_2023_savings))
#plot percentage savings by supplier
library(ggplot2)
library(scales)
ggplot(summary_savings_data, aes(x = Supplier, y = Savings_Percent, fill = as.factor(Delivery_Year))) +
geom_bar(stat = "identity", position = "dodge") +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
labs(
title = "Negotiated Savings by Supplier and Year",
x = "Supplier",
y = "Percent Saved",
fill = "Delivery Year"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#defect 2nd attempt - defect value based on compliant orders
# Summarize compliance vs. defects
compliance_summary <- sourcing_data %>%
group_by(Delivery_Year, Compliance) %>%
summarise(
Total_Spend = sum(Total_Purchase_Amount, na.rm = TRUE),
Total_Defects = sum(Defect_Value, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(Defect_Rate = Total_Defects / Total_Spend)
# Plot
ggplot(compliance_summary, aes(x = factor(Delivery_Year), y = Defect_Rate, fill = Compliance)) +
geom_col(position = "dodge") +
geom_text(aes(label = percent(Defect_Rate, accuracy = 0.1)),
position = position_dodge(width = 0.9),
vjust = -0.3, size = 3) +
scale_y_continuous(labels = percent_format(accuracy = 0.1), expand = expansion(mult = c(0, 0.1))) +
scale_fill_manual(values = c("Yes" = "#00C400", "No" = "#D00000")) +
labs(
title = "Defect Rate by Year and Compliance Status",
x = "Year",
y = "Defect Rate (% of Spend)",
fill = "Compliance"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(face = "bold"),
legend.position = "top"
)
#Defect Rate Per Supplier
defect_summary <- sourcing_data %>%
group_by(Supplier, Delivery_Year) %>%
summarise(Total_Defects = sum(Defect_Value, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'Supplier'. You can override using the
## `.groups` argument.
top_2023_defects <- defect_summary %>%
filter(Delivery_Year == 2023) %>%
arrange(desc(Total_Defects)) %>%
pull(Supplier)
defect_summary <- defect_summary %>%
mutate(Supplier = factor(Supplier, levels = top_2023_defects))
ggplot(defect_summary, aes(x = Supplier, y = Total_Defects, fill = as.factor(Delivery_Year))) +
geom_col(position = "dodge") +
scale_y_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
labs(
title = "Total Defect Value by Supplier and Year",
x = "Supplier",
y = "Defect Value ($)",
fill = "Year"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top"
)
#defect rate by supplier, with percent of spend
defect_summary <- sourcing_data %>%
group_by(Supplier, Delivery_Year) %>%
summarise(
Total_Defects = sum(Defect_Value, na.rm = TRUE),
Total_Spend = sum(Total_Purchase_Amount, na.rm = TRUE)
) %>%
mutate(Defect_Rate = Total_Defects / Total_Spend * 100) %>%
ungroup()
## `summarise()` has grouped output by 'Supplier'. You can override using the
## `.groups` argument.
top_2023_defects <- defect_summary %>%
filter(Delivery_Year == 2023) %>%
arrange(desc(Defect_Rate)) %>%
pull(Supplier)
defect_summary <- defect_summary %>%
mutate(Supplier = factor(Supplier, levels = top_2023_defects))
ggplot(defect_summary, aes(x = Supplier, y = Defect_Rate, fill = as.factor(Delivery_Year))) +
geom_col(position = "dodge") +
scale_y_continuous(labels = scales::percent_format(scale = 1)) +
labs(
title = "Defect Rate by Supplier and Year",
x = "Supplier",
y = "Defect Rate (%)",
fill = "Year"
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#defect rate with percentage of spend and totals
scale_factor <- max(defect_summary$Total_Defects) / max(defect_summary$Defect_Rate)
ggplot(defect_summary, aes(x = Supplier, y = Total_Defects)) +
geom_col(aes(fill = as.factor(Delivery_Year)), position = position_dodge(width = 0.8), width = 0.6) +
geom_line(
aes(y = Defect_Rate * scale_factor, group = as.factor(Delivery_Year), color = as.factor(Delivery_Year)),
position = position_dodge(width = 0.8),
size = 1
) +
scale_y_continuous(
name = "Total Defect Value ($)",
labels = scales::label_dollar(scale_cut = scales::cut_short_scale()),
sec.axis = sec_axis(
~ . / scale_factor,
name = "Defect Rate (%)",
labels = scales::percent_format(scale = 1)
)
) +
labs(
title = "Defect Value and Defect Rate by Supplier and Year",
x = "Supplier",
fill = "Year (Bars)",
color = "Year (Line)"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#defect Rate by category by year
# Summarize by Category and Year
defect_summary <- sourcing_data %>%
group_by(Item_Category, Delivery_Year) %>%
summarise(
Total_Defects = sum(Defect_Value, na.rm = TRUE),
Total_Spend = sum(Total_Purchase_Amount, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(Defect_Rate = Total_Defects / Total_Spend * 100)
# Get order based on most recent year's defect rate (e.g., 2023)
category_order <- defect_summary %>%
filter(Delivery_Year == 2023) %>%
arrange(desc(Defect_Rate)) %>%
pull(Item_Category)
# Apply factor order to control bar sort
defect_summary <- defect_summary %>%
mutate(Item_Category = factor(Item_Category, levels = category_order))
# Compute scale factor to align percent with dollar values
scale_factor <- max(defect_summary$Total_Defects) / max(defect_summary$Defect_Rate)
# Add scaled y-position for line and labels
defect_summary <- defect_summary %>%
mutate(
scaled_rate_y = Defect_Rate * scale_factor,
label_y = scaled_rate_y + max(Total_Defects) * 0.05 # small space above the line
)
# Plot
ggplot(defect_summary, aes(x = Item_Category, y = Total_Defects)) +
geom_col(aes(fill = as.factor(Delivery_Year)),
position = position_dodge(width = 0.8),
width = 0.6) +
geom_line(
aes(y = scaled_rate_y, group = as.factor(Delivery_Year), color = as.factor(Delivery_Year)),
position = position_dodge(width = 0.8),
linewidth = 1
) +
# Add percentage labels
geom_text(
aes(y = label_y, label = paste0(round(Defect_Rate, 1), "%"), group = as.factor(Delivery_Year), color = as.factor(Delivery_Year)),
position = position_dodge(width = 0.8),
size = 3,
vjust = 0
) +
scale_y_continuous(
name = "Total Defect Value ($)",
labels = scales::label_dollar(scale_cut = scales::cut_short_scale()),
sec.axis = sec_axis(
~ . / scale_factor,
name = "Defect Rate (%)",
labels = scales::percent_format(scale = 1)
)
) +
labs(
title = "Defect Value and Defect Rate by Item Category and Year",
x = "Item Category",
fill = "Year (Bars)",
color = "Year (Line)"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top"
)
#compliance status by supplier by year
# Summarize total spend by Supplier, Year, and Compliance
compliance_summary <- sourcing_data %>%
group_by(Supplier, Delivery_Year, Compliance) %>%
summarise(Total_Spend = sum(Total_Purchase_Amount, na.rm = TRUE), .groups = "drop")
# Calculate percent of spend that is compliant per supplier and year
compliance_summary <- compliance_summary %>%
group_by(Supplier, Delivery_Year) %>%
mutate(Spend_Share = Total_Spend / sum(Total_Spend)) %>%
ungroup()
# Order suppliers by non-compliant spend share in most recent year (e.g., 2023)
supplier_order <- compliance_summary %>%
filter(Delivery_Year == 2023, Compliance == "No") %>%
arrange(desc(Spend_Share)) %>%
pull(Supplier)
# Apply factor order
compliance_summary <- compliance_summary %>%
mutate(Supplier = factor(Supplier, levels = unique(supplier_order)))
# Plot
ggplot(compliance_summary, aes(x = Supplier, y = Spend_Share, fill = Compliance)) +
geom_col(position = "stack", width = 0.7) +
facet_wrap(~Delivery_Year, nrow = 1) +
scale_y_continuous(
labels = percent_format(accuracy = 1),
name = "Share of Total Spend"
) +
scale_fill_manual(
values = c("No" = "#d73027", "Yes" = "#1a9850"),
name = "Compliance"
) +
labs(
title = "Compliance Status by Supplier and Year",
x = "Supplier"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top"
)