# Load libraries
library(readxl)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Load dataset
df <- read_xlsx("C:/Users/Ayesha/Desktop/Dataset - SC R Lang.xlsx")
# Basic analysis commands
head(df) # View first 6 rows
## # A tibble: 6 × 24
## `Product type` SKU Price Availability `Number of products sold`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 haircare SKU0 69.8 55 802
## 2 skincare SKU1 14.8 95 736
## 3 haircare SKU2 11.3 34 8
## 4 skincare SKU3 61.2 68 83
## 5 skincare SKU4 4.81 26 871
## 6 haircare SKU5 1.70 87 147
## # ℹ 19 more variables: `Revenue generated` <dbl>,
## # `Customer demographics` <chr>, `Stock levels` <dbl>, `Lead times` <dbl>,
## # `Order quantities` <dbl>, `Shipping times` <dbl>,
## # `Shipping carriers` <chr>, `Shipping costs` <dbl>, `Supplier name` <chr>,
## # Location <chr>, `Lead time` <dbl>, `Production volumes` <dbl>,
## # `Manufacturing lead time` <dbl>, `Manufacturing costs` <dbl>,
## # `Inspection results` <chr>, `Defect rates` <dbl>, …
summary(df) # Statistical summary of numerical columns
## Product type SKU Price Availability
## Length:100 Length:100 Min. : 1.70 Min. : 1.00
## Class :character Class :character 1st Qu.:19.60 1st Qu.: 22.75
## Mode :character Mode :character Median :51.24 Median : 43.50
## Mean :49.46 Mean : 48.40
## 3rd Qu.:77.20 3rd Qu.: 75.00
## Max. :99.17 Max. :100.00
## Number of products sold Revenue generated Customer demographics
## Min. : 8.0 Min. :1062 Length:100
## 1st Qu.:184.2 1st Qu.:2813 Class :character
## Median :392.5 Median :6006 Mode :character
## Mean :461.0 Mean :5776
## 3rd Qu.:704.2 3rd Qu.:8254
## Max. :996.0 Max. :9866
## Stock levels Lead times Order quantities Shipping times
## Min. : 0.00 Min. : 1.00 Min. : 1.00 Min. : 1.00
## 1st Qu.: 16.75 1st Qu.: 8.00 1st Qu.:26.00 1st Qu.: 3.75
## Median : 47.50 Median :17.00 Median :52.00 Median : 6.00
## Mean : 47.77 Mean :15.96 Mean :49.22 Mean : 5.75
## 3rd Qu.: 73.00 3rd Qu.:24.00 3rd Qu.:71.25 3rd Qu.: 8.00
## Max. :100.00 Max. :30.00 Max. :96.00 Max. :10.00
## Shipping carriers Shipping costs Supplier name Location
## Length:100 Min. :1.013 Length:100 Length:100
## Class :character 1st Qu.:3.540 Class :character Class :character
## Mode :character Median :5.321 Mode :character Mode :character
## Mean :5.548
## 3rd Qu.:7.602
## Max. :9.930
## Lead time Production volumes Manufacturing lead time Manufacturing costs
## Min. : 1.00 Min. :104.0 Min. : 1.00 Min. : 1.085
## 1st Qu.:10.00 1st Qu.:352.0 1st Qu.: 7.00 1st Qu.:22.983
## Median :18.00 Median :568.5 Median :14.00 Median :45.906
## Mean :17.08 Mean :567.8 Mean :14.77 Mean :47.267
## 3rd Qu.:25.00 3rd Qu.:797.0 3rd Qu.:23.00 3rd Qu.:68.621
## Max. :30.00 Max. :985.0 Max. :30.00 Max. :99.466
## Inspection results Defect rates Transportation modes Routes
## Length:100 Min. :0.01861 Length:100 Length:100
## Class :character 1st Qu.:1.00965 Class :character Class :character
## Mode :character Median :2.14186 Mode :character Mode :character
## Mean :2.27716
## 3rd Qu.:3.56400
## Max. :4.93926
## Costs
## Min. :103.9
## 1st Qu.:318.8
## Median :520.4
## Mean :529.2
## 3rd Qu.:763.1
## Max. :997.4
str(df) # Structure of the dataset
## tibble [100 × 24] (S3: tbl_df/tbl/data.frame)
## $ Product type : chr [1:100] "haircare" "skincare" "haircare" "skincare" ...
## $ SKU : chr [1:100] "SKU0" "SKU1" "SKU2" "SKU3" ...
## $ Price : num [1:100] 69.81 14.84 11.32 61.16 4.81 ...
## $ Availability : num [1:100] 55 95 34 68 26 87 48 59 78 35 ...
## $ Number of products sold: num [1:100] 802 736 8 83 871 147 65 426 150 980 ...
## $ Revenue generated : num [1:100] 8662 7461 9578 7767 2687 ...
## $ Customer demographics : chr [1:100] "Non-binary" "Female" "Unknown" "Non-binary" ...
## $ Stock levels : num [1:100] 58 53 1 23 5 90 11 93 5 14 ...
## $ Lead times : num [1:100] 7 30 10 13 3 27 15 17 10 27 ...
## $ Order quantities : num [1:100] 96 37 88 59 56 66 58 11 15 83 ...
## $ Shipping times : num [1:100] 4 2 2 6 8 3 8 1 7 1 ...
## $ Shipping carriers : chr [1:100] "Carrier B" "Carrier A" "Carrier B" "Carrier C" ...
## $ Shipping costs : num [1:100] 2.96 9.72 8.05 1.73 3.89 ...
## $ Supplier name : chr [1:100] "Supplier 3" "Supplier 3" "Supplier 1" "Supplier 5" ...
## $ Location : chr [1:100] "Mumbai" "Mumbai" "Mumbai" "Kolkata" ...
## $ Lead time : num [1:100] 29 23 12 24 5 10 14 22 13 29 ...
## $ Production volumes : num [1:100] 215 517 971 937 414 104 314 564 769 963 ...
## $ Manufacturing lead time: num [1:100] 29 30 27 18 3 17 24 1 8 23 ...
## $ Manufacturing costs : num [1:100] 46.3 33.6 30.7 35.6 92.1 ...
## $ Inspection results : chr [1:100] "Pending" "Pending" "Pending" "Fail" ...
## $ Defect rates : num [1:100] 0.226 4.854 4.581 4.747 3.146 ...
## $ Transportation modes : chr [1:100] "Road" "Road" "Air" "Rail" ...
## $ Routes : chr [1:100] "Route B" "Route B" "Route C" "Route A" ...
## $ Costs : num [1:100] 188 503 142 255 923 ...
colnames(df)
## [1] "Product type" "SKU"
## [3] "Price" "Availability"
## [5] "Number of products sold" "Revenue generated"
## [7] "Customer demographics" "Stock levels"
## [9] "Lead times" "Order quantities"
## [11] "Shipping times" "Shipping carriers"
## [13] "Shipping costs" "Supplier name"
## [15] "Location" "Lead time"
## [17] "Production volumes" "Manufacturing lead time"
## [19] "Manufacturing costs" "Inspection results"
## [21] "Defect rates" "Transportation modes"
## [23] "Routes" "Costs"
# Simple IF ELSE: Identify High Revenue items
df$Revenue_Status <- ifelse(df$`Revenue generated` > 5000, "High", "Low")
# NESTED IF: Categorize Stock Levels
df$Stock_Priority <- ifelse(df$`Stock levels` < 20, "Critical",
ifelse(df$`Stock levels` < 50, "Low", "Sufficient"))
#View(df)
table(df$Revenue_Status)
##
## High Low
## 61 39
table(df$Stock_Priority)
##
## Critical Low Sufficient
## 27 26 47
# Assigning priority based on Transportation Mode
get_priority <- function(mode) {
switch(mode,
"Air" = "Express",
"Rail" = "Standard",
"Road" = "Economy",
"Sea" = "Bulk",
"Unknown")
}
df$Priority <- sapply(df$`Transportation modes`, get_priority)
# FOR loop: Print status of first 10 products
for (i in 1:10) {
cat("Product SKU:", df$SKU[i], "Type:", df$`Product type`[i], "\n")
}
## Product SKU: SKU0 Type: haircare
## Product SKU: SKU1 Type: skincare
## Product SKU: SKU2 Type: haircare
## Product SKU: SKU3 Type: skincare
## Product SKU: SKU4 Type: skincare
## Product SKU: SKU5 Type: haircare
## Product SKU: SKU6 Type: skincare
## Product SKU: SKU7 Type: cosmetics
## Product SKU: SKU8 Type: cosmetics
## Product SKU: SKU9 Type: skincare
# WHILE loop: Example of depleting a safety stock
inventory <- 100
days <- 0
while(inventory > 10) {
inventory <- inventory - sample(5:15, 1) # Subtract random daily demand
days <- days + 1
}
print(paste("Stock hit critical level in", days, "days"))
## [1] "Stock hit critical level in 11 days"
calc_margin <- function(rev, cost) {
margin <- (rev - cost) / rev * 100
return(round(margin, 2))
}
# Apply function
df$Margin_Percent <- calc_margin(df$`Revenue generated`, df$Costs)
# Create an S3 object
product_item <- list(sku = "SKU0", type = "haircare", stock = 58)
class(product_item) <- "SupplyChainItem"
# Define a generic method for the class
print.SupplyChainItem <- function(obj) {
cat("Supply Chain Item [", obj$sku, "] Category:", obj$type, "Stock:", obj$stock, "\n")
}
print(product_item)
## Supply Chain Item [ SKU0 ] Category: haircare Stock: 58
ggplot(df, aes(x = `Product type`, y = `Revenue generated`, fill = `Product type`)) +
geom_bar(stat = "summary", fun = "sum") +
labs(title = "Total Revenue by Product Type",
x = "Product Type",
y = "Total Revenue")

# Create a summary table first
demo_data <- as.data.frame(table(df$`Customer demographics`))
ggplot(demo_data, aes(x = "", y = Freq, fill = Var1)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
theme_void() +
labs(title = "Market Share by Customer Demographics", fill = "Demographic")

ggplot(df, aes(x = Price, y = `Revenue generated`)) +
geom_point(aes(color = `Product type`)) +
geom_smooth(method = "lm", color = "black", linetype = "dashed") +
labs(title = "Price vs Revenue Correlation")
## `geom_smooth()` using formula = 'y ~ x'

# Creating a sequence for the x-axis to show distribution
df$Order_Index <- 1:nrow(df)
ggplot(df, aes(x = Order_Index, y = `Production volumes`)) +
geom_area(fill = "steelblue", alpha = 0.4) +
labs(title = "Trend of Production Volumes", x = "Order Sequence")

# Take a subset of the first 10 items for clarity
df_subset <- df[1:10, ]
# 1. Create the subset (First 10 items)
df_subset <- df[1:10, ]
# 2. Now run the plot using that subset
ggplot(df_subset) +
geom_bar(aes(x = SKU, y = `Revenue generated`),
stat = "identity", fill = "lightblue") +
geom_line(aes(x = SKU, y = Costs, group = 1),
color = "red", size = 1) +
geom_point(aes(x = SKU, y = Costs),
color = "darkred", size = 3) +
labs(title = "Revenue (Bars) vs Manufacturing Costs (Line)",
subtitle = "Analysis of first 10 SKUs",
x = "Product SKU",
y = "Value") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# 1. Create the index column inside df
df$Order_Index <- 1:nrow(df)
# 2. Plot using the new column and backticks for 'Production volumes'
ggplot(df, aes(x = Order_Index, y = `Production volumes`)) +
geom_area(fill = "steelblue", alpha = 0.4) +
labs(title = "Trend of Production Volumes",
x = "Order Sequence (Item Index)",
y = "Production Volume")
