# Load libraries
library(readxl)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Load dataset
df <- read_xlsx("C:/Users/Ayesha/Desktop/Dataset - SC R Lang.xlsx")

# Basic analysis commands
head(df)        # View first 6 rows
## # A tibble: 6 × 24
##   `Product type` SKU   Price Availability `Number of products sold`
##   <chr>          <chr> <dbl>        <dbl>                     <dbl>
## 1 haircare       SKU0  69.8            55                       802
## 2 skincare       SKU1  14.8            95                       736
## 3 haircare       SKU2  11.3            34                         8
## 4 skincare       SKU3  61.2            68                        83
## 5 skincare       SKU4   4.81           26                       871
## 6 haircare       SKU5   1.70           87                       147
## # ℹ 19 more variables: `Revenue generated` <dbl>,
## #   `Customer demographics` <chr>, `Stock levels` <dbl>, `Lead times` <dbl>,
## #   `Order quantities` <dbl>, `Shipping times` <dbl>,
## #   `Shipping carriers` <chr>, `Shipping costs` <dbl>, `Supplier name` <chr>,
## #   Location <chr>, `Lead time` <dbl>, `Production volumes` <dbl>,
## #   `Manufacturing lead time` <dbl>, `Manufacturing costs` <dbl>,
## #   `Inspection results` <chr>, `Defect rates` <dbl>, …
summary(df)     # Statistical summary of numerical columns
##  Product type           SKU                Price        Availability   
##  Length:100         Length:100         Min.   : 1.70   Min.   :  1.00  
##  Class :character   Class :character   1st Qu.:19.60   1st Qu.: 22.75  
##  Mode  :character   Mode  :character   Median :51.24   Median : 43.50  
##                                        Mean   :49.46   Mean   : 48.40  
##                                        3rd Qu.:77.20   3rd Qu.: 75.00  
##                                        Max.   :99.17   Max.   :100.00  
##  Number of products sold Revenue generated Customer demographics
##  Min.   :  8.0           Min.   :1062      Length:100           
##  1st Qu.:184.2           1st Qu.:2813      Class :character     
##  Median :392.5           Median :6006      Mode  :character     
##  Mean   :461.0           Mean   :5776                           
##  3rd Qu.:704.2           3rd Qu.:8254                           
##  Max.   :996.0           Max.   :9866                           
##   Stock levels      Lead times    Order quantities Shipping times 
##  Min.   :  0.00   Min.   : 1.00   Min.   : 1.00    Min.   : 1.00  
##  1st Qu.: 16.75   1st Qu.: 8.00   1st Qu.:26.00    1st Qu.: 3.75  
##  Median : 47.50   Median :17.00   Median :52.00    Median : 6.00  
##  Mean   : 47.77   Mean   :15.96   Mean   :49.22    Mean   : 5.75  
##  3rd Qu.: 73.00   3rd Qu.:24.00   3rd Qu.:71.25    3rd Qu.: 8.00  
##  Max.   :100.00   Max.   :30.00   Max.   :96.00    Max.   :10.00  
##  Shipping carriers  Shipping costs  Supplier name        Location        
##  Length:100         Min.   :1.013   Length:100         Length:100        
##  Class :character   1st Qu.:3.540   Class :character   Class :character  
##  Mode  :character   Median :5.321   Mode  :character   Mode  :character  
##                     Mean   :5.548                                        
##                     3rd Qu.:7.602                                        
##                     Max.   :9.930                                        
##    Lead time     Production volumes Manufacturing lead time Manufacturing costs
##  Min.   : 1.00   Min.   :104.0      Min.   : 1.00           Min.   : 1.085     
##  1st Qu.:10.00   1st Qu.:352.0      1st Qu.: 7.00           1st Qu.:22.983     
##  Median :18.00   Median :568.5      Median :14.00           Median :45.906     
##  Mean   :17.08   Mean   :567.8      Mean   :14.77           Mean   :47.267     
##  3rd Qu.:25.00   3rd Qu.:797.0      3rd Qu.:23.00           3rd Qu.:68.621     
##  Max.   :30.00   Max.   :985.0      Max.   :30.00           Max.   :99.466     
##  Inspection results  Defect rates     Transportation modes    Routes         
##  Length:100         Min.   :0.01861   Length:100           Length:100        
##  Class :character   1st Qu.:1.00965   Class :character     Class :character  
##  Mode  :character   Median :2.14186   Mode  :character     Mode  :character  
##                     Mean   :2.27716                                          
##                     3rd Qu.:3.56400                                          
##                     Max.   :4.93926                                          
##      Costs      
##  Min.   :103.9  
##  1st Qu.:318.8  
##  Median :520.4  
##  Mean   :529.2  
##  3rd Qu.:763.1  
##  Max.   :997.4
str(df)         # Structure of the dataset
## tibble [100 × 24] (S3: tbl_df/tbl/data.frame)
##  $ Product type           : chr [1:100] "haircare" "skincare" "haircare" "skincare" ...
##  $ SKU                    : chr [1:100] "SKU0" "SKU1" "SKU2" "SKU3" ...
##  $ Price                  : num [1:100] 69.81 14.84 11.32 61.16 4.81 ...
##  $ Availability           : num [1:100] 55 95 34 68 26 87 48 59 78 35 ...
##  $ Number of products sold: num [1:100] 802 736 8 83 871 147 65 426 150 980 ...
##  $ Revenue generated      : num [1:100] 8662 7461 9578 7767 2687 ...
##  $ Customer demographics  : chr [1:100] "Non-binary" "Female" "Unknown" "Non-binary" ...
##  $ Stock levels           : num [1:100] 58 53 1 23 5 90 11 93 5 14 ...
##  $ Lead times             : num [1:100] 7 30 10 13 3 27 15 17 10 27 ...
##  $ Order quantities       : num [1:100] 96 37 88 59 56 66 58 11 15 83 ...
##  $ Shipping times         : num [1:100] 4 2 2 6 8 3 8 1 7 1 ...
##  $ Shipping carriers      : chr [1:100] "Carrier B" "Carrier A" "Carrier B" "Carrier C" ...
##  $ Shipping costs         : num [1:100] 2.96 9.72 8.05 1.73 3.89 ...
##  $ Supplier name          : chr [1:100] "Supplier 3" "Supplier 3" "Supplier 1" "Supplier 5" ...
##  $ Location               : chr [1:100] "Mumbai" "Mumbai" "Mumbai" "Kolkata" ...
##  $ Lead time              : num [1:100] 29 23 12 24 5 10 14 22 13 29 ...
##  $ Production volumes     : num [1:100] 215 517 971 937 414 104 314 564 769 963 ...
##  $ Manufacturing lead time: num [1:100] 29 30 27 18 3 17 24 1 8 23 ...
##  $ Manufacturing costs    : num [1:100] 46.3 33.6 30.7 35.6 92.1 ...
##  $ Inspection results     : chr [1:100] "Pending" "Pending" "Pending" "Fail" ...
##  $ Defect rates           : num [1:100] 0.226 4.854 4.581 4.747 3.146 ...
##  $ Transportation modes   : chr [1:100] "Road" "Road" "Air" "Rail" ...
##  $ Routes                 : chr [1:100] "Route B" "Route B" "Route C" "Route A" ...
##  $ Costs                  : num [1:100] 188 503 142 255 923 ...
colnames(df)
##  [1] "Product type"            "SKU"                    
##  [3] "Price"                   "Availability"           
##  [5] "Number of products sold" "Revenue generated"      
##  [7] "Customer demographics"   "Stock levels"           
##  [9] "Lead times"              "Order quantities"       
## [11] "Shipping times"          "Shipping carriers"      
## [13] "Shipping costs"          "Supplier name"          
## [15] "Location"                "Lead time"              
## [17] "Production volumes"      "Manufacturing lead time"
## [19] "Manufacturing costs"     "Inspection results"     
## [21] "Defect rates"            "Transportation modes"   
## [23] "Routes"                  "Costs"
# Simple IF ELSE: Identify High Revenue items
df$Revenue_Status <- ifelse(df$`Revenue generated` > 5000, "High", "Low")

# NESTED IF: Categorize Stock Levels
df$Stock_Priority <- ifelse(df$`Stock levels` < 20, "Critical",
                            ifelse(df$`Stock levels` < 50, "Low", "Sufficient"))

#View(df)

table(df$Revenue_Status)
## 
## High  Low 
##   61   39
table(df$Stock_Priority)
## 
##   Critical        Low Sufficient 
##         27         26         47
# Assigning priority based on Transportation Mode
get_priority <- function(mode) {
  switch(mode,
         "Air" = "Express",
         "Rail" = "Standard",
         "Road" = "Economy",
         "Sea" = "Bulk",
         "Unknown")
}
df$Priority <- sapply(df$`Transportation modes`, get_priority)

# FOR loop: Print status of first 10 products
for (i in 1:10) {
  cat("Product SKU:", df$SKU[i], "Type:", df$`Product type`[i], "\n")
}
## Product SKU: SKU0 Type: haircare 
## Product SKU: SKU1 Type: skincare 
## Product SKU: SKU2 Type: haircare 
## Product SKU: SKU3 Type: skincare 
## Product SKU: SKU4 Type: skincare 
## Product SKU: SKU5 Type: haircare 
## Product SKU: SKU6 Type: skincare 
## Product SKU: SKU7 Type: cosmetics 
## Product SKU: SKU8 Type: cosmetics 
## Product SKU: SKU9 Type: skincare
# WHILE loop: Example of depleting a safety stock
inventory <- 100
days <- 0
while(inventory > 10) {
  inventory <- inventory - sample(5:15, 1) # Subtract random daily demand
  days <- days + 1
}
print(paste("Stock hit critical level in", days, "days"))
## [1] "Stock hit critical level in 11 days"
calc_margin <- function(rev, cost) {
  margin <- (rev - cost) / rev * 100
  return(round(margin, 2))
}

# Apply function
df$Margin_Percent <- calc_margin(df$`Revenue generated`, df$Costs)

# Create an S3 object
product_item <- list(sku = "SKU0", type = "haircare", stock = 58)
class(product_item) <- "SupplyChainItem"

# Define a generic method for the class
print.SupplyChainItem <- function(obj) {
  cat("Supply Chain Item [", obj$sku, "] Category:", obj$type, "Stock:", obj$stock, "\n")
}

print(product_item)
## Supply Chain Item [ SKU0 ] Category: haircare Stock: 58
ggplot(df, aes(x = `Product type`, y = `Revenue generated`, fill = `Product type`)) +
  geom_bar(stat = "summary", fun = "sum") +
  labs(title = "Total Revenue by Product Type", 
       x = "Product Type", 
       y = "Total Revenue")

# Create a summary table first
demo_data <- as.data.frame(table(df$`Customer demographics`))

ggplot(demo_data, aes(x = "", y = Freq, fill = Var1)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  theme_void() +
  labs(title = "Market Share by Customer Demographics", fill = "Demographic")

ggplot(df, aes(x = Price, y = `Revenue generated`)) +
  geom_point(aes(color = `Product type`)) +
  geom_smooth(method = "lm", color = "black", linetype = "dashed") +
  labs(title = "Price vs Revenue Correlation")
## `geom_smooth()` using formula = 'y ~ x'

# Creating a sequence for the x-axis to show distribution
df$Order_Index <- 1:nrow(df)

ggplot(df, aes(x = Order_Index, y = `Production volumes`)) +
  geom_area(fill = "steelblue", alpha = 0.4) +
  labs(title = "Trend of Production Volumes", x = "Order Sequence")

# Take a subset of the first 10 items for clarity
df_subset <- df[1:10, ]

# 1. Create the subset (First 10 items)
df_subset <- df[1:10, ]

# 2. Now run the plot using that subset
ggplot(df_subset) +
  geom_bar(aes(x = SKU, y = `Revenue generated`), 
           stat = "identity", fill = "lightblue") +
  geom_line(aes(x = SKU, y = Costs, group = 1), 
            color = "red", size = 1) +
  geom_point(aes(x = SKU, y = Costs), 
             color = "darkred", size = 3) +
  labs(title = "Revenue (Bars) vs Manufacturing Costs (Line)", 
       subtitle = "Analysis of first 10 SKUs",
       x = "Product SKU", 
       y = "Value") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# 1. Create the index column inside df
df$Order_Index <- 1:nrow(df)

# 2. Plot using the new column and backticks for 'Production volumes'
ggplot(df, aes(x = Order_Index, y = `Production volumes`)) +
  geom_area(fill = "steelblue", alpha = 0.4) +
  labs(title = "Trend of Production Volumes", 
       x = "Order Sequence (Item Index)", 
       y = "Production Volume")