# ----------------------------------------------------------
# STEP 1: Create Retail Supermarket Data Frame
# ----------------------------------------------------------
retail <- data.frame(
BillID = c(501,502,503,504,505,506),
CustomerName = c("Aman","Riya","Karan","Neha","Rohit","Simran"),
Gender = factor(c("Male","Female","Male","Female","Male","Female")),
Membership = factor(c("Gold","Silver","Gold","None","Silver","Gold")),
Category = factor(c("Grocery","Electronics","Clothing","Grocery","Electronics","Clothing")),
Quantity = c(10,1,3,8,2,5),
CostPrice = c(50,20000,800,60,18000,700),
SellingPrice = c(70,25000,1000,80,22000,900),
Discount = c(100,2000,150,50,500,300),
DeliveryType = factor(c("Home","Store","Home","Store","Home","Home"))
)
retail
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 4 504 Neha Female None Grocery 8 60
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType
## 1 70 100 Home
## 2 25000 2000 Store
## 3 1000 150 Home
## 4 80 50 Store
## 5 22000 500 Home
## 6 900 300 Home
# ----------------------------------------------------------
# STEP 2: Add Calculation-Based Columns
# ----------------------------------------------------------
retail$GrossRevenue <- retail$Quantity * retail$SellingPrice
retail$TotalCost <- retail$Quantity * retail$CostPrice
retail$NetRevenue <- retail$GrossRevenue - retail$Discount
retail$Profit <- retail$NetRevenue - retail$TotalCost
retail
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 4 504 Neha Female None Grocery 8 60
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 Home 700 500 600 100
## 2 25000 2000 Store 25000 20000 23000 3000
## 3 1000 150 Home 3000 2400 2850 450
## 4 80 50 Store 640 480 590 110
## 5 22000 500 Home 44000 36000 43500 7500
## 6 900 300 Home 4500 3500 4200 700
# ----------------------------------------------------------
# STEP 3: Multi-Condition Based Analysis
# ----------------------------------------------------------
# 1. Gold members, Profit > 5000, Home delivery
subset(retail, Membership=="Gold" & Profit>5000 & DeliveryType=="Home")
## [1] BillID CustomerName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit
## <0 rows> (or 0-length row.names)
# 2. Loss-making transactions
subset(retail, Profit < 0)
## [1] BillID CustomerName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit
## <0 rows> (or 0-length row.names)
# 2. Loss-making transactions
subset(retail, Profit < 0)
## [1] BillID CustomerName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit
## <0 rows> (or 0-length row.names)
# 4. Premium customers (NetRevenue>20000 OR Gold)
subset(retail, NetRevenue>20000 | Membership=="Gold")
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 Home 700 500 600 100
## 2 25000 2000 Store 25000 20000 23000 3000
## 3 1000 150 Home 3000 2400 2850 450
## 5 22000 500 Home 44000 36000 43500 7500
## 6 900 300 Home 4500 3500 4200 700
# ----------------------------------------------------------
# STEP 4: Advanced Logical Classification
# ----------------------------------------------------------
# 1. ProfitCategory
retail$ProfitCategory <- ifelse(
retail$Profit > 10000, "High Profit",
ifelse(retail$Profit > 0, "Moderate Profit", "Loss")
)
retail
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 4 504 Neha Female None Grocery 8 60
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 Home 700 500 600 100
## 2 25000 2000 Store 25000 20000 23000 3000
## 3 1000 150 Home 3000 2400 2850 450
## 4 80 50 Store 640 480 590 110
## 5 22000 500 Home 44000 36000 43500 7500
## 6 900 300 Home 4500 3500 4200 700
## ProfitCategory
## 1 Moderate Profit
## 2 Moderate Profit
## 3 Moderate Profit
## 4 Moderate Profit
## 5 Moderate Profit
## 6 Moderate Profit
# 2. RiskFlag
retail$RiskFlag <- ifelse(
retail$Discount > 0.20*retail$GrossRevenue | retail$Profit < 0,
"Risky","Safe"
)
retail
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 4 504 Neha Female None Grocery 8 60
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 Home 700 500 600 100
## 2 25000 2000 Store 25000 20000 23000 3000
## 3 1000 150 Home 3000 2400 2850 450
## 4 80 50 Store 640 480 590 110
## 5 22000 500 Home 44000 36000 43500 7500
## 6 900 300 Home 4500 3500 4200 700
## ProfitCategory RiskFlag
## 1 Moderate Profit Safe
## 2 Moderate Profit Safe
## 3 Moderate Profit Safe
## 4 Moderate Profit Safe
## 5 Moderate Profit Safe
## 6 Moderate Profit Safe
# ----------------------------------------------------------
# STEP 5: Aggregated Analysis
# ----------------------------------------------------------
# 1. Total Profit by Membership and Category
aggregate(Profit ~ Membership + Category, data=retail, sum)
## Membership Category Profit
## 1 Gold Clothing 1150
## 2 Silver Electronics 10500
## 3 Gold Grocery 100
## 4 None Grocery 110
# 2. RiskFlag
retail$RiskFlag <- ifelse(
retail$Discount > 0.20*retail$GrossRevenue | retail$Profit < 0,
"Risky","Safe"
)
retail
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 4 504 Neha Female None Grocery 8 60
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 Home 700 500 600 100
## 2 25000 2000 Store 25000 20000 23000 3000
## 3 1000 150 Home 3000 2400 2850 450
## 4 80 50 Store 640 480 590 110
## 5 22000 500 Home 44000 36000 43500 7500
## 6 900 300 Home 4500 3500 4200 700
## ProfitCategory RiskFlag
## 1 Moderate Profit Safe
## 2 Moderate Profit Safe
## 3 Moderate Profit Safe
## 4 Moderate Profit Safe
## 5 Moderate Profit Safe
## 6 Moderate Profit Safe