Retail <- data.frame(
bill_id = c(501, 502, 503, 504, 505, 506),
customer_name = c('Aman', 'Riya', 'Karan', 'Neha', 'Rohit', 'Simran'),
gender = factor(c('male', 'female', 'male', 'female', 'male', 'female')),
membership = factor(c('gold', 'red', 'gold', 'none', 'silver', 'gold')),
Category = factor(c('Grocery', 'Electronics', 'Clothing', 'Grocery', 'Electronics', 'Clothing')),
Quantity = c(10, 1, 3, 8, 2, 5),
CostPrice = c(50, 20000, 800, 60, 18000, 700),
SellingPrice = c(70, 25000, 1000, 80, 22000, 500),
Discount = c(100, 2000, 150, 50, 500, 300),
DeliveryType = factor(c('home', 'store', 'home', 'store', 'home', 'home'))
)
Retail
## bill_id customer_name gender membership Category Quantity CostPrice
## 1 501 Aman male gold Grocery 10 50
## 2 502 Riya female red Electronics 1 20000
## 3 503 Karan male gold Clothing 3 800
## 4 504 Neha female none Grocery 8 60
## 5 505 Rohit male silver Electronics 2 18000
## 6 506 Simran female gold Clothing 5 700
## SellingPrice Discount DeliveryType
## 1 70 100 home
## 2 25000 2000 store
## 3 1000 150 home
## 4 80 50 store
## 5 22000 500 home
## 6 500 300 home
#Step 2: Add Calculation-Based Columns
#1. Add GrossRevenue = Quantity * SellingPrice.
Retail$GrossRevenue<- Retail$Quantity * Retail$SellingPrice
Retail
## bill_id customer_name gender membership Category Quantity CostPrice
## 1 501 Aman male gold Grocery 10 50
## 2 502 Riya female red Electronics 1 20000
## 3 503 Karan male gold Clothing 3 800
## 4 504 Neha female none Grocery 8 60
## 5 505 Rohit male silver Electronics 2 18000
## 6 506 Simran female gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue
## 1 70 100 home 700
## 2 25000 2000 store 25000
## 3 1000 150 home 3000
## 4 80 50 store 640
## 5 22000 500 home 44000
## 6 500 300 home 2500
#2. Add TotalCost = Quantity * CostPrice.
Retail$TotalCost<- Retail$Quantity * Retail$CostPrice
Retail
## bill_id customer_name gender membership Category Quantity CostPrice
## 1 501 Aman male gold Grocery 10 50
## 2 502 Riya female red Electronics 1 20000
## 3 503 Karan male gold Clothing 3 800
## 4 504 Neha female none Grocery 8 60
## 5 505 Rohit male silver Electronics 2 18000
## 6 506 Simran female gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost
## 1 70 100 home 700 500
## 2 25000 2000 store 25000 20000
## 3 1000 150 home 3000 2400
## 4 80 50 store 640 480
## 5 22000 500 home 44000 36000
## 6 500 300 home 2500 3500
#3. Add NetRevenue = GrossRevenue - Discount.
Retail$NetRevenue <- Retail$GrossRevenue - Retail$Discount
Retail
## bill_id customer_name gender membership Category Quantity CostPrice
## 1 501 Aman male gold Grocery 10 50
## 2 502 Riya female red Electronics 1 20000
## 3 503 Karan male gold Clothing 3 800
## 4 504 Neha female none Grocery 8 60
## 5 505 Rohit male silver Electronics 2 18000
## 6 506 Simran female gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue
## 1 70 100 home 700 500 600
## 2 25000 2000 store 25000 20000 23000
## 3 1000 150 home 3000 2400 2850
## 4 80 50 store 640 480 590
## 5 22000 500 home 44000 36000 43500
## 6 500 300 home 2500 3500 2200
#4. Add Profit = NetRevenue - TotalCost.
Retail$Profit <- Retail$NetRevenue - Retail$TotalCost
Retail
## bill_id customer_name gender membership Category Quantity CostPrice
## 1 501 Aman male gold Grocery 10 50
## 2 502 Riya female red Electronics 1 20000
## 3 503 Karan male gold Clothing 3 800
## 4 504 Neha female none Grocery 8 60
## 5 505 Rohit male silver Electronics 2 18000
## 6 506 Simran female gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 home 700 500 600 100
## 2 25000 2000 store 25000 20000 23000 3000
## 3 1000 150 home 3000 2400 2850 450
## 4 80 50 store 640 480 590 110
## 5 22000 500 home 44000 36000 43500 7500
## 6 500 300 home 2500 3500 2200 -1300
# Step 3: Multi-Condition Based Analysis
#1. Find Gold members who have Profit > 5000 and chose Home delivery.
member<-Retail[Retail$Profit > 100 & Retail$DeliveryType =='home' & Retail$membership =="gold", ]
member
## bill_id customer_name gender membership Category Quantity CostPrice
## 3 503 Karan male gold Clothing 3 800
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 3 1000 150 home 3000 2400 2850 450
#2. Find all loss-making transactions (Profit < 0).
loss_making<-Retail[Retail$Profit < 0, ]
loss_making
## bill_id customer_name gender membership Category Quantity CostPrice
## 6 506 Simran female gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 6 500 300 home 2500 3500 2200 -1300
#3. Find Electronics orders where Quantity >= 2, Discount > 1000, and Profit > 0.
elc<-Retail[Retail$Quantity >= 2 & Retail$Discount > 1000 & Retail$Profit > 0 & Retail$Category=='Electronics', ]
elc
## [1] bill_id customer_name gender membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit
## <0 rows> (or 0-length row.names)
#4. Find premium customers where NetRevenue > 20000 OR Membership is Gold.
prm<-Retail[Retail$NetRevenue>20000 | Retail$membership=='gold', ]
prm
## bill_id customer_name gender membership Category Quantity CostPrice
## 1 501 Aman male gold Grocery 10 50
## 2 502 Riya female red Electronics 1 20000
## 3 503 Karan male gold Clothing 3 800
## 5 505 Rohit male silver Electronics 2 18000
## 6 506 Simran female gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 home 700 500 600 100
## 2 25000 2000 store 25000 20000 23000 3000
## 3 1000 150 home 3000 2400 2850 450
## 5 22000 500 home 44000 36000 43500 7500
## 6 500 300 home 2500 3500 2200 -1300
#Step 4: Advanced Logical Classification
#1. Create ProfitCategory: High Profit (>10000), Moderate Profit (>0), otherwise Loss.
Retail$ProfitCategory<- ifelse(Retail$Profit > 10000,'High Profit',
ifelse(Retail$Profit > 0,'Moderate Profit','loss'))
print(Retail)
## bill_id customer_name gender membership Category Quantity CostPrice
## 1 501 Aman male gold Grocery 10 50
## 2 502 Riya female red Electronics 1 20000
## 3 503 Karan male gold Clothing 3 800
## 4 504 Neha female none Grocery 8 60
## 5 505 Rohit male silver Electronics 2 18000
## 6 506 Simran female gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 home 700 500 600 100
## 2 25000 2000 store 25000 20000 23000 3000
## 3 1000 150 home 3000 2400 2850 450
## 4 80 50 store 640 480 590 110
## 5 22000 500 home 44000 36000 43500 7500
## 6 500 300 home 2500 3500 2200 -1300
## ProfitCategory
## 1 Moderate Profit
## 2 Moderate Profit
## 3 Moderate Profit
## 4 Moderate Profit
## 5 Moderate Profit
## 6 loss
#2. Create RiskFlag: Risky if Discount > 20% of GrossRevenue OR Profit < 0, otherwise Safe.
Retail$RiskFlag<- ifelse(Retail$Discount > 0.2*Retail$GrossRevenue | Retail$Profit <0,'Risky', 'Safe')
print(Retail)
## bill_id customer_name gender membership Category Quantity CostPrice
## 1 501 Aman male gold Grocery 10 50
## 2 502 Riya female red Electronics 1 20000
## 3 503 Karan male gold Clothing 3 800
## 4 504 Neha female none Grocery 8 60
## 5 505 Rohit male silver Electronics 2 18000
## 6 506 Simran female gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 home 700 500 600 100
## 2 25000 2000 store 25000 20000 23000 3000
## 3 1000 150 home 3000 2400 2850 450
## 4 80 50 store 640 480 590 110
## 5 22000 500 home 44000 36000 43500 7500
## 6 500 300 home 2500 3500 2200 -1300
## ProfitCategory RiskFlag
## 1 Moderate Profit Safe
## 2 Moderate Profit Safe
## 3 Moderate Profit Safe
## 4 Moderate Profit Safe
## 5 Moderate Profit Safe
## 6 loss Risky
#Step 5: Aggregated Analysis
#1. Calculate total Profit by Membership and Category.
aggregate(Profit ~ membership +Category, data=Retail,sum)
## membership Category Profit
## 1 gold Clothing -850
## 2 red Electronics 3000
## 3 silver Electronics 7500
## 4 gold Grocery 100
## 5 none Grocery 110
#2. Calculate average Profit by Gender and DeliveryType.
aggregate(Profit ~ gender+ DeliveryType,data=Retail,mean)
## gender DeliveryType Profit
## 1 female home -1300.000
## 2 male home 2683.333
## 3 female store 1555.000
#Step 6: KPI-Based Calculations
#1. Create ProfitMargin (%) = (Profit / NetRevenue) * 100.
Retail$ProfitMargin<-(Retail$Profit / Retail$NetRevenue)*100
print(Retail)
## bill_id customer_name gender membership Category Quantity CostPrice
## 1 501 Aman male gold Grocery 10 50
## 2 502 Riya female red Electronics 1 20000
## 3 503 Karan male gold Clothing 3 800
## 4 504 Neha female none Grocery 8 60
## 5 505 Rohit male silver Electronics 2 18000
## 6 506 Simran female gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 home 700 500 600 100
## 2 25000 2000 store 25000 20000 23000 3000
## 3 1000 150 home 3000 2400 2850 450
## 4 80 50 store 640 480 590 110
## 5 22000 500 home 44000 36000 43500 7500
## 6 500 300 home 2500 3500 2200 -1300
## ProfitCategory RiskFlag ProfitMargin
## 1 Moderate Profit Safe 16.66667
## 2 Moderate Profit Safe 13.04348
## 3 Moderate Profit Safe 15.78947
## 4 Moderate Profit Safe 18.64407
## 5 Moderate Profit Safe 17.24138
## 6 loss Risky -59.09091
#2. Find orders where ProfitMargin > 30 and NetRevenue > 20000.
mar<-Retail[Retail$ProfitMargin > 30 & Retail$NetRevenue >200000, ]
print(mar)
## [1] bill_id customer_name gender membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit ProfitCategory
## [16] RiskFlag ProfitMargin
## <0 rows> (or 0-length row.names)