# Retail database
retail <- data.frame(
BillID = c(501, 502, 503, 504, 505, 506),
CustomertName = c("Aman", "Riya", "Karan", "Neha", "Rohit", "Simran"),
Gender = factor(c("Male", "Female", "Male", "Female", "Male", "Female")),
Membership = factor(c("Gold", "Silver", "Gold", "None", "Silver", "Gold")),
Category = factor(c("Grocery", "Electronics", "Clothing", "Grocery", "Electronics", "Clothing")),
Quantity = c(10, 1, 3, 8, 2, 5),
CostPrice = c(50, 20000, 800, 60, 18000, 700),
SellingPrice=c(70, 25000, 1000, 80, 22000,900),
Discount=c(100, 2000, 150, 50, 500, 300),
DeliveryType=factor(c("Home", "Store", "Home", "Store", "Home", "Home"))
)
retail
## BillID CustomertName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 4 504 Neha Female None Grocery 8 60
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType
## 1 70 100 Home
## 2 25000 2000 Store
## 3 1000 150 Home
## 4 80 50 Store
## 5 22000 500 Home
## 6 900 300 Home
# Basic----------------------------------
str(retail) # Structure of data frame
## 'data.frame': 6 obs. of 10 variables:
## $ BillID : num 501 502 503 504 505 506
## $ CustomertName: chr "Aman" "Riya" "Karan" "Neha" ...
## $ Gender : Factor w/ 2 levels "Female","Male": 2 1 2 1 2 1
## $ Membership : Factor w/ 3 levels "Gold","None",..: 1 3 1 2 3 1
## $ Category : Factor w/ 3 levels "Clothing","Electronics",..: 3 2 1 3 2 1
## $ Quantity : num 10 1 3 8 2 5
## $ CostPrice : num 50 20000 800 60 18000 700
## $ SellingPrice : num 70 25000 1000 80 22000 900
## $ Discount : num 100 2000 150 50 500 300
## $ DeliveryType : Factor w/ 2 levels "Home","Store": 1 2 1 2 1 1
summary(retail) # Summary statistics
## BillID CustomertName Gender Membership Category
## Min. :501.0 Length:6 Female:3 Gold :3 Clothing :2
## 1st Qu.:502.2 Class :character Male :3 None :1 Electronics:2
## Median :503.5 Mode :character Silver:2 Grocery :2
## Mean :503.5
## 3rd Qu.:504.8
## Max. :506.0
## Quantity CostPrice SellingPrice Discount DeliveryType
## Min. : 1.000 Min. : 50 Min. : 70 Min. : 50.0 Home :4
## 1st Qu.: 2.250 1st Qu.: 220 1st Qu.: 285 1st Qu.: 112.5 Store:2
## Median : 4.000 Median : 750 Median : 950 Median : 225.0
## Mean : 4.833 Mean : 6602 Mean : 8175 Mean : 516.7
## 3rd Qu.: 7.250 3rd Qu.:13700 3rd Qu.:16750 3rd Qu.: 450.0
## Max. :10.000 Max. :20000 Max. :25000 Max. :2000.0
dim(retail) # Dimensions
## [1] 6 10
nrow(retail) # Number of rows
## [1] 6
ncol(retail) # Number of columns
## [1] 10
colnames(retail) # Column names
## [1] "BillID" "CustomertName" "Gender" "Membership"
## [5] "Category" "Quantity" "CostPrice" "SellingPrice"
## [9] "Discount" "DeliveryType"
# Step 2: Add Calculation-Based Columns
# 1. Add GrossRevenue = Quantity * SellingPrice
retail$GrossRevenue <- retail$Quantity * retail$SellingPrice
# 2. Add TotalCost = Quantity * CostPrice.
retail$TotalCost <- retail$Quantity * retail$CostPrice
# 3. Add NetRevenue = GrossRevenue - Discount.
retail$NetRevenue <- retail$GrossRevenue - retail$Discount
#4. Add Profit = NetRevenue - TotalCost.
retail$Profit <- retail$NetRevenue - retail$TotalCost
retail
## BillID CustomertName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 4 504 Neha Female None Grocery 8 60
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 Home 700 500 600 100
## 2 25000 2000 Store 25000 20000 23000 3000
## 3 1000 150 Home 3000 2400 2850 450
## 4 80 50 Store 640 480 590 110
## 5 22000 500 Home 44000 36000 43500 7500
## 6 900 300 Home 4500 3500 4200 700
# Step 3: Multi-Condition Based Analysis (use subset function )
# 1. Find Gold members who have Profit > 5000 and chose Home delivery.
subset(retail,
Membership == "Gold" &
Profit > 5000 &
DeliveryType == "Home")
## [1] BillID CustomertName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit
## <0 rows> (or 0-length row.names)
# 2. Find all loss-making transactions (Profit < 0).
subset(retail, Profit < 0)
## [1] BillID CustomertName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit
## <0 rows> (or 0-length row.names)
# 3. Find Electronics orders where Quantity >= 2, Discount > 1000, and Profit > 0.
subset(retail,
Category == "Electronics" &
Quantity >= 2 &
Discount > 1000 &
Profit > 0)
## [1] BillID CustomertName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit
## <0 rows> (or 0-length row.names)
# 4. Find premium customers where NetRevenue > 20000 OR Membership is Gold.
subset(retail,
NetRevenue > 20000 |
Membership == "Gold")
## BillID CustomertName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 Home 700 500 600 100
## 2 25000 2000 Store 25000 20000 23000 3000
## 3 1000 150 Home 3000 2400 2850 450
## 5 22000 500 Home 44000 36000 43500 7500
## 6 900 300 Home 4500 3500 4200 700
# STEP 4: Advanced Logical Classification
# 1. ProfitCategory
retail$ProfitCategory <- ifelse(retail$Profit > 10000, "High Profit",
ifelse(retail$Profit > 0, "Moderate Profit",
"Loss"))
# 2. RiskFlag
retail$RiskFlag <- ifelse(
retail$Discount > 0.20 * retail$GrossRevenue |
retail$Profit < 0,
"Risky",
"Safe"
)
# Step 5: Aggregated Analysis
# 1. Calculate total Profit by Membership and Category.
aggregate(Profit ~ Membership + Category,
data = retail,
sum)
## Membership Category Profit
## 1 Gold Clothing 1150
## 2 Silver Electronics 10500
## 3 Gold Grocery 100
## 4 None Grocery 110
# 2. Calculate average Profit by Gender and DeliveryType.
aggregate(Profit ~ Gender + DeliveryType,
data = retail,
mean)
## Gender DeliveryType Profit
## 1 Female Home 700.000
## 2 Male Home 2683.333
## 3 Female Store 1555.000
# Step 6: KPI-Based Calculations
# 1. Create ProfitMargin (%) = (Profit / NetRevenue) * 100.
retail$ProfitMargin <- (retail$Profit / retail$NetRevenue) * 100
# 2. Find orders where ProfitMargin > 30 and NetRevenue > 20000.
subset(retail,
ProfitMargin > 30 &
NetRevenue > 20000)
## [1] BillID CustomertName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit ProfitCategory
## [16] RiskFlag ProfitMargin
## <0 rows> (or 0-length row.names)
# Step 7: Sorting
# Sort the data by Profit (descending) and then by NetRevenue (descending).
retail_sorted <- retail[order(-retail$Profit,
-retail$NetRevenue), ]
# Step 8: Scenario-Based Analysis Questions (Answer using R)
# 1. Find Gold members who placed Home delivery orders and earned profit above 8000.
subset(retail,
Membership == "Gold" &
DeliveryType == "Home" &
Profit > 8000)
## [1] BillID CustomertName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit ProfitCategory
## [16] RiskFlag ProfitMargin
## <0 rows> (or 0-length row.names)
# 2. Identify transactions where discount exceeds 15% AND category is Electronics.
subset(retail,
Discount > 0.15 * GrossRevenue &
Category == "Electronics")
## [1] BillID CustomertName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit ProfitCategory
## [16] RiskFlag ProfitMargin
## <0 rows> (or 0-length row.names)
# 3. Find female customers who made profit > 5000 OR purchased Grocery.
subset(retail,
Gender == "Female" &
(Profit > 5000 | Category == "Grocery"))
## BillID CustomertName Gender Membership Category Quantity CostPrice
## 4 504 Neha Female None Grocery 8 60
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 4 80 50 Store 640 480 590 110
## ProfitCategory RiskFlag ProfitMargin
## 4 Moderate Profit Safe 18.64407
# 4. Detect risky transactions where profit is negative OR discount ratio > 25%.
subset(retail,
Profit < 0 |
Discount > 0.25 * GrossRevenue)
## [1] BillID CustomertName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit ProfitCategory
## [16] RiskFlag ProfitMargin
## <0 rows> (or 0-length row.names)
# 5. Identify top 3 most profitable transactions.
head(retail[order(-retail$Profit), ], 3)
## BillID CustomertName Gender Membership Category Quantity CostPrice
## 5 505 Rohit Male Silver Electronics 2 18000
## 2 502 Riya Female Silver Electronics 1 20000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 5 22000 500 Home 44000 36000 43500 7500
## 2 25000 2000 Store 25000 20000 23000 3000
## 6 900 300 Home 4500 3500 4200 700
## ProfitCategory RiskFlag ProfitMargin
## 5 Moderate Profit Safe 17.24138
## 2 Moderate Profit Safe 13.04348
## 6 Moderate Profit Safe 16.66667
# 6. Compare average profit of Gold vs Silver members.
aggregate(Profit ~ Membership,
data = retail,
mean)
## Membership Profit
## 1 Gold 416.6667
## 2 None 110.0000
## 3 Silver 5250.0000
# 7. Find transactions where TotalCost exceeds NetRevenue.
subset(retail,
TotalCost > NetRevenue)
## [1] BillID CustomertName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit ProfitCategory
## [16] RiskFlag ProfitMargin
## <0 rows> (or 0-length row.names)
# 8. Determine membership type contributing highest total profit.
membership_profit <- aggregate(Profit ~ Membership,
data = retail,
sum)
membership_profit[which.max(membership_profit$Profit), ]
## Membership Profit
## 3 Silver 10500
# 9. Find delivery type having lowest average profit.
delivery_profit <- aggregate(Profit ~ DeliveryType,
data = retail,
mean)
delivery_profit[which.min(delivery_profit$Profit), ]
## DeliveryType Profit
## 2 Store 1555
# 10. Identify high revenue but low margin transactions (NetRevenue > 20000 and ProfitMargin < 20).
subset(retail,
NetRevenue > 20000 &
ProfitMargin < 20)
## BillID CustomertName Gender Membership Category Quantity CostPrice
## 2 502 Riya Female Silver Electronics 1 20000
## 5 505 Rohit Male Silver Electronics 2 18000
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 2 25000 2000 Store 25000 20000 23000 3000
## 5 22000 500 Home 44000 36000 43500 7500
## ProfitCategory RiskFlag ProfitMargin
## 2 Moderate Profit Safe 13.04348
## 5 Moderate Profit Safe 17.24138