# Retail database

retail <- data.frame(
  BillID = c(501, 502, 503, 504, 505, 506),
  CustomertName = c("Aman", "Riya", "Karan", "Neha", "Rohit", "Simran"),
  Gender = factor(c("Male", "Female", "Male", "Female", "Male", "Female")),
  Membership = factor(c("Gold", "Silver", "Gold", "None", "Silver", "Gold")),
  Category = factor(c("Grocery", "Electronics", "Clothing", "Grocery", "Electronics", "Clothing")),
  Quantity = c(10, 1, 3, 8, 2, 5),
  CostPrice = c(50, 20000, 800, 60, 18000, 700),
  SellingPrice=c(70, 25000, 1000, 80, 22000,900),
  Discount=c(100, 2000, 150, 50, 500, 300),
  DeliveryType=factor(c("Home", "Store", "Home", "Store", "Home", "Home"))
)
retail
##   BillID CustomertName Gender Membership    Category Quantity CostPrice
## 1    501          Aman   Male       Gold     Grocery       10        50
## 2    502          Riya Female     Silver Electronics        1     20000
## 3    503         Karan   Male       Gold    Clothing        3       800
## 4    504          Neha Female       None     Grocery        8        60
## 5    505         Rohit   Male     Silver Electronics        2     18000
## 6    506        Simran Female       Gold    Clothing        5       700
##   SellingPrice Discount DeliveryType
## 1           70      100         Home
## 2        25000     2000        Store
## 3         1000      150         Home
## 4           80       50        Store
## 5        22000      500         Home
## 6          900      300         Home
# Basic----------------------------------
str(retail)        # Structure of data frame
## 'data.frame':    6 obs. of  10 variables:
##  $ BillID       : num  501 502 503 504 505 506
##  $ CustomertName: chr  "Aman" "Riya" "Karan" "Neha" ...
##  $ Gender       : Factor w/ 2 levels "Female","Male": 2 1 2 1 2 1
##  $ Membership   : Factor w/ 3 levels "Gold","None",..: 1 3 1 2 3 1
##  $ Category     : Factor w/ 3 levels "Clothing","Electronics",..: 3 2 1 3 2 1
##  $ Quantity     : num  10 1 3 8 2 5
##  $ CostPrice    : num  50 20000 800 60 18000 700
##  $ SellingPrice : num  70 25000 1000 80 22000 900
##  $ Discount     : num  100 2000 150 50 500 300
##  $ DeliveryType : Factor w/ 2 levels "Home","Store": 1 2 1 2 1 1
summary(retail)    # Summary statistics
##      BillID      CustomertName         Gender   Membership        Category
##  Min.   :501.0   Length:6           Female:3   Gold  :3    Clothing   :2  
##  1st Qu.:502.2   Class :character   Male  :3   None  :1    Electronics:2  
##  Median :503.5   Mode  :character              Silver:2    Grocery    :2  
##  Mean   :503.5                                                            
##  3rd Qu.:504.8                                                            
##  Max.   :506.0                                                            
##     Quantity        CostPrice      SellingPrice      Discount      DeliveryType
##  Min.   : 1.000   Min.   :   50   Min.   :   70   Min.   :  50.0   Home :4     
##  1st Qu.: 2.250   1st Qu.:  220   1st Qu.:  285   1st Qu.: 112.5   Store:2     
##  Median : 4.000   Median :  750   Median :  950   Median : 225.0               
##  Mean   : 4.833   Mean   : 6602   Mean   : 8175   Mean   : 516.7               
##  3rd Qu.: 7.250   3rd Qu.:13700   3rd Qu.:16750   3rd Qu.: 450.0               
##  Max.   :10.000   Max.   :20000   Max.   :25000   Max.   :2000.0
dim(retail)        # Dimensions
## [1]  6 10
nrow(retail)       # Number of rows
## [1] 6
ncol(retail)       # Number of columns
## [1] 10
colnames(retail)   # Column names
##  [1] "BillID"        "CustomertName" "Gender"        "Membership"   
##  [5] "Category"      "Quantity"      "CostPrice"     "SellingPrice" 
##  [9] "Discount"      "DeliveryType"
# Step 2: Add Calculation-Based Columns
# 1. Add GrossRevenue = Quantity * SellingPrice
retail$GrossRevenue <- retail$Quantity * retail$SellingPrice
# 2. Add TotalCost = Quantity * CostPrice.
retail$TotalCost <- retail$Quantity * retail$CostPrice
# 3. Add NetRevenue = GrossRevenue - Discount.
retail$NetRevenue <- retail$GrossRevenue - retail$Discount
#4. Add Profit = NetRevenue - TotalCost.
retail$Profit <- retail$NetRevenue - retail$TotalCost

retail
##   BillID CustomertName Gender Membership    Category Quantity CostPrice
## 1    501          Aman   Male       Gold     Grocery       10        50
## 2    502          Riya Female     Silver Electronics        1     20000
## 3    503         Karan   Male       Gold    Clothing        3       800
## 4    504          Neha Female       None     Grocery        8        60
## 5    505         Rohit   Male     Silver Electronics        2     18000
## 6    506        Simran Female       Gold    Clothing        5       700
##   SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1           70      100         Home          700       500        600    100
## 2        25000     2000        Store        25000     20000      23000   3000
## 3         1000      150         Home         3000      2400       2850    450
## 4           80       50        Store          640       480        590    110
## 5        22000      500         Home        44000     36000      43500   7500
## 6          900      300         Home         4500      3500       4200    700
# Step 3: Multi-Condition Based Analysis (use subset function )
# 1. Find Gold members who have Profit > 5000 and chose Home delivery.
subset(retail,
       Membership == "Gold" &
         Profit > 5000 &
         DeliveryType == "Home")
##  [1] BillID        CustomertName Gender        Membership    Category     
##  [6] Quantity      CostPrice     SellingPrice  Discount      DeliveryType 
## [11] GrossRevenue  TotalCost     NetRevenue    Profit       
## <0 rows> (or 0-length row.names)
# 2. Find all loss-making transactions (Profit < 0).
subset(retail, Profit < 0)
##  [1] BillID        CustomertName Gender        Membership    Category     
##  [6] Quantity      CostPrice     SellingPrice  Discount      DeliveryType 
## [11] GrossRevenue  TotalCost     NetRevenue    Profit       
## <0 rows> (or 0-length row.names)
# 3. Find Electronics orders where Quantity >= 2, Discount > 1000, and Profit > 0.
subset(retail,
       Category == "Electronics" &
         Quantity >= 2 &
         Discount > 1000 &
         Profit > 0)
##  [1] BillID        CustomertName Gender        Membership    Category     
##  [6] Quantity      CostPrice     SellingPrice  Discount      DeliveryType 
## [11] GrossRevenue  TotalCost     NetRevenue    Profit       
## <0 rows> (or 0-length row.names)
# 4. Find premium customers where NetRevenue > 20000 OR Membership is Gold.
subset(retail,
       NetRevenue > 20000 |
         Membership == "Gold")
##   BillID CustomertName Gender Membership    Category Quantity CostPrice
## 1    501          Aman   Male       Gold     Grocery       10        50
## 2    502          Riya Female     Silver Electronics        1     20000
## 3    503         Karan   Male       Gold    Clothing        3       800
## 5    505         Rohit   Male     Silver Electronics        2     18000
## 6    506        Simran Female       Gold    Clothing        5       700
##   SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1           70      100         Home          700       500        600    100
## 2        25000     2000        Store        25000     20000      23000   3000
## 3         1000      150         Home         3000      2400       2850    450
## 5        22000      500         Home        44000     36000      43500   7500
## 6          900      300         Home         4500      3500       4200    700
# STEP 4: Advanced Logical Classification
# 1. ProfitCategory
retail$ProfitCategory <- ifelse(retail$Profit > 10000, "High Profit",
                                ifelse(retail$Profit > 0, "Moderate Profit",
                                       "Loss"))

# 2. RiskFlag
retail$RiskFlag <- ifelse(
  retail$Discount > 0.20 * retail$GrossRevenue |
    retail$Profit < 0,
  "Risky",
  "Safe"
)

# Step 5: Aggregated Analysis
# 1. Calculate total Profit by Membership and Category.
aggregate(Profit ~ Membership + Category,
          data = retail,
          sum)
##   Membership    Category Profit
## 1       Gold    Clothing   1150
## 2     Silver Electronics  10500
## 3       Gold     Grocery    100
## 4       None     Grocery    110
# 2. Calculate average Profit by Gender and DeliveryType.
aggregate(Profit ~ Gender + DeliveryType,
          data = retail,
          mean)
##   Gender DeliveryType   Profit
## 1 Female         Home  700.000
## 2   Male         Home 2683.333
## 3 Female        Store 1555.000
# Step 6: KPI-Based Calculations
# 1. Create ProfitMargin (%) = (Profit / NetRevenue) * 100.
retail$ProfitMargin <- (retail$Profit / retail$NetRevenue) * 100

# 2. Find orders where ProfitMargin > 30 and NetRevenue > 20000.
subset(retail,
       ProfitMargin > 30 &
         NetRevenue > 20000)
##  [1] BillID         CustomertName  Gender         Membership     Category      
##  [6] Quantity       CostPrice      SellingPrice   Discount       DeliveryType  
## [11] GrossRevenue   TotalCost      NetRevenue     Profit         ProfitCategory
## [16] RiskFlag       ProfitMargin  
## <0 rows> (or 0-length row.names)
# Step 7: Sorting
# Sort the data by Profit (descending) and then by NetRevenue (descending).
retail_sorted <- retail[order(-retail$Profit,
                              -retail$NetRevenue), ]

# Step 8: Scenario-Based Analysis Questions (Answer using R)
# 1. Find Gold members who placed Home delivery orders and earned profit above 8000.
subset(retail,
       Membership == "Gold" &
         DeliveryType == "Home" &
         Profit > 8000)
##  [1] BillID         CustomertName  Gender         Membership     Category      
##  [6] Quantity       CostPrice      SellingPrice   Discount       DeliveryType  
## [11] GrossRevenue   TotalCost      NetRevenue     Profit         ProfitCategory
## [16] RiskFlag       ProfitMargin  
## <0 rows> (or 0-length row.names)
# 2. Identify transactions where discount exceeds 15% AND category is Electronics.
subset(retail,
       Discount > 0.15 * GrossRevenue &
         Category == "Electronics")
##  [1] BillID         CustomertName  Gender         Membership     Category      
##  [6] Quantity       CostPrice      SellingPrice   Discount       DeliveryType  
## [11] GrossRevenue   TotalCost      NetRevenue     Profit         ProfitCategory
## [16] RiskFlag       ProfitMargin  
## <0 rows> (or 0-length row.names)
# 3. Find female customers who made profit > 5000 OR purchased Grocery.
subset(retail,
       Gender == "Female" &
         (Profit > 5000 | Category == "Grocery"))
##   BillID CustomertName Gender Membership Category Quantity CostPrice
## 4    504          Neha Female       None  Grocery        8        60
##   SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 4           80       50        Store          640       480        590    110
##    ProfitCategory RiskFlag ProfitMargin
## 4 Moderate Profit     Safe     18.64407
# 4. Detect risky transactions where profit is negative OR discount ratio > 25%.
subset(retail,
       Profit < 0 |
         Discount > 0.25 * GrossRevenue)
##  [1] BillID         CustomertName  Gender         Membership     Category      
##  [6] Quantity       CostPrice      SellingPrice   Discount       DeliveryType  
## [11] GrossRevenue   TotalCost      NetRevenue     Profit         ProfitCategory
## [16] RiskFlag       ProfitMargin  
## <0 rows> (or 0-length row.names)
# 5. Identify top 3 most profitable transactions.
head(retail[order(-retail$Profit), ], 3)
##   BillID CustomertName Gender Membership    Category Quantity CostPrice
## 5    505         Rohit   Male     Silver Electronics        2     18000
## 2    502          Riya Female     Silver Electronics        1     20000
## 6    506        Simran Female       Gold    Clothing        5       700
##   SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 5        22000      500         Home        44000     36000      43500   7500
## 2        25000     2000        Store        25000     20000      23000   3000
## 6          900      300         Home         4500      3500       4200    700
##    ProfitCategory RiskFlag ProfitMargin
## 5 Moderate Profit     Safe     17.24138
## 2 Moderate Profit     Safe     13.04348
## 6 Moderate Profit     Safe     16.66667
# 6. Compare average profit of Gold vs Silver members.
aggregate(Profit ~ Membership,
          data = retail,
          mean)
##   Membership    Profit
## 1       Gold  416.6667
## 2       None  110.0000
## 3     Silver 5250.0000
# 7. Find transactions where TotalCost exceeds NetRevenue.
subset(retail,
       TotalCost > NetRevenue)
##  [1] BillID         CustomertName  Gender         Membership     Category      
##  [6] Quantity       CostPrice      SellingPrice   Discount       DeliveryType  
## [11] GrossRevenue   TotalCost      NetRevenue     Profit         ProfitCategory
## [16] RiskFlag       ProfitMargin  
## <0 rows> (or 0-length row.names)
# 8. Determine membership type contributing highest total profit.
membership_profit <- aggregate(Profit ~ Membership,
                               data = retail,
                               sum)
membership_profit[which.max(membership_profit$Profit), ]
##   Membership Profit
## 3     Silver  10500
# 9. Find delivery type having lowest average profit.
delivery_profit <- aggregate(Profit ~ DeliveryType,
                             data = retail,
                             mean)
delivery_profit[which.min(delivery_profit$Profit), ]
##   DeliveryType Profit
## 2        Store   1555
# 10. Identify high revenue but low margin transactions (NetRevenue > 20000 and ProfitMargin < 20).
subset(retail,
       NetRevenue > 20000 &
         ProfitMargin < 20)
##   BillID CustomertName Gender Membership    Category Quantity CostPrice
## 2    502          Riya Female     Silver Electronics        1     20000
## 5    505         Rohit   Male     Silver Electronics        2     18000
##   SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 2        25000     2000        Store        25000     20000      23000   3000
## 5        22000      500         Home        44000     36000      43500   7500
##    ProfitCategory RiskFlag ProfitMargin
## 2 Moderate Profit     Safe     13.04348
## 5 Moderate Profit     Safe     17.24138