Retail Data Analysis

retail <- data.frame(
  BillID = c(501,502,503,504,505,506),
  CustomerName = c("Aman","Riya","Karan","Neha","Rohit","Simran"),
  Gender = c("Male","Female","Male","Female","Male","Female"),
  Membership = c("Gold","Silver","Gold","None","Silver","Gold"),
  Category = c("Grocery","Electronics","Clothing","Grocery","Electronics","Clothing"),
  Quantity = c(10,1,3,8,2,5),
  CostPrice = c(50,20000,800,60,18000,700),
  SellingPrice = c(70,25000,1000,80,22000,900),
  Discount = c(100,2000,150,50,500,300),
  DeliveryType = c("Home","Store","Home","Store","Home","Home"),
  stringsAsFactors = FALSE
)
print(retail)
##   BillID CustomerName Gender Membership    Category Quantity CostPrice
## 1    501         Aman   Male       Gold     Grocery       10        50
## 2    502         Riya Female     Silver Electronics        1     20000
## 3    503        Karan   Male       Gold    Clothing        3       800
## 4    504         Neha Female       None     Grocery        8        60
## 5    505        Rohit   Male     Silver Electronics        2     18000
## 6    506       Simran Female       Gold    Clothing        5       700
##   SellingPrice Discount DeliveryType
## 1           70      100         Home
## 2        25000     2000        Store
## 3         1000      150         Home
## 4           80       50        Store
## 5        22000      500         Home
## 6          900      300         Home

Summary of Data

summary(retail)
##      BillID      CustomerName          Gender           Membership       
##  Min.   :501.0   Length:6           Length:6           Length:6          
##  1st Qu.:502.2   Class :character   Class :character   Class :character  
##  Median :503.5   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :503.5                                                           
##  3rd Qu.:504.8                                                           
##  Max.   :506.0                                                           
##    Category            Quantity        CostPrice      SellingPrice  
##  Length:6           Min.   : 1.000   Min.   :   50   Min.   :   70  
##  Class :character   1st Qu.: 2.250   1st Qu.:  220   1st Qu.:  285  
##  Mode  :character   Median : 4.000   Median :  750   Median :  950  
##                     Mean   : 4.833   Mean   : 6602   Mean   : 8175  
##                     3rd Qu.: 7.250   3rd Qu.:13700   3rd Qu.:16750  
##                     Max.   :10.000   Max.   :20000   Max.   :25000  
##     Discount      DeliveryType      
##  Min.   :  50.0   Length:6          
##  1st Qu.: 112.5   Class :character  
##  Median : 225.0   Mode  :character  
##  Mean   : 516.7                     
##  3rd Qu.: 450.0                     
##  Max.   :2000.0

Revenue and Profit Calculations

retail$GrossRevenue <- retail$Quantity * retail$SellingPrice
retail$TotalCost <- retail$Quantity * retail$CostPrice
retail$NetRevenue <- retail$GrossRevenue - retail$Discount
retail$Profit <- retail$NetRevenue - retail$TotalCost
print(retail)
##   BillID CustomerName Gender Membership    Category Quantity CostPrice
## 1    501         Aman   Male       Gold     Grocery       10        50
## 2    502         Riya Female     Silver Electronics        1     20000
## 3    503        Karan   Male       Gold    Clothing        3       800
## 4    504         Neha Female       None     Grocery        8        60
## 5    505        Rohit   Male     Silver Electronics        2     18000
## 6    506       Simran Female       Gold    Clothing        5       700
##   SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1           70      100         Home          700       500        600    100
## 2        25000     2000        Store        25000     20000      23000   3000
## 3         1000      150         Home         3000      2400       2850    450
## 4           80       50        Store          640       480        590    110
## 5        22000      500         Home        44000     36000      43500   7500
## 6          900      300         Home         4500      3500       4200    700

Filtering Data

subset(retail, Membership == "Gold" & Profit > 5000 & DeliveryType == "Home")
##  [1] BillID       CustomerName Gender       Membership   Category    
##  [6] Quantity     CostPrice    SellingPrice Discount     DeliveryType
## [11] GrossRevenue TotalCost    NetRevenue   Profit      
## <0 rows> (or 0-length row.names)
subset(retail, Profit < 0)
##  [1] BillID       CustomerName Gender       Membership   Category    
##  [6] Quantity     CostPrice    SellingPrice Discount     DeliveryType
## [11] GrossRevenue TotalCost    NetRevenue   Profit      
## <0 rows> (or 0-length row.names)
subset(retail, Category == "Electronics" & Quantity >= 2 & Discount > 1000 & Profit > 0)
##  [1] BillID       CustomerName Gender       Membership   Category    
##  [6] Quantity     CostPrice    SellingPrice Discount     DeliveryType
## [11] GrossRevenue TotalCost    NetRevenue   Profit      
## <0 rows> (or 0-length row.names)
subset(retail, NetRevenue > 20000 | Membership == "Gold")
##   BillID CustomerName Gender Membership    Category Quantity CostPrice
## 1    501         Aman   Male       Gold     Grocery       10        50
## 2    502         Riya Female     Silver Electronics        1     20000
## 3    503        Karan   Male       Gold    Clothing        3       800
## 5    505        Rohit   Male     Silver Electronics        2     18000
## 6    506       Simran Female       Gold    Clothing        5       700
##   SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1           70      100         Home          700       500        600    100
## 2        25000     2000        Store        25000     20000      23000   3000
## 3         1000      150         Home         3000      2400       2850    450
## 5        22000      500         Home        44000     36000      43500   7500
## 6          900      300         Home         4500      3500       4200    700

Classification

retail$ProfitCategory <- ifelse(retail$Profit > 10000, "High Profit",
                               ifelse(retail$Profit > 0, "Moderate Profit", "Loss"))

retail$RiskFlag <- ifelse(retail$Discount > 0.20 * retail$GrossRevenue | 
                         retail$Profit < 0, "Risky", "Safe")
print(retail)
##   BillID CustomerName Gender Membership    Category Quantity CostPrice
## 1    501         Aman   Male       Gold     Grocery       10        50
## 2    502         Riya Female     Silver Electronics        1     20000
## 3    503        Karan   Male       Gold    Clothing        3       800
## 4    504         Neha Female       None     Grocery        8        60
## 5    505        Rohit   Male     Silver Electronics        2     18000
## 6    506       Simran Female       Gold    Clothing        5       700
##   SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1           70      100         Home          700       500        600    100
## 2        25000     2000        Store        25000     20000      23000   3000
## 3         1000      150         Home         3000      2400       2850    450
## 4           80       50        Store          640       480        590    110
## 5        22000      500         Home        44000     36000      43500   7500
## 6          900      300         Home         4500      3500       4200    700
##    ProfitCategory RiskFlag
## 1 Moderate Profit     Safe
## 2 Moderate Profit     Safe
## 3 Moderate Profit     Safe
## 4 Moderate Profit     Safe
## 5 Moderate Profit     Safe
## 6 Moderate Profit     Safe

Aggregation Analysis

aggregate(Profit ~ Membership + Category, data = retail, sum)
##   Membership    Category Profit
## 1       Gold    Clothing   1150
## 2     Silver Electronics  10500
## 3       Gold     Grocery    100
## 4       None     Grocery    110
aggregate(Profit ~ Gender + DeliveryType, data = retail, mean)
##   Gender DeliveryType   Profit
## 1 Female         Home  700.000
## 2   Male         Home 2683.333
## 3 Female        Store 1555.000

Final Dataset Preview

head(retail)
##   BillID CustomerName Gender Membership    Category Quantity CostPrice
## 1    501         Aman   Male       Gold     Grocery       10        50
## 2    502         Riya Female     Silver Electronics        1     20000
## 3    503        Karan   Male       Gold    Clothing        3       800
## 4    504         Neha Female       None     Grocery        8        60
## 5    505        Rohit   Male     Silver Electronics        2     18000
## 6    506       Simran Female       Gold    Clothing        5       700
##   SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1           70      100         Home          700       500        600    100
## 2        25000     2000        Store        25000     20000      23000   3000
## 3         1000      150         Home         3000      2400       2850    450
## 4           80       50        Store          640       480        590    110
## 5        22000      500         Home        44000     36000      43500   7500
## 6          900      300         Home         4500      3500       4200    700
##    ProfitCategory RiskFlag
## 1 Moderate Profit     Safe
## 2 Moderate Profit     Safe
## 3 Moderate Profit     Safe
## 4 Moderate Profit     Safe
## 5 Moderate Profit     Safe
## 6 Moderate Profit     Safe