Retail Data Analysis
retail <- data.frame(
BillID = c(501,502,503,504,505,506),
CustomerName = c("Aman","Riya","Karan","Neha","Rohit","Simran"),
Gender = c("Male","Female","Male","Female","Male","Female"),
Membership = c("Gold","Silver","Gold","None","Silver","Gold"),
Category = c("Grocery","Electronics","Clothing","Grocery","Electronics","Clothing"),
Quantity = c(10,1,3,8,2,5),
CostPrice = c(50,20000,800,60,18000,700),
SellingPrice = c(70,25000,1000,80,22000,900),
Discount = c(100,2000,150,50,500,300),
DeliveryType = c("Home","Store","Home","Store","Home","Home"),
stringsAsFactors = FALSE
)
print(retail)
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 4 504 Neha Female None Grocery 8 60
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType
## 1 70 100 Home
## 2 25000 2000 Store
## 3 1000 150 Home
## 4 80 50 Store
## 5 22000 500 Home
## 6 900 300 Home
Summary of Data
summary(retail)
## BillID CustomerName Gender Membership
## Min. :501.0 Length:6 Length:6 Length:6
## 1st Qu.:502.2 Class :character Class :character Class :character
## Median :503.5 Mode :character Mode :character Mode :character
## Mean :503.5
## 3rd Qu.:504.8
## Max. :506.0
## Category Quantity CostPrice SellingPrice
## Length:6 Min. : 1.000 Min. : 50 Min. : 70
## Class :character 1st Qu.: 2.250 1st Qu.: 220 1st Qu.: 285
## Mode :character Median : 4.000 Median : 750 Median : 950
## Mean : 4.833 Mean : 6602 Mean : 8175
## 3rd Qu.: 7.250 3rd Qu.:13700 3rd Qu.:16750
## Max. :10.000 Max. :20000 Max. :25000
## Discount DeliveryType
## Min. : 50.0 Length:6
## 1st Qu.: 112.5 Class :character
## Median : 225.0 Mode :character
## Mean : 516.7
## 3rd Qu.: 450.0
## Max. :2000.0
Revenue and Profit Calculations
retail$GrossRevenue <- retail$Quantity * retail$SellingPrice
retail$TotalCost <- retail$Quantity * retail$CostPrice
retail$NetRevenue <- retail$GrossRevenue - retail$Discount
retail$Profit <- retail$NetRevenue - retail$TotalCost
print(retail)
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 4 504 Neha Female None Grocery 8 60
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 Home 700 500 600 100
## 2 25000 2000 Store 25000 20000 23000 3000
## 3 1000 150 Home 3000 2400 2850 450
## 4 80 50 Store 640 480 590 110
## 5 22000 500 Home 44000 36000 43500 7500
## 6 900 300 Home 4500 3500 4200 700
Filtering Data
subset(retail, Membership == "Gold" & Profit > 5000 & DeliveryType == "Home")
## [1] BillID CustomerName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit
## <0 rows> (or 0-length row.names)
subset(retail, Profit < 0)
## [1] BillID CustomerName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit
## <0 rows> (or 0-length row.names)
subset(retail, Category == "Electronics" & Quantity >= 2 & Discount > 1000 & Profit > 0)
## [1] BillID CustomerName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit
## <0 rows> (or 0-length row.names)
subset(retail, NetRevenue > 20000 | Membership == "Gold")
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 Home 700 500 600 100
## 2 25000 2000 Store 25000 20000 23000 3000
## 3 1000 150 Home 3000 2400 2850 450
## 5 22000 500 Home 44000 36000 43500 7500
## 6 900 300 Home 4500 3500 4200 700
Classification
retail$ProfitCategory <- ifelse(retail$Profit > 10000, "High Profit",
ifelse(retail$Profit > 0, "Moderate Profit", "Loss"))
retail$RiskFlag <- ifelse(retail$Discount > 0.20 * retail$GrossRevenue |
retail$Profit < 0, "Risky", "Safe")
print(retail)
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 4 504 Neha Female None Grocery 8 60
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 Home 700 500 600 100
## 2 25000 2000 Store 25000 20000 23000 3000
## 3 1000 150 Home 3000 2400 2850 450
## 4 80 50 Store 640 480 590 110
## 5 22000 500 Home 44000 36000 43500 7500
## 6 900 300 Home 4500 3500 4200 700
## ProfitCategory RiskFlag
## 1 Moderate Profit Safe
## 2 Moderate Profit Safe
## 3 Moderate Profit Safe
## 4 Moderate Profit Safe
## 5 Moderate Profit Safe
## 6 Moderate Profit Safe
Aggregation Analysis
aggregate(Profit ~ Membership + Category, data = retail, sum)
## Membership Category Profit
## 1 Gold Clothing 1150
## 2 Silver Electronics 10500
## 3 Gold Grocery 100
## 4 None Grocery 110
aggregate(Profit ~ Gender + DeliveryType, data = retail, mean)
## Gender DeliveryType Profit
## 1 Female Home 700.000
## 2 Male Home 2683.333
## 3 Female Store 1555.000
Final Dataset Preview
head(retail)
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 4 504 Neha Female None Grocery 8 60
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 1 70 100 Home 700 500 600 100
## 2 25000 2000 Store 25000 20000 23000 3000
## 3 1000 150 Home 3000 2400 2850 450
## 4 80 50 Store 640 480 590 110
## 5 22000 500 Home 44000 36000 43500 7500
## 6 900 300 Home 4500 3500 4200 700
## ProfitCategory RiskFlag
## 1 Moderate Profit Safe
## 2 Moderate Profit Safe
## 3 Moderate Profit Safe
## 4 Moderate Profit Safe
## 5 Moderate Profit Safe
## 6 Moderate Profit Safe