# Step 1: Create the Data Frame (Mixed Data Types)
retail_data <- data.frame(
BillID = c(501, 502, 503, 504, 505, 506),
CustomerName = c("Aman", "Riya", "Karan", "Neha", "Rohit", "Simran"),
Gender = factor(c("Male", "Female", "Male", "Female", "Male", "Female")),
Membership = factor(c("Gold", "Silver", "Gold", "None", "Silver", "Gold")),
Category = factor(c("Grocery", "Electronics", "Clothing", "Grocery", "Electronics", "Clothing")),
Quantity = c(10, 1, 3, 8, 2, 5),
CostPrice = c(50, 20000, 800, 60, 18000, 700),
SellingPrice = c(70, 25000, 1000, 80, 22000, 900),
Discount = c(100, 2000, 150, 50, 500, 300),
DeliveryType = c("Home", "Store", "Home", "Store", "Home", "Home"),
stringsAsFactors = FALSE
)
# Check structure
str(retail_data)
## 'data.frame': 6 obs. of 10 variables:
## $ BillID : num 501 502 503 504 505 506
## $ CustomerName: chr "Aman" "Riya" "Karan" "Neha" ...
## $ Gender : Factor w/ 2 levels "Female","Male": 2 1 2 1 2 1
## $ Membership : Factor w/ 3 levels "Gold","None",..: 1 3 1 2 3 1
## $ Category : Factor w/ 3 levels "Clothing","Electronics",..: 3 2 1 3 2 1
## $ Quantity : num 10 1 3 8 2 5
## $ CostPrice : num 50 20000 800 60 18000 700
## $ SellingPrice: num 70 25000 1000 80 22000 900
## $ Discount : num 100 2000 150 50 500 300
## $ DeliveryType: chr "Home" "Store" "Home" "Store" ...
print(retail_data)
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 1 501 Aman Male Gold Grocery 10 50
## 2 502 Riya Female Silver Electronics 1 20000
## 3 503 Karan Male Gold Clothing 3 800
## 4 504 Neha Female None Grocery 8 60
## 5 505 Rohit Male Silver Electronics 2 18000
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType
## 1 70 100 Home
## 2 25000 2000 Store
## 3 1000 150 Home
## 4 80 50 Store
## 5 22000 500 Home
## 6 900 300 Home
# 1. Add GrossRevenue = Quantity * SellingPrice.
retail_data$GrossRevenue <- retail_data$Quantity*retail_data$SellingPrice
# 2. Add TotalCost = Quantity * CostPrice.
retail_data$TotalCost <- retail_data$Quantity*retail_data$CostPrice
# 3. Add NetRevenue = GrossRevenue - Discount.
retail_data$NetRevenue <- retail_data$GrossRevenue-retail_data$Discount
# 4. Add Profit = NetRevenue - TotalCost
retail_data$Profit <- retail_data$NetRevenue-retail_data$TotalCost
# 5. Find Gold members who have Profit > 5000 and chose Home delivery.
ProGoldCard <- subset(retail_data,
Membership == "Gold" &
Profit > 100 &
DeliveryType == "Home")
ProGoldCard
## BillID CustomerName Gender Membership Category Quantity CostPrice
## 3 503 Karan Male Gold Clothing 3 800
## 6 506 Simran Female Gold Clothing 5 700
## SellingPrice Discount DeliveryType GrossRevenue TotalCost NetRevenue Profit
## 3 1000 150 Home 3000 2400 2850 450
## 6 900 300 Home 4500 3500 4200 700
# 6. Find all loss-making transactions (Profit < 0)
loss_making<- subset(retail_data,
Profit<0)
loss_making
## [1] BillID CustomerName Gender Membership Category
## [6] Quantity CostPrice SellingPrice Discount DeliveryType
## [11] GrossRevenue TotalCost NetRevenue Profit
## <0 rows> (or 0-length row.names)