library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bike_orderlines <- readRDS("bike_orderlines.rds")
# Unique categories for category_1, category_2, and frame_material
category_1_unique <- bike_orderlines %>%
distinct(category_1)
category_2_unique <- bike_orderlines %>%
distinct(category_2)
frame_material_unique <- bike_orderlines %>%
distinct(frame_material)
# Display results
category_1_unique
## # A tibble: 2 × 1
## category_1
## <chr>
## 1 Mountain
## 2 Road
category_2_unique
## # A tibble: 9 × 1
## category_2
## <chr>
## 1 Over Mountain
## 2 Trail
## 3 Elite Road
## 4 Endurance Road
## 5 Sport
## 6 Cross Country Race
## 7 Cyclocross
## 8 Triathalon
## 9 Fat Bike
frame_material_unique
## # A tibble: 2 × 1
## frame_material
## <chr>
## 1 Carbon
## 2 Aluminum
# Convert price to numeric for accurate calculation
bike_orderlines <- bike_orderlines %>%
mutate(total_price = as.numeric(gsub("[$,]", "", total_price)))
# Primary Category Sales (renamed)
primary_category_sales <- bike_orderlines %>%
group_by(category_1) %>%
summarise(total_price = sum(total_price, na.rm = TRUE)) %>%
rename('Primary Category' = category_1) %>%
arrange(desc(total_price))
# Secondary Category Sales
secondary_category_sales <- bike_orderlines %>%
group_by(category_2) %>%
summarise(total_price = sum(total_price, na.rm = TRUE)) %>%
rename('Secondary Category' = category_2) %>%
arrange(desc(total_price))
# Frame Material Sales (renamed)
frame_material_sales <- bike_orderlines %>%
group_by(frame_material) %>%
summarise(total_price = sum(total_price, na.rm = TRUE)) %>%
rename('Frame Material' = frame_material) %>%
arrange(desc(total_price))
# Display results
primary_category_sales
## # A tibble: 2 × 2
## `Primary Category` total_price
## <chr> <dbl>
## 1 Mountain 39154735
## 2 Road 31877595
secondary_category_sales
## # A tibble: 9 × 2
## `Secondary Category` total_price
## <chr> <dbl>
## 1 Cross Country Race 19224630
## 2 Elite Road 15334665
## 3 Endurance Road 10381060
## 4 Trail 9373460
## 5 Over Mountain 7571270
## 6 Triathalon 4053750
## 7 Cyclocross 2108120
## 8 Sport 1932755
## 9 Fat Bike 1052620
frame_material_sales
## # A tibble: 2 × 2
## `Frame Material` total_price
## <chr> <dbl>
## 1 Carbon 52940540
## 2 Aluminum 18091790
# Group by primary and secondary categories and check for Aluminum and Carbon sales
material_combinations <- bike_orderlines %>%
group_by(category_1, category_2) %>%
summarise(
Aluminum = sum(total_price[frame_material == "Aluminum"], na.rm = TRUE),
Carbon = sum(total_price[frame_material == "Carbon"], na.rm = TRUE),
`Total Sales` = sum(total_price, na.rm = TRUE),
.groups = 'drop'
) %>%
rename('Primary Category' = category_1, 'Secondary Category' = category_2)
# Display result
material_combinations
## # A tibble: 9 × 5
## `Primary Category` `Secondary Category` Aluminum Carbon `Total Sales`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Mountain Cross Country Race 3318560 15906070 19224630
## 2 Mountain Fat Bike 1052620 0 1052620
## 3 Mountain Over Mountain 0 7571270 7571270
## 4 Mountain Sport 1932755 0 1932755
## 5 Mountain Trail 4537610 4835850 9373460
## 6 Road Cyclocross 0 2108120 2108120
## 7 Road Elite Road 5637795 9696870 15334665
## 8 Road Endurance Road 1612450 8768610 10381060
## 9 Road Triathalon 0 4053750 4053750