# Load required libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(scales)
# Load the data
filename <- "path/to/your/file.rds" # Replace with the actual path to your file
bike_orderlines <- readRDS("/cloud/project/bike_orderlines (1).rds")
# Perform operations on the data
bike_orderlines %>% distinct(category_1)
## # A tibble: 2 × 1
## category_1
## <chr>
## 1 Mountain
## 2 Road
bike_orderlines %>% distinct(category_2)
## # A tibble: 9 × 1
## category_2
## <chr>
## 1 Over Mountain
## 2 Trail
## 3 Elite Road
## 4 Endurance Road
## 5 Sport
## 6 Cross Country Race
## 7 Cyclocross
## 8 Triathalon
## 9 Fat Bike
bike_orderlines %>% distinct(frame_material)
## # A tibble: 2 × 1
## frame_material
## <chr>
## 1 Carbon
## 2 Aluminum
# Rename columns for clarity
bike_orderlines_rename <- bike_orderlines %>%
rename(`Primary Category` = category_1,
`Secondary Category` = category_2,
`Frame Material` = frame_material)
# Summarize sales by primary category
primary_category_sales <- bike_orderlines_rename %>%
select(`Primary Category`, total_price) %>%
group_by(`Primary Category`) %>%
summarise(Sales = sum(total_price, na.rm = TRUE)) %>%
arrange(desc(Sales)) %>%
ungroup() %>%
mutate(Sales = dollar(Sales))
primary_category_sales
## # A tibble: 2 × 2
## `Primary Category` Sales
## <chr> <chr>
## 1 Mountain $39,154,735
## 2 Road $31,877,595
# Summarize sales by secondary category
secondary_category_sales <- bike_orderlines_rename %>%
select(`Secondary Category`, total_price) %>%
group_by(`Secondary Category`) %>%
summarise(Sales = sum(total_price, na.rm = TRUE)) %>%
arrange(desc(Sales)) %>%
ungroup() %>%
mutate(Sales = dollar(Sales))
secondary_category_sales
## # A tibble: 9 × 2
## `Secondary Category` Sales
## <chr> <chr>
## 1 Cross Country Race $19,224,630
## 2 Elite Road $15,334,665
## 3 Endurance Road $10,381,060
## 4 Trail $9,373,460
## 5 Over Mountain $7,571,270
## 6 Triathalon $4,053,750
## 7 Cyclocross $2,108,120
## 8 Sport $1,932,755
## 9 Fat Bike $1,052,620
# Summarize sales by frame material
frame_material_sales <- bike_orderlines_rename %>%
select(`Frame Material`, total_price) %>%
group_by(`Frame Material`) %>%
summarise(Sales = sum(total_price, na.rm = TRUE)) %>%
arrange(desc(Sales)) %>%
ungroup() %>%
mutate(Sales = dollar(Sales))
frame_material_sales
## # A tibble: 2 × 2
## `Frame Material` Sales
## <chr> <chr>
## 1 Carbon $52,940,540
## 2 Aluminum $18,091,790
# Generate a sales summary for aluminum and carbon frame materials
frame_material_summary <- bike_orderlines_rename %>%
mutate(Aluminum = ifelse(`Frame Material` == "Aluminum", total_price, 0),
Carbon = ifelse(`Frame Material` == "Carbon", total_price, 0)) %>%
select(`Primary Category`, `Secondary Category`, Aluminum, Carbon, total_price) %>%
group_by(`Primary Category`, `Secondary Category`) %>%
summarise(Aluminum = sum(Aluminum, na.rm = TRUE),
Carbon = sum(Carbon, na.rm = TRUE),
Sales = sum(total_price, na.rm = TRUE)) %>%
arrange(desc(Sales)) %>%
ungroup() %>%
mutate(Aluminum = dollar(Aluminum),
Carbon = dollar(Carbon),
Sales = dollar(Sales))
## `summarise()` has grouped output by 'Primary Category'. You can override using
## the `.groups` argument.
frame_material_summary
## # A tibble: 9 × 5
## `Primary Category` `Secondary Category` Aluminum Carbon Sales
## <chr> <chr> <chr> <chr> <chr>
## 1 Mountain Cross Country Race $3,318,560 $15,906,070 $19,224,630
## 2 Road Elite Road $5,637,795 $9,696,870 $15,334,665
## 3 Road Endurance Road $1,612,450 $8,768,610 $10,381,060
## 4 Mountain Trail $4,537,610 $4,835,850 $9,373,460
## 5 Mountain Over Mountain $0 $7,571,270 $7,571,270
## 6 Road Triathalon $0 $4,053,750 $4,053,750
## 7 Road Cyclocross $0 $2,108,120 $2,108,120
## 8 Mountain Sport $1,932,755 $0 $1,932,755
## 9 Mountain Fat Bike $1,052,620 $0 $1,052,620