# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(scales)
library(readxl)
# Step 1: Load the dataset
file_path <- "/cloud/project/bike_orderlines.xlsx" # Update the file path
bike_orderlines <- read_excel(file_path)
# Step 2: Inspect the data structure (optional, remove for knitting)
# glimpse(bike_orderlines)
# colnames(bike_orderlines)
# Step 3: Rename columns for readability
bike_orderlines_rename <- bike_orderlines %>%
rename(
`Primary Category` = category_1,
`Secondary Category` = category_2,
`Frame Material` = frame_material
)
# Step 4: Sales by Primary Category
primary_category_sales <- bike_orderlines_rename %>%
select(`Primary Category`, total_price) %>%
group_by(`Primary Category`) %>%
summarise(Sales = sum(total_price, na.rm = TRUE)) %>%
arrange(desc(Sales)) %>%
ungroup() %>%
mutate(Sales = dollar(Sales))
# Display results
primary_category_sales
## # A tibble: 2 × 2
## `Primary Category` Sales
## <chr> <chr>
## 1 Mountain $39,154,735
## 2 Road $31,877,595
# Step 5: Sales by Secondary Category
secondary_category_sales <- bike_orderlines_rename %>%
select(`Secondary Category`, total_price) %>%
group_by(`Secondary Category`) %>%
summarise(Sales = sum(total_price, na.rm = TRUE)) %>%
arrange(desc(Sales)) %>%
ungroup() %>%
mutate(Sales = dollar(Sales))
# Display results
secondary_category_sales
## # A tibble: 9 × 2
## `Secondary Category` Sales
## <chr> <chr>
## 1 Cross Country Race $19,224,630
## 2 Elite Road $15,334,665
## 3 Endurance Road $10,381,060
## 4 Trail $9,373,460
## 5 Over Mountain $7,571,270
## 6 Triathalon $4,053,750
## 7 Cyclocross $2,108,120
## 8 Sport $1,932,755
## 9 Fat Bike $1,052,620
# Step 6: Sales by Frame Material
frame_material_sales <- bike_orderlines_rename %>%
select(`Frame Material`, total_price) %>%
group_by(`Frame Material`) %>%
summarise(Sales = sum(total_price, na.rm = TRUE)) %>%
arrange(desc(Sales)) %>%
ungroup() %>%
mutate(Sales = dollar(Sales))
# Display results
frame_material_sales
## # A tibble: 2 × 2
## `Frame Material` Sales
## <chr> <chr>
## 1 Carbon $52,940,540
## 2 Aluminum $18,091,790
# Step 7: Detailed Analysis by Frame Material
frame_material_analysis <- bike_orderlines_rename %>%
mutate(Aluminum = ifelse(`Frame Material` == 'Aluminum', total_price, 0)) %>%
mutate(Carbon = ifelse(`Frame Material` == 'Carbon', total_price, 0)) %>%
select(`Primary Category`, `Secondary Category`, Aluminum, Carbon, total_price) %>%
group_by(`Primary Category`, `Secondary Category`) %>%
summarise(
Aluminum = sum(Aluminum, na.rm = TRUE),
Carbon = sum(Carbon, na.rm = TRUE),
Sales = sum(total_price, na.rm = TRUE)
) %>%
arrange(desc(Sales)) %>%
ungroup() %>%
mutate(
Aluminum = dollar(Aluminum),
Carbon = dollar(Carbon),
Sales = dollar(Sales)
)
## `summarise()` has grouped output by 'Primary Category'. You can override using
## the `.groups` argument.
# Display results
frame_material_analysis
## # A tibble: 9 × 5
## `Primary Category` `Secondary Category` Aluminum Carbon Sales
## <chr> <chr> <chr> <chr> <chr>
## 1 Mountain Cross Country Race $3,318,560 $15,906,070 $19,224,630
## 2 Road Elite Road $5,637,795 $9,696,870 $15,334,665
## 3 Road Endurance Road $1,612,450 $8,768,610 $10,381,060
## 4 Mountain Trail $4,537,610 $4,835,850 $9,373,460
## 5 Mountain Over Mountain $0 $7,571,270 $7,571,270
## 6 Road Triathalon $0 $4,053,750 $4,053,750
## 7 Road Cyclocross $0 $2,108,120 $2,108,120
## 8 Mountain Sport $1,932,755 $0 $1,932,755
## 9 Mountain Fat Bike $1,052,620 $0 $1,052,620