library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bike_data <- readRDS("bike_orderlines (1).rds")
category_1_unique <- bike_data %>% distinct(category_1)
category_2_unique <- bike_data %>% distinct(category_2)
frame_material_unique <- bike_data %>% distinct(frame_material)
print("Unique values in category_1:")
## [1] "Unique values in category_1:"
print(category_1_unique)
## # A tibble: 2 × 1
## category_1
## <chr>
## 1 Mountain
## 2 Road
print("Unique values in category_2:")
## [1] "Unique values in category_2:"
print(category_2_unique)
## # A tibble: 9 × 1
## category_2
## <chr>
## 1 Over Mountain
## 2 Trail
## 3 Elite Road
## 4 Endurance Road
## 5 Sport
## 6 Cross Country Race
## 7 Cyclocross
## 8 Triathalon
## 9 Fat Bike
print("Unique values in frame_material:")
## [1] "Unique values in frame_material:"
print(frame_material_unique)
## # A tibble: 2 × 1
## frame_material
## <chr>
## 1 Carbon
## 2 Aluminum
primary_sales <- bike_data %>%
group_by(category_1) %>%
summarise(Sales = sum(total_price, na.rm = TRUE)) %>%
rename("Primary Category" = category_1) %>%
arrange(desc(Sales))
print(primary_sales)
## # A tibble: 2 × 2
## `Primary Category` Sales
## <chr> <dbl>
## 1 Mountain 39154735
## 2 Road 31877595
secondary_sales <- bike_data %>%
group_by(category_2) %>%
summarise(Sales = sum(total_price, na.rm = TRUE)) %>%
rename("Secondary Category" = category_2) %>%
arrange(desc(Sales))
print(secondary_sales)
## # A tibble: 9 × 2
## `Secondary Category` Sales
## <chr> <dbl>
## 1 Cross Country Race 19224630
## 2 Elite Road 15334665
## 3 Endurance Road 10381060
## 4 Trail 9373460
## 5 Over Mountain 7571270
## 6 Triathalon 4053750
## 7 Cyclocross 2108120
## 8 Sport 1932755
## 9 Fat Bike 1052620
frame_material_sales <- bike_data %>%
group_by(frame_material) %>%
summarise(Sales = sum(total_price, na.rm = TRUE)) %>%
rename("Frame Material" = frame_material) %>%
arrange(desc(Sales))
print(frame_material_sales)
## # A tibble: 2 × 2
## `Frame Material` Sales
## <chr> <dbl>
## 1 Carbon 52940540
## 2 Aluminum 18091790
category_frame_check <- bike_data %>%
group_by(category_1, category_2) %>%
summarise(
Aluminum = ifelse(any(frame_material == "Aluminum"), sum(total_price[frame_material == "Aluminum"], na.rm = TRUE), 0),
Carbon = ifelse(any(frame_material == "Carbon"), sum(total_price[frame_material == "Carbon"], na.rm = TRUE), 0),
`Total Sales` = sum(total_price, na.rm = TRUE),
.groups = "drop"
) %>%
rename("Primary Category" = category_1, "Secondary Category" = category_2)
print(category_frame_check)
## # A tibble: 9 × 5
## `Primary Category` `Secondary Category` Aluminum Carbon `Total Sales`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Mountain Cross Country Race 3318560 15906070 19224630
## 2 Mountain Fat Bike 1052620 0 1052620
## 3 Mountain Over Mountain 0 7571270 7571270
## 4 Mountain Sport 1932755 0 1932755
## 5 Mountain Trail 4537610 4835850 9373460
## 6 Road Cyclocross 0 2108120 2108120
## 7 Road Elite Road 5637795 9696870 15334665
## 8 Road Endurance Road 1612450 8768610 10381060
## 9 Road Triathalon 0 4053750 4053750