library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
bike_data <- readRDS("bike_orderlines (1).rds")
category_1_unique <- bike_data %>% distinct(category_1)
category_2_unique <- bike_data %>% distinct(category_2)
frame_material_unique <- bike_data %>% distinct(frame_material)
print("Unique values in category_1:")
## [1] "Unique values in category_1:"
print(category_1_unique)
## # A tibble: 2 × 1
##   category_1
##   <chr>     
## 1 Mountain  
## 2 Road
print("Unique values in category_2:")
## [1] "Unique values in category_2:"
print(category_2_unique)
## # A tibble: 9 × 1
##   category_2        
##   <chr>             
## 1 Over Mountain     
## 2 Trail             
## 3 Elite Road        
## 4 Endurance Road    
## 5 Sport             
## 6 Cross Country Race
## 7 Cyclocross        
## 8 Triathalon        
## 9 Fat Bike
print("Unique values in frame_material:")
## [1] "Unique values in frame_material:"
print(frame_material_unique)
## # A tibble: 2 × 1
##   frame_material
##   <chr>         
## 1 Carbon        
## 2 Aluminum
primary_sales <- bike_data %>%
  group_by(category_1) %>%
  summarise(Sales = sum(total_price, na.rm = TRUE)) %>%
  rename("Primary Category" = category_1) %>%
  arrange(desc(Sales))

print(primary_sales)
## # A tibble: 2 × 2
##   `Primary Category`    Sales
##   <chr>                 <dbl>
## 1 Mountain           39154735
## 2 Road               31877595
secondary_sales <- bike_data %>%
  group_by(category_2) %>%
  summarise(Sales = sum(total_price, na.rm = TRUE)) %>%
  rename("Secondary Category" = category_2) %>%
  arrange(desc(Sales))

print(secondary_sales)
## # A tibble: 9 × 2
##   `Secondary Category`    Sales
##   <chr>                   <dbl>
## 1 Cross Country Race   19224630
## 2 Elite Road           15334665
## 3 Endurance Road       10381060
## 4 Trail                 9373460
## 5 Over Mountain         7571270
## 6 Triathalon            4053750
## 7 Cyclocross            2108120
## 8 Sport                 1932755
## 9 Fat Bike              1052620
frame_material_sales <- bike_data %>%
  group_by(frame_material) %>%
  summarise(Sales = sum(total_price, na.rm = TRUE)) %>%
  rename("Frame Material" = frame_material) %>%
  arrange(desc(Sales))

print(frame_material_sales)
## # A tibble: 2 × 2
##   `Frame Material`    Sales
##   <chr>               <dbl>
## 1 Carbon           52940540
## 2 Aluminum         18091790
category_frame_check <- bike_data %>%
  group_by(category_1, category_2) %>%
  summarise(
    Aluminum = ifelse(any(frame_material == "Aluminum"), sum(total_price[frame_material == "Aluminum"], na.rm = TRUE), 0),
    Carbon = ifelse(any(frame_material == "Carbon"), sum(total_price[frame_material == "Carbon"], na.rm = TRUE), 0),
    `Total Sales` = sum(total_price, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  rename("Primary Category" = category_1, "Secondary Category" = category_2)

print(category_frame_check)
## # A tibble: 9 × 5
##   `Primary Category` `Secondary Category` Aluminum   Carbon `Total Sales`
##   <chr>              <chr>                   <dbl>    <dbl>         <dbl>
## 1 Mountain           Cross Country Race    3318560 15906070      19224630
## 2 Mountain           Fat Bike              1052620        0       1052620
## 3 Mountain           Over Mountain               0  7571270       7571270
## 4 Mountain           Sport                 1932755        0       1932755
## 5 Mountain           Trail                 4537610  4835850       9373460
## 6 Road               Cyclocross                  0  2108120       2108120
## 7 Road               Elite Road            5637795  9696870      15334665
## 8 Road               Endurance Road        1612450  8768610      10381060
## 9 Road               Triathalon                  0  4053750       4053750