library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
#input
bike_orderlines <- readRDS("/cloud/project/bike_orderlines (1).rds")
#rename
bike_orderlines <- bike_orderlines %>%
  rename(`Primary Category` = category_1,
         `Secondary Category` = category_2,
         `Frame Material` = frame_material)
#categorization
category_1 <- bike_orderlines %>% select(`Primary Category`) %>% distinct()
category_2 <- bike_orderlines %>% select(`Secondary Category`) %>% distinct()
frame_material <- bike_orderlines %>% select(`Frame Material`) %>% distinct()
#sum of sales
sales_by_category_1 <- bike_orderlines %>%
  group_by(`Primary Category`) %>%
  summarize(total_price = sum(total_price)) %>%
  arrange(desc(total_price))
head(sales_by_category_1)
## # A tibble: 2 × 2
##   `Primary Category` total_price
##   <chr>                    <dbl>
## 1 Mountain              39154735
## 2 Road                  31877595
sales_by_category_2 <- bike_orderlines %>%
  group_by(`Secondary Category`) %>%
  summarize(total_price = sum(total_price)) %>%
  arrange(desc(total_price))
head(sales_by_category_2)
## # A tibble: 6 × 2
##   `Secondary Category` total_price
##   <chr>                      <dbl>
## 1 Cross Country Race      19224630
## 2 Elite Road              15334665
## 3 Endurance Road          10381060
## 4 Trail                    9373460
## 5 Over Mountain            7571270
## 6 Triathalon               4053750
sales_by_frame_material <- bike_orderlines %>%
  group_by(`Frame Material`) %>%
  summarize(total_price = sum(total_price)) %>%
  arrange(desc(total_price))
head(sales_by_frame_material)
## # A tibble: 2 × 2
##   `Frame Material` total_price
##   <chr>                  <dbl>
## 1 Carbon              52940540
## 2 Aluminum            18091790
#combining the result
combinations <- bike_orderlines %>%
  group_by(`Primary Category`, `Secondary Category`) %>%
  summarize(
    Aluminum = sum(total_price[`Frame Material` == "Aluminum"]),
    Carbon = sum(total_price[`Frame Material` == "Carbon"]),
    `Total Sales` = sum(total_price)
  ) %>%
  mutate(
    Contains_Aluminum = Aluminum > 0,
    Contains_Carbon = Carbon > 0,
    Both_Aluminum_and_Carbon = Contains_Aluminum & Contains_Carbon
  )
## `summarise()` has grouped output by 'Primary Category'. You can override using
## the `.groups` argument.
head(combinations) 
## # A tibble: 6 × 8
## # Groups:   Primary Category [2]
##   `Primary Category` `Secondary Category` Aluminum   Carbon `Total Sales`
##   <chr>              <chr>                   <dbl>    <dbl>         <dbl>
## 1 Mountain           Cross Country Race    3318560 15906070      19224630
## 2 Mountain           Fat Bike              1052620        0       1052620
## 3 Mountain           Over Mountain               0  7571270       7571270
## 4 Mountain           Sport                 1932755        0       1932755
## 5 Mountain           Trail                 4537610  4835850       9373460
## 6 Road               Cyclocross                  0  2108120       2108120
## # ℹ 3 more variables: Contains_Aluminum <lgl>, Contains_Carbon <lgl>,
## #   Both_Aluminum_and_Carbon <lgl>