library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
#input
bike_orderlines <- readRDS("/cloud/project/bike_orderlines (1).rds")
#rename
bike_orderlines <- bike_orderlines %>%
rename(`Primary Category` = category_1,
`Secondary Category` = category_2,
`Frame Material` = frame_material)
#categorization
category_1 <- bike_orderlines %>% select(`Primary Category`) %>% distinct()
category_2 <- bike_orderlines %>% select(`Secondary Category`) %>% distinct()
frame_material <- bike_orderlines %>% select(`Frame Material`) %>% distinct()
#sum of sales
sales_by_category_1 <- bike_orderlines %>%
group_by(`Primary Category`) %>%
summarize(total_price = sum(total_price)) %>%
arrange(desc(total_price))
head(sales_by_category_1)
## # A tibble: 2 × 2
## `Primary Category` total_price
## <chr> <dbl>
## 1 Mountain 39154735
## 2 Road 31877595
sales_by_category_2 <- bike_orderlines %>%
group_by(`Secondary Category`) %>%
summarize(total_price = sum(total_price)) %>%
arrange(desc(total_price))
head(sales_by_category_2)
## # A tibble: 6 × 2
## `Secondary Category` total_price
## <chr> <dbl>
## 1 Cross Country Race 19224630
## 2 Elite Road 15334665
## 3 Endurance Road 10381060
## 4 Trail 9373460
## 5 Over Mountain 7571270
## 6 Triathalon 4053750
sales_by_frame_material <- bike_orderlines %>%
group_by(`Frame Material`) %>%
summarize(total_price = sum(total_price)) %>%
arrange(desc(total_price))
head(sales_by_frame_material)
## # A tibble: 2 × 2
## `Frame Material` total_price
## <chr> <dbl>
## 1 Carbon 52940540
## 2 Aluminum 18091790
#combining the result
combinations <- bike_orderlines %>%
group_by(`Primary Category`, `Secondary Category`) %>%
summarize(
Aluminum = sum(total_price[`Frame Material` == "Aluminum"]),
Carbon = sum(total_price[`Frame Material` == "Carbon"]),
`Total Sales` = sum(total_price)
) %>%
mutate(
Contains_Aluminum = Aluminum > 0,
Contains_Carbon = Carbon > 0,
Both_Aluminum_and_Carbon = Contains_Aluminum & Contains_Carbon
)
## `summarise()` has grouped output by 'Primary Category'. You can override using
## the `.groups` argument.
head(combinations)
## # A tibble: 6 × 8
## # Groups: Primary Category [2]
## `Primary Category` `Secondary Category` Aluminum Carbon `Total Sales`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Mountain Cross Country Race 3318560 15906070 19224630
## 2 Mountain Fat Bike 1052620 0 1052620
## 3 Mountain Over Mountain 0 7571270 7571270
## 4 Mountain Sport 1932755 0 1932755
## 5 Mountain Trail 4537610 4835850 9373460
## 6 Road Cyclocross 0 2108120 2108120
## # ℹ 3 more variables: Contains_Aluminum <lgl>, Contains_Carbon <lgl>,
## # Both_Aluminum_and_Carbon <lgl>