Loading libraries and dataset
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(scales)
filename <- file.choose()
bike_orderlines <- readRDS(filename)
View(bike_orderlines)
Q1: What are the unique categories of products in category_1,
cataergory_2, and frame_material?
bike_orderlines %>% distinct(category_1)
## # A tibble: 2 × 1
## category_1
## <chr>
## 1 Mountain
## 2 Road
bike_orderlines %>% distinct(category_2)
## # A tibble: 9 × 1
## category_2
## <chr>
## 1 Over Mountain
## 2 Trail
## 3 Elite Road
## 4 Endurance Road
## 5 Sport
## 6 Cross Country Race
## 7 Cyclocross
## 8 Triathalon
## 9 Fat Bike
bike_orderlines %>% distinct(frame_material)
## # A tibble: 2 × 1
## frame_material
## <chr>
## 1 Carbon
## 2 Aluminum
Q2: Which product catergories have the most sales?
bike_orderlines_rename <- bike_orderlines %>%
rename(`Primary Category` = category_1) %>%
rename(`Secondary Category` = category_2) %>%
rename(`Frame Material` = frame_material)
primary_category_sales <- bike_orderlines_rename %>%
select(`Primary Category`, total_price) %>%
group_by(`Primary Category`) %>%
summarise(Sales = sum(total_price)) %>%
arrange(desc(Sales)) %>%
ungroup() %>%
mutate(Sales = dollar(Sales))
primary_category_sales
## # A tibble: 2 × 2
## `Primary Category` Sales
## <chr> <chr>
## 1 Mountain $39,154,735
## 2 Road $31,877,595
secondary_category_sales <- bike_orderlines_rename %>%
select(`Secondary Category`, total_price) %>%
group_by(`Secondary Category`) %>%
summarise(Sales = sum(total_price)) %>%
arrange(desc(Sales)) %>%
ungroup() %>%
mutate(Sales = dollar(Sales))
secondary_category_sales
## # A tibble: 9 × 2
## `Secondary Category` Sales
## <chr> <chr>
## 1 Cross Country Race $19,224,630
## 2 Elite Road $15,334,665
## 3 Endurance Road $10,381,060
## 4 Trail $9,373,460
## 5 Over Mountain $7,571,270
## 6 Triathalon $4,053,750
## 7 Cyclocross $2,108,120
## 8 Sport $1,932,755
## 9 Fat Bike $1,052,620
frame_material_sales <- bike_orderlines_rename %>%
select(`Frame Material`, total_price) %>%
group_by(`Frame Material`) %>%
summarise(Sales = sum(total_price)) %>%
arrange(desc(Sales)) %>%
ungroup() %>%
mutate(Sales = dollar(Sales))
frame_material_sales
## # A tibble: 2 × 2
## `Frame Material` Sales
## <chr> <chr>
## 1 Carbon $52,940,540
## 2 Aluminum $18,091,790
Q3: Do all combinations primary and secondary bike categories
contain both Aluminium and Carbon frame materials?
bike_orderlines_rename %>%
mutate(Aluminum = ifelse(`Frame Material` == 'Aluminum', total_price, 0)) %>%
mutate(Carbon = ifelse(`Frame Material` == 'Carbon', total_price, 0)) %>%
select(`Primary Category`, `Secondary Category`, Aluminum, Carbon, total_price) %>%
group_by(`Primary Category`, `Secondary Category`) %>%
summarise(Aluminum = sum(Aluminum), Carbon = sum(Carbon), Sales = sum(total_price)) %>%
arrange(desc(Sales)) %>%
ungroup() %>%
mutate(Aluminum = dollar(Aluminum), Carbon = dollar(Carbon), Sales = dollar(Sales))
## `summarise()` has grouped output by 'Primary Category'. You can override using
## the `.groups` argument.
## # A tibble: 9 × 5
## `Primary Category` `Secondary Category` Aluminum Carbon Sales
## <chr> <chr> <chr> <chr> <chr>
## 1 Mountain Cross Country Race $3,318,560 $15,906,070 $19,224,630
## 2 Road Elite Road $5,637,795 $9,696,870 $15,334,665
## 3 Road Endurance Road $1,612,450 $8,768,610 $10,381,060
## 4 Mountain Trail $4,537,610 $4,835,850 $9,373,460
## 5 Mountain Over Mountain $0 $7,571,270 $7,571,270
## 6 Road Triathalon $0 $4,053,750 $4,053,750
## 7 Road Cyclocross $0 $2,108,120 $2,108,120
## 8 Mountain Sport $1,932,755 $0 $1,932,755
## 9 Mountain Fat Bike $1,052,620 $0 $1,052,620