# Load necessary libraries
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
##
## extract
library(stringr)
library(scales)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# Question 1
path_bike_orderlines <- "bike_orderlines.rds"
bike_orderlines_tbl <- readRDS(path_bike_orderlines) %>%
mutate(model = case_when(
model == "CAAD Disc Ultegra" ~ "CAAD12 Disc Ultegra",
model == "Syapse Carbon Tiagra" ~ "Synapse Carbon Tiagra",
model == "Supersix Evo Hi-Mod Utegra" ~ "Supersix Evo Hi-Mod Ultegra",
TRUE ~ model
))
glimpse(bike_orderlines_tbl)
## Rows: 15,644
## Columns: 13
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…
# Question 2
bike_orderlines_tbl %>%
select(order_date, total_price) %>%
mutate(month = order_date %>% floor_date("month") %>% month(label = TRUE)) %>%
group_by(month) %>%
summarise(sales = sum(total_price)) %>%
ungroup() %>%
mutate(sales = scales::dollar(sales)) %>%
arrange(month) %>%
rename(
"Month" = month,
"Sales" = sales
)
## # A tibble: 12 × 2
## Month Sales
## <ord> <chr>
## 1 Jan $4,089,460
## 2 Feb $5,343,295
## 3 Mar $7,282,280
## 4 Apr $8,386,170
## 5 May $7,935,055
## 6 Jun $7,813,105
## 7 Jul $7,602,005
## 8 Aug $5,346,125
## 9 Sep $5,556,055
## 10 Oct $4,394,300
## 11 Nov $4,169,755
## 12 Dec $3,114,725
# Question 3
bike_orderlines_tbl %>%
select(model, total_price) %>%
mutate(Black = model %>% str_detect("Black Inc")) %>%
group_by(Black) %>%
summarise(Median_Orderline = median(total_price)) %>%
ungroup() %>%
mutate(Median_Orderline = scales::dollar(Median_Orderline))
## # A tibble: 2 × 2
## Black Median_Orderline
## <lgl> <chr>
## 1 FALSE $2,880
## 2 TRUE $12,250
bike_orderlines_tbl %>%
select(model, total_price) %>%
mutate(Ultegra = model %>% str_detect("Ultegra")) %>%
group_by(Ultegra) %>%
summarise(Median_Orderline = median(total_price)) %>%
ungroup() %>%
mutate(Median_Orderline = scales::dollar(Median_Orderline))
## # A tibble: 2 × 2
## Ultegra Median_Orderline
## <lgl> <chr>
## 1 FALSE $3,200
## 2 TRUE $3,200
bike_orderlines_tbl %>%
select(model, total_price) %>%
mutate(Disc = model %>% str_detect("Disc")) %>%
group_by(Disc) %>%
summarise(Median_Orderline = median(total_price)) %>%
ungroup() %>%
mutate(Median_Orderline = scales::dollar(Median_Orderline))
## # A tibble: 2 × 2
## Disc Median_Orderline
## <lgl> <chr>
## 1 FALSE $3,200
## 2 TRUE $2,660
# Question 4
bike_orderlines_tbl %>%
distinct(category_1, category_2, model, price) %>%
separate(
col = model,
into = str_c("model_", 1:9),
sep = " ",
remove = FALSE,
fill = "right",
extra = "drop"
) %>%
mutate(model_base = case_when(
str_detect(str_to_lower(model_1), "scalpel") ~ str_c(model_1, model_2, sep = " "),
str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4, sep = " "),
TRUE ~ model_1
)) %>%
select(-(model_1:model_9)) %>%
group_by(category_1, category_2, model_base) %>%
summarise(
mean_price = mean(price),
min_price = min(price),
max_price = max(price)
) %>%
ungroup() %>%
mutate(
mean_price = scales::dollar(mean_price, largest_with_cents = 1e+4),
min_price = scales::dollar(min_price, largest_with_cents = 1e+4),
max_price = scales::dollar(max_price, largest_with_cents = 1e+4)
) %>%
arrange(desc(mean_price))
## `summarise()` has grouped output by 'category_1', 'category_2'. You can
## override using the `.groups` argument.
## # A tibble: 21 × 6
## category_1 category_2 model_base mean_price min_price max_price
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Mountain Cross Country Race Scalpel-Si Race $9,060 $9,060 $9,060
## 2 Mountain Cross Country Race Scalpel-Si Hi-M… $7,460 $7,460 $7,460
## 3 Mountain Sport Catalyst $546 $415 $705
## 4 Road Elite Road Supersix $5,491 $1,840 $12,790
## 5 Mountain Cross Country Race Scalpel-Si Carb… $5,327 $4,260 $6,390
## 6 Mountain Over Mountain Jekyll $5,275 $3,200 $7,990
## 7 Mountain Over Mountain Trigger $5,275 $3,200 $8,200
## 8 Mountain Cross Country Race F-Si $5,070 $1,840 $11,190
## 9 Mountain Trail Habit $5,052 $1,950 $12,250
## 10 Mountain Cross Country Race Scalpel 29 $4,795 $3,200 $6,390
## # ℹ 11 more rows