This is a short challenge to begin applying what you are learning to the problem at hand. You will go through a series of questions related to the course project goals:
Coming up with a new product idea, and
Segmenting the customer-base
Apply lubridate and stringr functions to answer questions related to the course projects.
Gain exposure to rmarkdown.
To read the data, make sure that the paths point to the appropriate data sets. Saving the file in the “challenges folder” should enable the paths to be detected correctly.
# Load libraries
library(tidyverse)
library(lubridate)# Read bike orderlines data
path_bike_orderlines <- "../00_data/bike_sales/data_wrangled/bike_orderlines.rds"
bike_orderlines_tbl <- read_rds(path_bike_orderlines) %>%
# Fix typos found in Feature Engineering
mutate(model = case_when(
model == "CAAD Disc Ultegra" ~ "CAAD12 Disc Ultegra",
model == "Syapse Carbon Tiagra" ~ "Synapse Carbon Tiagra",
model == "Supersix Evo Hi-Mod Utegra" ~ "Supersix Evo Hi-Mod Ultegra",
TRUE ~ model
))
glimpse(bike_orderlines_tbl)## Rows: 15,644
## Columns: 13
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0~
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7~
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1~
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1~
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,~
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,~
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea~
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",~
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta~
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca~
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",~
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui~
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "~
bike_orderlines_tblorder_date and total_pricemonthsalesdollar()What does this tell us about a time of year to focus marketing efforts?
bike_orderlines_tbl %>%
select(order_date, total_price) %>%
mutate(month = order_date %>% floor_date("month") %>% month(label = TRUE)) %>%
group_by(month) %>%
summarise(sales = sum(total_price)) %>%
ungroup() %>%
mutate(sales = scales::dollar(sales)) %>%
arrange(month) %>%
rename(
"Month" = month,
"Sales" = sales
)bike_orderlinesmodel and total_pricemedian() orderlinedollar() (Hint: investigate largest_with_cents argument)Evaluate “Black Inc”. What does this tell us about the “Black Inc” feature?
bike_orderlines_tbl %>%
select(model, total_price)%>%
mutate(Black = model %>% str_detect("Black Inc") %>% as_factor()) %>%
group_by(Black)%>%
summarise(Median_Orderline = median(total_price))%>%
ungroup() %>%
mutate(Median_Orderline = scales::dollar(Median_Orderline))Evaluate “Ultegra”. What does this tell us about the “Ultegra” feature?
bike_orderlines_tbl %>%
select(model, total_price)%>%
mutate(Ultegra = model %>% str_detect("Ultegra") %>% as_factor()) %>%
group_by(Ultegra)%>%
summarise(Median_Orderline = median(total_price))%>%
ungroup() %>%
mutate(Median_Orderline = scales::dollar(Median_Orderline))Evaluate “Disc” option. What does this tell us about the “Disc” feature?
bike_orderlines_tbl %>%
select(model, total_price)%>%
mutate(Disc = model %>% str_detect("Disc") %>% as_factor()) %>%
group_by(Disc)%>%
summarise(Median_Orderline = median(total_price))%>%
ungroup() %>%
mutate(Median_Orderline = scales::dollar(Median_Orderline))bike_orderlines_tblmodel_base (Hint: Use the Feature Engineering code)
"model_[0-9]")mean(), min(), and max())dollar() (Hint: Check out largest_with_cents)What does this tell us about how bikes are priced?
bike_orderlines_tbl %>%
distinct(category_1, category_2, model, price)%>%
separate(col = model,
into = str_c("model_", 1:9),
sep = " ",
remove = FALSE,
fill = "right",
extra = "drop"
) %>%
#lets create a base model feature from column model
mutate(model_base = case_when(
#fix Scalpel 29
str_detect(str_to_lower(model_1), "scalpel") ~ str_c(model_1, model_2, sep =" "),
#fix Beast of the East
str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4, sep =" "),
TRUE ~ model_1)) %>%
#mutate(model_tier = model %>% str_replace(model_base) %>%
#str_trim) %>%
#remove unnecessary columns
select(-(model_1:model_9)) %>%
#select(-matches("[0-9]")) %>%
group_by(category_1, category_2, model_base) %>%
summarise(
mean_price = mean(price),
min_price = min(price),
max_price = max(price)
) %>%
ungroup() %>%
mutate(
mean_price = scales::dollar(mean_price, largest_with_cents = 1e+4),
min_price = scales::dollar(min_price, largest_with_cents = 1e+4),
max_price = scales::dollar(max_price, largest_with_cents = 1e+4)) %>%
#arrange(mean_price)
arrange(desc(mean_price)) ## `summarise()` has grouped output by 'category_1', 'category_2'. You can override using the `.groups` argument.
#set_names(names(.) %>% str_replace ("_", " ") %>% str_to_title()