This is a short challenge to begin applying what you are learning to the problem at hand. You will go through a series of questions related to the course project goals:
Coming up with a new product idea, and
Segmenting the customer-base
Apply lubridate and stringr functions to answer questions related to the course projects.
Gain exposure to rmarkdown.
To read the data, make sure that the paths point to the appropriate data sets. Saving the file in the “challenges folder” should enable the paths to be detected correctly.
# Read bike orderlines data
path_bike_orderlines <- "../00_data/bike_sales/data_wrangled/bike_orderlines.rds"
bike_orderlines_tbl <- read_rds(path_bike_orderlines) %>%
# Fix typos found in Feature Engineering
mutate(model = case_when(
model == "CAAD Disc Ultegra" ~ "CAAD12 Disc Ultegra",
model == "Syapse Carbon Tiagra" ~ "Synapse Carbon Tiagra",
model == "Supersix Evo Hi-Mod Utegra" ~ "Supersix Evo Hi-Mod Ultegra",
TRUE ~ model
))
glimpse(bike_orderlines_tbl)## Rows: 15,644
## Columns: 13
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-…
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, …
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, …
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, …
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570…
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570…
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the E…
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road"…
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mount…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "C…
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers"…
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Lou…
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", …
bike_orderlines_tblorder_date and total_pricemonthsalesdollar()What does this tell us about a time of year to focus marketing efforts?
bike_orderlines_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(month = month(order_date, abbr = FALSE, label = TRUE)) %>%
group_by(month) %>%
summarize(sales = sum(total_price)) %>%
ungroup() %>%
mutate(month = month %>% as_factor(),
sales = sales %>% scales::dollar(),
month = month %>% str_to_title())bike_orderlinesmodel and total_pricemedian() orderlinedollar() (Hint: investigate largest_with_cents argument)Evaluate “Black Inc”. What does this tell us about the “Black Inc” feature?
bike_orderlines_tbl %>%
select(model, total_price) %>%
mutate(black = model %>% str_to_lower() %>% str_detect("black inc")) %>%
group_by(black) %>%
summarize(median = median(total_price)) %>%
ungroup() %>%
mutate(median = median %>% scales::dollar()) %>%
rename(`Black Inc` = black,
`Median Orderline` = median)Evaluate “Ultegra”. What does this tell us about the “Ultegra” feature?
bike_orderlines_tbl %>%
select(model, total_price) %>%
mutate(ultegra = model %>% str_to_lower() %>% str_detect("ultegra")) %>%
group_by(ultegra) %>%
summarize(median = median(total_price)) %>%
ungroup() %>%
mutate(median = median %>% scales::dollar()) %>%
rename(`Ultegra` = ultegra,
`Median Orderline` = median)Evaluate “Disc” option. What does this tell us about the “Disc” feature?
bike_orderlines_tbl %>%
select(model, total_price) %>%
mutate(disc = model %>% str_to_lower() %>% str_detect("disc")) %>%
group_by(disc) %>%
summarize(median = median(total_price)) %>%
ungroup() %>%
mutate(median = median %>% scales::dollar()) %>%
rename(`Disc` = disc,
`Median Orderline` = median)bike_orderlines_tblmodel_base (Hint: Use the Feature Engineering code)
"model_[0-9]")mean(), min(), and max())dollar() (Hint: Check out largest_with_cents)What does this tell us about how bikes are priced?
bike_orderlines_tbl %>%
select(model, category_1, category_2, price) %>%
separate(model,
into = str_c("model_", 1:7),
sep = " ",
remove = T,
fill = "right",
extra = "drop") %>%
mutate(model_base = case_when(
# fix - supersix evo
str_detect(str_to_lower(model_1), "supersix") ~ str_c(model_1, model_2, sep = " "),
# fix - beast of the east
str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4, sep = " "),
# fix - fat CAAD
str_detect(str_to_lower(model_1), "fat") ~ str_c(model_1, model_2, sep = " "),
# fix - bad habit
str_detect(str_to_lower(model_1), "bad") ~ str_c(model_1, model_2, sep = " "),
# fix - scalpel 29
str_detect(str_to_lower(model_2), "29") ~ str_c(model_1, model_2, sep = " "),
# catch-all
TRUE ~ model_1)
) %>%
mutate(model_1 = model_1 %>% str_trim()) %>%
select(category_1, category_2, model_base, price) %>%
group_by(category_1, category_2,model_base) %>%
summarize(mean = round(mean(price),0) %>% scales::dollar(),
min = min(price) %>% scales::dollar(),
max = max(price) %>% scales::dollar()) %>%
ungroup() %>%
arrange(desc(mean)) %>%
rename(
`Category 1` = category_1,
`Category 2` = category_2,
`Model Base` = model_base,
`Mean Price` = mean,
`Min Price` = min,
`Max Price` = max
)## `summarise()` has grouped output by 'category_1', 'category_2'. You can override using the `.groups` argument.