Challenge Summary

This is a short challenge to begin applying what you are learning to the problem at hand. You will go through a series of questions related to the course project goals:

  1. Coming up with a new product idea, and

  2. Segmenting the customer-base

Objectives

  1. Apply lubridate and stringr functions to answer questions related to the course projects.

  2. Gain exposure to rmarkdown.

Data

To read the data, make sure that the paths point to the appropriate data sets. Saving the file in the “challenges folder” should enable the paths to be detected correctly.

# Load libraries
library(tidyverse)
library(lubridate)
# Read bike orderlines data
path_bike_orderlines <- "../00_data/bike_sales/data_wrangled/bike_orderlines.rds"
bike_orderlines_tbl <- read_rds(path_bike_orderlines) %>%
    
    # Fix typos found in Feature Engineering
    mutate(model = case_when(
        model == "CAAD Disc Ultegra" ~ "CAAD12 Disc Ultegra",
        model == "Syapse Carbon Tiagra" ~ "Synapse Carbon Tiagra",
        model == "Supersix Evo Hi-Mod Utegra" ~ "Supersix Evo Hi-Mod Ultegra",
        TRUE ~ model
    ))

glimpse(bike_orderlines_tbl)
## Rows: 15,644
## Columns: 13
## $ order_date     <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0~
## $ order_id       <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7~
## $ order_line     <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1~
## $ quantity       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1~
## $ price          <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,~
## $ total_price    <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,~
## $ model          <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea~
## $ category_1     <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",~
## $ category_2     <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta~
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca~
## $ bikeshop_name  <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",~
## $ city           <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui~
## $ state          <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "~

Questions

lubridate: Which month has the highest bike sales? (Difficulty = Medium)

What does this tell us about a time of year to focus marketing efforts?

bike_orderlines_tbl %>%
    select(order_date, total_price) %>%
    mutate(month = order_date %>% floor_date("month") %>% month(label = TRUE)) %>%
    group_by(month) %>%
    summarise(sales = sum(total_price)) %>%
    ungroup() %>%
    mutate(sales = scales::dollar(sales)) %>%
    arrange(month) %>%
    
    rename(
        "Month" = month,
        "Sales" = sales
        )

stringr: What is the median orderline sales value by Bike Attribute? (Difficulty = Medium)

Evaluate “Black Inc”. What does this tell us about the “Black Inc” feature?

bike_orderlines_tbl %>%
    select(model, total_price)%>%
    mutate(Black = model %>% str_detect("Black Inc") %>% as_factor()) %>%
    group_by(Black)%>%
    summarise(Median_Orderline = median(total_price))%>%
    ungroup() %>%
    mutate(Median_Orderline = scales::dollar(Median_Orderline))

Evaluate “Ultegra”. What does this tell us about the “Ultegra” feature?

bike_orderlines_tbl %>%
    select(model, total_price)%>%
    mutate(Ultegra = model %>% str_detect("Ultegra") %>% as_factor()) %>%
    group_by(Ultegra)%>%
    summarise(Median_Orderline = median(total_price))%>%
    ungroup() %>%
    mutate(Median_Orderline = scales::dollar(Median_Orderline))

Evaluate “Disc” option. What does this tell us about the “Disc” feature?

bike_orderlines_tbl %>%
    select(model, total_price)%>%
    mutate(Disc = model %>% str_detect("Disc") %>% as_factor()) %>%
    group_by(Disc)%>%
    summarise(Median_Orderline = median(total_price))%>%
    ungroup() %>%
    mutate(Median_Orderline = scales::dollar(Median_Orderline))

stringr: What are the average, min, and max prices by Base Model? (Difficulty = High)

What does this tell us about how bikes are priced?

bike_orderlines_tbl %>%
    distinct(category_1, category_2, model, price)%>%
    separate(col    = model,
             into   = str_c("model_", 1:9),
             sep    = " ",
             remove = FALSE,
             fill   = "right",
             extra  = "drop"
             ) %>%
    
    #lets create a base model feature from column model
    mutate(model_base = case_when(
        
        #fix  Scalpel 29
        str_detect(str_to_lower(model_1), "scalpel") ~ str_c(model_1, model_2,  sep =" "),
        
        #fix Beast of the East
        str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4,  sep =" "),
        TRUE ~ model_1)) %>%
    
    #mutate(model_tier =  model %>% str_replace(model_base) %>%
               #str_trim) %>%
    
    #remove unnecessary columns
    select(-(model_1:model_9)) %>%
    #select(-matches("[0-9]")) %>%
    group_by(category_1, category_2, model_base) %>%
    summarise(
        mean_price = mean(price),
        min_price  = min(price),
        max_price  = max(price)
    ) %>%
    ungroup() %>%
    mutate(
        mean_price = scales::dollar(mean_price, largest_with_cents = 1e+4),
        min_price  = scales::dollar(min_price, largest_with_cents = 1e+4),
        max_price  = scales::dollar(max_price, largest_with_cents = 1e+4)) %>%
    
    #arrange(mean_price)
    arrange(desc(mean_price)) 
## `summarise()` has grouped output by 'category_1', 'category_2'. You can override using the `.groups` argument.
    #set_names(names(.) %>% str_replace ("_", " ") %>% str_to_title()