install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages("magrittr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
library(stringr)
library(scales)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
path_bike_orderlines <- "bike_orderlines.rds"
  1. Fix typos found in Feature Engineering

•ConvertCAAD Disk UltegratoCAAD12 Disc Ultegra.

•ConvertSyapse Carbon TiagratoSynapse Carbon Tiagra.

•ConvertSupersix Evo Hi-Mod UtegratoSupersix Evo Hi-Mod Ultegra

bike_orderlines_tbl <- readRDS(path_bike_orderlines) %>%
  mutate(model = case_when(
    model == "CAAD Disc Ultegra" ~ "CAAD12 Disc Ultegra",
    model == "Syapse Carbon Tiagra" ~ "Synapse Carbon Tiagra",
    model == "Supersix Evo Hi-Mod Utegra" ~ "Supersix Evo Hi-Mod Ultegra",
    TRUE ~ model
  ))

glimpse(bike_orderlines_tbl)
## Rows: 15,644
## Columns: 13
## $ order_date     <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id       <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ order_line     <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price          <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price    <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model          <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1     <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2     <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name  <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city           <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state          <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…
  1. Which month has the highest bike sales?

What does this tell us about a time of year to focus marketing efforts?

bike_orderlines_tbl %>%
  select(order_date, total_price) %>%
  mutate(month = order_date %>% floor_date("month") %>% month(label = TRUE)) %>%
  group_by(month) %>%
  summarise(sales = sum(total_price)) %>%
  ungroup() %>%
  mutate(sales = scales::dollar(sales)) %>%
  arrange(month) %>%
  rename(
    "Month" = month,
    "Sales" = sales
  )
## # A tibble: 12 × 2
##    Month Sales     
##    <ord> <chr>     
##  1 Jan   $4,089,460
##  2 Feb   $5,343,295
##  3 Mar   $7,282,280
##  4 Apr   $8,386,170
##  5 May   $7,935,055
##  6 Jun   $7,813,105
##  7 Jul   $7,602,005
##  8 Aug   $5,346,125
##  9 Sep   $5,556,055
## 10 Oct   $4,394,300
## 11 Nov   $4,169,755
## 12 Dec   $3,114,725
  1. What is the median orderline sales value by Bike Attribute?

Evaluate “Black Inc”.What does this tell us about the “Black Inc” feature?

bike_orderlines_tbl %>%
  select(model, total_price) %>%
  mutate(Black = model %>% str_detect("Black Inc")) %>%
  group_by(Black) %>%
  summarise(Median_Orderline = median(total_price)) %>%
  ungroup() %>%
  mutate(Median_Orderline = scales::dollar(Median_Orderline))
## # A tibble: 2 × 2
##   Black Median_Orderline
##   <lgl> <chr>           
## 1 FALSE $2,880          
## 2 TRUE  $12,250

Evaluate “Ultegra”.What does this tell us about the “Ultegra” feature?

bike_orderlines_tbl %>%
  select(model, total_price) %>%
  mutate(Ultegra = model %>% str_detect("Ultegra")) %>%
  group_by(Ultegra) %>%
  summarise(Median_Orderline = median(total_price)) %>%
  ungroup() %>%
  mutate(Median_Orderline = scales::dollar(Median_Orderline))
## # A tibble: 2 × 2
##   Ultegra Median_Orderline
##   <lgl>   <chr>           
## 1 FALSE   $3,200          
## 2 TRUE    $3,200

Evaluate “Disc” option.What does this tell us about the “Disc” feature?

bike_orderlines_tbl %>%
  select(model, total_price) %>%
  mutate(Disc = model %>% str_detect("Disc")) %>%
  group_by(Disc) %>%
  summarise(Median_Orderline = median(total_price)) %>%
  ungroup() %>%
  mutate(Median_Orderline = scales::dollar(Median_Orderline))
## # A tibble: 2 × 2
##   Disc  Median_Orderline
##   <lgl> <chr>           
## 1 FALSE $3,200          
## 2 TRUE  $2,660
  1. What are the average, min, and max prices by Base Model?

What does this tell us about how bikes are priced?

‘summarise()’ has grouped output by ‘category_1’, ‘category_2’. You can override using the ‘.groups’ argument.

bike_orderlines_tbl %>%
  distinct(category_1, category_2, model, price) %>%
  separate(
    col = model,
    into = str_c("model_", 1:9),
    sep = " ",
    remove = FALSE,
    fill = "right",
    extra = "drop"
  ) %>%
  mutate(model_base = case_when(
    str_detect(str_to_lower(model_1), "scalpel") ~ str_c(model_1, model_2, sep = " "),
    str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4, sep = " "),
    TRUE ~ model_1
  )) %>%
  select(-(model_1:model_9)) %>%
  group_by(category_1, category_2, model_base) %>%
  summarise(
    mean_price = mean(price),
    min_price = min(price),
    max_price = max(price)
  ) %>%
  ungroup() %>%
  mutate(
    mean_price = scales::dollar(mean_price, largest_with_cents = 1e+4),
    min_price = scales::dollar(min_price, largest_with_cents = 1e+4),
    max_price = scales::dollar(max_price, largest_with_cents = 1e+4)
  ) %>%
  arrange(desc(mean_price))
## `summarise()` has grouped output by 'category_1', 'category_2'. You can
## override using the `.groups` argument.
## # A tibble: 21 × 6
##    category_1 category_2         model_base       mean_price min_price max_price
##    <chr>      <chr>              <chr>            <chr>      <chr>     <chr>    
##  1 Mountain   Cross Country Race Scalpel-Si Race  $9,060     $9,060    $9,060   
##  2 Mountain   Cross Country Race Scalpel-Si Hi-M… $7,460     $7,460    $7,460   
##  3 Mountain   Sport              Catalyst         $546       $415      $705     
##  4 Road       Elite Road         Supersix         $5,491     $1,840    $12,790  
##  5 Mountain   Cross Country Race Scalpel-Si Carb… $5,327     $4,260    $6,390   
##  6 Mountain   Over Mountain      Jekyll           $5,275     $3,200    $7,990   
##  7 Mountain   Over Mountain      Trigger          $5,275     $3,200    $8,200   
##  8 Mountain   Cross Country Race F-Si             $5,070     $1,840    $11,190  
##  9 Mountain   Trail              Habit            $5,052     $1,950    $12,250  
## 10 Mountain   Cross Country Race Scalpel 29       $4,795     $3,200    $6,390   
## # ℹ 11 more rows