library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(readxl)
library(dplyr)
library(formattable)
library(scales)
## 
## Attaching package: 'scales'
## The following objects are masked from 'package:formattable':
## 
##     comma, percent, scientific
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
bike_orderlines_tbl <- read_excel("bike_orderlines.xlsx")
#lubridate: Which month has the highest bike sales? (Difficulty = Medium)
for(i in length(bike_orderlines_tbl$order_date))
  bike_orderlines_tbl$month1 = c(factor(format(bike_orderlines_tbl$order_date, '%B')))
month2 <- unique(bike_orderlines_tbl$month1)
Sales <- c()
Month <- c()
Month <- as.factor(Month)
for(i in 1:length(month2))
{
  result <- 0
  bike_orderlines_tbl2 <- dplyr::filter(bike_orderlines_tbl, month1==month2[i])
  for (j in 1:length(bike_orderlines_tbl2$total_price))
      result <- result+bike_orderlines_tbl2$total_price[j]
  Sales <- c(Sales, result)
  Month <- c(Month, month2[i])
}
bike_orderline <- data.frame(Month, Sales)
bike_orderline$Sales <- currency(bike_orderline$Sales, digits = 0L)
arrange(bike_orderline, desc(Sales))
##        Month      Sales
## 1      April $8,386,170
## 2        May $7,935,055
## 3       June $7,813,105
## 4       July $7,602,005
## 5      March $7,282,280
## 6  September $5,556,055
## 7     August $5,346,125
## 8   February $5,343,295
## 9    October $4,394,300
## 10  November $4,169,755
## 11   January $4,089,460
## 12  December $3,114,725
"=>Month has the highest bike sales is April"
## [1] "=>Month has the highest bike sales is April"
#stringr: What is the median orderline sales value by Bike Attribute? (Difficulty = Medium)
#Evaluate "Black Inc". What does this tell us about the "Black Inc" feature?
bike_orderlines_tbl %>%
  select(model, total_price) %>%
  mutate(black = model %>% str_to_lower() %>% str_detect("black inc")) %>%
  group_by(black) %>%
  summarize(median = median(total_price)) %>%
  ungroup() %>%
  mutate(median = median %>% scales::dollar()) %>%
  rename(`Black Inc` = black, `Median Orderline` = median)
## # A tibble: 2 × 2
##   `Black Inc` `Median Orderline`
##   <lgl>       <chr>             
## 1 FALSE       $2,880            
## 2 TRUE        $12,250
#Evaluate "Ultegra". What does this tell us about the "Ultegra" feature?
bike_orderlines_tbl %>%
  select(model, total_price) %>%
  mutate(ultegra = model %>% str_to_lower() %>% str_detect("ultegra")) %>%
  group_by(ultegra) %>%
  summarize(median = median(total_price)) %>%
  ungroup() %>%
  mutate(median = median %>% scales::dollar()) %>%
  rename(`Ultegra` = ultegra, `Median Orderline` = median)
## # A tibble: 2 × 2
##   Ultegra `Median Orderline`
##   <lgl>   <chr>             
## 1 FALSE   $3,200            
## 2 TRUE    $3,200
#Evaluate "Disc" option. *What does this tell us about the "Disc" feature?
bike_orderlines_tbl %>%
  select(model, total_price) %>%
  mutate(disc = model %>% str_to_lower() %>% str_detect("disc")) %>%
  group_by(disc) %>%
  summarize(median = median(total_price)) %>%
  ungroup() %>%
  mutate(median = median %>% scales::dollar()) %>%
  rename(`Disc` = disc, `Median Orderline` = median)
## # A tibble: 2 × 2
##   Disc  `Median Orderline`
##   <lgl> <chr>             
## 1 FALSE $3,200            
## 2 TRUE  $2,660
#stringr: What are the average, min, and max prices by Base Model? (Difficulty = High)
bike_orderlines_tbl %>% 
  select(category_1, category_2, model, price) %>%
  separate(model,
           into = str_c("model_", 1:7),
           sep = " ",
           remove = T,
           fill = "right",
           extra = "drop") %>% 
  mutate(model_base = case_when(
    # fix - supersix evo
        str_detect(str_to_lower(model_1), "supersix") ~ str_c(model_1, model_2, sep = " "),
    # fix - beast of the east
        str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4, sep = " "),
    # fix - fat CAAD
        str_detect(str_to_lower(model_1), "fat") ~ str_c(model_1, model_2, sep = " "),
    # fix - bad habit
        str_detect(str_to_lower(model_1), "bad") ~ str_c(model_1, model_2, sep = " "),
    # fix - scalpel 29
        str_detect(str_to_lower(model_2), "29") ~ str_c(model_1, model_2, sep = " "),
     # catch-all
        TRUE ~ model_1)) %>% 
  mutate(model_1 = model_1 %>% str_trim()) %>%
    select(category_1, category_2, model_base, price) %>%
    group_by(category_1, category_2,model_base) %>%
    summarize(mean = round(mean(price),0) %>% scales::dollar(),
              min = min(price) %>% scales::dollar(),
              max = max(price) %>% scales::dollar()) %>%
    ungroup() %>%
    arrange(desc(mean)) %>%
    rename(
        `Category 1` = category_1,
        `Category 2` = category_2,
        `Model Base` = model_base,
        `Average Price` = mean,
        `Min Price` = min, 
        `Max Price` = max)
## `summarise()` has grouped output by 'category_1', 'category_2'. You can override using the `.groups` argument.
## # A tibble: 20 × 6
##    `Category 1` `Category 2`       `Model Base`      `Average Price` `Min Price`
##    <chr>        <chr>              <chr>             <chr>           <chr>      
##  1 Mountain     Cross Country Race Scalpel-Si        $6,695          $3,200     
##  2 Mountain     Sport              Catalyst          $541            $415       
##  3 Mountain     Over Mountain      Jekyll            $5,042          $3,200     
##  4 Road         Elite Road         Supersix Evo      $4,978          $1,840     
##  5 Mountain     Over Mountain      Trigger           $4,970          $3,200     
##  6 Mountain     Trail              Habit             $4,611          $1,950     
##  7 Mountain     Cross Country Race F-Si              $4,504          $1,840     
##  8 Mountain     Cross Country Race Scalpel 29        $4,499          $3,200     
##  9 Mountain     Fat Bike           Fat CAAD1         $3,730          $3,730     
## 10 Road         Triathalon         Slice             $3,527          $1,950     
## 11 Road         Endurance Road     Synapse           $3,168          $870       
## 12 Road         Elite Road         CAAD12            $2,978          $1,680     
## 13 Mountain     Trail              Bad Habit         $2,954          $2,660     
## 14 Road         Elite Road         CAAD              $2,660          $2,660     
## 15 Road         Cyclocross         SuperX            $2,339          $1,750     
## 16 Mountain     Trail              Beast of the East $2,194          $1,620     
## 17 Mountain     Fat Bike           Fat CAAD2         $2,130          $2,130     
## 18 Road         Endurance Road     Syapse            $1,840          $1,840     
## 19 Mountain     Sport              Trail             $1,153          $815       
## 20 Road         Elite Road         CAAD8             $1,136          $815       
## # … with 1 more variable: Max Price <chr>