library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
#input
bike_orderlines <- readRDS("/cloud/project/bike_orderlines (1).rds")
#1 Fix Typos
bike_orderlines <- bike_orderlines %>%
  mutate(
    model = str_replace_all(model, "CAAD Disk Ultegra", "CAAD12 Disc Ultegra"),
    model = str_replace_all(model, "Syapse Carbon Tiagra", "Synapse Carbon Tiagra"),
    model = str_replace_all(model, "Supersix Evo Hi-Mod Utegra", "Supersix Evo Hi-Mod Ultegra")
  )

#2 Sales of each month.

bike_orderlines <- bike_orderlines %>% 
  mutate(order_month = month(order_date, label = TRUE))

monthly_sales <- bike_orderlines %>%
  group_by(order_month) %>%
  summarize(total_sales = sum(total_price))

monthly_sales <- monthly_sales %>%
    mutate(total_sales = (total_sales))

month_with_highest_sales <- monthly_sales %>%
  arrange(desc(total_sales)) %>%
  head(1)
print(monthly_sales)
## # A tibble: 12 × 2
##    order_month total_sales
##    <ord>             <dbl>
##  1 Jan             4089460
##  2 Feb             5343295
##  3 Mar             7282280
##  4 Apr             8386170
##  5 May             7935055
##  6 Jun             7813105
##  7 Jul             7602005
##  8 Aug             5346125
##  9 Sep             5556055
## 10 Oct             4394300
## 11 Nov             4169755
## 12 Dec             3114725
#3 Median of sales by bike attribute
  # Black Inc
black_inc_median <- bike_orderlines %>%
  filter(str_detect(model, "Black Inc")) %>%
  summarize('Black Inc' = median(total_price, na.rm = TRUE))

non_black_inc_median <- bike_orderlines %>%
  filter(!str_detect(model, "Black Inc")) %>%
  summarize('Black Inc' = median(total_price, na.rm = TRUE))

result_black_inc <- data.frame('Black Inc' = c(FALSE, TRUE), 'Median Orderline' = c(non_black_inc_median$`Black Inc`, black_inc_median$`Black Inc`))
print(result_black_inc)
##   Black.Inc Median.Orderline
## 1     FALSE             2880
## 2      TRUE            12250
  # Ultegra
ultegra_median <- bike_orderlines %>%
  filter(str_detect(model, "Ultegra")) %>%
  summarize('Ultegra' = median(total_price, na.rm = TRUE))

non_ultegra_median <- bike_orderlines %>%
  filter(!str_detect(model, "Ultegra")) %>%
  summarize('Ultegra' = median(total_price, na.rm = TRUE))

result_ultegra <- data.frame('Ultegra' = c(FALSE, TRUE), 'Median Orderline' = c(non_ultegra_median$`Ultegra`, ultegra_median$`Ultegra`))
print(result_ultegra)
##   Ultegra Median.Orderline
## 1   FALSE             3200
## 2    TRUE             3200
  # Disc
disc_median <- bike_orderlines %>%
  filter(str_detect(model, "Disc")) %>%
  summarize('Disc' = median(total_price, na.rm = TRUE))

non_disc_median <- bike_orderlines %>%
  filter(!str_detect(model, "Disc")) %>%
  summarize('Disc' = median(total_price, na.rm = TRUE))

result_disc <- data.frame('Disc' = c(FALSE, TRUE), 'Median Orderline' = c(non_disc_median$`Disc`, disc_median$`Disc`))
print(result_disc)
##    Disc Median.Orderline
## 1 FALSE             3200
## 2  TRUE             2660
#4 Avg, min, max by base model
#rename
bike_orderlines %>%
  distinct(category_1, category_2, model, price) %>%
  separate(
    col = model,
    into = str_c("model_", 1:9),
    sep = " ",
    remove = FALSE,
    fill = "right",
    extra = "drop"
  ) %>%
  mutate(model_base = case_when(
    str_detect(str_to_lower(model_1), "scalpel") ~ str_c(model_1, model_2, sep = " "),
    str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4, sep = " "),
    TRUE ~ model_1
  )) %>%
  select(-(model_1:model_9)) %>%
  group_by(category_1, category_2, model_base) %>%
  summarise(
    mean_price = mean(price),
    min_price = min(price),
    max_price = max(price)
  , .groups = 'drop') %>%
  ungroup() %>%
  mutate(
    mean_price = scales::dollar(mean_price, largest_with_cents = 1e+4),
    min_price = scales::dollar(min_price, largest_with_cents = 1e+4),
    max_price = scales::dollar(max_price, largest_with_cents = 1e+4)
  ) %>%
  arrange(desc(mean_price))
## # A tibble: 22 × 6
##    category_1 category_2         model_base       mean_price min_price max_price
##    <chr>      <chr>              <chr>            <chr>      <chr>     <chr>    
##  1 Mountain   Cross Country Race Scalpel-Si Race  $9,060     $9,060    $9,060   
##  2 Mountain   Cross Country Race Scalpel-Si Hi-M… $7,460     $7,460    $7,460   
##  3 Mountain   Sport              Catalyst         $546       $415      $705     
##  4 Road       Elite Road         Supersix         $5,491     $1,840    $12,790  
##  5 Mountain   Cross Country Race Scalpel-Si Carb… $5,327     $4,260    $6,390   
##  6 Mountain   Over Mountain      Jekyll           $5,275     $3,200    $7,990   
##  7 Mountain   Over Mountain      Trigger          $5,275     $3,200    $8,200   
##  8 Mountain   Cross Country Race F-Si             $5,070     $1,840    $11,190  
##  9 Mountain   Trail              Habit            $5,052     $1,950    $12,250  
## 10 Mountain   Cross Country Race Scalpel 29       $4,795     $3,200    $6,390   
## # ℹ 12 more rows