library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
#input
bike_orderlines <- readRDS("/cloud/project/bike_orderlines (1).rds")
#1 Fix Typos
bike_orderlines <- bike_orderlines %>%
mutate(
model = str_replace_all(model, "CAAD Disk Ultegra", "CAAD12 Disc Ultegra"),
model = str_replace_all(model, "Syapse Carbon Tiagra", "Synapse Carbon Tiagra"),
model = str_replace_all(model, "Supersix Evo Hi-Mod Utegra", "Supersix Evo Hi-Mod Ultegra")
)
#2 Sales of each month.
bike_orderlines <- bike_orderlines %>%
mutate(order_month = month(order_date, label = TRUE))
monthly_sales <- bike_orderlines %>%
group_by(order_month) %>%
summarize(total_sales = sum(total_price))
monthly_sales <- monthly_sales %>%
mutate(total_sales = (total_sales))
month_with_highest_sales <- monthly_sales %>%
arrange(desc(total_sales)) %>%
head(1)
print(monthly_sales)
## # A tibble: 12 × 2
## order_month total_sales
## <ord> <dbl>
## 1 Jan 4089460
## 2 Feb 5343295
## 3 Mar 7282280
## 4 Apr 8386170
## 5 May 7935055
## 6 Jun 7813105
## 7 Jul 7602005
## 8 Aug 5346125
## 9 Sep 5556055
## 10 Oct 4394300
## 11 Nov 4169755
## 12 Dec 3114725
#3 Median of sales by bike attribute
# Black Inc
black_inc_median <- bike_orderlines %>%
filter(str_detect(model, "Black Inc")) %>%
summarize('Black Inc' = median(total_price, na.rm = TRUE))
non_black_inc_median <- bike_orderlines %>%
filter(!str_detect(model, "Black Inc")) %>%
summarize('Black Inc' = median(total_price, na.rm = TRUE))
result_black_inc <- data.frame('Black Inc' = c(FALSE, TRUE), 'Median Orderline' = c(non_black_inc_median$`Black Inc`, black_inc_median$`Black Inc`))
print(result_black_inc)
## Black.Inc Median.Orderline
## 1 FALSE 2880
## 2 TRUE 12250
# Ultegra
ultegra_median <- bike_orderlines %>%
filter(str_detect(model, "Ultegra")) %>%
summarize('Ultegra' = median(total_price, na.rm = TRUE))
non_ultegra_median <- bike_orderlines %>%
filter(!str_detect(model, "Ultegra")) %>%
summarize('Ultegra' = median(total_price, na.rm = TRUE))
result_ultegra <- data.frame('Ultegra' = c(FALSE, TRUE), 'Median Orderline' = c(non_ultegra_median$`Ultegra`, ultegra_median$`Ultegra`))
print(result_ultegra)
## Ultegra Median.Orderline
## 1 FALSE 3200
## 2 TRUE 3200
# Disc
disc_median <- bike_orderlines %>%
filter(str_detect(model, "Disc")) %>%
summarize('Disc' = median(total_price, na.rm = TRUE))
non_disc_median <- bike_orderlines %>%
filter(!str_detect(model, "Disc")) %>%
summarize('Disc' = median(total_price, na.rm = TRUE))
result_disc <- data.frame('Disc' = c(FALSE, TRUE), 'Median Orderline' = c(non_disc_median$`Disc`, disc_median$`Disc`))
print(result_disc)
## Disc Median.Orderline
## 1 FALSE 3200
## 2 TRUE 2660
#4 Avg, min, max by base model
#rename
bike_orderlines %>%
distinct(category_1, category_2, model, price) %>%
separate(
col = model,
into = str_c("model_", 1:9),
sep = " ",
remove = FALSE,
fill = "right",
extra = "drop"
) %>%
mutate(model_base = case_when(
str_detect(str_to_lower(model_1), "scalpel") ~ str_c(model_1, model_2, sep = " "),
str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4, sep = " "),
TRUE ~ model_1
)) %>%
select(-(model_1:model_9)) %>%
group_by(category_1, category_2, model_base) %>%
summarise(
mean_price = mean(price),
min_price = min(price),
max_price = max(price)
, .groups = 'drop') %>%
ungroup() %>%
mutate(
mean_price = scales::dollar(mean_price, largest_with_cents = 1e+4),
min_price = scales::dollar(min_price, largest_with_cents = 1e+4),
max_price = scales::dollar(max_price, largest_with_cents = 1e+4)
) %>%
arrange(desc(mean_price))
## # A tibble: 22 × 6
## category_1 category_2 model_base mean_price min_price max_price
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Mountain Cross Country Race Scalpel-Si Race $9,060 $9,060 $9,060
## 2 Mountain Cross Country Race Scalpel-Si Hi-M… $7,460 $7,460 $7,460
## 3 Mountain Sport Catalyst $546 $415 $705
## 4 Road Elite Road Supersix $5,491 $1,840 $12,790
## 5 Mountain Cross Country Race Scalpel-Si Carb… $5,327 $4,260 $6,390
## 6 Mountain Over Mountain Jekyll $5,275 $3,200 $7,990
## 7 Mountain Over Mountain Trigger $5,275 $3,200 $8,200
## 8 Mountain Cross Country Race F-Si $5,070 $1,840 $11,190
## 9 Mountain Trail Habit $5,052 $1,950 $12,250
## 10 Mountain Cross Country Race Scalpel 29 $4,795 $3,200 $6,390
## # ℹ 12 more rows