filename <- file.choose()
bike_orderlines <- readRDS(filename)
View(bike_orderlines)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
bike_orderlines_fixed <- bike_orderlines %>% mutate(
model = case_when(
str_detect(str_trim(model), "CAAD Disc Ultegra") ~ "CAAD12 Disc Ultegra",
str_detect(str_trim(model), "Syapse Carbon Tiagra") ~ "Synapse Carbon Tiagra",
str_detect(str_trim(model), "Supersix Evo Hi-Mod Utegra") ~ "Supersix Evo Hi-Mod Ultegra",
TRUE ~ model)) %>%
glimpse()
## Rows: 15,644
## Columns: 13
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…
sales_by_month <- bike_orderlines_fixed %>%
select(order_date, total_price) %>%
mutate(Month = month(order_date, label = TRUE, abbr = FALSE)) %>%
group_by(Month) %>%
summarise(Sales = sum(total_price)) %>%
ungroup() %>%
mutate(Sales = scales::dollar(Sales))
sales_by_month
## # A tibble: 12 × 2
## Month Sales
## <ord> <chr>
## 1 January $4,089,460
## 2 February $5,343,295
## 3 March $7,282,280
## 4 April $8,386,170
## 5 May $7,935,055
## 6 June $7,813,105
## 7 July $7,602,005
## 8 August $5,346,125
## 9 September $5,556,055
## 10 October $4,394,300
## 11 November $4,169,755
## 12 December $3,114,725
The shop should be focusing on the summer season, from March to July
bike_orderlines_fixed %>%
mutate(`Black Inc` = str_detect(
str_to_lower(model), "black inc")) %>%
select(`Black Inc`, total_price) %>%
group_by(`Black Inc`) %>%
summarise(`Median Orderline` = median(total_price)) %>%
ungroup() %>%
mutate(`Median Orderline` = scales::dollar(`Median Orderline`))
## # A tibble: 2 × 2
## `Black Inc` `Median Orderline`
## <lgl> <chr>
## 1 FALSE $2,880
## 2 TRUE $12,250
The Black Inc feature generates more sales than the other features
bike_orderlines_fixed %>%
mutate(`Ultegra` = str_detect(
str_to_lower(model), "ultegra")) %>%
select(`Ultegra`, total_price) %>%
group_by(`Ultegra`) %>%
summarise(`Median Orderline` = median(total_price)) %>%
ungroup() %>%
mutate(`Median Orderline` = scales::dollar(`Median Orderline`))
## # A tibble: 2 × 2
## Ultegra `Median Orderline`
## <lgl> <chr>
## 1 FALSE $3,200
## 2 TRUE $3,200
The Ultegra feature generates the same amount of sales as the other features
bike_orderlines_fixed %>%
mutate(`Disc` = str_detect(
str_to_lower(model), "disc")) %>%
select(`Disc`, total_price) %>%
group_by(`Disc`) %>%
summarise(`Median Orderline` = median(total_price)) %>%
ungroup() %>%
mutate(`Median Orderline` = scales::dollar(`Median Orderline`))
## # A tibble: 2 × 2
## Disc `Median Orderline`
## <lgl> <chr>
## 1 FALSE $3,200
## 2 TRUE $2,660
The Disc feature generates less sales as the other features
model_base <- bike_orderlines_fixed %>% select(model, category_1, category_2, price) %>%
separate(col = model,
into = str_c("model",1:6),
sep = " ",
remove = FALSE,
fill = "right",
extra = "drop") %>%
mutate(model_base = case_when(
str_detect(str_to_lower(model1), "supersix") ~ str_c(model1, model2, sep = " "),
str_detect(str_to_lower(model1), "fat") ~ str_c(model1, model2, sep = " "),
str_detect(str_to_lower(model2), "29") ~ str_c(model1, model2, sep = " "),
str_detect(str_to_lower(model1), "beast") ~ str_c(model1, model2, model3, model4, sep = " "),
str_detect(str_to_lower(model1), "bad") ~ str_c(model1, model2, sep = " "),
TRUE ~ model1)) %>%
select(category_1, category_2, model_base, price)
model_base <- model_base %>%
rename(`Category 1` = category_1) %>%
rename(`Category 2` = category_2) %>%
rename(`Model Base`= model_base)
model_base %>% group_by(`Category 1`, `Category 2`, `Model Base`) %>%
summarise(`Mean Price` = round(mean(price)),
`Min Price` = min(price),
`Max Price` = max(price)) %>%
arrange(desc(`Mean Price`)) %>%
ungroup() %>%
mutate(`Mean Price` = scales::dollar(`Mean Price`),
`Min Price` = scales::dollar(`Min Price`),
`Max Price` = scales::dollar(`Max Price`))
## `summarise()` has grouped output by 'Category 1', 'Category 2'. You can
## override using the `.groups` argument.
## # A tibble: 18 × 6
## `Category 1` `Category 2` `Model Base` `Mean Price` `Min Price` `Max Price`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Mountain Cross Country… Scalpel-Si $6,695 $3,200 $12,790
## 2 Mountain Over Mountain Jekyll $5,042 $3,200 $7,990
## 3 Road Elite Road Supersix Evo $4,978 $1,840 $12,790
## 4 Mountain Over Mountain Trigger $4,970 $3,200 $8,200
## 5 Mountain Trail Habit $4,611 $1,950 $12,250
## 6 Mountain Cross Country… F-Si $4,504 $1,840 $11,190
## 7 Mountain Cross Country… Scalpel 29 $4,499 $3,200 $6,390
## 8 Mountain Fat Bike Fat CAAD1 $3,730 $3,730 $3,730
## 9 Road Triathalon Slice $3,527 $1,950 $7,000
## 10 Road Endurance Road Synapse $3,080 $870 $9,590
## 11 Mountain Trail Bad Habit $2,954 $2,660 $3,200
## 12 Road Elite Road CAAD12 $2,926 $1,680 $5,860
## 13 Road Cyclocross SuperX $2,339 $1,750 $3,500
## 14 Mountain Trail Beast of th… $2,194 $1,620 $2,770
## 15 Mountain Fat Bike Fat CAAD2 $2,130 $2,130 $2,130
## 16 Mountain Sport Trail $1,153 $815 $1,520
## 17 Road Elite Road CAAD8 $1,136 $815 $1,410
## 18 Mountain Sport Catalyst $541 $415 $705
The bikes are priced based on the Model Base, each one have a different price range