Loading libraries, read the files
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
##
## Attaching package: 'lubridate'
##
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Correction 1
bikes_corrected <- bike_orderlines %>%
mutate(model = case_when(
model == "CAAD Disc Ultegra" ~"CAAD12 Disc Ultegra",
TRUE ~ model))
# Correction 2
bikes_corrected <- bikes_corrected %>%
mutate(model = case_when(
model == "Syapse Carbon Tiagra" ~"Synapse Carbon Tiagra",
TRUE ~ model))
# Correction 3
bikes_corrected <- bikes_corrected %>%
mutate(model = case_when(
model == "Supersix Evo Hi-Mod Utegra" ~"Supersix Evo Hi-Mod Ultegra",
TRUE ~ model))
glimpse(bikes_corrected)
## Rows: 15,644
## Columns: 13
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…
bike_sales_m_tbl <- bikes_corrected %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(Month = month(order_date, label = TRUE)) %>%
group_by(Month) %>%
summarize(Sales = sum(total_price)) %>%
mutate(Sales = scales::dollar(Sales))
bike_sales_m_tbl %>% View()
bike_sales_blackInc <- bikes_corrected %>%
mutate(BlackInc = str_detect(model, "Black Inc")) %>%
group_by(BlackInc) %>%
summarize(MeanOrderline = mean(total_price)) %>%
mutate(MeanOrderline = scales::dollar(MeanOrderline))
bike_sales_blackInc
## # A tibble: 2 × 2
## BlackInc MeanOrderline
## <lgl> <chr>
## 1 FALSE $4,037.61
## 2 TRUE $13,154.58
#Bikes that contain the 'Black Inc' feature tend to have a higher price when compared to the median price of the other products.
## Median sales by 'Ultegra' attribute
bike_sales_ultegra <- bikes_corrected %>%
mutate(Ultegra = str_detect(model, "Ultegra")) %>%
group_by(Ultegra) %>%
summarize(MeanOrderline = mean(total_price)) %>%
mutate(MeanOrderline = scales::dollar(MeanOrderline))
bike_sales_ultegra
## # A tibble: 2 × 2
## Ultegra MeanOrderline
## <lgl> <chr>
## 1 FALSE $4,601.17
## 2 TRUE $4,175.90
# Bikes that contain the 'Ultegra' feature tend to be similarly priced to the median of the other products.
# Those these bikes have a slightly lower median by comparison, with about a a 425$ difference.
## Median sales by 'Disc' attribute
bike_sales_disc <- bikes_corrected %>%
mutate(Disc = str_detect(model, "Disc")) %>%
group_by(Disc) %>%
summarize(MeanOrderline = mean(total_price)) %>%
mutate(MeanOrderline = scales::dollar(MeanOrderline))
bike_sales_disc
## # A tibble: 2 × 2
## Disc MeanOrderline
## <lgl> <chr>
## 1 FALSE $4,544.44
## 2 TRUE $4,511.86
# Bikes that contain the 'Disc' feature tend to have a similar median when compared to the other products offered.
# The difference between medians as almost insignificant.
bikes_corrected %>% select(category_1, category_2, model, total_price) %>%
mutate(Model_Base = str_extract(model, "^[a-zA-Z\\s-]+")) %>%
group_by(category_1, category_2, Model_Base) %>%
summarize(
Mean_Price = scales::dollar(mean(total_price)),
Max_Price = scales::dollar(max(total_price)),
Min_Price = scales::dollar(min(total_price))
) %>% View()
## `summarise()` has grouped output by 'category_1', 'category_2'. You can
## override using the `.groups` argument.
#As displayed, the bikes with the highest price don't necessary tend to the highest means.
#Also some models present a very large spread between the min and max price. For example the "Scapel" base model who has a spread of almost 60k between the min and max price.