library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.1.8
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(lubridate)
bike_orderline_table <- readRDS("C:\\Users\\emman\\Downloads\\bike_orderlines(1).rds")
bike_orderline_table
## # A tibble: 15,644 × 13
## order_date order_id order_line quantity price total_…¹ model categ…²
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 2011-01-07 00:00:00 1 1 1 6070 6070 Jeky… Mounta…
## 2 2011-01-07 00:00:00 1 2 1 5970 5970 Trig… Mounta…
## 3 2011-01-10 00:00:00 2 1 1 2770 2770 Beas… Mounta…
## 4 2011-01-10 00:00:00 2 2 1 5970 5970 Trig… Mounta…
## 5 2011-01-10 00:00:00 3 1 1 10660 10660 Supe… Road
## 6 2011-01-10 00:00:00 3 2 1 3200 3200 Jeky… Mounta…
## 7 2011-01-10 00:00:00 3 3 1 12790 12790 Supe… Road
## 8 2011-01-10 00:00:00 3 4 1 5330 5330 Supe… Road
## 9 2011-01-10 00:00:00 3 5 1 1570 1570 Syna… Road
## 10 2011-01-11 00:00:00 4 1 1 4800 4800 Syna… Road
## # … with 15,634 more rows, 5 more variables: category_2 <chr>,
## # frame_material <chr>, bikeshop_name <chr>, city <chr>, state <chr>, and
## # abbreviated variable names ¹total_price, ²category_1
#Question 1
bike_orderline_table <- bike_orderline_table %>%
mutate(model = case_when(
model == "CAAD Disk Ultegra" ~ "CAAD12 Disc Ultegra",
model == "Syapse Carbon Tiagra" ~ "Synapse Carbon Tiagra",
model == "Supersix Evo Hi-Mod Utegra" ~ "Supersix Evo Hi-Mod Ultegra",
TRUE ~ model
))
glimpse(bike_orderline_table)
## Rows: 15,644
## Columns: 13
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…
#Question 2
bike_orderline_table <- bike_orderline_table %>%
mutate(order_date = as.Date(order_date),
month = month(order_date, label = TRUE, abbr = FALSE))
monthly_sales <- bike_orderline_table %>%
group_by(month) %>%
summarize(total_sales = sum(total_price)) %>%
mutate(total_sales = total_sales %>% scales::dollar())
month_with_highest_sales <- monthly_sales %>%
filter(total_sales == max(total_sales)) %>%
select(month, total_sales)
result <- paste("Month with the highest sales:", month_with_highest_sales$month, "with total sales:", month_with_highest_sales$total_sales)
result
## [1] "Month with the highest sales: April with total sales: $8,386,170"
#Question 3
bike_orderline_table %>%
select(model, total_price) %>%
mutate(black = model %>% str_detect("Black Inc")) %>%
group_by(black) %>%
summarize(median = median(total_price)) %>%
ungroup() %>%
mutate(median = median %>% scales::dollar()) %>%
rename('Black Inc' = black,
'Median Orderline' = median)
## # A tibble: 2 × 2
## `Black Inc` `Median Orderline`
## <lgl> <chr>
## 1 FALSE $2,880
## 2 TRUE $12,250
bike_orderline_table %>%
select(model, total_price) %>%
mutate(ultegra = model %>% str_detect("Ultegra")) %>%
group_by(ultegra) %>%
summarize(median = median(total_price)) %>%
ungroup() %>%
mutate(median = median %>% scales::dollar()) %>%
rename('Ultegra' = ultegra,
'Median Orderline' = median)
## # A tibble: 2 × 2
## Ultegra `Median Orderline`
## <lgl> <chr>
## 1 FALSE $3,200
## 2 TRUE $3,200
bike_orderline_table %>%
select(model, total_price) %>%
mutate(disc = model %>% str_detect("Disc")) %>%
group_by(disc) %>%
summarize(median = median(total_price)) %>%
ungroup() %>%
mutate(median = median %>% scales::dollar()) %>%
rename('Disc' = disc,
'Median Orderline' = median)
## # A tibble: 2 × 2
## Disc `Median Orderline`
## <lgl> <chr>
## 1 FALSE $3,200
## 2 TRUE $2,660
#Question 4
bike_orderline_table %>%
select(model, category_1, category_2, price) %>%
separate(model,
into = str_c("model_", 1:7),
sep = " ",
extra = 'drop') %>%
mutate(model_base = case_when(
str_detect(str_to_lower(model_1), "supersix") ~ str_c(model_1, model_2, sep=" "),
str_detect(str_to_lower(model_1), "fat") ~ str_c(model_1, model_2, sep=" "),
str_detect(str_to_lower(model_1), "bad") ~ str_c(model_1, model_2, sep=" "),
str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4, sep=" "),
str_detect(str_to_lower(model_2), "29") ~ str_c(model_1, model_2, sep=" "),
TRUE ~ model_1)
) %>%
mutate(model_1 = model_1 %>% str_trim()) %>%
select(category_1, category_2, model_base, price) %>%
group_by(category_1, category_2,model_base) %>%
summarize(mean = round(mean(price),0) %>% scales::dollar(),
min = min(price) %>% scales::dollar(),
max = max(price) %>% scales::dollar()) %>%
ungroup() %>%
arrange(desc(mean)) %>%
rename(
`Category 1` = category_1,
`Category 2` = category_2,
`Model Base` = model_base,
`Mean Price` = mean,
`Min Price` = min,
`Max Price` = max
)
## Warning: Expected 7 pieces. Missing pieces filled with `NA` in 15644 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## `summarise()` has grouped output by 'category_1', 'category_2'. You can
## override using the `.groups` argument.
## # A tibble: 19 × 6
## `Category 1` `Category 2` `Model Base` Mean Pric…¹ Min P…² Max P…³
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Mountain Cross Country Race Scalpel-Si $6,695 $3,200 $12,790
## 2 Mountain Sport Catalyst $541 $415 $705
## 3 Mountain Over Mountain Jekyll $5,042 $3,200 $7,990
## 4 Road Elite Road Supersix Evo $4,978 $1,840 $12,790
## 5 Mountain Over Mountain Trigger $4,970 $3,200 $8,200
## 6 Mountain Trail Habit $4,611 $1,950 $12,250
## 7 Mountain Cross Country Race F-Si $4,504 $1,840 $11,190
## 8 Mountain Cross Country Race Scalpel 29 $4,499 $3,200 $6,390
## 9 Mountain Fat Bike Fat CAAD1 $3,730 $3,730 $3,730
## 10 Road Triathalon Slice $3,527 $1,950 $7,000
## 11 Road Endurance Road Synapse $3,080 $870 $9,590
## 12 Road Elite Road CAAD12 $2,978 $1,680 $5,860
## 13 Mountain Trail Bad Habit $2,954 $2,660 $3,200
## 14 Road Elite Road CAAD $2,660 $2,660 $2,660
## 15 Road Cyclocross SuperX $2,339 $1,750 $3,500
## 16 Mountain Trail Beast of the East $2,194 $1,620 $2,770
## 17 Mountain Fat Bike Fat CAAD2 $2,130 $2,130 $2,130
## 18 Mountain Sport Trail $1,153 $815 $1,520
## 19 Road Elite Road CAAD8 $1,136 $815 $1,410
## # … with abbreviated variable names ¹`Mean Price`, ²`Min Price`, ³`Max Price`
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.