library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(readxl)
library(dplyr)
library(formattable)
library(scales)
##
## Attaching package: 'scales'
## The following objects are masked from 'package:formattable':
##
## comma, percent, scientific
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
bike_orderlines_tbl <- read_excel("bike_orderlines.xlsx")
#lubridate: Which month has the highest bike sales? (Difficulty = Medium)
for(i in length(bike_orderlines_tbl$order_date))
bike_orderlines_tbl$month1 = c(factor(format(bike_orderlines_tbl$order_date, '%B')))
month2 <- unique(bike_orderlines_tbl$month1)
Sales <- c()
Month <- c()
Month <- as.factor(Month)
for(i in 1:length(month2))
{
result <- 0
bike_orderlines_tbl2 <- dplyr::filter(bike_orderlines_tbl, month1==month2[i])
for (j in 1:length(bike_orderlines_tbl2$total_price))
result <- result+bike_orderlines_tbl2$total_price[j]
Sales <- c(Sales, result)
Month <- c(Month, month2[i])
}
bike_orderline <- data.frame(Month, Sales)
bike_orderline$Sales <- currency(bike_orderline$Sales, digits = 0L)
arrange(bike_orderline, desc(Sales))
## Month Sales
## 1 April $8,386,170
## 2 May $7,935,055
## 3 June $7,813,105
## 4 July $7,602,005
## 5 March $7,282,280
## 6 September $5,556,055
## 7 August $5,346,125
## 8 February $5,343,295
## 9 October $4,394,300
## 10 November $4,169,755
## 11 January $4,089,460
## 12 December $3,114,725
"=>Month has the highest bike sales is April"
## [1] "=>Month has the highest bike sales is April"
#stringr: What is the median orderline sales value by Bike Attribute? (Difficulty = Medium)
#Evaluate "Black Inc". What does this tell us about the "Black Inc" feature?
bike_orderlines_tbl %>%
select(model, total_price) %>%
mutate(black = model %>% str_to_lower() %>% str_detect("black inc")) %>%
group_by(black) %>%
summarize(median = median(total_price)) %>%
ungroup() %>%
mutate(median = median %>% scales::dollar()) %>%
rename(`Black Inc` = black, `Median Orderline` = median)
## # A tibble: 2 × 2
## `Black Inc` `Median Orderline`
## <lgl> <chr>
## 1 FALSE $2,880
## 2 TRUE $12,250
#Evaluate "Ultegra". What does this tell us about the "Ultegra" feature?
bike_orderlines_tbl %>%
select(model, total_price) %>%
mutate(ultegra = model %>% str_to_lower() %>% str_detect("ultegra")) %>%
group_by(ultegra) %>%
summarize(median = median(total_price)) %>%
ungroup() %>%
mutate(median = median %>% scales::dollar()) %>%
rename(`Ultegra` = ultegra, `Median Orderline` = median)
## # A tibble: 2 × 2
## Ultegra `Median Orderline`
## <lgl> <chr>
## 1 FALSE $3,200
## 2 TRUE $3,200
#Evaluate "Disc" option. *What does this tell us about the "Disc" feature?
bike_orderlines_tbl %>%
select(model, total_price) %>%
mutate(disc = model %>% str_to_lower() %>% str_detect("disc")) %>%
group_by(disc) %>%
summarize(median = median(total_price)) %>%
ungroup() %>%
mutate(median = median %>% scales::dollar()) %>%
rename(`Disc` = disc, `Median Orderline` = median)
## # A tibble: 2 × 2
## Disc `Median Orderline`
## <lgl> <chr>
## 1 FALSE $3,200
## 2 TRUE $2,660
#stringr: What are the average, min, and max prices by Base Model? (Difficulty = High)
bike_orderlines_tbl %>%
select(category_1, category_2, model, price) %>%
separate(model,
into = str_c("model_", 1:7),
sep = " ",
remove = T,
fill = "right",
extra = "drop") %>%
mutate(model_base = case_when(
# fix - supersix evo
str_detect(str_to_lower(model_1), "supersix") ~ str_c(model_1, model_2, sep = " "),
# fix - beast of the east
str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4, sep = " "),
# fix - fat CAAD
str_detect(str_to_lower(model_1), "fat") ~ str_c(model_1, model_2, sep = " "),
# fix - bad habit
str_detect(str_to_lower(model_1), "bad") ~ str_c(model_1, model_2, sep = " "),
# fix - scalpel 29
str_detect(str_to_lower(model_2), "29") ~ str_c(model_1, model_2, sep = " "),
# catch-all
TRUE ~ model_1)) %>%
mutate(model_1 = model_1 %>% str_trim()) %>%
select(category_1, category_2, model_base, price) %>%
group_by(category_1, category_2,model_base) %>%
summarize(mean = round(mean(price),0) %>% scales::dollar(),
min = min(price) %>% scales::dollar(),
max = max(price) %>% scales::dollar()) %>%
ungroup() %>%
arrange(desc(mean)) %>%
rename(
`Category 1` = category_1,
`Category 2` = category_2,
`Model Base` = model_base,
`Average Price` = mean,
`Min Price` = min,
`Max Price` = max)
## `summarise()` has grouped output by 'category_1', 'category_2'. You can override using the `.groups` argument.
## # A tibble: 20 × 6
## `Category 1` `Category 2` `Model Base` `Average Price` `Min Price`
## <chr> <chr> <chr> <chr> <chr>
## 1 Mountain Cross Country Race Scalpel-Si $6,695 $3,200
## 2 Mountain Sport Catalyst $541 $415
## 3 Mountain Over Mountain Jekyll $5,042 $3,200
## 4 Road Elite Road Supersix Evo $4,978 $1,840
## 5 Mountain Over Mountain Trigger $4,970 $3,200
## 6 Mountain Trail Habit $4,611 $1,950
## 7 Mountain Cross Country Race F-Si $4,504 $1,840
## 8 Mountain Cross Country Race Scalpel 29 $4,499 $3,200
## 9 Mountain Fat Bike Fat CAAD1 $3,730 $3,730
## 10 Road Triathalon Slice $3,527 $1,950
## 11 Road Endurance Road Synapse $3,168 $870
## 12 Road Elite Road CAAD12 $2,978 $1,680
## 13 Mountain Trail Bad Habit $2,954 $2,660
## 14 Road Elite Road CAAD $2,660 $2,660
## 15 Road Cyclocross SuperX $2,339 $1,750
## 16 Mountain Trail Beast of the East $2,194 $1,620
## 17 Mountain Fat Bike Fat CAAD2 $2,130 $2,130
## 18 Road Endurance Road Syapse $1,840 $1,840
## 19 Mountain Sport Trail $1,153 $815
## 20 Road Elite Road CAAD8 $1,136 $815
## # … with 1 more variable: Max Price <chr>