library(tidyverse)
library(lubridate)
library(dplyr)
library(readxl)
library('scales')
#path_bike_orderlines <- "bike_orderlines.xlsx" # change to your path
#import dataset from the table bikeorderlines and fix the errors
bike_orderlines <- read_excel("bike_orderlines.xlsx")
bike_orderlines <- bike_orderlines %>%
mutate(model = case_when(
model == "CAAD Disc Ultegra" ~ "CAAD12 Disc Ultegra",
model == "Syapse Carbon Tiagra" ~ "Synapse Carbon Tiagra",
model == "Supersix Evo Hi-Mod Utegra" ~ "Supersix Evo Hi-Mod Ultegra",
TRUE ~ model
))
glimpse(bike_orderlines)
## Rows: 15,644
## Columns: 13
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…
sales_by_month <- bike_orderlines %>%
transmute(order_date, total_price,month = month(as.Date(bike_orderlines$order_date, format="%d/%m/%Y"))) %>%
group_by(month)%>%
summarise(total_price) %>%
group_by(month) %>%
summarise_at(vars(total_price),funs(sum(.,na.rm=TRUE))) %>%
rename(sale_revenue= total_price)%>%ungroup()
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
sales <- sales_by_month %>%
filter(sale_revenue == max(sale_revenue))
sales_by_month$sale_revenue <- dollar(sales_by_month$sale_revenue)
sales_by_month$month <- month.abb[sales_by_month$month]