Libraries

library(readxl)
library(tidyverse)
library(lubridate)

Data Import

bikes_tbl <- read_excel("./bikes.xlsx")
bikeshops_tbl <- read_excel("./bikeshops.xlsx")
orderlines_tbl <- read_excel("./orderlines.xlsx")

Data Joining and Wrangling

bike_orderlines_wrangled_tbl <- orderlines_tbl %>% 
  left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>% 
  left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>% 
  separate(description, 
           into = c('category.1', 'category.2', 'frame.material'), 
           sep  = ' - ') %>% 
  separate(location, 
           into = c('city', 'state'), 
           sep  = ', ',
           remove = FALSE) %>% 
  mutate(total.price = price * quantity) %>% 
  select(-...1, -location) %>% 
  select(contains('date'), contains('id'), 
         contains('order'), 
         quantity, price, total.price, 
         everything()) %>% 
  rename(order_date = order.date) %>%
  set_names(names(.) %>% str_replace_all("\\.", "_"))

# Display first few rows
head(bike_orderlines_wrangled_tbl)
## # A tibble: 6 × 15
##   order_date          order_id customer_id product_id order_line quantity price
##   <dttm>                 <dbl>       <dbl>      <dbl>      <dbl>    <dbl> <dbl>
## 1 2011-01-07 00:00:00        1           2         48          1        1  6070
## 2 2011-01-07 00:00:00        1           2         52          2        1  5970
## 3 2011-01-10 00:00:00        2          10         76          1        1  2770
## 4 2011-01-10 00:00:00        2          10         52          2        1  5970
## 5 2011-01-10 00:00:00        3           6          2          1        1 10660
## 6 2011-01-10 00:00:00        3           6         50          2        1  3200
## # ℹ 8 more variables: total_price <dbl>, model <chr>, category_1 <chr>,
## #   category_2 <chr>, frame_material <chr>, bikeshop_name <chr>, city <chr>,
## #   state <chr>

Revenue by Category 2

# Bar plot
# Generate total sales by category 2
revenue_by_category2_tbl <- bike_orderlines_wrangled_tbl %>% 
  select(category_2, total_price) %>% 
  group_by(category_2) %>% 
  summarise(revenue = sum(total_price)) %>% 
  ungroup()

# bar plot
revenue_by_category2_tbl %>% 
  mutate(category_2 = category_2 %>% as_factor() %>% fct_reorder(revenue)) %>% 
  ggplot(aes(category_2, revenue)) +
  geom_col(fill = "darkgreen") +
  coord_flip()