Load Libraries

library(tidyverse)
library(readxl)
library(lubridate)

Data Import and Wrangling

# Import data files
bikes_tbl <- read_excel("./bikes.xlsx")
bikeshops_tbl <- read_excel("./bikeshops.xlsx")
orderlines_tbl <- read_excel("./orderlines.xlsx")

# Join and wrangle data
bike_orderlines_wrangled_tbl <- orderlines_tbl %>%
  left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>% 
  left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>%
  separate(description, 
           into = c('category_1', 'category_2', 'frame_material'), 
           sep = ' - ') %>% 
  separate(location, 
           into = c('city', 'state'), 
           sep = ', ',
           remove = TRUE) %>% 
  mutate(total_price = price * quantity) %>% 
  select(order.date, product.id, customer.id, order.id, order.line,
         quantity, price, total_price, 
         everything()) %>% 
  rename(order_date = order.date,
         product_id = product.id,
         customer_id = customer.id,
         order_id = order.id,
         order_line = order.line)

Monthly Sales

bike_sales_m <- bike_orderlines_wrangled_tbl %>% 
  mutate(order_date = ymd(order_date)) %>% 
  mutate(year_month = floor_date(order_date, unit = "month")) %>%
  group_by(year_month) %>% 
  summarise(sales = sum(total_price)) %>% 
  ungroup()
bike_sales_y <- bike_orderlines_wrangled_tbl %>%
  mutate(order_date = ymd(order_date)) %>% 
  mutate(year = year(order_date)) %>% 
  group_by(year) %>% 
  summarise(sales = sum(total_price)) %>% 
  ungroup()

bike_sales_growth <- bike_sales_y %>% 
  mutate(sales_lag_1 = lag(sales, n = 1)) %>% 
  mutate(sales_lag_1 = ifelse(is.na(sales_lag_1), sales, sales_lag_1)) %>% 
  mutate(diff_1 = sales - sales_lag_1,
         pct_diff_1 = diff_1 / sales_lag_1)

knitr::kable(bike_sales_growth %>% 
               select(year, sales, diff_1, pct_diff_1) %>%
               mutate(sales = scales::dollar(sales),
                      diff_1 = scales::dollar(diff_1),
                      pct_diff_1 = scales::percent(pct_diff_1)),
             col.names = c("Year", "Sales", "Change ($)", "Change (%)"),
             align = 'lrrr')
Year Sales Change ($) Change (%)
2011 $11,292,885 $0 0.0%
2012 $12,163,075 $870,190 7.7%
2013 $16,480,775 $4,317,700 35.5%
2014 $13,924,085 -$2,556,690 -15.5%
2015 $17,171,510 $3,247,425 23.3%

Sales by Second Category

revenue_by_category2_tbl <- bike_orderlines_wrangled_tbl %>% 
  group_by(category_2) %>% 
  summarise(revenue = sum(total_price)) %>% 
  ungroup()

Category Revenue Comparison

revenue_by_category2_tbl %>% 
  mutate(category_2 = fct_reorder(category_2, revenue)) %>% 
  ggplot(aes(x = category_2, y = revenue)) +
  geom_col(fill = "#2C3E50", alpha = 0.9) +
  coord_flip() +
  scale_y_continuous(labels = scales::dollar_format()) +
  labs(
    title = "Revenue by Bike Category",
    subtitle = "Total revenue across all bike shops",
    x = "category_2",
    y = "Revenue",
    caption = "Data from 2011-2015"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 18, face = "bold", color = "#2C3E50"),
    plot.subtitle = element_text(size = 12, color = "#666666"),
    axis.title.x = element_text(size = 11, face = "bold"),
    axis.title.y = element_text(size = 11, face = "bold"),
    axis.text.y = element_text(size = 10),
    axis.text.x = element_text(size = 10),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_line(color = "#E5E5E5")
  )