Load Libraries

library(tidyverse)
library(tidyquant)
library(readxl)
library(lubridate)

Data Import and Wrangling

# Import data files
bikes_tbl <- read_excel("bikes.xlsx")
bikeshops_tbl <- read_excel("bikeshops.xlsx")
orderlines_tbl <- read_excel("orderlines.xlsx")

# Join and wrangle data
bike_orderlines_wrangled_tbl <- left_join(orderlines_tbl, bikes_tbl, by = c("product.id" = "bike.id")) %>% 
  left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>%
  
  # Wrangling data: decompose description into three columns
  separate(description, 
           into = c('category.1', 'category.2', 'frame.material'), 
           sep  = ' - ') %>% 
  separate(location, 
           into = c('city', 'state'), 
           sep  = ', ',
           remove = FALSE) %>% 
  
  # Create calculated columns 
  mutate(total.price = price * quantity) %>% 
  
  # Reorganize columns                                  
  select(-...1, -location) %>% 
  
  # Reorder columns                                  
  select(contains('date'), contains('id'), 
         contains('order'), 
         quantity, price, total.price, 
         everything()) %>% 
  
  # Rename columns
  rename(order_date = order.date) %>%
  set_names(names(.) %>% str_replace_all("\\.", "_"))

Sales Analysis by Category

Time Series by Category 2 (Monthly)

bike_sales_category_m <- bike_orderlines_wrangled_tbl %>% 
  select(order_date, category_2, total_price) %>% 
  mutate(order_date = ymd(order_date)) %>% 
  mutate(year_month = floor_date(order_date, unit = "month")) %>%
  group_by(year_month, category_2) %>% 
  summarise(sales = sum(total_price), .groups = 'drop')

Additional Analysis

Monthly Sales (Overall)

bike_sales_m <- bike_orderlines_wrangled_tbl %>% 
  select(order_date, total_price) %>% 
  mutate(order_date = ymd(order_date)) %>% 
  mutate(year_month = floor_date(order_date, unit = "month")) %>%
  group_by(year_month) %>% 
  summarise(sales = sum(total_price), .groups = 'drop')

Yearly Sales and Growth

bike_sales_y <- bike_orderlines_wrangled_tbl %>%
  select(order_date, total_price) %>% 
  mutate(order_date = ymd(order_date)) %>% 
  mutate(year = year(order_date)) %>% 
  group_by(year) %>% 
  summarise(sales = sum(total_price)) %>% 
  ungroup()

# Calculate percentage change
calculate_pct_diff <- function(data){
  data %>% 
    mutate(sales_lag_1 = lag(sales, n = 1)) %>% 
    mutate(sales_lag_1 = case_when(
      is.na(sales_lag_1) ~ sales,
      TRUE ~ sales_lag_1
    )) %>% 
    mutate(diff_1 = sales - sales_lag_1) %>% 
    mutate(pct_diff_1 = diff_1 / sales_lag_1) %>% 
    mutate(pct_diff_1_chr = scales::percent(pct_diff_1))
}

bike_sales_growth <- calculate_pct_diff(bike_sales_y)

knitr::kable(bike_sales_growth %>% 
               select(year, sales, diff_1, pct_diff_1_chr) %>%
               mutate(sales = scales::dollar(sales),
                      diff_1 = scales::dollar(diff_1)),
             col.names = c("Year", "Sales", "Change ($)", "Change (%)"),
             align = 'lrrr',
             caption = "Year-over-Year Sales Growth")
Year-over-Year Sales Growth
Year Sales Change ($) Change (%)
2011 $11,292,885 $0 0.0%
2012 $12,163,075 $870,190 7.7%
2013 $16,480,775 $4,317,700 35.5%
2014 $13,924,085 -$2,556,690 -15.5%
2015 $17,171,510 $3,247,425 23.3%

Revenue by Category 2

revenue_by_category2_tbl <- bike_orderlines_wrangled_tbl %>% 
  select(category_2, total_price) %>% 
  group_by(category_2) %>% 
  summarise(revenue = sum(total_price), .groups = 'drop')

Revenue Comparison (Bar Chart)

revenue_by_category2_tbl %>% 
  mutate(category_2 = category_2 %>% as_factor() %>% fct_reorder(revenue)) %>% 
  ggplot(aes(category_2, revenue)) +
  geom_col(fill = "#2C3E50", alpha = 0.9) +
  coord_flip() +
  scale_y_continuous(labels = scales::dollar_format()) +
  labs(
    title = "Revenue by Bike Category",
    subtitle = "Total revenue across all periods",
    x = "Category",
    y = "Revenue"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    axis.text = element_text(size = 10)
  )

Yearly Revenue Trend

bike_sales_y %>% 
  ggplot(aes(x = year, y = sales, color = sales)) +
  geom_point(size = 5) + 
  geom_line(linewidth = 2) + 
  geom_smooth(method = "lm", formula = 'y ~ x', se = FALSE) +
  expand_limits(y = c(0, 20e6)) + 
  scale_colour_continuous(low = "red", high = "black", 
                          labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) +
  scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) + 
  labs(
    title = "Revenue Trend Over Time",
    subtitle = "Sales are trending up and to the right!",
    x = "Year", 
    y = "Sales (Millions)",
    color = "Rev ($M)",
    caption = "Total sales from 2011 to 2015"
  ) +
  theme_tq()

Summary

This analysis reveals distinct sales patterns across bike categories from 2011-2015:

  • Cross Country Race and Elite Road are the top-performing categories with significant revenue
  • Most categories show seasonal fluctuations in sales
  • Overall revenue trend is positive across the analysis period