Bike Sales Analysis - Time Series by Category

Temuulen Sukhbat

2025-12-02

Sales Trends Analysis

This report analyzes monthly sales trends across different bike categories from 2011-2015.

Load Libraries

# Load required libraries
library(tidyverse)
library(tidyquant)
library(lubridate)
library(readxl)
library(readr)

Import Data

# Import bikes data from Excel
bikes_tbl <- read_excel("bikes.xlsx")

# Import bikeshops data from Excel
bikeshops_tbl <- read_excel("bikeshops.xlsx")

# Import orderlines data from Excel
orderlines_tbl <- read_excel("orderlines.xlsx")

Data Wrangling

# Join tables together
bike_orderlines_joined_tbl <- orderlines_tbl %>%
  left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>% 
  left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))

# Clean and transform the data
bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>% 
  # Split description column
  separate(col = description, 
           into = c('cat_1', 'cat_2', 'frame_mat'), 
           sep = ' - ') %>% 
  # Split location column
  separate(col = location, 
           into = c('city', 'state'), 
           sep = ', ',
           remove = FALSE) %>% 
  # Calculate total price
  mutate(total_price = price * quantity) %>% 
  # Drop unnecessary columns
  select(-...1, -location) %>% 
  # Reorganize columns
  select(contains('date'), contains('id'), 
         contains('order'), 
         quantity, price, total_price, 
         everything()) %>% 
  # Rename order date column
  rename(order_date = order.date) %>%
  # Replace periods with underscores in all column names
  set_names(names(.) %>% str_replace_all("\\.", "_"))

Prepare Data

# Aggregate sales by category and month
sales_by_cat_month <- bike_orderlines_wrangled_tbl %>%
  select(order_date, cat_2, total_price) %>%
  # Parse dates
  mutate(order_date = ymd(order_date)) %>%
  # Floor to beginning of month
  mutate(date_month = floor_date(order_date, unit = "month")) %>%
  # Aggregate by category and month
  group_by(cat_2, date_month) %>%
  summarise(revenue = sum(total_price), .groups = 'drop')

# Display first few rows
head(sales_by_cat_month)
## # A tibble: 6 × 3
##   cat_2              date_month revenue
##   <chr>              <date>       <dbl>
## 1 Cross Country Race 2011-01-01  143660
## 2 Cross Country Race 2011-02-01  324400
## 3 Cross Country Race 2011-03-01  142000
## 4 Cross Country Race 2011-04-01  498580
## 5 Cross Country Race 2011-05-01  220310
## 6 Cross Country Race 2011-06-01  364420

Sales Trends by Bike Category

# Create multi-panel time series plot
sales_by_cat_month %>%
  ggplot(aes(x = date_month, y = revenue, color = cat_2)) +
  
  # Add data points and connecting lines
  geom_point(size = 2) +
  geom_line(linewidth = 0.8) +
  
  # Split into separate facets by category
  facet_wrap(~ cat_2, scales = "free_y", ncol = 3) +
  
  # Scale y-axis to millions of dollars
  scale_y_continuous(labels = scales::dollar_format(scale = 1e-6, suffix = "M")) +
  
  # Format x-axis with yearly labels
  scale_x_date(date_labels = "%Y", date_breaks = "1 year") +
  
  # Use tidyquant color scheme
  scale_color_tq() +
  
  # Apply clean theme
  theme_tq() +
  
  # Additional theme customizations
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 45, hjust = 1),
    strip.text = element_text(face = "bold", size = 11),
    plot.title = element_text(face = "bold", size = 15, hjust = 0.5)
  ) +
  
  # Add descriptive labels
  labs(
    title = "Monthly Sales Trends Across Bike Categories (2011-2015)",
    x = "Month",
    y = "Revenue (Millions)",
    caption = "Data aggregated by secondary bike category"
  )