1. Introduction

This report analyzes bike sales data across different categories, frame materials, and bikeshops. We’ll explore sales patterns and identify top-performing product categories.

2. Setup and Package Installation

# Install packages (run once if needed)
# install.packages(c("tidyverse", "readxl", "writexl", "scales", "knitr"))

# Load required libraries
library(tidyverse)
library(readxl)
library(writexl)
library(scales)
library(knitr)

3. Data Import

# Import Excel files
bikes_tbl      <- read_excel("./bikes.xlsx")
bikeshops_tbl  <- read_excel("./bikeshops.xlsx")
orderlines_tbl <- read_excel("./orderlines.xlsx")

# Display structure of imported data
glimpse(bikes_tbl)
## Rows: 97
## Columns: 4
## $ bike.id     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ model       <chr> "Supersix Evo Black Inc.", "Supersix Evo Hi-Mod Team", "Su…
## $ description <chr> "Road - Elite Road - Carbon", "Road - Elite Road - Carbon"…
## $ price       <dbl> 12790, 10660, 7990, 5330, 4260, 3940, 3200, 2660, 2240, 18…
glimpse(bikeshops_tbl)
## Rows: 30
## Columns: 3
## $ bikeshop.id   <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ bikeshop.name <chr> "Pittsburgh Mountain Machines", "Ithaca Mountain Climber…
## $ location      <chr> "Pittsburgh, PA", "Ithaca, NY", "Columbus, OH", "Detroit…
glimpse(orderlines_tbl)
## Rows: 15,644
## Columns: 7
## $ ...1        <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "…
## $ order.id    <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7…
## $ order.line  <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2…
## $ order.date  <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-01-1…
## $ customer.id <dbl> 2, 2, 10, 10, 6, 6, 6, 6, 6, 22, 8, 8, 8, 8, 16, 16, 16, 1…
## $ product.id  <dbl> 48, 52, 76, 52, 2, 50, 1, 4, 34, 26, 96, 66, 35, 72, 45, 3…
## $ quantity    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1…

4. Data Joining

# Join all three datasets using pipes
bike_orderlines_joined_tbl <- orderlines_tbl %>% 
  left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>% 
  left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))

# Preview joined data
head(bike_orderlines_joined_tbl) %>% kable()
…1 order.id order.line order.date customer.id product.id quantity model description price bikeshop.name location
1 1 1 2011-01-07 2 48 1 Jekyll Carbon 2 Mountain - Over Mountain - Carbon 6070 Ithaca Mountain Climbers Ithaca, NY
2 1 2 2011-01-07 2 52 1 Trigger Carbon 2 Mountain - Over Mountain - Carbon 5970 Ithaca Mountain Climbers Ithaca, NY
3 2 1 2011-01-10 10 76 1 Beast of the East 1 Mountain - Trail - Aluminum 2770 Kansas City 29ers Kansas City, KS
4 2 2 2011-01-10 10 52 1 Trigger Carbon 2 Mountain - Over Mountain - Carbon 5970 Kansas City 29ers Kansas City, KS
5 3 1 2011-01-10 6 2 1 Supersix Evo Hi-Mod Team Road - Elite Road - Carbon 10660 Louisville Race Equipment Louisville, KY
6 3 2 2011-01-10 6 50 1 Jekyll Carbon 4 Mountain - Over Mountain - Carbon 3200 Louisville Race Equipment Louisville, KY

5. Data Wrangling

# Clean and transform the data
bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>% 
  # Separate description into components
  separate(description, 
           into = c('category_1', 'category_2', 'frame_material'), 
           sep = ' - ') %>% 
  # Separate location into city and state
  separate(location, 
           into = c('city', 'state'), 
           sep = ', ',
           remove = FALSE) %>% 
  # Calculate total price
  mutate(total_price = price * quantity) %>% 
  # Remove unnecessary columns
  select(-...1, -location) %>% 
  # Reorder columns for better readability
  select(contains('date'), contains('id'), contains('order'),
         quantity, price, total_price, everything()) %>% 
  # Rename columns to use underscores
  rename(order_date = order.date) %>%
  set_names(names(.) %>% str_replace_all("\\.", "_"))

# Save wrangled data for future use
saveRDS(bike_orderlines_wrangled_tbl, './bike_orderlines.rds')

# Preview wrangled data
glimpse(bike_orderlines_wrangled_tbl)
## Rows: 15,644
## Columns: 15
## $ order_date     <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id       <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ customer_id    <dbl> 2, 2, 10, 10, 6, 6, 6, 6, 6, 22, 8, 8, 8, 8, 16, 16, 16…
## $ product_id     <dbl> 48, 52, 76, 52, 2, 50, 1, 4, 34, 26, 96, 66, 35, 72, 45…
## $ order_line     <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price          <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price    <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model          <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1     <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2     <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name  <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city           <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state          <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…

6. Exploratory Data Analysis

6.1 Summary Statistics

# Calculate key metrics
summary_stats <- bike_orderlines_wrangled_tbl %>% 
  summarise(
    Total_Orders = n(),
    Total_Revenue = sum(total_price),
    Average_Order_Value = mean(total_price),
    Median_Order_Value = median(total_price),
    Min_Order_Value = min(total_price),
    Max_Order_Value = max(total_price)
  ) %>% 
  mutate(across(where(is.numeric), ~dollar(.)))

summary_stats %>% 
  pivot_longer(everything(), names_to = "Metric", values_to = "Value") %>% 
  kable(caption = "Overall Sales Summary")
Overall Sales Summary
Metric Value
Total_Orders $15,644
Total_Revenue $71,032,330
Average_Order_Value $4,540.55
Median_Order_Value $3,200
Min_Order_Value $415
Max_Order_Value $106,600

6.2 Unique Product Categories

# Display unique categories
cat("Primary Categories:\n")
## Primary Categories:
bike_orderlines_wrangled_tbl %>% distinct(category_1) %>% pull() %>% cat(sep = ", ")
## Mountain, Road
cat("\n\nSecondary Categories:\n")
## 
## 
## Secondary Categories:
bike_orderlines_wrangled_tbl %>% distinct(category_2) %>% pull() %>% cat(sep = ", ")
## Over Mountain, Trail, Elite Road, Endurance Road, Sport, Cross Country Race, Cyclocross, Triathalon, Fat Bike
cat("\n\nFrame Materials:\n")
## 
## 
## Frame Materials:
bike_orderlines_wrangled_tbl %>% distinct(frame_material) %>% pull() %>% cat(sep = ", ")
## Carbon, Aluminum

7. Sales Analysis by Category

7.1 Sales by Primary Category

sales_by_category_1 <- bike_orderlines_wrangled_tbl %>% 
  group_by(category_1) %>% 
  summarise(
    Total_Sales = sum(total_price),
    Number_of_Orders = n(),
    Avg_Order_Value = mean(total_price)
  ) %>% 
  arrange(desc(Total_Sales)) %>% 
  mutate(
    Total_Sales = dollar(Total_Sales),
    Avg_Order_Value = dollar(Avg_Order_Value)
  )

sales_by_category_1 %>% 
  kable(caption = "Sales Performance by Primary Category")
Sales Performance by Primary Category
category_1 Total_Sales Number_of_Orders Avg_Order_Value
Mountain $39,154,735 8051 $4,863.34
Road $31,877,595 7593 $4,198.29

7.2 Detailed Sales Breakdown

detailed_sales <- bike_orderlines_wrangled_tbl %>%  
  group_by(category_1, category_2, frame_material) %>% 
  summarise(
    Total_Sales = sum(total_price),
    Number_of_Orders = n(),
    .groups = "drop"
  ) %>% 
  arrange(desc(Total_Sales)) %>% 
  mutate(Total_Sales = dollar(Total_Sales))

detailed_sales %>% 
  head(15) %>% 
  kable(caption = "Top 15 Product Combinations by Sales")
Top 15 Product Combinations by Sales
category_1 category_2 frame_material Total_Sales Number_of_Orders
Mountain Cross Country Race Carbon $15,906,070 1854
Road Elite Road Carbon $9,696,870 1519
Road Endurance Road Carbon $8,768,610 1644
Mountain Over Mountain Carbon $7,571,270 1169
Road Elite Road Aluminum $5,637,795 1953
Mountain Trail Carbon $4,835,850 583
Mountain Trail Aluminum $4,537,610 1437
Road Triathalon Carbon $4,053,750 867
Mountain Cross Country Race Aluminum $3,318,560 998
Road Cyclocross Carbon $2,108,120 668
Mountain Sport Aluminum $1,932,755 1716
Road Endurance Road Aluminum $1,612,450 942
Mountain Fat Bike Aluminum $1,052,620 294

8. Visualizations

8.1 Total Sales by Primary Category

bike_orderlines_wrangled_tbl %>% 
  group_by(category_1) %>% 
  summarise(Sales = sum(total_price)) %>% 
  ggplot(aes(x = reorder(category_1, Sales), y = Sales, fill = category_1)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "Total Sales by Primary Category",
    subtitle = "Mountain bikes lead in total revenue",
    x = "Primary Category",
    y = "Sales"
  ) +
  scale_y_continuous(labels = dollar_format()) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    axis.text = element_text(size = 11)
  )

8.2 Sales by Secondary Category

bike_orderlines_wrangled_tbl %>% 
  group_by(category_2) %>% 
  summarise(Sales = sum(total_price)) %>% 
  arrange(desc(Sales)) %>% 
  head(10) %>% 
  ggplot(aes(x = reorder(category_2, Sales), y = Sales, fill = category_2)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "Top 10 Secondary Categories by Sales",
    x = "Secondary Category",
    y = "Sales"
  ) +
  scale_y_continuous(labels = dollar_format()) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    axis.text = element_text(size = 11)
  )

8.3 Sales by Frame Material

bike_orderlines_wrangled_tbl %>% 
  group_by(frame_material) %>% 
  summarise(Sales = sum(total_price)) %>% 
  ggplot(aes(x = reorder(frame_material, Sales), y = Sales, fill = frame_material)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "Sales by Frame Material",
    x = "Frame Material",
    y = "Sales"
  ) +
  scale_y_continuous(labels = dollar_format()) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    axis.text = element_text(size = 11)
  )

8.4 Sales Over Time

bike_orderlines_wrangled_tbl %>% 
  mutate(order_month = floor_date(order_date, "month")) %>% 
  group_by(order_month, category_1) %>% 
  summarise(Sales = sum(total_price), .groups = "drop") %>% 
  ggplot(aes(x = order_month, y = Sales, color = category_1)) +
  geom_line(size = 1) +
  geom_smooth(se = FALSE, linetype = "dashed", size = 0.5) +
  labs(
    title = "Sales Trends Over Time by Category",
    x = "Month",
    y = "Sales",
    color = "Category"
  ) +
  scale_y_continuous(labels = dollar_format()) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    legend.position = "bottom"
  )

9. Top Performing Products

top_models <- bike_orderlines_wrangled_tbl %>% 
  group_by(model) %>% 
  summarise(
    Total_Sales = sum(total_price),
    Units_Sold = sum(quantity)
  ) %>% 
  arrange(desc(Total_Sales)) %>% 
  head(10) %>% 
  mutate(Total_Sales = dollar(Total_Sales))

top_models %>% 
  kable(caption = "Top 10 Best-Selling Bike Models")
Top 10 Best-Selling Bike Models
model Total_Sales Units_Sold
Scalpel-Si Black Inc. $2,148,720 168
Supersix Evo Black Inc. $2,059,190 161
F-Si Black Inc. $1,879,920 168
Supersix Evo Hi-Mod Team $1,822,860 171
Habit Hi-Mod Black Inc. $1,813,000 148
Scalpel-Si Race $1,657,980 183
Synapse Hi-Mod Disc Black Inc. $1,390,550 145
F-Si Hi-Mod Team $1,377,120 152
Jekyll Carbon 1 $1,358,300 170
Trigger Carbon 1 $1,344,800 164

10. Conclusions

Key Findings:

  1. Category Performance: Mountain bikes dominate sales across all categories
  2. Frame Materials: Carbon frames command premium prices and strong sales
  3. Seasonal Trends: Sales patterns show clear seasonal variations
  4. Top Products: Premium models generate the highest revenue

Recommendations:

  • Focus marketing efforts on top-performing categories
  • Maintain inventory for best-selling models
  • Consider expanding carbon frame offerings
  • Plan promotions around seasonal trends

Report generated on 2025-10-07