1. Introduction
This report analyzes bike sales data across different categories,
frame materials, and bikeshops. We’ll explore sales patterns and
identify top-performing product categories.
2. Setup and Package Installation
# Install packages (run once if needed)
# install.packages(c("tidyverse", "readxl", "writexl", "scales", "knitr"))
# Load required libraries
library(tidyverse)
library(readxl)
library(writexl)
library(scales)
library(knitr)
3. Data Import
# Import Excel files
bikes_tbl <- read_excel("./bikes.xlsx")
bikeshops_tbl <- read_excel("./bikeshops.xlsx")
orderlines_tbl <- read_excel("./orderlines.xlsx")
# Display structure of imported data
glimpse(bikes_tbl)
## Rows: 97
## Columns: 4
## $ bike.id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ model <chr> "Supersix Evo Black Inc.", "Supersix Evo Hi-Mod Team", "Su…
## $ description <chr> "Road - Elite Road - Carbon", "Road - Elite Road - Carbon"…
## $ price <dbl> 12790, 10660, 7990, 5330, 4260, 3940, 3200, 2660, 2240, 18…
## Rows: 30
## Columns: 3
## $ bikeshop.id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ bikeshop.name <chr> "Pittsburgh Mountain Machines", "Ithaca Mountain Climber…
## $ location <chr> "Pittsburgh, PA", "Ithaca, NY", "Columbus, OH", "Detroit…
## Rows: 15,644
## Columns: 7
## $ ...1 <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "…
## $ order.id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7…
## $ order.line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2…
## $ order.date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-01-1…
## $ customer.id <dbl> 2, 2, 10, 10, 6, 6, 6, 6, 6, 22, 8, 8, 8, 8, 16, 16, 16, 1…
## $ product.id <dbl> 48, 52, 76, 52, 2, 50, 1, 4, 34, 26, 96, 66, 35, 72, 45, 3…
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1…
4. Data Joining
# Join all three datasets using pipes
bike_orderlines_joined_tbl <- orderlines_tbl %>%
left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))
# Preview joined data
head(bike_orderlines_joined_tbl) %>% kable()
| 1 |
1 |
1 |
2011-01-07 |
2 |
48 |
1 |
Jekyll Carbon 2 |
Mountain - Over Mountain - Carbon |
6070 |
Ithaca Mountain Climbers |
Ithaca, NY |
| 2 |
1 |
2 |
2011-01-07 |
2 |
52 |
1 |
Trigger Carbon 2 |
Mountain - Over Mountain - Carbon |
5970 |
Ithaca Mountain Climbers |
Ithaca, NY |
| 3 |
2 |
1 |
2011-01-10 |
10 |
76 |
1 |
Beast of the East 1 |
Mountain - Trail - Aluminum |
2770 |
Kansas City 29ers |
Kansas City, KS |
| 4 |
2 |
2 |
2011-01-10 |
10 |
52 |
1 |
Trigger Carbon 2 |
Mountain - Over Mountain - Carbon |
5970 |
Kansas City 29ers |
Kansas City, KS |
| 5 |
3 |
1 |
2011-01-10 |
6 |
2 |
1 |
Supersix Evo Hi-Mod Team |
Road - Elite Road - Carbon |
10660 |
Louisville Race Equipment |
Louisville, KY |
| 6 |
3 |
2 |
2011-01-10 |
6 |
50 |
1 |
Jekyll Carbon 4 |
Mountain - Over Mountain - Carbon |
3200 |
Louisville Race Equipment |
Louisville, KY |
5. Data Wrangling
# Clean and transform the data
bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>%
# Separate description into components
separate(description,
into = c('category_1', 'category_2', 'frame_material'),
sep = ' - ') %>%
# Separate location into city and state
separate(location,
into = c('city', 'state'),
sep = ', ',
remove = FALSE) %>%
# Calculate total price
mutate(total_price = price * quantity) %>%
# Remove unnecessary columns
select(-...1, -location) %>%
# Reorder columns for better readability
select(contains('date'), contains('id'), contains('order'),
quantity, price, total_price, everything()) %>%
# Rename columns to use underscores
rename(order_date = order.date) %>%
set_names(names(.) %>% str_replace_all("\\.", "_"))
# Save wrangled data for future use
saveRDS(bike_orderlines_wrangled_tbl, './bike_orderlines.rds')
# Preview wrangled data
glimpse(bike_orderlines_wrangled_tbl)
## Rows: 15,644
## Columns: 15
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ customer_id <dbl> 2, 2, 10, 10, 6, 6, 6, 6, 6, 22, 8, 8, 8, 8, 16, 16, 16…
## $ product_id <dbl> 48, 52, 76, 52, 2, 50, 1, 4, 34, 26, 96, 66, 35, 72, 45…
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…
6. Exploratory Data Analysis
6.1 Summary Statistics
# Calculate key metrics
summary_stats <- bike_orderlines_wrangled_tbl %>%
summarise(
Total_Orders = n(),
Total_Revenue = sum(total_price),
Average_Order_Value = mean(total_price),
Median_Order_Value = median(total_price),
Min_Order_Value = min(total_price),
Max_Order_Value = max(total_price)
) %>%
mutate(across(where(is.numeric), ~dollar(.)))
summary_stats %>%
pivot_longer(everything(), names_to = "Metric", values_to = "Value") %>%
kable(caption = "Overall Sales Summary")
Overall Sales Summary
| Total_Orders |
$15,644 |
| Total_Revenue |
$71,032,330 |
| Average_Order_Value |
$4,540.55 |
| Median_Order_Value |
$3,200 |
| Min_Order_Value |
$415 |
| Max_Order_Value |
$106,600 |
6.2 Unique Product Categories
# Display unique categories
cat("Primary Categories:\n")
## Primary Categories:
bike_orderlines_wrangled_tbl %>% distinct(category_1) %>% pull() %>% cat(sep = ", ")
## Mountain, Road
cat("\n\nSecondary Categories:\n")
##
##
## Secondary Categories:
bike_orderlines_wrangled_tbl %>% distinct(category_2) %>% pull() %>% cat(sep = ", ")
## Over Mountain, Trail, Elite Road, Endurance Road, Sport, Cross Country Race, Cyclocross, Triathalon, Fat Bike
cat("\n\nFrame Materials:\n")
##
##
## Frame Materials:
bike_orderlines_wrangled_tbl %>% distinct(frame_material) %>% pull() %>% cat(sep = ", ")
## Carbon, Aluminum
7. Sales Analysis by Category
7.1 Sales by Primary Category
sales_by_category_1 <- bike_orderlines_wrangled_tbl %>%
group_by(category_1) %>%
summarise(
Total_Sales = sum(total_price),
Number_of_Orders = n(),
Avg_Order_Value = mean(total_price)
) %>%
arrange(desc(Total_Sales)) %>%
mutate(
Total_Sales = dollar(Total_Sales),
Avg_Order_Value = dollar(Avg_Order_Value)
)
sales_by_category_1 %>%
kable(caption = "Sales Performance by Primary Category")
Sales Performance by Primary Category
| Mountain |
$39,154,735 |
8051 |
$4,863.34 |
| Road |
$31,877,595 |
7593 |
$4,198.29 |
7.2 Detailed Sales Breakdown
detailed_sales <- bike_orderlines_wrangled_tbl %>%
group_by(category_1, category_2, frame_material) %>%
summarise(
Total_Sales = sum(total_price),
Number_of_Orders = n(),
.groups = "drop"
) %>%
arrange(desc(Total_Sales)) %>%
mutate(Total_Sales = dollar(Total_Sales))
detailed_sales %>%
head(15) %>%
kable(caption = "Top 15 Product Combinations by Sales")
Top 15 Product Combinations by Sales
| Mountain |
Cross Country Race |
Carbon |
$15,906,070 |
1854 |
| Road |
Elite Road |
Carbon |
$9,696,870 |
1519 |
| Road |
Endurance Road |
Carbon |
$8,768,610 |
1644 |
| Mountain |
Over Mountain |
Carbon |
$7,571,270 |
1169 |
| Road |
Elite Road |
Aluminum |
$5,637,795 |
1953 |
| Mountain |
Trail |
Carbon |
$4,835,850 |
583 |
| Mountain |
Trail |
Aluminum |
$4,537,610 |
1437 |
| Road |
Triathalon |
Carbon |
$4,053,750 |
867 |
| Mountain |
Cross Country Race |
Aluminum |
$3,318,560 |
998 |
| Road |
Cyclocross |
Carbon |
$2,108,120 |
668 |
| Mountain |
Sport |
Aluminum |
$1,932,755 |
1716 |
| Road |
Endurance Road |
Aluminum |
$1,612,450 |
942 |
| Mountain |
Fat Bike |
Aluminum |
$1,052,620 |
294 |
8. Visualizations
8.1 Total Sales by Primary Category
bike_orderlines_wrangled_tbl %>%
group_by(category_1) %>%
summarise(Sales = sum(total_price)) %>%
ggplot(aes(x = reorder(category_1, Sales), y = Sales, fill = category_1)) +
geom_col(show.legend = FALSE) +
coord_flip() +
labs(
title = "Total Sales by Primary Category",
subtitle = "Mountain bikes lead in total revenue",
x = "Primary Category",
y = "Sales"
) +
scale_y_continuous(labels = dollar_format()) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16),
axis.text = element_text(size = 11)
)

8.2 Sales by Secondary Category
bike_orderlines_wrangled_tbl %>%
group_by(category_2) %>%
summarise(Sales = sum(total_price)) %>%
arrange(desc(Sales)) %>%
head(10) %>%
ggplot(aes(x = reorder(category_2, Sales), y = Sales, fill = category_2)) +
geom_col(show.legend = FALSE) +
coord_flip() +
labs(
title = "Top 10 Secondary Categories by Sales",
x = "Secondary Category",
y = "Sales"
) +
scale_y_continuous(labels = dollar_format()) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16),
axis.text = element_text(size = 11)
)

8.3 Sales by Frame Material
bike_orderlines_wrangled_tbl %>%
group_by(frame_material) %>%
summarise(Sales = sum(total_price)) %>%
ggplot(aes(x = reorder(frame_material, Sales), y = Sales, fill = frame_material)) +
geom_col(show.legend = FALSE) +
coord_flip() +
labs(
title = "Sales by Frame Material",
x = "Frame Material",
y = "Sales"
) +
scale_y_continuous(labels = dollar_format()) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16),
axis.text = element_text(size = 11)
)

8.4 Sales Over Time
bike_orderlines_wrangled_tbl %>%
mutate(order_month = floor_date(order_date, "month")) %>%
group_by(order_month, category_1) %>%
summarise(Sales = sum(total_price), .groups = "drop") %>%
ggplot(aes(x = order_month, y = Sales, color = category_1)) +
geom_line(size = 1) +
geom_smooth(se = FALSE, linetype = "dashed", size = 0.5) +
labs(
title = "Sales Trends Over Time by Category",
x = "Month",
y = "Sales",
color = "Category"
) +
scale_y_continuous(labels = dollar_format()) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16),
legend.position = "bottom"
)

10. Conclusions
Key Findings:
- Category Performance: Mountain bikes dominate sales
across all categories
- Frame Materials: Carbon frames command premium
prices and strong sales
- Seasonal Trends: Sales patterns show clear seasonal
variations
- Top Products: Premium models generate the highest
revenue
Recommendations:
- Focus marketing efforts on top-performing categories
- Maintain inventory for best-selling models
- Consider expanding carbon frame offerings
- Plan promotions around seasonal trends
Report generated on 2025-10-07