## Executive Summary
This analysis examines bike sales trends across nine distinct product categories from 2011 to 2015. The data reveals monthly sales patterns, seasonal variations, and category-specific performance metrics that provide insights into market dynamics and customer preferences.
Key Findings:
## Load Libraries and Data
library(tidyverse)
library(tidyquant)
library(lubridate)
library(readxl)
library(scales)
# Load data - adjust path if needed
# Option 1: If RDS file exists
if (file.exists('bike_orderlines.rds')) {
bike_orderlines_wrangled_tbl <- readRDS('bike_orderlines.rds')
} else {
# Option 2: Create from Excel files
bikes_tbl <- read_excel("bikes.xlsx")
bikeshops_tbl <- read_excel("bikeshops.xlsx")
orderlines_tbl <- read_excel("orderlines.xlsx")
bike_orderlines_wrangled_tbl <- left_join(orderlines_tbl, bikes_tbl,
by = c("product.id" = "bike.id")) %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>%
separate(description,
into = c('category.1', 'category.2', 'frame.material'),
sep = ' - ') %>%
separate(location,
into = c('city', 'state'),
sep = ', ',
remove = FALSE) %>%
mutate(total.price = price * quantity) %>%
select(-...1, -location) %>%
select(contains('date'), contains('id'),
contains('order'),
quantity, price, total.price,
everything()) %>%
rename(order_date = order.date) %>%
set_names(names(.) %>% str_replace_all("\\.", "_"))
}
# Display data summary
cat("Total Records:", nrow(bike_orderlines_wrangled_tbl), "\n")## Total Records: 15644
cat("Date Range:", format(min(ymd(bike_orderlines_wrangled_tbl$order_date)), "%b %Y"),
"to", format(max(ymd(bike_orderlines_wrangled_tbl$order_date)), "%b %Y"), "\n")## Date Range: Jan 2011 to Dec 2015
## Total Revenue: $71,032,330
## Sales Overview by Category
# Summary statistics by category
category_summary <- bike_orderlines_wrangled_tbl %>%
group_by(category_2) %>%
summarise(
Total_Revenue = sum(total_price),
Avg_Monthly_Sales = mean(total_price),
Total_Units = sum(quantity),
.groups = 'drop'
) %>%
arrange(desc(Total_Revenue)) %>%
mutate(
Revenue_Pct = Total_Revenue / sum(Total_Revenue),
Total_Revenue = dollar(Total_Revenue),
Avg_Monthly_Sales = dollar(Avg_Monthly_Sales)
)
category_summary %>%
mutate(Revenue_Pct = percent(Revenue_Pct, accuracy = 0.1)) %>%
rename(
`Category` = category_2,
`Total Revenue` = Total_Revenue,
`Avg Monthly Sales` = Avg_Monthly_Sales,
`Total Units Sold` = Total_Units,
`% of Total Revenue` = Revenue_Pct
)## Monthly Sales Trends by Bike Category
The following visualization displays monthly sales patterns for each bike category. Each panel represents a distinct product category, allowing for easy comparison of trends, seasonality, and overall performance.
# Prepare data for time series by category_2
sales_by_category2_time <- bike_orderlines_wrangled_tbl %>%
select(order_date, category_2, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_month = floor_date(order_date, unit = "month")) %>%
group_by(year_month, category_2) %>%
summarise(sales = sum(total_price), .groups = 'drop') %>%
ungroup()
# Create the faceted time series plot
sales_by_category2_time %>%
ggplot(aes(x = year_month, y = sales, color = category_2)) +
# Add line and points
geom_line(linewidth = 1.2) +
geom_point(size = 2.5) +
# Facet by category_2
facet_wrap(~ category_2, scales = "free_y", ncol = 3) +
# Format y-axis as currency in millions
scale_y_continuous(labels = scales::dollar_format(scale = 1e-6, suffix = "M")) +
# Use color palette
scale_color_tq() +
# Labels
labs(
x = "Order Date",
y = "Monthly Sales",
caption = "Data Source: Bike Sales Orders (2011-2015) | Each panel shows independent y-axis scale"
) +
# Theme
theme_tq() +
theme(
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 10),
strip.background = element_rect(fill = "#2c3e50", color = "#2c3e50"),
strip.text = element_text(color = "white", face = "bold", size = 12),
axis.text.x = element_text(angle = 0, hjust = 0.5, size = 9),
axis.title = element_text(size = 11, face = "bold"),
panel.spacing = unit(1.2, "lines"),
plot.caption = element_text(hjust = 0, face = "italic", size = 9, color = "gray40")
)## Category Performance Comparison
# Total sales by category
bike_orderlines_wrangled_tbl %>%
group_by(category_2) %>%
summarise(total_revenue = sum(total_price), .groups = 'drop') %>%
mutate(category_2 = fct_reorder(category_2, total_revenue)) %>%
ggplot(aes(x = category_2, y = total_revenue, fill = category_2)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = dollar(total_revenue, scale = 1e-6, suffix = "M")),
hjust = -0.1, size = 4, fontface = "bold") +
coord_flip() +
scale_y_continuous(
labels = dollar_format(scale = 1e-6, suffix = "M"),
expand = expansion(mult = c(0, 0.15))
) +
scale_fill_tq() +
labs(
title = "Total Revenue by Bike Category (2011-2015)",
x = NULL,
y = "Total Revenue (Millions)"
) +
theme_tq() +
theme(
plot.title = element_text(face = "bold", size = 14),
axis.text.y = element_text(size = 11),
axis.title.x = element_text(face = "bold")
)## Conclusion
This analysis provides a comprehensive view of bike sales performance across multiple product categories from 2011 to 2015. The data reveals distinct market segments with varying demand patterns, offering valuable insights for inventory management, marketing strategy, and product development decisions.
Strategic Recommendations:
Report Generated: r format(Sys.time(), ā%B %d, %Y at %I:%M
%pā)
Analysis Period: 2011 - 2015
Tool: R with tidyverse, tidyquant, and lubridate packages