# Load required packages for data analysis and visualization
library(readxl)
library(dplyr)
library(lubridate)
library(ggplot2)
library(scales)
library(RColorBrewer)
library(knitr)
# Import bike sales data
bike_orderlines_1_ <- read_excel("bike_orderlines.xlsx")
# Data transformation and feature engineering
bike_orderlines_1_ <- bike_orderlines_1_ %>%
mutate(order_date = as.Date(order_date))
# Generate temporal aggregation variables
bike_orderlines_1_ <- bike_orderlines_1_ %>%
mutate(
year = year(order_date),
quarter = quarter(order_date),
year_quarter = as.Date(paste0(year, "-", (quarter-1)*3 + 1, "-01")),
year_month = floor_date(order_date, "month"),
year_week = floor_date(order_date, "week")
)
This comprehensive analysis examines bike sales performance across a 5-year period (2011-2015). The report provides insights into revenue trends at multiple time granularities and explores performance variations across different product categories.
Key Metrics: - Total Revenue: $71,032,330 - Total Transactions: 2,000 - Average Order Value: $4,540.55 - Product Categories Analyzed: Road Bikes & Mountain Bikes
This section analyzes overall revenue trends across different time periods to identify seasonal patterns and growth trajectories.
The quarterly analysis reveals macro-level sales patterns and business cycles.
# Quarterly aggregation
quarterly_sales <- bike_orderlines_1_ %>%
group_by(year_quarter) %>%
summarise(revenue = sum(total_price)) %>%
arrange(year_quarter)
# Visualization
ggplot(quarterly_sales, aes(x = year_quarter, y = revenue)) +
geom_line(color = "#DC2626", size = 2, lineend = "round") +
geom_point(color = "#991B1B", size = 4) +
scale_y_continuous(labels = dollar_format(), limits = c(0, NA),
breaks = seq(0, 8000000, 1000000)) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
labs(
title = "Quarterly Revenue Analysis (2011-2015)",
subtitle = "Aggregate sales performance by quarter",
x = "Year",
y = "Total Revenue (USD)"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
plot.subtitle = element_text(hjust = 0.5, size = 12, color = "gray30"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
panel.background = element_rect(fill = "#FEF3C7", color = NA),
plot.background = element_rect(fill = "#FEF3C7", color = NA),
axis.title = element_text(face = "bold")
)
Monthly data provides insights into seasonal fluctuations and mid-term trends.
# Monthly aggregation
monthly_sales <- bike_orderlines_1_ %>%
group_by(year_month) %>%
summarise(revenue = sum(total_price)) %>%
arrange(year_month)
# Plot with trend analysis
ggplot(monthly_sales, aes(x = year_month, y = revenue)) +
geom_point(size = 2.5, color = "#EA580C", alpha = 0.7) +
geom_smooth(method = "loess", se = TRUE, color = "#DC2626",
fill = "#FED7AA", alpha = 0.4, size = 1.8) +
scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
scale_x_date(date_breaks = "6 months", date_labels = "%b %Y") +
labs(
title = "Monthly Revenue Trends with Smoothed Forecast",
subtitle = "Detailed monthly sales patterns showing seasonality",
x = "Month",
y = "Total Revenue (USD)"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
plot.subtitle = element_text(hjust = 0.5, size = 12, color = "gray30"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
axis.text.x = element_text(angle = 45, hjust = 1),
panel.background = element_rect(fill = "#FEF3C7", color = NA),
plot.background = element_rect(fill = "#FEF3C7", color = NA),
axis.title = element_text(face = "bold")
)
Weekly data captures short-term volatility and helps identify anomalies.
# Weekly aggregation
weekly_sales <- bike_orderlines_1_ %>%
group_by(year_week) %>%
summarise(revenue = sum(total_price)) %>%
arrange(year_week)
# High-resolution weekly chart
ggplot(weekly_sales, aes(x = year_week, y = revenue)) +
geom_point(size = 1.2, alpha = 0.5, color = "#EA580C") +
geom_smooth(method = "loess", se = TRUE, color = "#DC2626",
fill = "#FED7AA", alpha = 0.4, span = 0.15, size = 1.6) +
scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
scale_x_date(date_breaks = "6 months", date_labels = "%b %Y") +
labs(
title = "Weekly Sales Dynamics and Volatility Analysis",
subtitle = "High-frequency revenue data with trend smoothing",
x = "Week",
y = "Total Revenue (USD)"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
plot.subtitle = element_text(hjust = 0.5, size = 12, color = "gray30"),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1),
panel.background = element_rect(fill = "#FEF3C7", color = NA),
plot.background = element_rect(fill = "#FEF3C7", color = NA),
axis.title = element_text(face = "bold")
)
Detailed breakdown of sales performance by bike category and subcategory.
Road bikes represent a significant portion of overall revenue with four distinct subcategories.
# Road bikes quarterly data
road_quarterly <- bike_orderlines_1_ %>%
filter(category_1 == "Road") %>%
group_by(year_quarter, category_2) %>%
summarise(revenue = sum(total_price), .groups = "drop") %>%
arrange(year_quarter) %>%
mutate(category_2 = factor(category_2,
levels = c("Elite Road", "Endurance Road", "Triathalon", "Cyclocross")))
# Custom color palette - Purple shades
road_colors <- c("#7C3AED", "#A78BFA", "#C4B5FD", "#DDD6FE")
# Faceted visualization
ggplot(road_quarterly, aes(x = year_quarter, y = revenue, color = category_2)) +
geom_line(size = 1.8, lineend = "round") +
geom_point(size = 2, alpha = 0.8) +
scale_color_manual(values = road_colors) +
scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
facet_wrap(~ category_2, ncol = 1, scales = "free_y") +
labs(
title = "Road Bikes - Quarterly Sales Performance by Subcategory",
subtitle = "Revenue breakdown for Elite Road, Endurance Road, Triathalon, and Cyclocross",
x = "Year",
y = "Revenue (USD)"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", color = "white", size = 16),
plot.subtitle = element_text(hjust = 0.5, color = "white", size = 11),
plot.background = element_rect(fill = "#1E293B", color = NA),
panel.background = element_rect(fill = "#F8FAFC", color = NA),
strip.background = element_rect(fill = "#334155", color = NA),
strip.text = element_text(color = "white", face = "bold", size = 12),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray85"),
legend.position = "none",
axis.text = element_text(color = "black"),
axis.title = element_text(face = "bold")
)
# Road bikes monthly data
road_monthly <- bike_orderlines_1_ %>%
filter(category_1 == "Road") %>%
group_by(year_month, category_2) %>%
summarise(revenue = sum(total_price), .groups = "drop") %>%
arrange(year_month) %>%
mutate(category_2 = factor(category_2,
levels = c("Elite Road", "Endurance Road", "Triathalon", "Cyclocross")))
# Monthly analysis with smoothing
ggplot(road_monthly, aes(x = year_month, y = revenue, color = category_2)) +
geom_point(size = 1.3, alpha = 0.6) +
geom_smooth(method = "loess", se = TRUE, fill = "#DDD6FE", alpha = 0.35, size = 1.5) +
scale_color_manual(values = road_colors) +
scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
facet_wrap(~ category_2, ncol = 1, scales = "free_y") +
labs(
title = "Road Bikes - Monthly Sales Trends with Forecast",
subtitle = "Smoothed monthly revenue patterns for each subcategory",
x = "Month",
y = "Revenue (USD)"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", color = "white", size = 16),
plot.subtitle = element_text(hjust = 0.5, color = "white", size = 11),
plot.background = element_rect(fill = "#1E293B", color = NA),
panel.background = element_rect(fill = "#F8FAFC", color = NA),
strip.background = element_rect(fill = "#334155", color = NA),
strip.text = element_text(color = "white", face = "bold", size = 12),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray85"),
legend.position = "none",
axis.text = element_text(color = "black"),
axis.title = element_text(face = "bold")
)
# Road bikes weekly data
road_weekly <- bike_orderlines_1_ %>%
filter(category_1 == "Road") %>%
group_by(year_week, category_2) %>%
summarise(revenue = sum(total_price), .groups = "drop") %>%
arrange(year_week) %>%
mutate(category_2 = factor(category_2,
levels = c("Elite Road", "Endurance Road", "Triathalon", "Cyclocross")))
# High-frequency weekly visualization
ggplot(road_weekly, aes(x = year_week, y = revenue, color = category_2)) +
geom_point(size = 1, alpha = 0.5) +
geom_smooth(method = "loess", se = FALSE, span = 0.15, size = 1.5) +
scale_color_manual(values = road_colors) +
scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
facet_wrap(~ category_2, ncol = 1, scales = "free_y") +
labs(
title = "Road Bikes - Weekly Sales Volatility Analysis",
subtitle = "High-resolution weekly revenue patterns and fluctuations",
x = "Week",
y = "Revenue (USD)"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", color = "white", size = 16),
plot.subtitle = element_text(hjust = 0.5, color = "white", size = 11),
plot.background = element_rect(fill = "#1E293B", color = NA),
panel.background = element_rect(fill = "#F8FAFC", color = NA),
strip.background = element_rect(fill = "#334155", color = NA),
strip.text = element_text(color = "white", face = "bold", size = 12),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray85"),
legend.position = "none",
axis.text = element_text(color = "black"),
axis.title = element_text(face = "bold")
)
Mountain bikes encompass five distinct subcategories with unique performance characteristics.
# Mountain bikes quarterly data
mountain_quarterly <- bike_orderlines_1_ %>%
filter(category_1 == "Mountain") %>%
group_by(year_quarter, category_2) %>%
summarise(revenue = sum(total_price), .groups = "drop") %>%
arrange(year_quarter) %>%
mutate(category_2 = factor(category_2,
levels = c("Cross Country Race", "Trail", "Over Mountain", "Sport", "Fat Bike")))
# Custom color palette - Teal/Cyan shades
mountain_colors <- c("#0891B2", "#06B6D4", "#22D3EE", "#67E8F9", "#A5F3FC")
# Comprehensive quarterly visualization
ggplot(mountain_quarterly, aes(x = year_quarter, y = revenue, color = category_2)) +
geom_line(size = 1.8, lineend = "round") +
geom_point(size = 2, alpha = 0.8) +
scale_color_manual(values = mountain_colors) +
scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
facet_wrap(~ category_2, ncol = 1, scales = "free_y") +
labs(
title = "Mountain Bikes - Quarterly Sales Performance by Subcategory",
subtitle = "Revenue analysis for Cross Country, Trail, Over Mountain, Sport, and Fat Bike categories",
x = "Year",
y = "Revenue (USD)"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", color = "white", size = 16),
plot.subtitle = element_text(hjust = 0.5, color = "white", size = 11),
plot.background = element_rect(fill = "#1E293B", color = NA),
panel.background = element_rect(fill = "#F8FAFC", color = NA),
strip.background = element_rect(fill = "#334155", color = NA),
strip.text = element_text(color = "white", face = "bold", size = 12),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray85"),
legend.position = "none",
axis.text = element_text(color = "black"),
axis.title = element_text(face = "bold")
)
# Mountain bikes monthly data
mountain_monthly <- bike_orderlines_1_ %>%
filter(category_1 == "Mountain") %>%
group_by(year_month, category_2) %>%
summarise(revenue = sum(total_price), .groups = "drop") %>%
arrange(year_month) %>%
mutate(category_2 = factor(category_2,
levels = c("Cross Country Race", "Trail", "Over Mountain", "Sport", "Fat Bike")))
# Monthly trend visualization
ggplot(mountain_monthly, aes(x = year_month, y = revenue, color = category_2)) +
geom_point(size = 1.3, alpha = 0.6) +
geom_smooth(method = "loess", se = TRUE, fill = "#A5F3FC", alpha = 0.35, size = 1.5) +
scale_color_manual(values = mountain_colors) +
scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
facet_wrap(~ category_2, ncol = 1, scales = "free_y") +
labs(
title = "Mountain Bikes - Monthly Sales Trends with Forecast",
subtitle = "Detailed monthly revenue patterns showing seasonality across all subcategories",
x = "Month",
y = "Revenue (USD)"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", color = "white", size = 16),
plot.subtitle = element_text(hjust = 0.5, color = "white", size = 11),
plot.background = element_rect(fill = "#1E293B", color = NA),
panel.background = element_rect(fill = "#F8FAFC", color = NA),
strip.background = element_rect(fill = "#334155", color = NA),
strip.text = element_text(color = "white", face = "bold", size = 12),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray85"),
legend.position = "none",
axis.text = element_text(color = "black"),
axis.title = element_text(face = "bold")
)
# Mountain bikes weekly data
mountain_weekly <- bike_orderlines_1_ %>%
filter(category_1 == "Mountain") %>%
group_by(year_week, category_2) %>%
summarise(revenue = sum(total_price), .groups = "drop") %>%
arrange(year_week) %>%
mutate(category_2 = factor(category_2,
levels = c("Cross Country Race", "Trail", "Over Mountain", "Sport", "Fat Bike")))
# Weekly volatility analysis
ggplot(mountain_weekly, aes(x = year_week, y = revenue, color = category_2)) +
geom_point(size = 1, alpha = 0.5) +
geom_smooth(method = "loess", se = FALSE, span = 0.15, size = 1.5) +
scale_color_manual(values = mountain_colors) +
scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
facet_wrap(~ category_2, ncol = 1, scales = "free_y") +
labs(
title = "Mountain Bikes - Weekly Sales Volatility Analysis",
subtitle = "High-frequency weekly data revealing short-term sales patterns and anomalies",
x = "Week",
y = "Revenue (USD)"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", color = "white", size = 16),
plot.subtitle = element_text(hjust = 0.5, color = "white", size = 11),
plot.background = element_rect(fill = "#1E293B", color = NA),
panel.background = element_rect(fill = "#F8FAFC", color = NA),
strip.background = element_rect(fill = "#334155", color = NA),
strip.text = element_text(color = "white", face = "bold", size = 12),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray85"),
legend.position = "none",
axis.text = element_text(color = "black"),
axis.title = element_text(face = "bold")
)
# Calculate summary statistics
total_revenue <- sum(bike_orderlines_1_$total_price)
total_orders <- n_distinct(bike_orderlines_1_$order_id)
avg_order_value <- mean(bike_orderlines_1_$total_price)
# Category breakdown
category_summary <- bike_orderlines_1_ %>%
group_by(category_1) %>%
summarise(
Total_Revenue = sum(total_price),
Percentage = round(sum(total_price) / total_revenue * 100, 1),
Avg_Price = round(mean(total_price), 2)
) %>%
arrange(desc(Total_Revenue))
# Display summary table
kable(category_summary,
col.names = c("Category", "Total Revenue ($)", "% of Total", "Avg Price ($)"),
format.args = list(big.mark = ","),
caption = "Revenue Summary by Product Category")
| Category | Total Revenue (\()| % of Total| Avg Price (\)) | ||
|---|---|---|---|
| Mountain | 39,154,735 | 55.1 | 4,863.34 |
| Road | 31,877,595 | 44.9 | 4,198.29 |
This comprehensive analysis provides actionable insights into bike sales performance across multiple dimensions. The data reveals strong seasonal patterns, clear category preferences, and opportunities for strategic optimization. Continued monitoring of these metrics will enable data-driven decision-making and support sustained business growth.
Report Generated: 2025-12-22
Data Period: 2011-2015
Total Observations: 15644
## R version 4.3.3 (2024-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so; LAPACK version 3.9.0
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## time zone: UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.43 RColorBrewer_1.1-3 scales_1.2.1 ggplot2_3.4.2
## [5] lubridate_1.9.2 dplyr_1.1.2 readxl_1.4.3
##
## loaded via a namespace (and not attached):
## [1] Matrix_1.6-5 gtable_0.3.3 jsonlite_1.8.7 highr_0.10
## [5] compiler_4.3.3 tidyselect_1.2.0 jquerylib_0.1.4 splines_4.3.3
## [9] yaml_2.3.7 fastmap_1.1.1 lattice_0.22-5 R6_2.5.1
## [13] labeling_0.4.2 generics_0.1.3 tibble_3.2.1 munsell_0.5.0
## [17] bslib_0.5.0 pillar_1.9.0 rlang_1.1.1 utf8_1.2.3
## [21] cachem_1.0.8 xfun_0.40 sass_0.4.7 timechange_0.2.0
## [25] cli_3.6.1 mgcv_1.9-1 withr_2.5.0 magrittr_2.0.3
## [29] digest_0.6.33 grid_4.3.3 rstudioapi_0.15.0 nlme_3.1-164
## [33] lifecycle_1.0.3 vctrs_0.6.3 evaluate_0.21 glue_1.6.2
## [37] farver_2.1.1 cellranger_1.1.0 fansi_1.0.4 colorspace_2.1-0
## [41] rmarkdown_2.23 tools_4.3.3 pkgconfig_2.0.3 htmltools_0.5.6