# Load required packages for data analysis and visualization
library(readxl)
library(dplyr)
library(lubridate)
library(ggplot2)
library(scales)
library(RColorBrewer)
library(knitr)
# Import bike sales data
bike_orderlines_1_ <- read_excel("bike_orderlines.xlsx")

# Data transformation and feature engineering
bike_orderlines_1_ <- bike_orderlines_1_ %>%
  mutate(order_date = as.Date(order_date))

# Generate temporal aggregation variables
bike_orderlines_1_ <- bike_orderlines_1_ %>%
  mutate(
    year = year(order_date),
    quarter = quarter(order_date),
    year_quarter = as.Date(paste0(year, "-", (quarter-1)*3 + 1, "-01")),
    year_month = floor_date(order_date, "month"),
    year_week = floor_date(order_date, "week")
  )

Executive Summary

This comprehensive analysis examines bike sales performance across a 5-year period (2011-2015). The report provides insights into revenue trends at multiple time granularities and explores performance variations across different product categories.

Key Metrics: - Total Revenue: $71,032,330 - Total Transactions: 2,000 - Average Order Value: $4,540.55 - Product Categories Analyzed: Road Bikes & Mountain Bikes


Section 1: Aggregate Sales Performance

This section analyzes overall revenue trends across different time periods to identify seasonal patterns and growth trajectories.

1.2 Monthly Revenue Patterns

Monthly data provides insights into seasonal fluctuations and mid-term trends.

# Monthly aggregation
monthly_sales <- bike_orderlines_1_ %>%
  group_by(year_month) %>%
  summarise(revenue = sum(total_price)) %>%
  arrange(year_month)

# Plot with trend analysis
ggplot(monthly_sales, aes(x = year_month, y = revenue)) +
  geom_point(size = 2.5, color = "#EA580C", alpha = 0.7) +
  geom_smooth(method = "loess", se = TRUE, color = "#DC2626", 
              fill = "#FED7AA", alpha = 0.4, size = 1.8) +
  scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
  scale_x_date(date_breaks = "6 months", date_labels = "%b %Y") +
  labs(
    title = "Monthly Revenue Trends with Smoothed Forecast",
    subtitle = "Detailed monthly sales patterns showing seasonality",
    x = "Month",
    y = "Total Revenue (USD)"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
    plot.subtitle = element_text(hjust = 0.5, size = 12, color = "gray30"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.background = element_rect(fill = "#FEF3C7", color = NA),
    plot.background = element_rect(fill = "#FEF3C7", color = NA),
    axis.title = element_text(face = "bold")
  )

1.3 Weekly Revenue Distribution

Weekly data captures short-term volatility and helps identify anomalies.

# Weekly aggregation
weekly_sales <- bike_orderlines_1_ %>%
  group_by(year_week) %>%
  summarise(revenue = sum(total_price)) %>%
  arrange(year_week)

# High-resolution weekly chart
ggplot(weekly_sales, aes(x = year_week, y = revenue)) +
  geom_point(size = 1.2, alpha = 0.5, color = "#EA580C") +
  geom_smooth(method = "loess", se = TRUE, color = "#DC2626", 
              fill = "#FED7AA", alpha = 0.4, span = 0.15, size = 1.6) +
  scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
  scale_x_date(date_breaks = "6 months", date_labels = "%b %Y") +
  labs(
    title = "Weekly Sales Dynamics and Volatility Analysis",
    subtitle = "High-frequency revenue data with trend smoothing",
    x = "Week",
    y = "Total Revenue (USD)"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
    plot.subtitle = element_text(hjust = 0.5, size = 12, color = "gray30"),
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.background = element_rect(fill = "#FEF3C7", color = NA),
    plot.background = element_rect(fill = "#FEF3C7", color = NA),
    axis.title = element_text(face = "bold")
  )


Section 2: Product Category Performance

Detailed breakdown of sales performance by bike category and subcategory.

2.1 Road Bikes Category Analysis

Road bikes represent a significant portion of overall revenue with four distinct subcategories.

Quarterly Performance by Subcategory

# Road bikes quarterly data
road_quarterly <- bike_orderlines_1_ %>%
  filter(category_1 == "Road") %>%
  group_by(year_quarter, category_2) %>%
  summarise(revenue = sum(total_price), .groups = "drop") %>%
  arrange(year_quarter) %>%
  mutate(category_2 = factor(category_2, 
    levels = c("Elite Road", "Endurance Road", "Triathalon", "Cyclocross")))

# Custom color palette - Purple shades
road_colors <- c("#7C3AED", "#A78BFA", "#C4B5FD", "#DDD6FE")

# Faceted visualization
ggplot(road_quarterly, aes(x = year_quarter, y = revenue, color = category_2)) +
  geom_line(size = 1.8, lineend = "round") +
  geom_point(size = 2, alpha = 0.8) +
  scale_color_manual(values = road_colors) +
  scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
  facet_wrap(~ category_2, ncol = 1, scales = "free_y") +
  labs(
    title = "Road Bikes - Quarterly Sales Performance by Subcategory",
    subtitle = "Revenue breakdown for Elite Road, Endurance Road, Triathalon, and Cyclocross",
    x = "Year",
    y = "Revenue (USD)"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", color = "white", size = 16),
    plot.subtitle = element_text(hjust = 0.5, color = "white", size = 11),
    plot.background = element_rect(fill = "#1E293B", color = NA),
    panel.background = element_rect(fill = "#F8FAFC", color = NA),
    strip.background = element_rect(fill = "#334155", color = NA),
    strip.text = element_text(color = "white", face = "bold", size = 12),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray85"),
    legend.position = "none",
    axis.text = element_text(color = "black"),
    axis.title = element_text(face = "bold")
  )

Weekly Sales Dynamics

# Road bikes weekly data
road_weekly <- bike_orderlines_1_ %>%
  filter(category_1 == "Road") %>%
  group_by(year_week, category_2) %>%
  summarise(revenue = sum(total_price), .groups = "drop") %>%
  arrange(year_week) %>%
  mutate(category_2 = factor(category_2, 
    levels = c("Elite Road", "Endurance Road", "Triathalon", "Cyclocross")))

# High-frequency weekly visualization
ggplot(road_weekly, aes(x = year_week, y = revenue, color = category_2)) +
  geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess", se = FALSE, span = 0.15, size = 1.5) +
  scale_color_manual(values = road_colors) +
  scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
  facet_wrap(~ category_2, ncol = 1, scales = "free_y") +
  labs(
    title = "Road Bikes - Weekly Sales Volatility Analysis",
    subtitle = "High-resolution weekly revenue patterns and fluctuations",
    x = "Week",
    y = "Revenue (USD)"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", color = "white", size = 16),
    plot.subtitle = element_text(hjust = 0.5, color = "white", size = 11),
    plot.background = element_rect(fill = "#1E293B", color = NA),
    panel.background = element_rect(fill = "#F8FAFC", color = NA),
    strip.background = element_rect(fill = "#334155", color = NA),
    strip.text = element_text(color = "white", face = "bold", size = 12),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray85"),
    legend.position = "none",
    axis.text = element_text(color = "black"),
    axis.title = element_text(face = "bold")
  )


2.2 Mountain Bikes Category Analysis

Mountain bikes encompass five distinct subcategories with unique performance characteristics.

Quarterly Performance by Subcategory

# Mountain bikes quarterly data
mountain_quarterly <- bike_orderlines_1_ %>%
  filter(category_1 == "Mountain") %>%
  group_by(year_quarter, category_2) %>%
  summarise(revenue = sum(total_price), .groups = "drop") %>%
  arrange(year_quarter) %>%
  mutate(category_2 = factor(category_2, 
    levels = c("Cross Country Race", "Trail", "Over Mountain", "Sport", "Fat Bike")))

# Custom color palette - Teal/Cyan shades
mountain_colors <- c("#0891B2", "#06B6D4", "#22D3EE", "#67E8F9", "#A5F3FC")

# Comprehensive quarterly visualization
ggplot(mountain_quarterly, aes(x = year_quarter, y = revenue, color = category_2)) +
  geom_line(size = 1.8, lineend = "round") +
  geom_point(size = 2, alpha = 0.8) +
  scale_color_manual(values = mountain_colors) +
  scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
  facet_wrap(~ category_2, ncol = 1, scales = "free_y") +
  labs(
    title = "Mountain Bikes - Quarterly Sales Performance by Subcategory",
    subtitle = "Revenue analysis for Cross Country, Trail, Over Mountain, Sport, and Fat Bike categories",
    x = "Year",
    y = "Revenue (USD)"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", color = "white", size = 16),
    plot.subtitle = element_text(hjust = 0.5, color = "white", size = 11),
    plot.background = element_rect(fill = "#1E293B", color = NA),
    panel.background = element_rect(fill = "#F8FAFC", color = NA),
    strip.background = element_rect(fill = "#334155", color = NA),
    strip.text = element_text(color = "white", face = "bold", size = 12),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray85"),
    legend.position = "none",
    axis.text = element_text(color = "black"),
    axis.title = element_text(face = "bold")
  )

Weekly Sales Dynamics

# Mountain bikes weekly data
mountain_weekly <- bike_orderlines_1_ %>%
  filter(category_1 == "Mountain") %>%
  group_by(year_week, category_2) %>%
  summarise(revenue = sum(total_price), .groups = "drop") %>%
  arrange(year_week) %>%
  mutate(category_2 = factor(category_2, 
    levels = c("Cross Country Race", "Trail", "Over Mountain", "Sport", "Fat Bike")))

# Weekly volatility analysis
ggplot(mountain_weekly, aes(x = year_week, y = revenue, color = category_2)) +
  geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess", se = FALSE, span = 0.15, size = 1.5) +
  scale_color_manual(values = mountain_colors) +
  scale_y_continuous(labels = dollar_format(), limits = c(0, NA)) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
  facet_wrap(~ category_2, ncol = 1, scales = "free_y") +
  labs(
    title = "Mountain Bikes - Weekly Sales Volatility Analysis",
    subtitle = "High-frequency weekly data revealing short-term sales patterns and anomalies",
    x = "Week",
    y = "Revenue (USD)"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", color = "white", size = 16),
    plot.subtitle = element_text(hjust = 0.5, color = "white", size = 11),
    plot.background = element_rect(fill = "#1E293B", color = NA),
    panel.background = element_rect(fill = "#F8FAFC", color = NA),
    strip.background = element_rect(fill = "#334155", color = NA),
    strip.text = element_text(color = "white", face = "bold", size = 12),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray85"),
    legend.position = "none",
    axis.text = element_text(color = "black"),
    axis.title = element_text(face = "bold")
  )


Section 3: Comprehensive Summary & Key Insights

3.1 Overall Performance Summary

# Calculate summary statistics
total_revenue <- sum(bike_orderlines_1_$total_price)
total_orders <- n_distinct(bike_orderlines_1_$order_id)
avg_order_value <- mean(bike_orderlines_1_$total_price)

# Category breakdown
category_summary <- bike_orderlines_1_ %>%
  group_by(category_1) %>%
  summarise(
    Total_Revenue = sum(total_price),
    Percentage = round(sum(total_price) / total_revenue * 100, 1),
    Avg_Price = round(mean(total_price), 2)
  ) %>%
  arrange(desc(Total_Revenue))

# Display summary table
kable(category_summary, 
      col.names = c("Category", "Total Revenue ($)", "% of Total", "Avg Price ($)"),
      format.args = list(big.mark = ","),
      caption = "Revenue Summary by Product Category")
Revenue Summary by Product Category
Category Total Revenue (\()| % of Total| Avg Price (\))
Mountain 39,154,735 55.1 4,863.34
Road 31,877,595 44.9 4,198.29

3.2 Key Findings and Insights

Performance Highlights

  1. Revenue Growth Trajectory
    • Total sales over 5-year period: $71,032,330
    • Clear seasonal patterns observed with peaks in Q2 and Q4
    • Overall upward trend despite periodic fluctuations
  2. Category Performance
    • Mountain bikes dominate with 55.1% market share
    • Road bikes contribute 44.9% of total revenue
    • Significant price differentiation between categories
  3. Temporal Patterns
    • Monthly analysis reveals strong seasonality with summer peaks
    • Weekly volatility indicates promotional activities and demand spikes
    • Quarterly trends show consistent year-end performance
  4. Subcategory Insights
    • Road: Elite Road and Endurance Road are top performers
    • Mountain: Cross Country Race and Trail bikes show consistent demand
    • Emerging category: Fat Bike showing growth potential in recent years

Strategic Recommendations

  1. Inventory Management
    • Increase stock levels during peak seasons (Q2, Q4)
    • Maintain optimal inventory for high-performing subcategories
    • Consider Just-In-Time strategies for slow-moving items
  2. Marketing Focus
    • Leverage seasonal demand patterns for promotional campaigns
    • Target marketing efforts on high-margin categories
    • Develop category-specific marketing strategies
  3. Product Development
    • Invest in expanding successful subcategories
    • Monitor emerging trends (e.g., Fat Bike growth)
    • Consider product line optimization based on performance data
  4. Sales Forecasting
    • Utilize historical patterns for demand forecasting
    • Account for seasonality in annual planning
    • Monitor weekly volatility for tactical adjustments

Conclusion

This comprehensive analysis provides actionable insights into bike sales performance across multiple dimensions. The data reveals strong seasonal patterns, clear category preferences, and opportunities for strategic optimization. Continued monitoring of these metrics will enable data-driven decision-making and support sustained business growth.

Report Generated: 2025-12-22

Data Period: 2011-2015

Total Observations: 15644


## R version 4.3.3 (2024-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so;  LAPACK version 3.9.0
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## time zone: UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] knitr_1.43         RColorBrewer_1.1-3 scales_1.2.1       ggplot2_3.4.2     
## [5] lubridate_1.9.2    dplyr_1.1.2        readxl_1.4.3      
## 
## loaded via a namespace (and not attached):
##  [1] Matrix_1.6-5      gtable_0.3.3      jsonlite_1.8.7    highr_0.10       
##  [5] compiler_4.3.3    tidyselect_1.2.0  jquerylib_0.1.4   splines_4.3.3    
##  [9] yaml_2.3.7        fastmap_1.1.1     lattice_0.22-5    R6_2.5.1         
## [13] labeling_0.4.2    generics_0.1.3    tibble_3.2.1      munsell_0.5.0    
## [17] bslib_0.5.0       pillar_1.9.0      rlang_1.1.1       utf8_1.2.3       
## [21] cachem_1.0.8      xfun_0.40         sass_0.4.7        timechange_0.2.0 
## [25] cli_3.6.1         mgcv_1.9-1        withr_2.5.0       magrittr_2.0.3   
## [29] digest_0.6.33     grid_4.3.3        rstudioapi_0.15.0 nlme_3.1-164     
## [33] lifecycle_1.0.3   vctrs_0.6.3       evaluate_0.21     glue_1.6.2       
## [37] farver_2.1.1      cellranger_1.1.0  fansi_1.0.4       colorspace_2.1-0 
## [41] rmarkdown_2.23    tools_4.3.3       pkgconfig_2.0.3   htmltools_0.5.6