Setup

Load Libraries

library(tidyverse)
library(readxl)
library(lubridate)

Data Import and Wrangling

# Import data files
bikes_tbl <- read_excel("./bikes.xlsx")
bikeshops_tbl <- read_excel("./bikeshops.xlsx")
orderlines_tbl <- read_excel("./orderlines.xlsx")

# Join and wrangle data
bike_orderlines_wrangled_tbl <- orderlines_tbl %>%
  left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>% 
  left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>%
  separate(description, 
           into = c('category_1', 'category_2', 'frame_material'), 
           sep = ' - ') %>% 
  separate(location, 
           into = c('city', 'state'), 
           sep = ', ',
           remove = TRUE) %>% 
  mutate(total_price = price * quantity) %>% 
  select(order.date, product.id, customer.id, order.id, order.line,
         quantity, price, total_price, 
         everything()) %>% 
  rename(order_date = order.date,
         product_id = product.id,
         customer_id = customer.id,
         order_id = order.id,
         order_line = order.line)

Exploratory Analysis

Product Categories

cat("Primary Categories:\n")
## Primary Categories:
print(unique(bike_orderlines_wrangled_tbl$category_1))
## [1] "Mountain" "Road"
cat("\nSecondary Categories:\n")
## 
## Secondary Categories:
print(unique(bike_orderlines_wrangled_tbl$category_2))
## [1] "Over Mountain"      "Trail"              "Elite Road"        
## [4] "Endurance Road"     "Sport"              "Cross Country Race"
## [7] "Cyclocross"         "Triathalon"         "Fat Bike"

Sales by Primary Category

sales_by_cat1 <- bike_orderlines_wrangled_tbl %>% 
  group_by(category_1) %>% 
  summarise(Sales = sum(total_price)) %>%
  arrange(desc(Sales)) %>% 
  mutate(Sales_Formatted = scales::dollar(Sales))

knitr::kable(sales_by_cat1 %>% select(category_1, Sales_Formatted),
             col.names = c("Primary Category", "Sales"),
             align = 'lr')
Primary Category Sales
Mountain $39,154,735
Road $31,877,595

Time Series Analysis

Annual Sales

bike_sales_y <- bike_orderlines_wrangled_tbl %>%
  mutate(order_date = ymd(order_date)) %>% 
  mutate(year = year(order_date)) %>% 
  group_by(year) %>% 
  summarise(sales = sum(total_price)) %>% 
  ungroup()

knitr::kable(bike_sales_y %>% 
               mutate(sales_fmt = scales::dollar(sales)) %>%
               select(year, sales_fmt),
             col.names = c("Year", "Sales"),
             align = 'lr')
Year Sales
2011 $11,292,885
2012 $12,163,075
2013 $16,480,775
2014 $13,924,085
2015 $17,171,510

Annual Revenue Trend

ggplot(bike_sales_y, aes(x = year, y = sales)) +
  geom_point(size = 5, color = "#FF1493") + 
  geom_line(linewidth = 2, color = "#FF69B4") + 
  geom_smooth(method = "lm", formula = y ~ x, se = FALSE, 
              color = "#C71585", linewidth = 1.5) +
  expand_limits(y = c(0, 20e6)) + 
  scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) + 
  labs(
    title = "Annual Revenue Trend (2011-2015)",
    subtitle = "Sales are trending up and to the right!",
    x = "Year", 
    y = "Sales (Millions)",
    caption = "Total sales from 2011 to 2015"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold", color = "#C71585"),
    plot.subtitle = element_text(size = 12, color = "#FF69B4"),
    axis.title = element_text(size = 12, face = "bold"),
    panel.grid.minor = element_blank()
  )

Monthly Sales

bike_sales_m <- bike_orderlines_wrangled_tbl %>% 
  mutate(order_date = ymd(order_date)) %>% 
  mutate(year_month = floor_date(order_date, unit = "month")) %>%
  group_by(year_month) %>% 
  summarise(sales = sum(total_price)) %>% 
  ungroup()

Year-over-Year Growth

bike_sales_growth <- bike_sales_y %>% 
  mutate(sales_lag_1 = lag(sales, n = 1)) %>% 
  mutate(sales_lag_1 = ifelse(is.na(sales_lag_1), sales, sales_lag_1)) %>% 
  mutate(diff_1 = sales - sales_lag_1,
         pct_diff_1 = diff_1 / sales_lag_1)

knitr::kable(bike_sales_growth %>% 
               select(year, sales, diff_1, pct_diff_1) %>%
               mutate(sales = scales::dollar(sales),
                      diff_1 = scales::dollar(diff_1),
                      pct_diff_1 = scales::percent(pct_diff_1)),
             col.names = c("Year", "Sales", "Change ($)", "Change (%)"),
             align = 'lrrr')
Year Sales Change ($) Change (%)
2011 $11,292,885 $0 0.0%
2012 $12,163,075 $870,190 7.7%
2013 $16,480,775 $4,317,700 35.5%
2014 $13,924,085 -$2,556,690 -15.5%
2015 $17,171,510 $3,247,425 23.3%

Sales by Secondary Category

revenue_by_category2_tbl <- bike_orderlines_wrangled_tbl %>% 
  group_by(category_2) %>% 
  summarise(revenue = sum(total_price)) %>% 
  ungroup()

Category Revenue Comparison

revenue_by_category2_tbl %>% 
  mutate(category_2 = fct_reorder(category_2, revenue)) %>% 
  ggplot(aes(x = category_2, y = revenue)) +
  geom_col(fill = "#FF69B4", alpha = 0.8) +
  coord_flip() +
  scale_y_continuous(labels = scales::dollar_format()) +
  labs(
    title = "Revenue by Bike Category",
    subtitle = "Total revenue across all bike shops",
    x = "category_2",
    y = "Revenue",
    caption = "Data from 2011-2015"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 18, face = "bold", color = "#4A90A4"),
    plot.subtitle = element_text(size = 12, color = "#666666"),
    axis.title.x = element_text(size = 11, face = "bold"),
    axis.title.y = element_text(size = 11, face = "bold"),
    axis.text.y = element_text(size = 10),
    axis.text.x = element_text(size = 10),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_line(color = "#E5E5E5")
  )

Summary

This analysis reveals:

  • Strong Growth Trend: Annual sales show consistent upward trajectory
  • Top Categories: Mountain and Road bikes dominate the product mix
  • Seasonal Patterns: Monthly analysis shows cyclical patterns

Analysis completed on 2025-10-21

sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: Asia/Taipei
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] readxl_1.4.5    lubridate_1.9.4 forcats_1.0.1   stringr_1.5.2  
##  [5] dplyr_1.1.4     purrr_1.1.0     readr_2.1.5     tidyr_1.3.1    
##  [9] tibble_3.3.0    ggplot2_4.0.0   tidyverse_2.0.0
## 
## loaded via a namespace (and not attached):
##  [1] Matrix_1.7-3       gtable_0.3.6       jsonlite_2.0.0     compiler_4.5.1    
##  [5] tidyselect_1.2.1   jquerylib_0.1.4    splines_4.5.1      scales_1.4.0      
##  [9] yaml_2.3.10        fastmap_1.2.0      lattice_0.22-7     R6_2.6.1          
## [13] labeling_0.4.3     generics_0.1.4     knitr_1.50         bslib_0.9.0       
## [17] pillar_1.11.1      RColorBrewer_1.1-3 tzdb_0.5.0         rlang_1.1.6       
## [21] stringi_1.8.7      cachem_1.1.0       xfun_0.53          sass_0.4.10       
## [25] S7_0.2.0           timechange_0.3.0   cli_3.6.5          mgcv_1.9-3        
## [29] withr_3.0.2        magrittr_2.0.4     digest_0.6.37      grid_4.5.1        
## [33] rstudioapi_0.17.1  hms_1.1.3          nlme_3.1-168       lifecycle_1.0.4   
## [37] vctrs_0.6.5        evaluate_1.0.5     glue_1.8.0         cellranger_1.1.0  
## [41] farver_2.1.2       rmarkdown_2.30     tools_4.5.1        pkgconfig_2.0.3   
## [45] htmltools_0.5.8.1