Setup
Load Libraries
library(tidyverse)
library(readxl)
library(lubridate)
Data Import and Wrangling
# Import data files
bikes_tbl <- read_excel("./bikes.xlsx")
bikeshops_tbl <- read_excel("./bikeshops.xlsx")
orderlines_tbl <- read_excel("./orderlines.xlsx")
# Join and wrangle data
bike_orderlines_wrangled_tbl <- orderlines_tbl %>%
left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>%
separate(description,
into = c('category_1', 'category_2', 'frame_material'),
sep = ' - ') %>%
separate(location,
into = c('city', 'state'),
sep = ', ',
remove = TRUE) %>%
mutate(total_price = price * quantity) %>%
select(order.date, product.id, customer.id, order.id, order.line,
quantity, price, total_price,
everything()) %>%
rename(order_date = order.date,
product_id = product.id,
customer_id = customer.id,
order_id = order.id,
order_line = order.line)
Exploratory Analysis
Product Categories
cat("Primary Categories:\n")
## Primary Categories:
print(unique(bike_orderlines_wrangled_tbl$category_1))
## [1] "Mountain" "Road"
cat("\nSecondary Categories:\n")
##
## Secondary Categories:
print(unique(bike_orderlines_wrangled_tbl$category_2))
## [1] "Over Mountain" "Trail" "Elite Road"
## [4] "Endurance Road" "Sport" "Cross Country Race"
## [7] "Cyclocross" "Triathalon" "Fat Bike"
Sales by Primary Category
sales_by_cat1 <- bike_orderlines_wrangled_tbl %>%
group_by(category_1) %>%
summarise(Sales = sum(total_price)) %>%
arrange(desc(Sales)) %>%
mutate(Sales_Formatted = scales::dollar(Sales))
knitr::kable(sales_by_cat1 %>% select(category_1, Sales_Formatted),
col.names = c("Primary Category", "Sales"),
align = 'lr')
| Mountain |
$39,154,735 |
| Road |
$31,877,595 |
Time Series Analysis
Annual Sales
bike_sales_y <- bike_orderlines_wrangled_tbl %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year = year(order_date)) %>%
group_by(year) %>%
summarise(sales = sum(total_price)) %>%
ungroup()
knitr::kable(bike_sales_y %>%
mutate(sales_fmt = scales::dollar(sales)) %>%
select(year, sales_fmt),
col.names = c("Year", "Sales"),
align = 'lr')
| 2011 |
$11,292,885 |
| 2012 |
$12,163,075 |
| 2013 |
$16,480,775 |
| 2014 |
$13,924,085 |
| 2015 |
$17,171,510 |
Annual Revenue Trend
ggplot(bike_sales_y, aes(x = year, y = sales)) +
geom_point(size = 5, color = "#FF1493") +
geom_line(linewidth = 2, color = "#FF69B4") +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE,
color = "#C71585", linewidth = 1.5) +
expand_limits(y = c(0, 20e6)) +
scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) +
labs(
title = "Annual Revenue Trend (2011-2015)",
subtitle = "Sales are trending up and to the right!",
x = "Year",
y = "Sales (Millions)",
caption = "Total sales from 2011 to 2015"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", color = "#C71585"),
plot.subtitle = element_text(size = 12, color = "#FF69B4"),
axis.title = element_text(size = 12, face = "bold"),
panel.grid.minor = element_blank()
)

Monthly Sales
bike_sales_m <- bike_orderlines_wrangled_tbl %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_month = floor_date(order_date, unit = "month")) %>%
group_by(year_month) %>%
summarise(sales = sum(total_price)) %>%
ungroup()
Year-over-Year Growth
bike_sales_growth <- bike_sales_y %>%
mutate(sales_lag_1 = lag(sales, n = 1)) %>%
mutate(sales_lag_1 = ifelse(is.na(sales_lag_1), sales, sales_lag_1)) %>%
mutate(diff_1 = sales - sales_lag_1,
pct_diff_1 = diff_1 / sales_lag_1)
knitr::kable(bike_sales_growth %>%
select(year, sales, diff_1, pct_diff_1) %>%
mutate(sales = scales::dollar(sales),
diff_1 = scales::dollar(diff_1),
pct_diff_1 = scales::percent(pct_diff_1)),
col.names = c("Year", "Sales", "Change ($)", "Change (%)"),
align = 'lrrr')
| 2011 |
$11,292,885 |
$0 |
0.0% |
| 2012 |
$12,163,075 |
$870,190 |
7.7% |
| 2013 |
$16,480,775 |
$4,317,700 |
35.5% |
| 2014 |
$13,924,085 |
-$2,556,690 |
-15.5% |
| 2015 |
$17,171,510 |
$3,247,425 |
23.3% |
Sales by Secondary Category
revenue_by_category2_tbl <- bike_orderlines_wrangled_tbl %>%
group_by(category_2) %>%
summarise(revenue = sum(total_price)) %>%
ungroup()
Category Revenue Comparison
revenue_by_category2_tbl %>%
mutate(category_2 = fct_reorder(category_2, revenue)) %>%
ggplot(aes(x = category_2, y = revenue)) +
geom_col(fill = "#FF69B4", alpha = 0.8) +
coord_flip() +
scale_y_continuous(labels = scales::dollar_format()) +
labs(
title = "Revenue by Bike Category",
subtitle = "Total revenue across all bike shops",
x = "category_2",
y = "Revenue",
caption = "Data from 2011-2015"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 18, face = "bold", color = "#4A90A4"),
plot.subtitle = element_text(size = 12, color = "#666666"),
axis.title.x = element_text(size = 11, face = "bold"),
axis.title.y = element_text(size = 11, face = "bold"),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 10),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "#E5E5E5")
)

Summary
This analysis reveals:
- Strong Growth Trend: Annual sales show consistent
upward trajectory
- Top Categories: Mountain and Road bikes dominate
the product mix
- Seasonal Patterns: Monthly analysis shows cyclical
patterns
Analysis completed on 2025-10-21
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: Asia/Taipei
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] readxl_1.4.5 lubridate_1.9.4 forcats_1.0.1 stringr_1.5.2
## [5] dplyr_1.1.4 purrr_1.1.0 readr_2.1.5 tidyr_1.3.1
## [9] tibble_3.3.0 ggplot2_4.0.0 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] Matrix_1.7-3 gtable_0.3.6 jsonlite_2.0.0 compiler_4.5.1
## [5] tidyselect_1.2.1 jquerylib_0.1.4 splines_4.5.1 scales_1.4.0
## [9] yaml_2.3.10 fastmap_1.2.0 lattice_0.22-7 R6_2.6.1
## [13] labeling_0.4.3 generics_0.1.4 knitr_1.50 bslib_0.9.0
## [17] pillar_1.11.1 RColorBrewer_1.1-3 tzdb_0.5.0 rlang_1.1.6
## [21] stringi_1.8.7 cachem_1.1.0 xfun_0.53 sass_0.4.10
## [25] S7_0.2.0 timechange_0.3.0 cli_3.6.5 mgcv_1.9-3
## [29] withr_3.0.2 magrittr_2.0.4 digest_0.6.37 grid_4.5.1
## [33] rstudioapi_0.17.1 hms_1.1.3 nlme_3.1-168 lifecycle_1.0.4
## [37] vctrs_0.6.5 evaluate_1.0.5 glue_1.8.0 cellranger_1.1.0
## [41] farver_2.1.2 rmarkdown_2.30 tools_4.5.1 pkgconfig_2.0.3
## [45] htmltools_0.5.8.1