# ============================================================================
# Bike Sales Analysis by Product Category
# Author: Temuulen Sukhbat
# Date: October 7, 2025
# ============================================================================
# Load required libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
# ============================================================================
# STEP 1: Import data files
# ============================================================================
bikes_tbl <- read_excel("C:/Users/dell/OneDrive - University of Finance and Economics/Documents/datas_rstudio/bikes.xlsx")
bikeshops_tbl <- read_excel("C:/Users/dell/OneDrive - University of Finance and Economics/Documents/datas_rstudio/bikeshops.xlsx")
orderlines_tbl <- read_excel("C:/Users/dell/OneDrive - University of Finance and Economics/Documents/datas_rstudio/orderlines.xlsx")
## New names:
## • `` -> `...1`
# ============================================================================
# STEP 2: Join and wrangle the data
# ============================================================================
bike_orderlines_wrangled_tbl <- orderlines_tbl %>%
left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>%
separate(description,
into = c('category.1', 'category.2', 'frame.material'),
sep = ' - ') %>%
separate(location,
into = c('city', 'state'),
sep = ', ',
remove = FALSE) %>%
mutate(total.price = price * quantity) %>%
select(-...1, -location) %>%
select(contains('date'), contains('id'),
contains('order'),
quantity, price, total.price,
everything()) %>%
rename(order_date = order.date) %>%
set_names(names(.) %>% str_replace_all("\\.", "_"))
# ============================================================================
# STEP 3: Calculate sales by product categories
# ============================================================================
category_sales_tbl <- bike_orderlines_wrangled_tbl %>%
group_by(category_1, category_2, frame_material) %>%
summarise(Sales = sum(total_price), .groups = 'drop') %>%
arrange(desc(Sales)) %>%
rename(`Prime category` = category_1,
`Secondary category` = category_2,
`Frame Material` = frame_material) %>%
mutate(Sales = dollar(Sales))
# Display results
category_sales_tbl
## # A tibble: 13 × 4
## `Prime category` `Secondary category` `Frame Material` Sales
## <chr> <chr> <chr> <chr>
## 1 Mountain Cross Country Race Carbon $15,906,070
## 2 Road Elite Road Carbon $9,696,870
## 3 Road Endurance Road Carbon $8,768,610
## 4 Mountain Over Mountain Carbon $7,571,270
## 5 Road Elite Road Aluminum $5,637,795
## 6 Mountain Trail Carbon $4,835,850
## 7 Mountain Trail Aluminum $4,537,610
## 8 Road Triathalon Carbon $4,053,750
## 9 Mountain Cross Country Race Aluminum $3,318,560
## 10 Road Cyclocross Carbon $2,108,120
## 11 Mountain Sport Aluminum $1,932,755
## 12 Road Endurance Road Aluminum $1,612,450
## 13 Mountain Fat Bike Aluminum $1,052,620