# ============================================================================
# Bike Sales Analysis by Product Category
# Author: Temuulen Nyambayar

# Date: October 7, 2025
# ============================================================================

# Load required libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
# ============================================================================
# STEP 1: Import data files
# ============================================================================
bikes_tbl <- read_excel("C:/Users/dell/OneDrive - University of Finance and Economics/Documents/datas_rstudio/bikes.xlsx")
bikeshops_tbl <- read_excel("C:/Users/dell/OneDrive - University of Finance and Economics/Documents/datas_rstudio/bikeshops.xlsx")
orderlines_tbl <- read_excel("C:/Users/dell/OneDrive - University of Finance and Economics/Documents/datas_rstudio/orderlines.xlsx")
## New names:
## • `` -> `...1`
# ============================================================================
# STEP 2: Join and wrangle the data
# ============================================================================
bike_orderlines_wrangled_tbl <- orderlines_tbl %>%
  left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>% 
  left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>%
  separate(description, 
           into = c('category.1', 'category.2', 'frame.material'), 
           sep  = ' - ') %>% 
  separate(location, 
           into = c('city', 'state'), 
           sep  = ', ',
           remove = FALSE) %>% 
  mutate(total.price = price * quantity) %>% 
  select(-...1, -location) %>% 
  select(contains('date'), contains('id'), 
         contains('order'), 
         quantity, price, total.price, 
         everything()) %>% 
  rename(order_date = order.date) %>%
  set_names(names(.) %>% str_replace_all("\\.", "_"))

# ============================================================================
# STEP 3: Calculate sales by product categories
# ============================================================================
category_sales_tbl <- bike_orderlines_wrangled_tbl %>%
  group_by(category_1, category_2, frame_material) %>%
  summarise(Sales = sum(total_price), .groups = 'drop') %>%
  arrange(desc(Sales)) %>%
  rename(`Prime category` = category_1,
         `Secondary category` = category_2,
         `Frame Material` = frame_material) %>%
  mutate(Sales = dollar(Sales))

# Display results
category_sales_tbl
## # A tibble: 13 × 4
##    `Prime category` `Secondary category` `Frame Material` Sales      
##    <chr>            <chr>                <chr>            <chr>      
##  1 Mountain         Cross Country Race   Carbon           $15,906,070
##  2 Road             Elite Road           Carbon           $9,696,870 
##  3 Road             Endurance Road       Carbon           $8,768,610 
##  4 Mountain         Over Mountain        Carbon           $7,571,270 
##  5 Road             Elite Road           Aluminum         $5,637,795 
##  6 Mountain         Trail                Carbon           $4,835,850 
##  7 Mountain         Trail                Aluminum         $4,537,610 
##  8 Road             Triathalon           Carbon           $4,053,750 
##  9 Mountain         Cross Country Race   Aluminum         $3,318,560 
## 10 Road             Cyclocross           Carbon           $2,108,120 
## 11 Mountain         Sport                Aluminum         $1,932,755 
## 12 Road             Endurance Road       Aluminum         $1,612,450 
## 13 Mountain         Fat Bike             Aluminum         $1,052,620