# Load required libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
# Import data
bikes_tbl <- read_excel("C:/Users/dell/OneDrive - University of Finance and Economics/Documents/datas_rstudio/bikes.xlsx")
bikeshops_tbl <- read_excel("C:/Users/dell/OneDrive - University of Finance and Economics/Documents/datas_rstudio/bikeshops.xlsx")
orderlines_tbl <- read_excel("C:/Users/dell/OneDrive - University of Finance and Economics/Documents/datas_rstudio/orderlines.xlsx")
## New names:
## • `` -> `...1`
# Join and wrangle data
bike_orderlines_wrangled_tbl <- left_join(orderlines_tbl, bikes_tbl, by = c("product.id" = "bike.id")) %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>%
separate(description,
into = c('category.1', 'category.2', 'frame.material'),
sep = ' - ') %>%
separate(location,
into = c('city', 'state'),
sep = ', ',
remove = FALSE) %>%
mutate(total.price = price * quantity) %>%
select(-...1, -location) %>%
select(contains('date'), contains('id'),
contains('order'),
quantity, price, total.price,
everything()) %>%
rename(order_date = order.date) %>%
set_names(names(.) %>% str_replace_all("\\.", "_"))
# Analyze sales by product categories
category_sales_tbl <- bike_orderlines_wrangled_tbl %>%
group_by(category_1, category_2, frame_material) %>%
summarise(Sales = sum(total_price), .groups = 'drop') %>%
arrange(desc(Sales)) %>%
rename(`Prime category` = category_1,
`Secondary category` = category_2,
`Frame Material` = frame_material) %>%
mutate(Sales = dollar(Sales))
# Display the results
print("Product Categories by Sales (Highest to Lowest):")
## [1] "Product Categories by Sales (Highest to Lowest):"
print(category_sales_tbl)
## # A tibble: 13 × 4
## `Prime category` `Secondary category` `Frame Material` Sales
## <chr> <chr> <chr> <chr>
## 1 Mountain Cross Country Race Carbon $15,906,070
## 2 Road Elite Road Carbon $9,696,870
## 3 Road Endurance Road Carbon $8,768,610
## 4 Mountain Over Mountain Carbon $7,571,270
## 5 Road Elite Road Aluminum $5,637,795
## 6 Mountain Trail Carbon $4,835,850
## 7 Mountain Trail Aluminum $4,537,610
## 8 Road Triathalon Carbon $4,053,750
## 9 Mountain Cross Country Race Aluminum $3,318,560
## 10 Road Cyclocross Carbon $2,108,120
## 11 Mountain Sport Aluminum $1,932,755
## 12 Road Endurance Road Aluminum $1,612,450
## 13 Mountain Fat Bike Aluminum $1,052,620
# Create a visualization
viz_tbl <- bike_orderlines_wrangled_tbl %>%
group_by(category_1, category_2, frame_material) %>%
summarise(Sales = sum(total_price), .groups = 'drop') %>%
arrange(desc(Sales))
# Top 15 categories plot
viz_tbl %>%
head(15) %>%
mutate(category = paste(category_1, category_2, frame_material, sep = " - "),
category = fct_reorder(category, Sales)) %>%
ggplot(aes(x = Sales, y = category)) +
geom_col(fill = "#2c3e50") +
scale_x_continuous(labels = dollar_format()) +
labs(
title = "Top 15 Product Categories by Sales",
subtitle = "Mountain bikes lead in total revenue",
x = "Total Sales",
y = "Product Category"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, color = "gray40"),
axis.title = element_text(face = "bold")
)

# Summary statistics
cat("\n\nSummary Statistics:\n")
##
##
## Summary Statistics:
cat("Total Revenue:", dollar(sum(bike_orderlines_wrangled_tbl$total_price)), "\n")
## Total Revenue: $71,032,330
cat("Number of Product Categories:", nrow(category_sales_tbl), "\n")
## Number of Product Categories: 13
cat("Average Sales per Category:", dollar(mean(viz_tbl$Sales)), "\n")
## Average Sales per Category: $5,464,025