Load Libraries
# Load required libraries
library(tidyverse)
library(tidyquant)
library(lubridate)
library(readxl)
library(readr)
Import Data
# Import bikes data from Excel - ӨӨРИЙН ЗАМААР СОЛИХ!
bikes_tbl <- read_excel("bikes.xlsx")
# Import bikeshops data from Excel
bikeshops_tbl <- read_excel("bikeshops.xlsx")
# Import orderlines data from Excel
orderlines_tbl <- read_excel("orderlines.xlsx")
Data Wrangling
# Join all three tables
bike_orderlines_joined_tbl <-
left_join(orderlines_tbl, bikes_tbl, by = c("product.id" = "bike.id")) %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))
# Wrangle data: separate columns and clean
bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>%
# Separate description into category columns
separate(description,
into = c('category.1', 'category.2', 'frame.material'),
sep = ' - ') %>%
# Separate location into city and state
separate(location,
into = c('city', 'state'),
sep = ', ',
remove = FALSE) %>%
# Create calculated column for total price
mutate(total.price = price * quantity) %>%
# Remove unnecessary columns
select(-...1, -location) %>%
# Reorder columns
select(contains('date'), contains('id'),
contains('order'),
quantity, price, total.price,
everything()) %>%
# Rename columns
rename(order_date = order.date) %>%
# Replace dots with underscores
set_names(names(.) %>% str_replace_all("\\.", "_"))
Prepare Data
# Calculate monthly sales by category_2
bike_sales_category2_monthly <- bike_orderlines_wrangled_tbl %>%
select(order_date, category_2, total_price) %>%
# Convert to date format
mutate(order_date = ymd(order_date)) %>%
# Round down to month
mutate(year_month = floor_date(order_date, unit = "month")) %>%
# Group by category and month
group_by(category_2, year_month) %>%
summarise(sales = sum(total_price)) %>%
ungroup()
# View the prepared data
head(bike_sales_category2_monthly)
## # A tibble: 6 × 3
## category_2 year_month sales
## <chr> <date> <dbl>
## 1 Cross Country Race 2011-01-01 143660
## 2 Cross Country Race 2011-02-01 324400
## 3 Cross Country Race 2011-03-01 142000
## 4 Cross Country Race 2011-04-01 498580
## 5 Cross Country Race 2011-05-01 220310
## 6 Cross Country Race 2011-06-01 364420
Sales Trends by Bike Category
# Generate the plot with separate panels for each bike category
bike_sales_category2_monthly %>%
ggplot(aes(x = year_month, y = sales, color = category_2)) +
# Add points and lines
geom_point() +
geom_line(linewidth = 1) +
# Create separate panel for each category
facet_wrap(~ category_2, scales = "free_y", ncol = 3) +
# Format y-axis to show millions
scale_y_continuous(labels = scales::dollar_format(scale = 1e-6, suffix = "M")) +
# Format x-axis to show years
scale_x_date(date_labels = "%Y", date_breaks = "1 year") +
# Apply tidyquant color palette
scale_color_tq() +
# Apply tidyquant theme
theme_tq() +
# Customize theme
theme(
legend.position = "none",
axis.text.x = element_text(angle = 0, hjust = 0.5),
strip.text = element_text(face = "bold", size = 10),
plot.title = element_text(face = "bold", size = 14)
) +
# Add labels
labs(
title = "Sales Trends by Bike Category (2011-2015)",
x = "Order Date",
y = "Sales",
caption = "Monthly sales data by secondary bike category"
)
