Load Libraries

# Load required libraries
library(tidyverse)
library(tidyquant)
library(lubridate)
library(readxl)
library(readr)

Import Data

# Import bikes data from Excel - ӨӨРИЙН ЗАМААР СОЛИХ!
bikes_tbl <- read_excel("bikes.xlsx")

# Import bikeshops data from Excel
bikeshops_tbl <- read_excel("bikeshops.xlsx")

# Import orderlines data from Excel
orderlines_tbl <- read_excel("orderlines.xlsx")

Data Wrangling

# Join all three tables
bike_orderlines_joined_tbl <- 
  left_join(orderlines_tbl, bikes_tbl, by = c("product.id" = "bike.id")) %>% 
  left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))

# Wrangle data: separate columns and clean
bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>% 
  # Separate description into category columns
  separate(description, 
           into = c('category.1', 'category.2', 'frame.material'), 
           sep  = ' - ') %>% 
  # Separate location into city and state
  separate(location, 
           into = c('city', 'state'), 
           sep  = ', ',
           remove = FALSE) %>% 
  # Create calculated column for total price
  mutate(total.price = price * quantity) %>% 
  # Remove unnecessary columns
  select(-...1, -location) %>% 
  # Reorder columns
  select(contains('date'), contains('id'), 
         contains('order'), 
         quantity, price, total.price, 
         everything()) %>% 
  # Rename columns
  rename(order_date = order.date) %>%
  # Replace dots with underscores
  set_names(names(.) %>% str_replace_all("\\.", "_"))

Prepare Data

# Calculate monthly sales by category_2
bike_sales_category2_monthly <- bike_orderlines_wrangled_tbl %>%
  select(order_date, category_2, total_price) %>%
  # Convert to date format
  mutate(order_date = ymd(order_date)) %>%
  # Round down to month
  mutate(year_month = floor_date(order_date, unit = "month")) %>%
  # Group by category and month
  group_by(category_2, year_month) %>%
  summarise(sales = sum(total_price)) %>%
  ungroup()

# View the prepared data
head(bike_sales_category2_monthly)
## # A tibble: 6 × 3
##   category_2         year_month  sales
##   <chr>              <date>      <dbl>
## 1 Cross Country Race 2011-01-01 143660
## 2 Cross Country Race 2011-02-01 324400
## 3 Cross Country Race 2011-03-01 142000
## 4 Cross Country Race 2011-04-01 498580
## 5 Cross Country Race 2011-05-01 220310
## 6 Cross Country Race 2011-06-01 364420