library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(writexl)

# Import Excel files
bikes_tbl <- read_excel("bikes.xlsx")
bikeshops_tbl <- read_excel("bikeshops.xlsx")
orderlines_tbl <- read_excel("orderlines.xlsx")
## New names:
## • `` -> `...1`
# Import CSV file
bike_orderlines_tbl <- read_csv("bike_orderlines.csv")
## Rows: 15644 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): model, category_1, category_2, frame_material, bikeshop_name, city...
## dbl  (5): order_id, order_line, quantity, price, total_price
## dttm (1): order_date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Join data
bike_orderlines_bikeshops_joined <- left_join(orderlines_tbl, bikes_tbl, by = c("product.id" = "bike.id")) %>%
  left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))

# Wrangle data
bike_orderlines_wrangled_tbl <- bike_orderlines_bikeshops_joined %>%
  separate(description, into = c("category_1", "category_2", "frame_material"), sep = " - ") %>%
  separate(location, into = c("city", "state"), sep = ", ", remove = FALSE) %>%
  mutate(total_price = price * quantity) %>%
  select(-...1, -location) %>%
  rename(order_date = order.date) %>%
  set_names(names(.) %>% str_replace_all("\\.", "_"))

# Rename columns
bike_orderlines_wrangled_tbl <- bike_orderlines_wrangled_tbl %>%
  rename(`Prime Category` = category_1,
         `Secondary Category` = category_2,
         `Frame Material` = frame_material)

# Create summary table
category_sales_tbl <- bike_orderlines_wrangled_tbl %>%
  distinct(`Prime Category`, `Secondary Category`, `Frame Material`, .keep_all = TRUE) %>%
  mutate(Sales = scales::dollar(total_price)) %>%
  select(`Prime Category`, `Secondary Category`, `Frame Material`, Sales)

print(category_sales_tbl)
## # A tibble: 13 × 4
##    `Prime Category` `Secondary Category` `Frame Material` Sales  
##    <chr>            <chr>                <chr>            <chr>  
##  1 Mountain         Over Mountain        Carbon           $6,070 
##  2 Mountain         Trail                Aluminum         $2,770 
##  3 Road             Elite Road           Carbon           $10,660
##  4 Road             Endurance Road       Aluminum         $1,570 
##  5 Road             Endurance Road       Carbon           $4,800 
##  6 Mountain         Sport                Aluminum         $480   
##  7 Mountain         Cross Country Race   Carbon           $22,380
##  8 Mountain         Cross Country Race   Aluminum         $2,060 
##  9 Road             Cyclocross           Carbon           $1,960 
## 10 Road             Triathalon           Carbon           $4,500 
## 11 Road             Elite Road           Aluminum         $2,240 
## 12 Mountain         Trail                Carbon           $8,960 
## 13 Mountain         Fat Bike             Aluminum         $3,730