1. Load Required Libraries

library(tidyverse)
library(lubridate)
library(readxl)
library(writexl)

2. Import Data Files

# Set working directory and import data
bikes_tbl <- read_excel("bikes.xlsx")
bikeshops_tbl <- read_excel("bikeshops.xlsx")
orderlines_tbl <- read_excel("orderlines.xlsx")

# Display structure of bikes data
glimpse(bikes_tbl)
## Rows: 97
## Columns: 4
## $ bike.id     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ model       <chr> "Supersix Evo Black Inc.", "Supersix Evo Hi-Mod Team", "Su…
## $ description <chr> "Road - Elite Road - Carbon", "Road - Elite Road - Carbon"…
## $ price       <dbl> 12790, 10660, 7990, 5330, 4260, 3940, 3200, 2660, 2240, 18…

3. Show Model and Price (Descending Order)

Display “model” and “price” columns with prices sorted in descending order:

bikes_tbl %>% 
  select(model, price) %>% 
  arrange(desc(price))
## # A tibble: 97 × 2
##    model                          price
##    <chr>                          <dbl>
##  1 Supersix Evo Black Inc.        12790
##  2 Scalpel-Si Black Inc.          12790
##  3 Habit Hi-Mod Black Inc.        12250
##  4 F-Si Black Inc.                11190
##  5 Supersix Evo Hi-Mod Team       10660
##  6 Synapse Hi-Mod Disc Black Inc.  9590
##  7 Scalpel-Si Race                 9060
##  8 F-Si Hi-Mod Team                9060
##  9 Trigger Carbon 1                8200
## 10 Supersix Evo Hi-Mod Dura Ace 1  7990
## # ℹ 87 more rows

4. Show Model and Price (Above Mean Price)

Display “model” and “price” columns where price is greater than the mean price:

bikes_tbl %>% 
  select(model, price) %>% 
  filter(price > mean(price))
## # A tibble: 35 × 2
##    model                          price
##    <chr>                          <dbl>
##  1 Supersix Evo Black Inc.        12790
##  2 Supersix Evo Hi-Mod Team       10660
##  3 Supersix Evo Hi-Mod Dura Ace 1  7990
##  4 Supersix Evo Hi-Mod Dura Ace 2  5330
##  5 Supersix Evo Hi-Mod Utegra      4260
##  6 CAAD12 Black Inc                5860
##  7 CAAD12 Disc Dura Ace            4260
##  8 Synapse Hi-Mod Disc Black Inc.  9590
##  9 Synapse Hi-Mod Disc Red         7460
## 10 Synapse Hi-Mod Dura Ace         5860
## # ℹ 25 more rows

Additional Analysis: Summary Statistics

# Calculate mean price for reference
mean_price <- mean(bikes_tbl$price)
cat("Mean Price: $", round(mean_price, 2), "\n")
## Mean Price: $ 3953.76
# Count of bikes above and below mean
bikes_tbl %>% 
  summarise(
    total_bikes = n(),
    above_mean = sum(price > mean(price)),
    below_mean = sum(price <= mean(price)),
    mean_price = round(mean(price), 2),
    max_price = max(price),
    min_price = min(price)
  )
## # A tibble: 1 × 6
##   total_bikes above_mean below_mean mean_price max_price min_price
##         <int>      <int>      <int>      <dbl>     <dbl>     <dbl>
## 1          97         35         62      3954.     12790       415

Data Joining and Wrangling

# Join orderlines with bikes
orderlines_bikes_tbl <- left_join(orderlines_tbl, bikes_tbl, 
                                   by = c("product.id" = "bike.id"))

# Join with bikeshops
bike_orderlines_joined_tbl <- orderlines_bikes_tbl %>% 
  left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))

# Wrangle data: separate description and location
bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>% 
  separate(description, 
           into = c("category.1", "category.2", "frame.material"), 
           sep  = " - ") %>% 
  separate(location, 
           into = c("city", "state"), 
           sep  = ", ",
           remove = FALSE)

# Display first few rows
head(bike_orderlines_wrangled_tbl)
## # A tibble: 6 × 16
##   ...1  order.id order.line order.date          customer.id product.id quantity
##   <chr>    <dbl>      <dbl> <dttm>                    <dbl>      <dbl>    <dbl>
## 1 1            1          1 2011-01-07 00:00:00           2         48        1
## 2 2            1          2 2011-01-07 00:00:00           2         52        1
## 3 3            2          1 2011-01-10 00:00:00          10         76        1
## 4 4            2          2 2011-01-10 00:00:00          10         52        1
## 5 5            3          1 2011-01-10 00:00:00           6          2        1
## 6 6            3          2 2011-01-10 00:00:00           6         50        1
## # ℹ 9 more variables: model <chr>, category.1 <chr>, category.2 <chr>,
## #   frame.material <chr>, price <dbl>, bikeshop.name <chr>, location <chr>,
## #   city <chr>, state <chr>