library(tidyverse)
library(lubridate)
library(readxl)
library(writexl)
# Set working directory and import data
bikes_tbl <- read_excel("bikes.xlsx")
bikeshops_tbl <- read_excel("bikeshops.xlsx")
orderlines_tbl <- read_excel("orderlines.xlsx")
# Display structure of bikes data
glimpse(bikes_tbl)
## Rows: 97
## Columns: 4
## $ bike.id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ model <chr> "Supersix Evo Black Inc.", "Supersix Evo Hi-Mod Team", "Su…
## $ description <chr> "Road - Elite Road - Carbon", "Road - Elite Road - Carbon"…
## $ price <dbl> 12790, 10660, 7990, 5330, 4260, 3940, 3200, 2660, 2240, 18…
Display “model” and “price” columns with prices sorted in descending order:
bikes_tbl %>%
select(model, price) %>%
arrange(desc(price))
## # A tibble: 97 × 2
## model price
## <chr> <dbl>
## 1 Supersix Evo Black Inc. 12790
## 2 Scalpel-Si Black Inc. 12790
## 3 Habit Hi-Mod Black Inc. 12250
## 4 F-Si Black Inc. 11190
## 5 Supersix Evo Hi-Mod Team 10660
## 6 Synapse Hi-Mod Disc Black Inc. 9590
## 7 Scalpel-Si Race 9060
## 8 F-Si Hi-Mod Team 9060
## 9 Trigger Carbon 1 8200
## 10 Supersix Evo Hi-Mod Dura Ace 1 7990
## # ℹ 87 more rows
Display “model” and “price” columns where price is greater than the mean price:
bikes_tbl %>%
select(model, price) %>%
filter(price > mean(price))
## # A tibble: 35 × 2
## model price
## <chr> <dbl>
## 1 Supersix Evo Black Inc. 12790
## 2 Supersix Evo Hi-Mod Team 10660
## 3 Supersix Evo Hi-Mod Dura Ace 1 7990
## 4 Supersix Evo Hi-Mod Dura Ace 2 5330
## 5 Supersix Evo Hi-Mod Utegra 4260
## 6 CAAD12 Black Inc 5860
## 7 CAAD12 Disc Dura Ace 4260
## 8 Synapse Hi-Mod Disc Black Inc. 9590
## 9 Synapse Hi-Mod Disc Red 7460
## 10 Synapse Hi-Mod Dura Ace 5860
## # ℹ 25 more rows
# Calculate mean price for reference
mean_price <- mean(bikes_tbl$price)
cat("Mean Price: $", round(mean_price, 2), "\n")
## Mean Price: $ 3953.76
# Count of bikes above and below mean
bikes_tbl %>%
summarise(
total_bikes = n(),
above_mean = sum(price > mean(price)),
below_mean = sum(price <= mean(price)),
mean_price = round(mean(price), 2),
max_price = max(price),
min_price = min(price)
)
## # A tibble: 1 × 6
## total_bikes above_mean below_mean mean_price max_price min_price
## <int> <int> <int> <dbl> <dbl> <dbl>
## 1 97 35 62 3954. 12790 415
# Join orderlines with bikes
orderlines_bikes_tbl <- left_join(orderlines_tbl, bikes_tbl,
by = c("product.id" = "bike.id"))
# Join with bikeshops
bike_orderlines_joined_tbl <- orderlines_bikes_tbl %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))
# Wrangle data: separate description and location
bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>%
separate(description,
into = c("category.1", "category.2", "frame.material"),
sep = " - ") %>%
separate(location,
into = c("city", "state"),
sep = ", ",
remove = FALSE)
# Display first few rows
head(bike_orderlines_wrangled_tbl)
## # A tibble: 6 × 16
## ...1 order.id order.line order.date customer.id product.id quantity
## <chr> <dbl> <dbl> <dttm> <dbl> <dbl> <dbl>
## 1 1 1 1 2011-01-07 00:00:00 2 48 1
## 2 2 1 2 2011-01-07 00:00:00 2 52 1
## 3 3 2 1 2011-01-10 00:00:00 10 76 1
## 4 4 2 2 2011-01-10 00:00:00 10 52 1
## 5 5 3 1 2011-01-10 00:00:00 6 2 1
## 6 6 3 2 2011-01-10 00:00:00 6 50 1
## # ℹ 9 more variables: model <chr>, category.1 <chr>, category.2 <chr>,
## # frame.material <chr>, price <dbl>, bikeshop.name <chr>, location <chr>,
## # city <chr>, state <chr>