Bike Shop Revenue and Category Analysis
Sam DePalma
8/2/2020
Read In “Bike Shop” Data
library(tidyverse)
library(lubridate)
library(tidyquant)
library(readxl)
library(knitr)
library(writexl)
bike_tbl <- read_excel(path = "C:/Users/open/Documents/R PROJECTS/Bike Shop Sales Analysis/bikes.xlsx")
bikeshops_tbl <- read_excel(path = "C:/Users/open/Documents/R PROJECTS/Bike Shop Sales Analysis/bikeshops.xlsx")
orderlines_tbl <- read_excel(path = "C:/Users/open/Documents/R PROJECTS/Bike Shop Sales Analysis/orderlines.xlsx")
Bikes Tibble: Bike ID, Model, Description of Bike, Price
| 1 |
Supersix Evo Black Inc. |
Road - Elite Road - Carbon |
12790 |
| 2 |
Supersix Evo Hi-Mod Team |
Road - Elite Road - Carbon |
10660 |
| 3 |
Supersix Evo Hi-Mod Dura Ace 1 |
Road - Elite Road - Carbon |
7990 |
| 4 |
Supersix Evo Hi-Mod Dura Ace 2 |
Road - Elite Road - Carbon |
5330 |
| 5 |
Supersix Evo Hi-Mod Utegra |
Road - Elite Road - Carbon |
4260 |
## Rows: 97
## Columns: 4
## $ bike.id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ...
## $ model <chr> "Supersix Evo Black Inc.", "Supersix Evo Hi-Mod Team", ...
## $ description <chr> "Road - Elite Road - Carbon", "Road - Elite Road - Carb...
## $ price <dbl> 12790, 10660, 7990, 5330, 4260, 3940, 3200, 2660, 2240,...
Bike Shops Tibble: Bike Shop ID, Bike Shop Name, Location
kable(head(bikeshops_tbl,5))
| 1 |
Pittsburgh Mountain Machines |
Pittsburgh, PA |
| 2 |
Ithaca Mountain Climbers |
Ithaca, NY |
| 3 |
Columbus Race Equipment |
Columbus, OH |
| 4 |
Detroit Cycles |
Detroit, MI |
| 5 |
Cincinnati Speed |
Cincinnati, OH |
## Rows: 30
## Columns: 3
## $ bikeshop.id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16...
## $ bikeshop.name <chr> "Pittsburgh Mountain Machines", "Ithaca Mountain Clim...
## $ location <chr> "Pittsburgh, PA", "Ithaca, NY", "Columbus, OH", "Detr...
Order Lines Tibble: Order ID, Order Line, Order Date, Customer ID, Product ID, Quantity
kable(head(orderlines_tbl,5))
| 1 |
1 |
1 |
2011-01-07 |
2 |
48 |
1 |
| 2 |
1 |
2 |
2011-01-07 |
2 |
52 |
1 |
| 3 |
2 |
1 |
2011-01-10 |
10 |
76 |
1 |
| 4 |
2 |
2 |
2011-01-10 |
10 |
52 |
1 |
| 5 |
3 |
1 |
2011-01-10 |
6 |
2 |
1 |
## Rows: 15,644
## Columns: 7
## $ ...1 <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"...
## $ order.id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7...
## $ order.line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1...
## $ order.date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0...
## $ customer.id <dbl> 2, 2, 10, 10, 6, 6, 6, 6, 6, 22, 8, 8, 8, 8, 16, 16, 16...
## $ product.id <dbl> 48, 52, 76, 52, 2, 50, 1, 4, 34, 26, 96, 66, 35, 72, 45...
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1...
Joining the Data for Analysis
bike_orderlines_joined_tbl <- orderlines_tbl %>%
left_join(bike_tbl, by = c("product.id" = "bike.id")) %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))
Wrangling/Cleansing
## Separating Description
bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>%
separate(description,
into = c("category.1", "category.2", "frame.material"),
sep = " - ",
remove = TRUE) %>%
## Separating City and State
separate(location,
into = c("city", "state"),
sep = ", ",
remove = FALSE) %>%
## Adding Total Price Column
mutate(total.price = price * quantity) %>%
## Reorganizing/Removing/Reordering/Renaming Columns
select(-"...1", -"location") %>%
select(-ends_with(".id")) %>%
bind_cols(bike_orderlines_joined_tbl %>% select(order.id)) %>%
select(contains("date"),contains("id"), contains("order"),
quantity, price, total.price, everything()) %>%
rename(order_date = order.date) %>%
set_names(names(.) %>% str_replace_all("\\.", "_"))
bike_orderlines_wrangled_tbl %>% glimpse()
## Rows: 15,644
## Columns: 13
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 201...
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6...
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4...
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1...
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 15...
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 15...
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the...
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Roa...
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mou...
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", ...
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climber...
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "L...
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY"...
Analysis/Insights
sales_by_year_tbl <- bike_orderlines_wrangled_tbl %>%
select(order_date, total_price) %>%
mutate(year = year(order_date)) %>%
group_by(year) %>%
summarize(sales = sum(total_price)) %>%
ungroup() %>%
mutate(sales_text = scales::dollar(sales))
## `summarise()` ungrouping output (override with `.groups` argument)
Plot 1: Revenue By Year - Revenue steadily increased through 2013, slight decline in 2014, rebounded in 2015 - and surpassed all prior years in terms of sales. Overall, we’re seeing a positive trend in annual revenue.
sales_by_year_tbl %>%
ggplot(aes(x = year, y = sales))+
geom_col(fill = "cornflowerblue")+
geom_label(aes(label = sales_text))+
geom_smooth(method = "lm", se = FALSE)+
theme_tq()+
scale_y_continuous(labels = scales::dollar)+
labs(
title = "Bike Shop: Annual Revenue",
subtitle = "Upward Trend",
x = "",
y = "Revenue")
## `geom_smooth()` using formula 'y ~ x'

Preparation for Second Plot
sales_by_year_cat_2 <- bike_orderlines_wrangled_tbl %>%
select(order_date, total_price, category_2) %>%
mutate(year = year(order_date)) %>%
group_by(year, category_2) %>%
summarize(sales = sum(total_price)) %>%
ungroup() %>%
mutate(sales_text = scales::dollar(sales))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
Plot 2: Revenue By Year and Category - All categories are trending up. Cross Country, Elite Road and Trail are biggest revenue drivers.
sales_by_year_cat_2 %>%
ggplot(aes(x = year, y = sales, fill = category_2))+
geom_col()+
facet_wrap(~ category_2, ncol = 3, scales = "free_y")+
geom_smooth(method = "lm", se = FALSE)+
theme_tq()+
scale_fill_tq()+
scale_y_continuous(labels = scales::dollar)+
labs(
title = "Bike Shop: Annual Revenue by Category",
subtitle = "All Categories are Trending Up",
x = "",
y = "Revenue")+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
## `geom_smooth()` using formula 'y ~ x'

End of Initial Analysis - Directory Creation/File Saving
fs::dir_create("R PROJECTS/Bike Shop Sales Analysis/data_wrangled_bikeshop")
bike_orderlines_wrangled_tbl %>%
write_xlsx("R PROJECTS/Bike Shop Sales Analysis/data_wrangled_bikeshop/bike_orderlines_wrangled_tbl.xlsx")
bike_orderlines_wrangled_tbl %>%
write_csv("R PROJECTS/Bike Shop Sales Analysis/data_wrangled_bikeshop/bike_orderlines_wrangled_tbl.csv")
bike_orderlines_wrangled_tbl %>%
write_rds("R PROJECTS/Bike Shop Sales Analysis/data_wrangled_bikeshop/bike_orderlines_wrangled_tbl.rds")