Bike Shop Revenue and Category Analysis

Sam DePalma

8/2/2020

Read In “Bike Shop” Data

library(tidyverse)
library(lubridate)
library(tidyquant)
library(readxl)
library(knitr)
library(writexl)

bike_tbl <- read_excel(path = "C:/Users/open/Documents/R PROJECTS/Bike Shop Sales Analysis/bikes.xlsx")
bikeshops_tbl <- read_excel(path = "C:/Users/open/Documents/R PROJECTS/Bike Shop Sales Analysis/bikeshops.xlsx")
orderlines_tbl <- read_excel(path = "C:/Users/open/Documents/R PROJECTS/Bike Shop Sales Analysis/orderlines.xlsx")

Bikes Tibble: Bike ID, Model, Description of Bike, Price

kable(head(bike_tbl,5))
bike.id model description price
1 Supersix Evo Black Inc. Road - Elite Road - Carbon 12790
2 Supersix Evo Hi-Mod Team Road - Elite Road - Carbon 10660
3 Supersix Evo Hi-Mod Dura Ace 1 Road - Elite Road - Carbon 7990
4 Supersix Evo Hi-Mod Dura Ace 2 Road - Elite Road - Carbon 5330
5 Supersix Evo Hi-Mod Utegra Road - Elite Road - Carbon 4260
glimpse(bike_tbl)
## Rows: 97
## Columns: 4
## $ bike.id     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ...
## $ model       <chr> "Supersix Evo Black Inc.", "Supersix Evo Hi-Mod Team", ...
## $ description <chr> "Road - Elite Road - Carbon", "Road - Elite Road - Carb...
## $ price       <dbl> 12790, 10660, 7990, 5330, 4260, 3940, 3200, 2660, 2240,...

Bike Shops Tibble: Bike Shop ID, Bike Shop Name, Location

kable(head(bikeshops_tbl,5))
bikeshop.id bikeshop.name location
1 Pittsburgh Mountain Machines Pittsburgh, PA
2 Ithaca Mountain Climbers Ithaca, NY
3 Columbus Race Equipment Columbus, OH
4 Detroit Cycles Detroit, MI
5 Cincinnati Speed Cincinnati, OH
glimpse(bikeshops_tbl)
## Rows: 30
## Columns: 3
## $ bikeshop.id   <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16...
## $ bikeshop.name <chr> "Pittsburgh Mountain Machines", "Ithaca Mountain Clim...
## $ location      <chr> "Pittsburgh, PA", "Ithaca, NY", "Columbus, OH", "Detr...

Order Lines Tibble: Order ID, Order Line, Order Date, Customer ID, Product ID, Quantity

kable(head(orderlines_tbl,5))
…1 order.id order.line order.date customer.id product.id quantity
1 1 1 2011-01-07 2 48 1
2 1 2 2011-01-07 2 52 1
3 2 1 2011-01-10 10 76 1
4 2 2 2011-01-10 10 52 1
5 3 1 2011-01-10 6 2 1
glimpse(orderlines_tbl)
## Rows: 15,644
## Columns: 7
## $ ...1        <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"...
## $ order.id    <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7...
## $ order.line  <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1...
## $ order.date  <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0...
## $ customer.id <dbl> 2, 2, 10, 10, 6, 6, 6, 6, 6, 22, 8, 8, 8, 8, 16, 16, 16...
## $ product.id  <dbl> 48, 52, 76, 52, 2, 50, 1, 4, 34, 26, 96, 66, 35, 72, 45...
## $ quantity    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1...

Joining the Data for Analysis

bike_orderlines_joined_tbl <- orderlines_tbl %>% 
    left_join(bike_tbl, by = c("product.id" = "bike.id")) %>% 
    left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))

Wrangling/Cleansing

## Separating Description
bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>% 
    separate(description,
             into = c("category.1", "category.2", "frame.material"),
             sep = " - ",
             remove = TRUE) %>% 
    
## Separating City and State
    
    separate(location,
             into = c("city", "state"),
             sep = ", ",
             remove = FALSE) %>% 
        
## Adding Total Price Column
        
    mutate(total.price = price * quantity) %>%
    
## Reorganizing/Removing/Reordering/Renaming Columns

    select(-"...1", -"location") %>% 
    select(-ends_with(".id")) %>% 
    bind_cols(bike_orderlines_joined_tbl %>% select(order.id)) %>% 
    
    select(contains("date"),contains("id"), contains("order"),
           quantity, price, total.price, everything()) %>% 
    
    rename(order_date = order.date) %>%
    set_names(names(.) %>% str_replace_all("\\.", "_")) 

    bike_orderlines_wrangled_tbl %>% glimpse()
## Rows: 15,644
## Columns: 13
## $ order_date     <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 201...
## $ order_id       <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6...
## $ order_line     <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4...
## $ quantity       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1...
## $ price          <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 15...
## $ total_price    <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 15...
## $ model          <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the...
## $ category_1     <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Roa...
## $ category_2     <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mou...
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", ...
## $ bikeshop_name  <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climber...
## $ city           <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "L...
## $ state          <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY"...

Analysis/Insights

sales_by_year_tbl <- bike_orderlines_wrangled_tbl %>% 
    select(order_date, total_price) %>% 
    mutate(year = year(order_date)) %>% 
    group_by(year) %>% 
    summarize(sales = sum(total_price)) %>%
    ungroup() %>%
    mutate(sales_text = scales::dollar(sales))
## `summarise()` ungrouping output (override with `.groups` argument)

Plot 1: Revenue By Year - Revenue steadily increased through 2013, slight decline in 2014, rebounded in 2015 - and surpassed all prior years in terms of sales. Overall, we’re seeing a positive trend in annual revenue.

sales_by_year_tbl %>% 
    ggplot(aes(x = year, y = sales))+
    geom_col(fill = "cornflowerblue")+
    geom_label(aes(label = sales_text))+
    geom_smooth(method = "lm", se = FALSE)+
    theme_tq()+
    scale_y_continuous(labels = scales::dollar)+
    labs(
        title = "Bike Shop: Annual Revenue",
        subtitle = "Upward Trend",
        x = "",
        y = "Revenue")
## `geom_smooth()` using formula 'y ~ x'

Preparation for Second Plot

sales_by_year_cat_2 <- bike_orderlines_wrangled_tbl %>% 
    select(order_date, total_price, category_2) %>% 
    mutate(year = year(order_date)) %>% 
    group_by(year, category_2) %>% 
    summarize(sales = sum(total_price)) %>%
    ungroup() %>%
    mutate(sales_text = scales::dollar(sales))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)

End of Initial Analysis - Directory Creation/File Saving

fs::dir_create("R PROJECTS/Bike Shop Sales Analysis/data_wrangled_bikeshop")

bike_orderlines_wrangled_tbl %>% 
    write_xlsx("R PROJECTS/Bike Shop Sales Analysis/data_wrangled_bikeshop/bike_orderlines_wrangled_tbl.xlsx")

bike_orderlines_wrangled_tbl %>% 
    write_csv("R PROJECTS/Bike Shop Sales Analysis/data_wrangled_bikeshop/bike_orderlines_wrangled_tbl.csv")

bike_orderlines_wrangled_tbl %>% 
    write_rds("R PROJECTS/Bike Shop Sales Analysis/data_wrangled_bikeshop/bike_orderlines_wrangled_tbl.rds")