Bike Models and Sales

An Analysis of Revenue Trends by Category Using R

Author

Jesus Ambriz-Sanchez

Published

October 10, 2025


1 Introduction

Summary: This report analyzes bike sales performance across models, categories, and years using R. The data includes transaction records, product details, and pricing information, allowing us to identify revenue trends, top-performing models, and seasonal patterns.

Navigation: Use the Table of Contents to navigate sections and click the 🞂 Code buttons throughout the document to view or hide R code for each example.

2 Datasets

In this section, I show what files are being used.

Code
# Resolve paths
bikes_path      <- file.path(params$data_dir, "bikes.xlsx")
bikeshops_path  <- file.path(params$data_dir, "bikeshops.xlsx")
orderlines_path <- file.path(params$data_dir, "orderlines.xlsx")

stopifnot(file.exists(bikes_path), file.exists(bikeshops_path), file.exists(orderlines_path))

bike_tbl       <- read_excel(path = bikes_path)
bikeshops_tbl  <- read_excel(path = bikeshops_path)
orderlines_tbl <- read_excel(path = orderlines_path)

2.1 Data Structure

Here is a quick peek of the raw data structure.

  • Dataset 1: Composed of 4 columns and 97 rows
  • Dataset 2: Composed of 3 columns and 30 rows
  • Dataset 3: Composed of 7 columns and 15,644 rows
NoteFunctions Used

glimpse()

Code
glimpse(bike_tbl)
Rows: 97
Columns: 4
$ bike.id     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
$ model       <chr> "Supersix Evo Black Inc.", "Supersix Evo Hi-Mod Team", "Su…
$ description <chr> "Road - Elite Road - Carbon", "Road - Elite Road - Carbon"…
$ price       <dbl> 12790, 10660, 7990, 5330, 4260, 3940, 3200, 2660, 2240, 18…
Code
glimpse(bikeshops_tbl)
Rows: 30
Columns: 3
$ bikeshop.id   <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
$ bikeshop.name <chr> "Pittsburgh Mountain Machines", "Ithaca Mountain Climber…
$ location      <chr> "Pittsburgh, PA", "Ithaca, NY", "Columbus, OH", "Detroit…
Code
glimpse(orderlines_tbl)
Rows: 15,644
Columns: 7
$ X__1        <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "…
$ order.id    <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7…
$ order.line  <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2…
$ order.date  <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-01-1…
$ customer.id <dbl> 2, 2, 10, 10, 6, 6, 6, 6, 6, 22, 8, 8, 8, 8, 16, 16, 16, 1…
$ product.id  <dbl> 48, 52, 76, 52, 2, 50, 1, 4, 34, 26, 96, 66, 35, 72, 45, 3…
$ quantity    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1…

2.2 Data Visual Sample

NoteFunctions Used

head()

Code
bike_tbl %>% head(5)
Code
bikeshops_tbl %>% head(5)
Code
orderlines_tbl %>% head(5)

2.3 Interactive Tables

Creating an interactive table with renamed variables.

NoteFunctions

datatable(), list(), rename()

Code
tagList(
  h3("Table 1: Bike Models and Prices"),
bike_tbl %>%   
  rename("Bike ID" = bike.id) %>%
  rename("Model" = model) %>%
  rename("Description" = description) %>%
  rename("Price" = price) %>%
  datatable(options = list(pageLength = 8, scrollX = TRUE))) 

Table 1: Bike Models and Prices

Code
tagList(
  h3("Table 2: Bike Shops and Locations"),
bikeshops_tbl %>%
  rename("Bike Shop ID" = bikeshop.id) %>%
  rename("Bike Shop Name" = bikeshop.name) %>%
  rename("Location" = location) %>%
  datatable(options = list(pageLength = 8, scrollX = TRUE)))

Table 2: Bike Shops and Locations

Code
tagList(
  h3("Table 3: Orderlines, Dates, and Sales"),
orderlines_tbl %>%
  rename("N" = X__1) %>%
  rename("Order ID" = order.id) %>%
  rename("Order " = order.line) %>%
  rename("Order Date" = order.date) %>%
  rename("Bike Shop ID" = customer.id) %>%
  rename("Bike ID" = product.id) %>%
  rename("Quantity" = quantity) %>%
  datatable(options = list(pageLength = 6, scrollX = TRUE)))

Table 3: Orderlines, Dates, and Sales



3 Left Joining

Joining the orderline dataset to bikes and shops using ID keys.

NoteFunctions

left_join(), rename(), head()

Code
bike_orderlines_joined_tbl <- orderlines_tbl %>%
  left_join(bike_tbl,      by = c("product.id"  = "bike.id")) %>%
  left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) 

bike_orderlines_joined_tbl %>%
    rename("N" = X__1) %>%
    rename("Order ID" = order.id) %>%
    rename("Order " = order.line) %>%
    rename("Order Date" = order.date) %>%
    rename("Bike Shop ID" = customer.id) %>%
    rename("Bike ID" = product.id) %>%
    rename("Quantity" = quantity) %>%
    rename("Model" = model) %>%
    rename("Description" = description) %>%
    rename("Price" = price) %>%
    rename("Bike Shop Name" = bikeshop.name) %>%
    rename("Location" = location) %>% head(10)
WarningAdvice

Click top right arrow of the table to view all columns.



4 Wrangling

Selecting, creating, splitting, renaming, and standardizing column names efficiently. Below is the before and after.

NoteFunctions

colnames(), mutate(), select(), bind_cols(), rename(), tibble(), seq_len(), min(), length(), head(), glimpse()

Code
# Before
before_cols <- colnames(bike_orderlines_joined_tbl)

bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>%
  separate(description,
           into = c("category.1","category.2","frame.material"),
           sep = " - ",
           remove = TRUE) %>%
  separate(location, into = c("city", "state"), sep = ", ", remove = FALSE) %>%
  mutate(total.sale = price * quantity) %>%
  select(-X__1, -location) %>%
  select(-ends_with(".id")) %>%
  bind_cols(bike_orderlines_joined_tbl %>% select(order.id)) %>%
  select(contains("date"), everything()) %>%
  select(contains("date"), contains("id"), contains("order"),
         quantity, price, total.sale, everything()) %>%
  rename(order_date = order.date) %>%
  rename_with(tolower) %>%
  rename_with(~ stringr::str_replace_all(.x, "[.]", "_"))

after_cols <- colnames(bike_orderlines_wrangled_tbl)

tibble(
  before = before_cols,
  after  = after_cols[seq_len(min(length(after_cols), length(before_cols)))]
) %>% 
  rename("Old Names" = before) %>%
  rename("New Names" = after) %>% head(12)
Code
glimpse(bike_orderlines_wrangled_tbl)
Rows: 15,644
Columns: 13
$ order_date     <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
$ order_id       <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
$ order_line     <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
$ quantity       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
$ price          <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
$ total_sale     <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
$ model          <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
$ category_1     <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
$ category_2     <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
$ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
$ bikeshop_name  <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
$ city           <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
$ state          <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…



5 Revenue

5.1 Revenue by Year

Using ggplot functions to graph an overview of total revenue by year.

NoteFunctions

select(), mutate(), year(), group_by(), summarize(), sum(), dollar(), rename(), head(), ggplot(), aes(), geom_col(), geom_label(), geom_smooth(), theme_tq(), scale_y_continuous(), labs()

Code
sales_by_year_tbl <- bike_orderlines_wrangled_tbl %>% 
  select(order_date, total_sale) %>%
  mutate(year = year(order_date)) %>%
  group_by(year) %>%
  summarize(annual_sale = sum(total_sale), .groups = "drop") %>%
  mutate(annual_sales_dtxt = scales::dollar(annual_sale)) 

sales_by_year_tbl %>% 
  select(year, annual_sales_dtxt) %>%
  rename("Year" = year) %>%
  rename("Total Annual Revenue" = annual_sales_dtxt) %>%
  head(5)
Code
sales_by_year_tbl %>%
  ggplot(aes(x = year, y = annual_sale)) +
  geom_col(fill = "#4492eb") +
  geom_label(aes(label = annual_sales_dtxt), size = 3) +
 # geom_smooth(method = "lm", se = FALSE, color = "#ff7119") +
  theme_tq() +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "Revenue by Year", subtitle = "Upward Trend", x = "", y = "Revenue")

5.2 Revenue by Year and Category

Breaking down total sales by year and bike category.

NoteFunctions

select(), mutate(), year(), group_by(), summarize(), sum(), dollar(), rename()

Code
sales_by_year_cat_tbl <- bike_orderlines_wrangled_tbl %>% 
  select(order_date, total_sale, category_2) %>%
  mutate(year = year(order_date)) %>%
  group_by(year, category_2) %>%
  summarize(cat_annual_sales = sum(total_sale), .groups = "drop") %>%
  mutate(cat_annual_dtxt = scales::dollar(cat_annual_sales))

sales_by_year_cat_tbl %>%
  select(-cat_annual_sales) %>% 
  rename("Year" = year) %>%
  rename("Category" = category_2) %>%
  rename("Total Revenue" = cat_annual_dtxt) 

5.3 Average Revenue by Category

Total revenue and average sales based on bike category.

NoteFunctions

group_by(), summarize(), sum(), mean(), n_distinct(), arrange(), desc(), mutate(), dollar(), select(), rename()

Code
category_summary_tbl <- sales_by_year_cat_tbl %>%
  group_by(category_2) %>%
  summarize(
    total_sales = sum(cat_annual_sales, na.rm = TRUE),
    avg_sales   = mean(cat_annual_sales, na.rm = TRUE),
    years       = n_distinct(year)
  ) %>%
  arrange(desc(total_sales)) %>%
  mutate(
    total_sales_dtxt = scales::dollar(total_sales),
    avg_sales_dtxt   = scales::dollar(avg_sales)
  )

category_summary_tbl %>%
  select(years, category_2, -total_sales, -avg_sales, avg_sales_dtxt, total_sales_dtxt) %>% 
  rename("N Years" = years) %>%
  rename("Category" = category_2) %>%
  rename("Average Revenue" = avg_sales_dtxt) %>%
  rename("Total Revenue" = total_sales_dtxt) 

5.4 Graphing Average Sales by Year

Graphing average sales using ggplot functions.

NoteFunctions

ggplot(), aes(), reorder(), geom_col(), geom_label(), scale_x_continuous(), dollar(), seq(), labs(), theme_tq(), theme(), element_text(), coord_cartesian()

Code
category_summary_tbl %>%
  ggplot(aes(
    x = avg_sales,
    y = reorder(category_2, avg_sales)
  )) +
  geom_col(fill = "#4492eb") +
  geom_label(aes(label = avg_sales_dtxt),
             hjust = -0.1,
             size = 3.5) +
  scale_x_continuous(
    labels = scales::dollar,
    limits = c(0, 5000000),   # show up to $5,000,000
    breaks = seq(0, 5000000, by = 1000000)
  ) +
  labs(
    title = "Average Annual Revenue by Category",
    subtitle = "Ranked highest to lowest",
    x = "Average Annual Revenue",
    y = "Category"
  ) +
  theme_tq() +
  theme(
    plot.title = element_text(face = "bold"),
    plot.subtitle = element_text(color = "gray30")
  ) +
  coord_cartesian(clip = "off")

5.5 Side-by-Side Comparision

Top two highest-preforming bike categories based on total annual sales.

NoteFunctions

group_by(), summarize(), sum(), slice_max(), pull(), filter(), ggplot(), aes(), geom_col(), geom_smooth(), facet_wrap(), theme_tq(), scale_fill_tq(), scale_y_continuous(), label_dollar(), cut_short_scale(), labs(), theme()

Code
# Identify top 2 categories by total sales
top2 <- sales_by_year_cat_tbl %>%
  group_by(category_2) %>%
  summarize(total_sales = sum(cat_annual_sales, na.rm = TRUE), .groups = "drop") %>%
  slice_max(total_sales, n = 2) %>%
  pull(category_2)

# Plot only top 4 categories
sales_by_year_cat_tbl %>%
  filter(category_2 %in% top2) %>% 
  ggplot(aes(x = year, y = cat_annual_sales, fill = category_2)) +
  geom_col(show.legend = FALSE) +
 # geom_smooth(method = "lm", se = FALSE, color = "#ff7119", linewidth = 1) +
  facet_wrap(~ category_2, ncol = 2, scales = "fixed") +
  theme_tq() +
  scale_fill_tq(drop = TRUE) +
  scale_y_continuous(
    labels = label_dollar(scale_cut = cut_short_scale())  # converts to $5M, $4M
  ) +
  labs(
    title = "Side-by-Side Comparision: Revenue by Year",
    subtitle = " Cross Country Race vs Elite",
    x = "Year",
    y = "Revenue"
  ) +
  theme(legend.position = "none")

5.6 Graphing Multiple Categories

Top 4 high-preforming bike categories based on total annual sales within multiple charts in an all-in-one graph.

NoteFunctions

group_by(), summarise(), sum(), slice_max(), pull(), filter(), ggplot(), aes(), geom_col(), geom_smooth(), facet_wrap(), theme_tq(), scale_fill_tq(), scale_y_continuous(), label_dollar(), cut_short_scale(), labs(), theme()

Code
# Identify top 4 categories by total sales
top4 <- sales_by_year_cat_tbl %>%
  group_by(category_2) %>%
  summarise(total_sales = sum(cat_annual_sales, na.rm = TRUE), .groups = "drop") %>%
  slice_max(total_sales, n = 4) %>%
  pull(category_2)

# Plot top 4 categories
sales_by_year_cat_tbl %>%
  filter(category_2 %in% top4) %>%
  ggplot(aes(x = year, y = cat_annual_sales, fill = category_2)) +
  geom_col(show.legend = FALSE) +
#  geom_smooth(method = "lm", se = FALSE, color = "#ff7119", linewidth = 1) +
  facet_wrap(~ category_2, ncol = 2, scales = "fixed") +
  theme_tq() +
  scale_fill_tq(drop = TRUE) +
  scale_y_continuous(
    labels = scales::label_dollar(scale_cut = scales::cut_short_scale()) 
  ) +
  labs(
    title = "Top 4 Categories: Revenue by Year",
    subtitle = "Highest-performing categories based on total sales",
    x = "Year",
    y = "Revenue"
  ) +
  theme(legend.position = "none")

5.7 Grouped Categories by Year

Bottom 4 Lowest-preforming categories based on total annual sales in an all-in-one graph.

NoteFunctions

group_by(), summarise(), sum(), slice_min(), pull(), filter(), ggplot(), aes(), geom_col(), geom_text(), dollar(), cut_short_scale(), position_dodge(), theme_tq(), scale_fill_tq(), scale_y_continuous(), label_dollar(), labs()

Code
# Identify bottom 4 categories by total sales
bottom4 <- sales_by_year_cat_tbl %>%
  group_by(category_2) %>%
  summarise(total_sales = sum(cat_annual_sales, na.rm = TRUE), .groups = "drop") %>%
  slice_min(total_sales, n = 4) %>%
  pull(category_2)

# Plot all bottom 4 categories grouped by year
sales_by_year_cat_tbl %>%
  filter(category_2 %in% bottom4) %>%
  ggplot(aes(x = factor(year), y = cat_annual_sales, fill = category_2)) +
  geom_col(position = "dodge") +  # grouped side-by-side by category per year
  geom_text(
    aes(label = scales::dollar(cat_annual_sales, scale_cut = scales::cut_short_scale())),
    position = position_dodge(width = 0.9),
    vjust = -0.3,
    size = 2.5
  ) +
  theme_tq() +
  scale_fill_tq() +
  scale_y_continuous(labels = scales::label_dollar(scale_cut = scales::cut_short_scale())) +
  labs(
  title = "Bottom 4 Categories by Annual Revenue",
  subtitle = "Grouped by year for direct category comparison",
    x = "Year",
    y = "Revenue",
    fill = "Category"
  )

5.8 Facet Wrap Reporting

Reporting performances of all categories in one.

NoteFunctions

group_by(), summarise(), sum(), arrange(), desc(), pull(), mutate(), factor(), ggplot(), aes(), geom_col(), geom_smooth(), facet_wrap(), theme_tq(), scale_fill_tq(), scale_y_continuous(), label_dollar(), cut_short_scale(), labs(), theme()

Code
# Calculate total revenue by category (descending)
ordered_cats <- sales_by_year_cat_tbl %>%
  group_by(category_2) %>%
  summarise(total_sales = sum(cat_annual_sales, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(total_sales)) %>%
  pull(category_2)

# Plot with facets ordered high → low
sales_by_year_cat_tbl %>%
  mutate(category_2 = factor(category_2, levels = ordered_cats)) %>%
  ggplot(aes(x = year, y = cat_annual_sales, fill = category_2)) +
  geom_col() +
 # geom_smooth(method = "lm", se = FALSE) +
  facet_wrap(~ category_2, ncol = 3, scales = "free_y") +
  theme_tq() +
  scale_fill_tq() +
  scale_y_continuous(
    labels = scales::label_dollar(scale_cut = scales::cut_short_scale())
  ) +
  labs(
    title = "Revenue by Year and Category",
    subtitle = "Bike categories have their own scaled revenue axis",
    x = "",
    y = "Revenue",
    fill = "Category"
  ) +
  theme(legend.position = "none")



6 Appendix

6.1 Export Data

6.2 Libraries

  • base R
  • dplyr
  • ggplot2
  • lubridate
  • scales
  • tibble
  • tidyquant
  • htmltools

6.3 Functions

Below is a list of all functions used. Learn more about the functions used by putting a “?” infront of the function in R.

  • aes()
  • arrange()
  • bind_cols()
  • colnames()
  • coord_cartesian()
  • cut_short_scale()
  • desc()
  • dollar()
  • element_text()
  • facet_wrap()
  • factor()
  • filter()
  • geom_col()
  • geom_label()
  • geom_smooth()
  • geom_text()
  • ggplot()
  • glimpse()
  • group_by()
  • h3()
  • head()
  • label_dollar()
  • labs()
  • left_join()
  • list()
  • mean()
  • min()
  • mutate()
  • n_distinct()
  • position_dodge()
  • pull()
  • reorder()
  • rename()
  • scale_fill_tq()
  • scale_x_continuous()
  • scale_y_continuous()
  • select()
  • seq()
  • seq_len()
  • slice_max()
  • slice_min()
  • sum()
  • summarise()
  • summarize()
  • tagList()
  • theme()
  • theme_tq()
  • tibble()
  • year()