Library

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr   1.1.3     ✔ readr   2.1.4
## ✔ forcats 1.0.0     ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4     ✔ tibble  3.2.1
## ✔ purrr   1.0.2     ✔ tidyr   1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggplot2)
library(tidyquant)
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
## 
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

Datset

bike_orderlines <- read_excel("C:/Users/Admin/OneDrive - 亞洲大學[Asia University]/Financial Database Mana & Application/Data/bike_orderlines.xlsx")
glimpse(bike_orderlines)
## Rows: 15,644
## Columns: 13
## $ order_date     <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id       <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ order_line     <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price          <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price    <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model          <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1     <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2     <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name  <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city           <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state          <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…

Top 10 Revenues by bikeshop

revenues <- bike_orderlines %>% select(bikeshop_name, total_price) %>% 
  group_by(bikeshop_name) %>% 
  summarise(Revenues = sum(total_price)) %>% 
  ungroup() %>% 
  arrange(desc(Revenues)) %>% 
  mutate(bikeshop_name = bikeshop_name %>% as_factor() %>% fct_rev())

revenues
## # A tibble: 30 × 2
##    bikeshop_name                Revenues
##    <fct>                           <dbl>
##  1 Kansas City 29ers            11535455
##  2 Denver Bike Shop              7697670
##  3 Ithaca Mountain Climbers      6299335
##  4 Phoenix Bi-peds               4168535
##  5 Oklahoma City Race Equipment  3450040
##  6 Las Vegas Cycles              3073615
##  7 New Orleans Velocipedes       2761825
##  8 Wichita Speed                 2380385
##  9 Miami Race Equipment          2057130
## 10 Minneapolis Bike Shop         2023220
## # ℹ 20 more rows

Create “Other Shops” category

plot_data <- revenues %>%
  mutate(bikeshop_name = bikeshop_name %>% fct_lump(
    n =10,
    w = Revenues,
    other_level = "Other Bikeshops")) %>%
  mutate(bikeshop_name = bikeshop_name %>% fct_relevel("Other Bikeshops",
                                                 after = 0)) %>% 
  group_by(bikeshop_name) %>%
  summarise(Revenues = sum(Revenues))

Plot the result

plot_data %>% ggplot(aes(y = bikeshop_name, x = Revenues))+
  geom_col(fill = "black")+
  scale_x_continuous(labels = scales::dollar_format(scale = 1e-6, suffix = "M")) +
  theme_tq() +
  scale_color_tq()