Which product categories have the most sales?

1. Use ‘bike_orderline_tbl’

Installing libraries

 library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
 library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.4     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
 library(readxl)
 library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

Importing dataset

bike_orderline_tbl <- read_excel("C:/Users/Admin/OneDrive - 亞洲大學[Asia University]/Financial Database Mana & Application/bike_orderlines.xlsx")
head(bike_orderline_tbl)
## # A tibble: 6 × 13
##   order_date          order_id order_line quantity price total_price model      
##   <dttm>                 <dbl>      <dbl>    <dbl> <dbl>       <dbl> <chr>      
## 1 2011-01-07 00:00:00        1          1        1  6070        6070 Jekyll Car…
## 2 2011-01-07 00:00:00        1          2        1  5970        5970 Trigger Ca…
## 3 2011-01-10 00:00:00        2          1        1  2770        2770 Beast of t…
## 4 2011-01-10 00:00:00        2          2        1  5970        5970 Trigger Ca…
## 5 2011-01-10 00:00:00        3          1        1 10660       10660 Supersix E…
## 6 2011-01-10 00:00:00        3          2        1  3200        3200 Jekyll Car…
## # ℹ 6 more variables: category_1 <chr>, category_2 <chr>, frame_material <chr>,
## #   bikeshop_name <chr>, city <chr>, state <chr>

2. Group and summarize the data calling the new column as ‘Sales’

categories_sales <- bike_orderline_tbl %>%
  select(category_1, category_2, total_price) %>% 
  group_by(category_1, category_2) %>% 
  summarise(Sales = sum(total_price)) %>% 
  arrange(desc(Sales)) %>%
  ungroup()
## `summarise()` has grouped output by 'category_1'. You can override using the
## `.groups` argument.
categories_sales
## # A tibble: 9 × 3
##   category_1 category_2            Sales
##   <chr>      <chr>                 <dbl>
## 1 Mountain   Cross Country Race 19224630
## 2 Road       Elite Road         15334665
## 3 Road       Endurance Road     10381060
## 4 Mountain   Trail               9373460
## 5 Mountain   Over Mountain       7571270
## 6 Road       Triathalon          4053750
## 7 Road       Cyclocross          2108120
## 8 Mountain   Sport               1932755
## 9 Mountain   Fat Bike            1052620

The Mountain Cross Country Race categories have the most sales.

3. Format the sales as ‘dollars()’

categories_sales_formatted <- categories_sales %>% 
  mutate(Sales = dollar(Sales))
categories_sales_formatted
## # A tibble: 9 × 3
##   category_1 category_2         Sales      
##   <chr>      <chr>              <chr>      
## 1 Mountain   Cross Country Race $19,224,630
## 2 Road       Elite Road         $15,334,665
## 3 Road       Endurance Road     $10,381,060
## 4 Mountain   Trail              $9,373,460 
## 5 Mountain   Over Mountain      $7,571,270 
## 6 Road       Triathalon         $4,053,750 
## 7 Road       Cyclocross         $2,108,120 
## 8 Mountain   Sport              $1,932,755 
## 9 Mountain   Fat Bike           $1,052,620

4. Rename ‘category_1’ to ‘Prime category’, ‘category_2’ to ‘Secondary category’, ‘frame_material’ to ‘Frame Material’

bike_orderline_tbl_rename <- bike_orderline_tbl %>% 
  rename(`Prime category` = category_1) %>% 
  rename(`Secondary category` = category_2) %>% 
  rename(`Frame Material` = frame_material)
bike_orderline_tbl_rename
## # A tibble: 15,644 × 13
##    order_date          order_id order_line quantity price total_price model     
##    <dttm>                 <dbl>      <dbl>    <dbl> <dbl>       <dbl> <chr>     
##  1 2011-01-07 00:00:00        1          1        1  6070        6070 Jekyll Ca…
##  2 2011-01-07 00:00:00        1          2        1  5970        5970 Trigger C…
##  3 2011-01-10 00:00:00        2          1        1  2770        2770 Beast of …
##  4 2011-01-10 00:00:00        2          2        1  5970        5970 Trigger C…
##  5 2011-01-10 00:00:00        3          1        1 10660       10660 Supersix …
##  6 2011-01-10 00:00:00        3          2        1  3200        3200 Jekyll Ca…
##  7 2011-01-10 00:00:00        3          3        1 12790       12790 Supersix …
##  8 2011-01-10 00:00:00        3          4        1  5330        5330 Supersix …
##  9 2011-01-10 00:00:00        3          5        1  1570        1570 Synapse D…
## 10 2011-01-11 00:00:00        4          1        1  4800        4800 Synapse C…
## # ℹ 15,634 more rows
## # ℹ 6 more variables: `Prime category` <chr>, `Secondary category` <chr>,
## #   `Frame Material` <chr>, bikeshop_name <chr>, city <chr>, state <chr>