library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.4 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
bike_orderline_tbl <- read_excel("C:/Users/Admin/OneDrive - 亞洲大學[Asia University]/Financial Database Mana & Application/bike_orderlines.xlsx")
head(bike_orderline_tbl)
## # A tibble: 6 × 13
## order_date order_id order_line quantity price total_price model
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011-01-07 00:00:00 1 1 1 6070 6070 Jekyll Car…
## 2 2011-01-07 00:00:00 1 2 1 5970 5970 Trigger Ca…
## 3 2011-01-10 00:00:00 2 1 1 2770 2770 Beast of t…
## 4 2011-01-10 00:00:00 2 2 1 5970 5970 Trigger Ca…
## 5 2011-01-10 00:00:00 3 1 1 10660 10660 Supersix E…
## 6 2011-01-10 00:00:00 3 2 1 3200 3200 Jekyll Car…
## # ℹ 6 more variables: category_1 <chr>, category_2 <chr>, frame_material <chr>,
## # bikeshop_name <chr>, city <chr>, state <chr>
categories_sales <- bike_orderline_tbl %>%
select(category_1, category_2, total_price) %>%
group_by(category_1, category_2) %>%
summarise(Sales = sum(total_price)) %>%
arrange(desc(Sales)) %>%
ungroup()
## `summarise()` has grouped output by 'category_1'. You can override using the
## `.groups` argument.
categories_sales
## # A tibble: 9 × 3
## category_1 category_2 Sales
## <chr> <chr> <dbl>
## 1 Mountain Cross Country Race 19224630
## 2 Road Elite Road 15334665
## 3 Road Endurance Road 10381060
## 4 Mountain Trail 9373460
## 5 Mountain Over Mountain 7571270
## 6 Road Triathalon 4053750
## 7 Road Cyclocross 2108120
## 8 Mountain Sport 1932755
## 9 Mountain Fat Bike 1052620
The Mountain Cross Country Race categories have the most sales.
categories_sales_formatted <- categories_sales %>%
mutate(Sales = dollar(Sales))
categories_sales_formatted
## # A tibble: 9 × 3
## category_1 category_2 Sales
## <chr> <chr> <chr>
## 1 Mountain Cross Country Race $19,224,630
## 2 Road Elite Road $15,334,665
## 3 Road Endurance Road $10,381,060
## 4 Mountain Trail $9,373,460
## 5 Mountain Over Mountain $7,571,270
## 6 Road Triathalon $4,053,750
## 7 Road Cyclocross $2,108,120
## 8 Mountain Sport $1,932,755
## 9 Mountain Fat Bike $1,052,620
bike_orderline_tbl_rename <- bike_orderline_tbl %>%
rename(`Prime category` = category_1) %>%
rename(`Secondary category` = category_2) %>%
rename(`Frame Material` = frame_material)
bike_orderline_tbl_rename
## # A tibble: 15,644 × 13
## order_date order_id order_line quantity price total_price model
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011-01-07 00:00:00 1 1 1 6070 6070 Jekyll Ca…
## 2 2011-01-07 00:00:00 1 2 1 5970 5970 Trigger C…
## 3 2011-01-10 00:00:00 2 1 1 2770 2770 Beast of …
## 4 2011-01-10 00:00:00 2 2 1 5970 5970 Trigger C…
## 5 2011-01-10 00:00:00 3 1 1 10660 10660 Supersix …
## 6 2011-01-10 00:00:00 3 2 1 3200 3200 Jekyll Ca…
## 7 2011-01-10 00:00:00 3 3 1 12790 12790 Supersix …
## 8 2011-01-10 00:00:00 3 4 1 5330 5330 Supersix …
## 9 2011-01-10 00:00:00 3 5 1 1570 1570 Synapse D…
## 10 2011-01-11 00:00:00 4 1 1 4800 4800 Synapse C…
## # ℹ 15,634 more rows
## # ℹ 6 more variables: `Prime category` <chr>, `Secondary category` <chr>,
## # `Frame Material` <chr>, bikeshop_name <chr>, city <chr>, state <chr>