# 1. Load up libraries ----
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(dplyr)
# 2. Import data ----
bike_orderlines_tbl <- read_excel(path = "./bike_orderlines.xlsx")
# 3. Analyse
bike_orderlines_tbl %>% select(bikeshop_name, category_1, category_2, quantity) %>%
group_by(bikeshop_name, category_1, category_2) %>%
summarise(total_quantity = sum(quantity)) %>%
ungroup() %>%
mutate(bikeshop_name = as_factor(bikeshop_name) %>%
fct_reorder(total_quantity)) %>%
arrange(desc(total_quantity)) %>%
mutate(total_quantity_pct = total_quantity/sum(total_quantity)) %>%
mutate(total_quantity_pct_txt = scales::percent(total_quantity_pct))
## `summarise()` has grouped output by 'bikeshop_name', 'category_1'. You can
## override using the `.groups` argument.
## # A tibble: 270 × 6
## bikeshop_name category_1 category_2 total_quantity total_quantity_pct
## <fct> <chr> <chr> <dbl> <dbl>
## 1 Kansas City 29ers Mountain Cross Cou… 896 0.0444
## 2 Kansas City 29ers Mountain Trail 620 0.0307
## 3 Kansas City 29ers Mountain Sport 558 0.0277
## 4 Denver Bike Shop Mountain Cross Cou… 549 0.0272
## 5 Kansas City 29ers Road Elite Road 437 0.0217
## 6 Denver Bike Shop Mountain Trail 411 0.0204
## 7 Denver Bike Shop Mountain Sport 388 0.0192
## 8 Oklahoma City Race E… Road Elite Road 382 0.0189
## 9 Ithaca Mountain Clim… Mountain Cross Cou… 379 0.0188
## 10 Kansas City 29ers Road Endurance… 328 0.0163
## # ℹ 260 more rows
## # ℹ 1 more variable: total_quantity_pct_txt <chr>
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.