library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.2 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
bike_data <- readRDS("bike_orderlines (1).rds")
category_1_unique <- bike_data %>%
group_by(category_1) %>%
summarise() %>%
ungroup()
category_2_unique <- bike_data %>%
group_by(category_2) %>%
summarise() %>%
ungroup()
frame_material_unique <- bike_data %>%
group_by(frame_material) %>%
summarise() %>%
ungroup()
cat("Unique values in category_1:\n")
## Unique values in category_1:
print(category_1_unique)
## # A tibble: 2 × 1
## category_1
## <chr>
## 1 Mountain
## 2 Road
cat("\nUnique values in category_2:\n")
##
## Unique values in category_2:
print(category_2_unique)
## # A tibble: 9 × 1
## category_2
## <chr>
## 1 Cross Country Race
## 2 Cyclocross
## 3 Elite Road
## 4 Endurance Road
## 5 Fat Bike
## 6 Over Mountain
## 7 Sport
## 8 Trail
## 9 Triathalon
cat("\nUnique values in frame_material:\n")
##
## Unique values in frame_material:
print(frame_material_unique)
## # A tibble: 2 × 1
## frame_material
## <chr>
## 1 Aluminum
## 2 Carbon
primary_sales <- bike_data %>%
group_by(category_1) %>%
summarise(Sales = sum(total_price, na.rm = TRUE), .groups = "drop") %>%
rename("Primary Category" = category_1) %>%
arrange(desc(Sales))
cat("\nSales by Primary Category:\n")
##
## Sales by Primary Category:
print(primary_sales)
## # A tibble: 2 × 2
## `Primary Category` Sales
## <chr> <dbl>
## 1 Mountain 39154735
## 2 Road 31877595
secondary_sales <- bike_data %>%
group_by(category_2) %>%
summarise(Sales = sum(total_price, na.rm = TRUE), .groups = "drop") %>%
rename("Secondary Category" = category_2) %>%
arrange(desc(Sales))
cat("\nSales by Secondary Category:\n")
##
## Sales by Secondary Category:
print(secondary_sales)
## # A tibble: 9 × 2
## `Secondary Category` Sales
## <chr> <dbl>
## 1 Cross Country Race 19224630
## 2 Elite Road 15334665
## 3 Endurance Road 10381060
## 4 Trail 9373460
## 5 Over Mountain 7571270
## 6 Triathalon 4053750
## 7 Cyclocross 2108120
## 8 Sport 1932755
## 9 Fat Bike 1052620
frame_material_sales <- bike_data %>%
group_by(frame_material) %>%
summarise(Sales = sum(total_price, na.rm = TRUE), .groups = "drop") %>%
rename("Frame Material" = frame_material) %>%
arrange(desc(Sales))
cat("\nSales by Frame Material:\n")
##
## Sales by Frame Material:
print(frame_material_sales)
## # A tibble: 2 × 2
## `Frame Material` Sales
## <chr> <dbl>
## 1 Carbon 52940540
## 2 Aluminum 18091790
category_frame_check <- bike_data %>%
group_by(category_1, category_2, frame_material) %>%
summarise(Sales = sum(total_price, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = frame_material, values_from = Sales, values_fill = 0)
category_frame_check <- category_frame_check %>%
rename("Primary Category" = category_1, "Secondary Category" = category_2)
cat("\nSales by Primary and Secondary Category with Frame Materials:\n")
##
## Sales by Primary and Secondary Category with Frame Materials:
print(category_frame_check)
## # A tibble: 9 × 4
## `Primary Category` `Secondary Category` Aluminum Carbon
## <chr> <chr> <dbl> <dbl>
## 1 Mountain Cross Country Race 3318560 15906070
## 2 Mountain Fat Bike 1052620 0
## 3 Mountain Over Mountain 0 7571270
## 4 Mountain Sport 1932755 0
## 5 Mountain Trail 4537610 4835850
## 6 Road Cyclocross 0 2108120
## 7 Road Elite Road 5637795 9696870
## 8 Road Endurance Road 1612450 8768610
## 9 Road Triathalon 0 4053750