install.packages(c("readxl", "dplyr", "ggplot2"))
## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
bikeorderline <- read_excel("bike_orderlines.xlsx")
revenue_column_name <- "price"
bikeorderline[[revenue_column_name]] <- as.numeric(bikeorderline[[revenue_column_name]])
top_bikeshops <- bikeorderline %>%
group_by(bikeshop_name) %>%
summarise(revenue = sum(.data[[revenue_column_name]])) %>%
arrange(desc(revenue)) %>%
head(10)
other_bikeshops <- bikeorderline %>%
group_by(bikeshop_name) %>%
summarise(revenue = sum(.data[[revenue_column_name]])) %>%
anti_join(top_bikeshops, by = "bikeshop_name") %>%
summarise(bikeshop_name = "Other Bikeshops", revenue = sum(revenue))
# Combine Top 10 and "Other Bikeshops"
final_data <- rbind(top_bikeshops, other_bikeshops)
# Plot the Results
ggplot(final_data, aes(x = reorder(bikeshop_name, -revenue), y = revenue)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(title = "Top 10 Bikeshops by Revenue",
x = "Bikeshop Name",
y = "Revenue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
