# Install and load required packages
install.packages(c("dplyr", "ggplot2"))
## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Sample data
data <- data.frame(
bikeshop.id = 1:30,
bikeshop.name = c(
"Pittsburgh Mountain Machines", "Ithaca Mountain Climbers", "Columbus Race Equipment",
"Detroit Cycles", "Cincinnati Speed", "Louisville Race Equipment",
"Nashville Cruisers", "Denver Bike Shop", "Minneapolis Bike Shop",
"Kansas City 29ers", "New York Cycles", "Dallas Cycles",
"Oklahoma City Race Equipment", "Albuquerque Cycles", "Austin Cruisers",
"Phoenix Bi-peds", "Las Vegas Cycles", "Los Angeles Cycles",
"San Francisco Cruisers", "Portland Bi-peds", "Seattle Race Equipment",
"Ann Arbor Speed", "Philadelphia Bike Shop", "Providence Bi-peds",
"New Orleans Velocipedes", "Miami Race Equipment", "San Antonio Bike Shop",
"Wichita Speed", "Indianapolis Velocipedes", "Tampa 29ers"
),
location = c(
"Pittsburgh, PA", "Ithaca, NY", "Columbus, OH", "Detroit, MI",
"Cincinnati, OH", "Louisville, KY", "Nashville, TN", "Denver, CO",
"Minneapolis, MN", "Kansas City, KS", "New York, NY", "Dallas, TX",
"Oklahoma City, OK", "Albuquerque, NM", "Austin, TX", "Phoenix, AZ",
"Las Vegas, NV", "Los Angeles, CA", "San Francisco, CA", "Portland, OR",
"Seattle, WA", "Ann Arbor, MI", "Philadelphia, PA", "Providence, RI",
"New Orleans, LA", "Miami, FL", "San Antonio, TX", "Wichita, KS",
"Indianapolis, IN", "Tampa, FL"
)
)
# Generate dummy revenues for each bikeshop
data$revenue <- seq(from = 100000, by = 100000, length.out = nrow(data))
# Select the top 10 bikeshops
top_10 <- head(arrange(data, desc(revenue)), 10)
# Create "Other bikeshops" entry to account for the rest
other_revenue <- sum(data$revenue[!(data$bikeshop.name %in% top_10$bikeshop.name)])
other_entry <- data.frame(
bikeshop.name = rep("Other bikeshops", nrow(top_10)),
revenue = rep(other_revenue, nrow(top_10))
)
# Concatenate top 10 and "Other bikeshops" entries
final_data <- rbind(top_10[, c("bikeshop.name", "revenue")], other_entry)
# Plot the results
ggplot(final_data, aes(x = bikeshop.name, y = revenue, fill = bikeshop.name)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Top 10 Bikeshops with Largest Revenues", x = "Bikeshop", y = "Revenue") +
scale_fill_manual(values = c(rep("steelblue", 10), "gray"))
