packages <- c(“readxl”, “tidyverse”, “lubridate”, “scales”)
installed <- packages %in% installed.packages() if (any(!installed)) install.packages(packages[!installed])
lapply(packages, library, character.only = TRUE)
bikes_tbl <- read_excel(“./bikes.xlsx”) bikeshops_tbl <- read_excel(“./bikeshops.xlsx”) orderlines_tbl <- read_excel(“./orderlines.xlsx”)
head(bikes_tbl) head(bikeshops_tbl) head(orderlines_tbl)
names(bikes_tbl) names(bikeshops_tbl) names(orderlines_tbl)
bike_sales_joined <- orderlines_tbl %>% left_join(bikes_tbl, by = c(“product.id” = “bike.id”)) %>% left_join(bikeshops_tbl, by = c(“customer.id” = “bikeshop.id”))
bike_sales_clean <- bike_sales_joined %>% separate(description, into = c(“category_1”, “category_2”, “frame_material”), sep = ” - “, remove = FALSE) %>% separate(location, into = c(”city”, “state”), sep = “,”, remove = FALSE) %>% mutate(total_price = price * quantity) %>% rename(order_date = any_of(c(“order.date”, “order_date”))) %>% mutate(order_date = ymd(order_date)) %>% select(order_date, quantity, price, total_price, category_1, category_2, frame_material, city, state, everything())
glimpse(bike_sales_clean)
total_revenue <- sum(bike_sales_clean$total_price, na.rm = TRUE) print(paste(“Total Revenue:”, scales::dollar(total_revenue)))
bike_sales_clean %>% group_by(category_1) %>% summarise(revenue = sum(total_price, na.rm = TRUE)) %>% arrange(desc(revenue)) %>% print(n = 10)
sales_by_year <- bike_sales_clean %>% mutate(year = year(order_date)) %>% group_by(year) %>% summarise(sales = sum(total_price, na.rm = TRUE))
ggplot(sales_by_year, aes(x = year, y = sales, color = sales)) + geom_point(size = 5) + geom_line(linewidth = 2) + geom_smooth(method = “lm”, se = FALSE) + scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, suffix = “M”)) + scale_color_continuous(low = “red”, high = “black”) + labs( title = “Revenue Over Time”, subtitle = “Yearly Total Sales”, x = “Year”, y = “Sales (Millions)”, color = “Revenue” ) + theme_minimal()
revenue_by_category <- bike_sales_clean %>% group_by(category_2) %>% summarise(revenue = sum(total_price, na.rm = TRUE)) %>% ungroup()
ggplot(revenue_by_category, aes(x = fct_reorder(category_2, revenue), y = revenue)) + geom_col(fill = “blue”) + coord_flip() + scale_y_continuous(labels = scales::dollar_format()) + labs( title = “Total Sales by Category 2”, x = “Category”, y = “Revenue” ) + theme_minimal()