======================================================

1. Install & Load Packages

======================================================

packages <- c(“readxl”, “tidyverse”, “lubridate”, “scales”)

installed <- packages %in% installed.packages() if (any(!installed)) install.packages(packages[!installed])

lapply(packages, library, character.only = TRUE)

======================================================

2. Data Import

======================================================

bikes_tbl <- read_excel(“./bikes.xlsx”) bikeshops_tbl <- read_excel(“./bikeshops.xlsx”) orderlines_tbl <- read_excel(“./orderlines.xlsx”)

Preview

head(bikes_tbl) head(bikeshops_tbl) head(orderlines_tbl)

======================================================

3. Data Joining

======================================================

Check column names if needed

names(bikes_tbl) names(bikeshops_tbl) names(orderlines_tbl)

bike_sales_joined <- orderlines_tbl %>% left_join(bikes_tbl, by = c(“product.id” = “bike.id”)) %>% left_join(bikeshops_tbl, by = c(“customer.id” = “bikeshop.id”))

======================================================

4. Data Wrangling

======================================================

bike_sales_clean <- bike_sales_joined %>% separate(description, into = c(“category_1”, “category_2”, “frame_material”), sep = ” - “, remove = FALSE) %>% separate(location, into = c(”city”, “state”), sep = “,”, remove = FALSE) %>% mutate(total_price = price * quantity) %>% rename(order_date = any_of(c(“order.date”, “order_date”))) %>% mutate(order_date = ymd(order_date)) %>% select(order_date, quantity, price, total_price, category_1, category_2, frame_material, city, state, everything())

Check cleaned data

glimpse(bike_sales_clean)

======================================================

5. Example Summaries

======================================================

Total revenue

total_revenue <- sum(bike_sales_clean$total_price, na.rm = TRUE) print(paste(“Total Revenue:”, scales::dollar(total_revenue)))

Top categories by revenue

bike_sales_clean %>% group_by(category_1) %>% summarise(revenue = sum(total_price, na.rm = TRUE)) %>% arrange(desc(revenue)) %>% print(n = 10)

======================================================

6. Time Series: Sales by Year

======================================================

sales_by_year <- bike_sales_clean %>% mutate(year = year(order_date)) %>% group_by(year) %>% summarise(sales = sum(total_price, na.rm = TRUE))

======================================================

7. Plot: Total Sales by Year

======================================================

ggplot(sales_by_year, aes(x = year, y = sales, color = sales)) + geom_point(size = 5) + geom_line(linewidth = 2) + geom_smooth(method = “lm”, se = FALSE) + scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, suffix = “M”)) + scale_color_continuous(low = “red”, high = “black”) + labs( title = “Revenue Over Time”, subtitle = “Yearly Total Sales”, x = “Year”, y = “Sales (Millions)”, color = “Revenue” ) + theme_minimal()

======================================================

8. Plot: Revenue by Category

======================================================

revenue_by_category <- bike_sales_clean %>% group_by(category_2) %>% summarise(revenue = sum(total_price, na.rm = TRUE)) %>% ungroup()

ggplot(revenue_by_category, aes(x = fct_reorder(category_2, revenue), y = revenue)) + geom_col(fill = “blue”) + coord_flip() + scale_y_continuous(labels = scales::dollar_format()) + labs( title = “Total Sales by Category 2”, x = “Category”, y = “Revenue” ) + theme_minimal()