# Import data files
bikes_tbl <- read_excel("bikes.xlsx")
bikeshops_tbl <- read_excel("bikeshops.xlsx")
orderlines_tbl <- read_excel("orderlines.xlsx")
# Join and wrangle data
bike_orderlines_wrangled_tbl <- left_join(orderlines_tbl, bikes_tbl, by = c("product.id" = "bike.id")) %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>%
# Wrangling data: decompose description into three columns
separate(description,
into = c('category.1', 'category.2', 'frame.material'),
sep = ' - ') %>%
separate(location,
into = c('city', 'state'),
sep = ', ',
remove = FALSE) %>%
# Create calculated columns
mutate(total.price = price * quantity) %>%
# Reorganize columns
select(-...1, -location) %>%
# Reorder columns
select(contains('date'), contains('id'),
contains('order'),
quantity, price, total.price,
everything()) %>%
# Rename columns
rename(order_date = order.date) %>%
set_names(names(.) %>% str_replace_all("\\.", "_"))bike_sales_category_m <- bike_orderlines_wrangled_tbl %>%
select(order_date, category_2, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_month = floor_date(order_date, unit = "month")) %>%
group_by(year_month, category_2) %>%
summarise(sales = sum(total_price), .groups = 'drop')bike_sales_category_m %>%
ggplot(aes(x = year_month, y = sales, color = category_2)) +
geom_point(size = 2) +
geom_line(size = 0.8) +
# Create facets by category_2
facet_wrap(~ category_2, scales = "free_y", ncol = 3) +
# Format y-axis as millions
scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) +
# Format x-axis to show years
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
# Apply tidyquant color palette
scale_color_tq() +
theme_tq() +
# Labels
labs(
x = "order_date",
y = "sales"
) +
# Theme customization
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 0, hjust = 0.5, size = 9),
axis.text.y = element_text(size = 9),
strip.background = element_rect(fill = "#2c3e50", color = "#2c3e50"),
strip.text = element_text(color = "white", face = "bold", size = 11),
panel.spacing = unit(1, "lines")
)bike_sales_y <- bike_orderlines_wrangled_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year = year(order_date)) %>%
group_by(year) %>%
summarise(sales = sum(total_price)) %>%
ungroup()
# Calculate percentage change
calculate_pct_diff <- function(data){
data %>%
mutate(sales_lag_1 = lag(sales, n = 1)) %>%
mutate(sales_lag_1 = case_when(
is.na(sales_lag_1) ~ sales,
TRUE ~ sales_lag_1
)) %>%
mutate(diff_1 = sales - sales_lag_1) %>%
mutate(pct_diff_1 = diff_1 / sales_lag_1) %>%
mutate(pct_diff_1_chr = scales::percent(pct_diff_1))
}
bike_sales_growth <- calculate_pct_diff(bike_sales_y)
knitr::kable(bike_sales_growth %>%
select(year, sales, diff_1, pct_diff_1_chr) %>%
mutate(sales = scales::dollar(sales),
diff_1 = scales::dollar(diff_1)),
col.names = c("Year", "Sales", "Change ($)", "Change (%)"),
align = 'lrrr',
caption = "Year-over-Year Sales Growth")| Year | Sales | Change ($) | Change (%) |
|---|---|---|---|
| 2011 | $11,292,885 | $0 | 0.0% |
| 2012 | $12,163,075 | $870,190 | 7.7% |
| 2013 | $16,480,775 | $4,317,700 | 35.5% |
| 2014 | $13,924,085 | -$2,556,690 | -15.5% |
| 2015 | $17,171,510 | $3,247,425 | 23.3% |
revenue_by_category2_tbl %>%
mutate(category_2 = category_2 %>% as_factor() %>% fct_reorder(revenue)) %>%
ggplot(aes(category_2, revenue)) +
geom_col(fill = "#2C3E50", alpha = 0.9) +
coord_flip() +
scale_y_continuous(labels = scales::dollar_format()) +
labs(
title = "Revenue by Bike Category",
subtitle = "Total revenue across all periods",
x = "Category",
y = "Revenue"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold"),
axis.text = element_text(size = 10)
)bike_sales_y %>%
ggplot(aes(x = year, y = sales, color = sales)) +
geom_point(size = 5) +
geom_line(linewidth = 2) +
geom_smooth(method = "lm", formula = 'y ~ x', se = FALSE) +
expand_limits(y = c(0, 20e6)) +
scale_colour_continuous(low = "red", high = "black",
labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) +
scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) +
labs(
title = "Revenue Trend Over Time",
subtitle = "Sales are trending up and to the right!",
x = "Year",
y = "Sales (Millions)",
color = "Rev ($M)",
caption = "Total sales from 2011 to 2015"
) +
theme_tq()This analysis reveals distinct sales patterns across bike categories from 2011-2015: