# Import data files
bikes_tbl <- read_excel("./bikes.xlsx")
bikeshops_tbl <- read_excel("./bikeshops.xlsx")
orderlines_tbl <- read_excel("./orderlines.xlsx")
# Join and wrangle data
bike_orderlines_wrangled_tbl <- orderlines_tbl %>%
left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>%
separate(description,
into = c('category_1', 'category_2', 'frame_material'),
sep = ' - ') %>%
separate(location,
into = c('city', 'state'),
sep = ', ',
remove = TRUE) %>%
mutate(total_price = price * quantity) %>%
select(order.date, product.id, customer.id, order.id, order.line,
quantity, price, total_price,
everything()) %>%
rename(order_date = order.date,
product_id = product.id,
customer_id = customer.id,
order_id = order.id,
order_line = order.line)
Monthly Sales
bike_sales_m <- bike_orderlines_wrangled_tbl %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_month = floor_date(order_date, unit = "month")) %>%
group_by(year_month) %>%
summarise(sales = sum(total_price)) %>%
ungroup()
bike_sales_y <- bike_orderlines_wrangled_tbl %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year = year(order_date)) %>%
group_by(year) %>%
summarise(sales = sum(total_price)) %>%
ungroup()
bike_sales_growth <- bike_sales_y %>%
mutate(sales_lag_1 = lag(sales, n = 1)) %>%
mutate(sales_lag_1 = ifelse(is.na(sales_lag_1), sales, sales_lag_1)) %>%
mutate(diff_1 = sales - sales_lag_1,
pct_diff_1 = diff_1 / sales_lag_1)
knitr::kable(bike_sales_growth %>%
select(year, sales, diff_1, pct_diff_1) %>%
mutate(sales = scales::dollar(sales),
diff_1 = scales::dollar(diff_1),
pct_diff_1 = scales::percent(pct_diff_1)),
col.names = c("Year", "Sales", "Change ($)", "Change (%)"),
align = 'lrrr')
2011 |
$11,292,885 |
$0 |
0.0% |
2012 |
$12,163,075 |
$870,190 |
7.7% |
2013 |
$16,480,775 |
$4,317,700 |
35.5% |
2014 |
$13,924,085 |
-$2,556,690 |
-15.5% |
2015 |
$17,171,510 |
$3,247,425 |
23.3% |
Category Revenue Comparison
revenue_by_category2_tbl %>%
mutate(category_2 = fct_reorder(category_2, revenue)) %>%
ggplot(aes(x = category_2, y = revenue)) +
geom_col(fill = "#2C3E50", alpha = 0.9) +
coord_flip() +
scale_y_continuous(labels = scales::dollar_format()) +
labs(
title = "Revenue by Bike Category",
subtitle = "Total revenue across all bike shops",
x = "category_2",
y = "Revenue",
caption = "Data from 2011-2015"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 18, face = "bold", color = "#2C3E50"),
plot.subtitle = element_text(size = 12, color = "#666666"),
axis.title.x = element_text(size = 11, face = "bold"),
axis.title.y = element_text(size = 11, face = "bold"),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 10),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "#E5E5E5")
)
