# Import data files
bikes_tbl <- read_excel("bikes.xlsx")
bikeshops_tbl <- read_excel("bikeshops.xlsx")
orderlines_tbl <- read_excel("orderlines.xlsx")
# Join and wrangle data
bike_orderlines_wrangled_tbl <- left_join(orderlines_tbl, bikes_tbl, by = c("product.id" = "bike.id")) %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>%
# Wrangling data: decompose description into three columns
separate(description,
into = c('category.1', 'category.2', 'frame.material'),
sep = ' - ') %>%
separate(location,
into = c('city', 'state'),
sep = ', ',
remove = FALSE) %>%
# Create calculated columns
mutate(total.price = price * quantity) %>%
# Reorganize columns
select(-...1, -location) %>%
# Reorder columns
select(contains('date'), contains('id'),
contains('order'),
quantity, price, total.price,
everything()) %>%
# Rename columns
rename(order_date = order.date) %>%
set_names(names(.) %>% str_replace_all("\\.", "_"))bike_sales_q <- bike_orderlines_wrangled_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_quarter = floor_date(order_date, unit = "quarter")) %>%
group_by(year_quarter) %>%
summarise(sales = sum(total_price), .groups = 'drop')
bike_sales_q %>%
ggplot(aes(x = year_quarter, y = sales)) +
geom_point(color = "#2C3E50", size = 2) +
geom_line(color = "#2C3E50", linewidth = 1) +
scale_y_continuous(labels = scales::dollar_format()) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
labs(title = "Total Sales", x = NULL, y = "Revenue (USD)") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 12),
panel.grid.minor = element_blank())bike_sales_m <- bike_orderlines_wrangled_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_month = floor_date(order_date, unit = "month")) %>%
group_by(year_month) %>%
summarise(sales = sum(total_price), .groups = 'drop')
bike_sales_m %>%
ggplot(aes(x = year_month, y = sales)) +
geom_point(color = "black", size = 1.5, alpha = 0.6) +
geom_smooth(method = "loess", se = TRUE, span = 0.2, color = "#2C3E50", fill = "grey70", linewidth = 1) +
scale_y_continuous(labels = scales::dollar_format()) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
labs(title = "Total Sales", x = NULL, y = "Revenue (USD)") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 12),
panel.grid.minor = element_blank())bike_sales_w <- bike_orderlines_wrangled_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_week = floor_date(order_date, unit = "week")) %>%
group_by(year_week) %>%
summarise(sales = sum(total_price), .groups = 'drop')
bike_sales_w %>%
ggplot(aes(x = year_week, y = sales)) +
geom_point(color = "black", size = 1, alpha = 0.5) +
geom_smooth(method = "loess", se = TRUE, span = 0.1, color = "#2C3E50", fill = "grey70", linewidth = 1) +
scale_y_continuous(labels = scales::dollar_format()) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
labs(title = "Total Sales", x = NULL, y = "Revenue (USD)") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 12),
panel.grid.minor = element_blank())road_categories <- c("Elite Road", "Endurance Road", "Triathalon", "Cyclocross")
bike_sales_road_q <- bike_orderlines_wrangled_tbl %>%
filter(category_2 %in% road_categories) %>%
select(order_date, category_2, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_quarter = floor_date(order_date, unit = "quarter")) %>%
group_by(year_quarter, category_2) %>%
summarise(sales = sum(total_price), .groups = 'drop')
bike_sales_road_q %>%
ggplot(aes(x = year_quarter, y = sales)) +
geom_point(size = 1.5) +
geom_line(linewidth = 0.8) +
facet_wrap(~ category_2, scales = "free_y", ncol = 1) +
scale_y_continuous(labels = scales::dollar_format()) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
labs(title = "Sales By Category 2", x = NULL, y = NULL) +
theme_minimal() +
theme(strip.background = element_rect(fill = "#2c3e50", color = "#2c3e50"),
strip.text = element_text(color = "white", face = "bold", size = 10),
panel.spacing = unit(0.8, "lines"),
axis.text.x = element_text(size = 8),
panel.grid.minor = element_blank())bike_sales_road_m <- bike_orderlines_wrangled_tbl %>%
filter(category_2 %in% road_categories) %>%
select(order_date, category_2, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_month = floor_date(order_date, unit = "month")) %>%
group_by(year_month, category_2) %>%
summarise(sales = sum(total_price), .groups = 'drop')
bike_sales_road_m %>%
ggplot(aes(x = year_month, y = sales)) +
geom_point(size = 1, alpha = 0.6) +
geom_smooth(method = "loess", se = TRUE, span = 0.2, fill = "grey70", linewidth = 0.8) +
facet_wrap(~ category_2, scales = "free_y", ncol = 1) +
scale_y_continuous(labels = scales::dollar_format()) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
labs(title = "Sales By Category 2", x = NULL, y = NULL) +
theme_minimal() +
theme(strip.background = element_rect(fill = "#2c3e50", color = "#2c3e50"),
strip.text = element_text(color = "white", face = "bold", size = 10),
panel.spacing = unit(0.8, "lines"),
axis.text.x = element_text(size = 8),
panel.grid.minor = element_blank())bike_sales_road_w <- bike_orderlines_wrangled_tbl %>%
filter(category_2 %in% road_categories) %>%
select(order_date, category_2, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_week = floor_date(order_date, unit = "week")) %>%
group_by(year_week, category_2) %>%
summarise(sales = sum(total_price), .groups = 'drop')
bike_sales_road_w %>%
ggplot(aes(x = year_week, y = sales)) +
geom_point(size = 0.8, alpha = 0.5) +
geom_smooth(method = "loess", se = TRUE, span = 0.1, fill = "grey70", linewidth = 0.8) +
facet_wrap(~ category_2, scales = "free_y", ncol = 1) +
scale_y_continuous(labels = scales::dollar_format()) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
labs(title = "Sales By Category 2", x = NULL, y = NULL) +
theme_minimal() +
theme(strip.background = element_rect(fill = "#2c3e50", color = "#2c3e50"),
strip.text = element_text(color = "white", face = "bold", size = 10),
panel.spacing = unit(0.8, "lines"),
axis.text.x = element_text(size = 8),
panel.grid.minor = element_blank())mountain_categories <- c("Cross Country Race", "Trail", "Over Mountain", "Sport", "Fat Bike")
bike_sales_mountain_q <- bike_orderlines_wrangled_tbl %>%
filter(category_2 %in% mountain_categories) %>%
select(order_date, category_2, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_quarter = floor_date(order_date, unit = "quarter")) %>%
group_by(year_quarter, category_2) %>%
summarise(sales = sum(total_price), .groups = 'drop')
bike_sales_mountain_q %>%
ggplot(aes(x = year_quarter, y = sales)) +
geom_point(size = 1.5) +
geom_line(linewidth = 0.8) +
facet_wrap(~ category_2, scales = "free_y", ncol = 1) +
scale_y_continuous(labels = scales::dollar_format()) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
labs(title = "Sales By Category 2", x = NULL, y = NULL) +
theme_minimal() +
theme(strip.background = element_rect(fill = "#2c3e50", color = "#2c3e50"),
strip.text = element_text(color = "white", face = "bold", size = 10),
panel.spacing = unit(0.8, "lines"),
axis.text.x = element_text(size = 8),
panel.grid.minor = element_blank())bike_sales_mountain_m <- bike_orderlines_wrangled_tbl %>%
filter(category_2 %in% mountain_categories) %>%
select(order_date, category_2, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_month = floor_date(order_date, unit = "month")) %>%
group_by(year_month, category_2) %>%
summarise(sales = sum(total_price), .groups = 'drop')
bike_sales_mountain_m %>%
ggplot(aes(x = year_month, y = sales)) +
geom_point(size = 1, alpha = 0.6) +
geom_smooth(method = "loess", se = TRUE, span = 0.2, fill = "grey70", linewidth = 0.8) +
facet_wrap(~ category_2, scales = "free_y", ncol = 1) +
scale_y_continuous(labels = scales::dollar_format()) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
labs(title = "Sales By Category 2", x = NULL, y = NULL) +
theme_minimal() +
theme(strip.background = element_rect(fill = "#2c3e50", color = "#2c3e50"),
strip.text = element_text(color = "white", face = "bold", size = 10),
panel.spacing = unit(0.8, "lines"),
axis.text.x = element_text(size = 8),
panel.grid.minor = element_blank())bike_sales_mountain_w <- bike_orderlines_wrangled_tbl %>%
filter(category_2 %in% mountain_categories) %>%
select(order_date, category_2, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_week = floor_date(order_date, unit = "week")) %>%
group_by(year_week, category_2) %>%
summarise(sales = sum(total_price), .groups = 'drop')
bike_sales_mountain_w %>%
ggplot(aes(x = year_week, y = sales)) +
geom_point(size = 0.8, alpha = 0.5) +
geom_smooth(method = "loess", se = TRUE, span = 0.1, fill = "grey70", linewidth = 0.8) +
facet_wrap(~ category_2, scales = "free_y", ncol = 1) +
scale_y_continuous(labels = scales::dollar_format()) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
labs(title = "Sales By Category 2", x = NULL, y = NULL) +
theme_minimal() +
theme(strip.background = element_rect(fill = "#2c3e50", color = "#2c3e50"),
strip.text = element_text(color = "white", face = "bold", size = 10),
panel.spacing = unit(0.8, "lines"),
axis.text.x = element_text(size = 8),
panel.grid.minor = element_blank())