library(tidyverse)
library(tidyquant)
library(lubridate)
library(ggplot2)

# Read In Data
bike_orderlines_tbl <- read_rds("C:/Users/open/Documents/R PROJECTS/00_data/bike_sales/data_wrangled/bike_orderlines.rds")
# Business Ask: Showcase Top 'N' Customers in terms of Revenue and Cum Percentage. Goal will be to determine how much purchasing power the top 5 customers have.

# Create Top 'N' Vector, which can be manipulated to update the visualizations.
n <- 10

top_N_customers <- bike_orderlines_tbl %>% 
    select(bikeshop_name, total_price) %>% 
    mutate(bikeshop_name = as.factor(bikeshop_name) %>% fct_lump(n = n, w = total_price)) %>% 
    group_by(bikeshop_name) %>% 
    summarize(revenue = sum(total_price)) %>% 
    ungroup() %>% 
    mutate(bikeshop_name = bikeshop_name %>% fct_reorder(revenue)) %>% 
    mutate(bikeshop_name = bikeshop_name %>% fct_relevel("Other", after = 0)) %>% 
    arrange(desc(bikeshop_name)) %>% 
    mutate(revenue_text = scales::dollar(revenue - 1e-6, suffix = "M")) %>% 
    mutate(cum_pct = cumsum(revenue) / sum(revenue)) %>% 
    mutate(cum_pct_text = scales::percent(cum_pct)) %>% 
    mutate(rank = row_number()) %>% 
    mutate(rank = case_when(rank == max(rank) ~ NA_integer_, TRUE~rank)) %>% 
    mutate(label_text = str_glue("Rank: {rank}\nRev: {revenue_text}\nCumPct: {cum_pct_text}"))
## `summarise()` ungrouping output (override with `.groups` argument)
top_N_customers %>% 
    ggplot(aes(x = revenue, y = bikeshop_name)) +
    geom_segment(aes(xend = 0, yend = bikeshop_name)) +
    geom_point(aes(size = revenue))+
    geom_label(aes(label = label_text), hjust = "inward", size = 2, color = palette_light()[1]) +
    scale_x_continuous(label = scales::dollar_format(scale = 1e-6, suffix = "M"),
                       breaks = seq(0, 30000000, 5000000),
                       limits = c(0, 30000000)) +
        labs(
        title = str_glue("Top {n} Customers"),
        subtitle = str_glue("Start Date: {year(min(bike_orderlines_tbl$order_date))}
                            End Date: {year(max(bike_orderlines_tbl$order_date))}"),
                            x = "Revenue ($M)",
                            y = "Customer",
                            caption = "The top 6 customers comprise 51% of overall purchasing power."
                            ) +
    theme_tq()+
    theme(
        legend.position = "none",
        plot.title = element_text(face = "bold.italic"),
        axis.text.y = element_text(angle = 30)
    )