suppressPackageStartupMessages(library(dplyr))
library(haven)
library(dplyr)
library(ggplot2)

df <- read_dta("C:\\Users\\JH\\Kitces.com\\DV - Research\\Anonymized Data and Codebooks - All Projects\\Wellbeing Studies\\2025\\Wellbeing 2025.dta")


df_long <- bind_rows(
  df %>% transmute(role_type = "Industry",      years_exp = as.numeric(rspfsexp), cantril_prsnt),
  df %>% transmute(role_type = "Client-facing",  years_exp = as.numeric(rspclexp), cantril_prsnt)
)

n_20plus <- df_long %>% 
  filter(years_exp >= 20) %>% 
  count(role_type, name = "n") %>% 
  pull(n) %>% min()

use_split <- n_20plus >= 100

df_plot <- df_long %>%
  filter(!is.na(years_exp), !is.na(cantril_prsnt)) %>%
  mutate(
    role_type = factor(role_type, levels = c("Industry", "Client-facing")),
    exp_bin = case_when(
      years_exp < 5                ~ "<5 years",
      years_exp <= 9               ~ "5-9",
      years_exp <= 19              ~ "10-19",
      use_split & years_exp <= 29  ~ "20-29 years",
      use_split                    ~ "30+ years",
      TRUE                         ~ "20+ years"
    )
  ) %>%
  group_by(exp_bin, role_type) %>%
  summarise(avg_cantril = round(mean(cantril_prsnt), 1), .groups = "drop") %>%
  mutate(exp_bin = factor(exp_bin, levels = c("<5 years","5-9","10-19","20-29 years","30+ years","20+ years")))

ggplot(df_plot, aes(x = exp_bin, y = avg_cantril, fill = role_type)) +
  geom_col(position = position_dodge(0.85), width = 0.8) +
  geom_text(aes(label = avg_cantril), position = position_dodge(0.85), vjust = -0.4, size = 4.5, fontface = "bold") +
  scale_fill_manual(values = c("Industry" = "#A6CEE3", "Client-facing" = "#1F78B4"), name = NULL) +
  scale_y_continuous(limits = c(0, 8.2), breaks = 0:8, expand = expansion(mult = c(0, 0.05))) +
  labs(title = "Wellbeing By Years of Experience", x = NULL, y = "Average Cantril Rating") +
  theme_minimal(base_size = 15) +
  theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 16),
        legend.position = "bottom",
        panel.grid.major.x = element_blank())

n_20plus <- df %>% 
  mutate(years_exp = as.numeric(rspfsexp),
         career_type = case_when(
           haven::as_factor(carswitchers) == "Career starters" ~ "Start of Career",
           haven::as_factor(carswitchers) == "Early career switchers" ~ "Early Career Switch",
           haven::as_factor(carswitchers) == "Second career switchers" ~ "Late Career Switch",
           TRUE ~ NA_character_
         )) %>% 
  filter(years_exp >= 20) %>% 
  count(career_type, name = "n") %>% 
  pull(n) %>% min(na.rm = TRUE)

use_split <- n_20plus >= 100

df_plot <- df %>%
  filter(!is.na(rspfsexp), !is.na(cantril_prsnt), !is.na(carswitchers)) %>%
  mutate(
    career_type = case_when(
      haven::as_factor(carswitchers) == "Career starters" ~ "Start of Career",
      haven::as_factor(carswitchers) == "Early career switchers" ~ "Early Career Switch",
      haven::as_factor(carswitchers) == "Second career switchers" ~ "Late Career Switch",
      TRUE ~ NA_character_
    ) %>% factor(levels = c("Start of Career", "Early Career Switch", "Late Career Switch")),
    years_exp = as.numeric(rspfsexp),
    exp_bin = case_when(
      years_exp < 5                ~ "<5",
      years_exp <= 9               ~ "5-9",
      years_exp <= 19              ~ "10-19",
      use_split & years_exp <= 29  ~ "20-29",
      use_split                    ~ "30+",
      TRUE                         ~ "20+"
    ) %>% factor(levels = c("<5", "5-9", "10-19", "20-29", "30+", "20+"))
  ) %>%
  group_by(exp_bin, career_type) %>%
  summarise(avg_cantril = round(mean(cantril_prsnt), 1), .groups = "drop")

ggplot(df_plot, aes(x = exp_bin, y = avg_cantril, fill = career_type)) +
  geom_col(position = position_dodge(0.85), width = 0.8) +
  geom_text(aes(label = avg_cantril), position = position_dodge(0.85), vjust = -0.4, size = 4.5, fontface = "bold") +
  scale_fill_manual(values = c("Start of Career" = "#A6CEE3", 
                               "Early Career Switch" = "#1F78B4", 
                               "Late Career Switch" = "#08306B"), name = NULL) +
  scale_y_continuous(limits = c(0, 8.2), breaks = 0:8, expand = expansion(mult = c(0, 0.05))) +
  labs(title = "Wellbeing By When Financial Services Work Began And Industry Exp.",
       x = "Years of Financial Services Experience",
       y = "Average Cantril Rating") +
  theme_minimal(base_size = 15) +
  theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 16),
        legend.position = "bottom",
        panel.grid.major.x = element_blank())