library(haven)
library(tidyverse)

file_path <- "C:\\Users\\JH\\OneDrive - Kitces.com\\Desktop\\JH\\Research_Ticket\\MK CFP Designation and Rev per Client\\Marketing_2026.dta"
df <- read_dta(file_path)

# Sum advanced designations 
df <- df %>%
  mutate(
    post_cfp_count = rowSums(select(., 
      desigaif, desigcaia, desigcap, desigcasl, desigcdfa, desigcepa, 
      desigcfa, desigcft, desigchfc, desigcima, desigcka, desigclu, 
      desigcpa, desigcpwa, desigcrpc, desigckp, desigea, desigpfs, 
      desigricp, desigrlp, desigrma, desigtpcp
    ), na.rm = TRUE)
  )

# Generate Post-CFP Designation Groups
df <- df %>%
  mutate(
    post_cfp_tier = case_when(
      post_cfp_count == 0 ~ "0 Designations",
      post_cfp_count >= 1 & post_cfp_count <= 2 ~ "1-2 Designations",
      post_cfp_count >= 3 ~ "3+ Designations"
    ),
    post_cfp_tier = factor(post_cfp_tier, levels = c(
      "0 Designations", "1-2 Designations", "3+ Designations"
    ))
  )

### Summary Table
summary_table <- df %>%
  group_by(`Post-CFP Designation` = post_cfp_tier) %>%
  summarise(
    Mean = mean(rvclient, na.rm = TRUE),
    Median = median(rvclient, na.rm = TRUE),
    Std_Dev = sd(rvclient, na.rm = TRUE),
    Freq = n(),
    .groups = "drop"
  )

print(knitr::kable(summary_table, digits = 0, format = "markdown", caption = "Summary of Practice Revenue Per Client by Post-CFP Designation"))
Summary of Practice Revenue Per Client by Post-CFP Designation
Post-CFP Designation Mean Median Std_Dev Freq
0 Designations 7588 5278 8357 271
1-2 Designations 7222 5533 6289 203
3+ Designations 5865 5231 4437 32
### graph for Median Revenue per Client
ggplot(summary_table, aes(x = `Post-CFP Designation`, y = Median, fill = `Post-CFP Designation`)) +
  geom_col(width = 0.5, color = "black", show.legend = FALSE) +
  geom_text(aes(label = scales::dollar(Median, accuracy = 1)), vjust = -0.5, fontface = "bold", size = 4) +
  theme_minimal(base_size = 12) +
  scale_fill_brewer(palette = "Blues") +
  scale_y_continuous(labels = scales::dollar_format(), expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "Median Practice Revenue Per Client By Post-CFP Designation",
    subtitle = "",
    x = "Post-CFP Designation Tier",
    y = "Median Revenue Per Client"
  ) +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank()
  )

#  space
cat("\n\n### Interpretation of Findings\n")

Interpretation of Findings

cat("The analysis shows that earning additional post-CFP designations does not drive higher practice revenue per client. Moving from zero designations to an intermediate level (1-2) yields a negligible median revenue increase, while advanced advisors with 3 or more designations actually underperform compared to both lower tiers.\n")

The analysis shows that earning additional post-CFP designations does not drive higher practice revenue per client. Moving from zero designations to an intermediate level (1-2) yields a negligible median revenue increase, while advanced advisors with 3 or more designations actually underperform compared to both lower tiers.