suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(kableExtra))
library(haven)
library(tidyverse)
library(kableExtra)

# Data and categories
df <- read_dta("C:/Users/JH/Kitces.com/DV - Research/Anonymized Data and Codebooks - All Projects/Wellbeing Studies/2025/Wellbeing 2025.dta")
  
#
p25 <- quantile(df$rspinc, 0.25, na.rm = TRUE)
p75 <- quantile(df$rspinc, 0.75, na.rm = TRUE)

df_clean <- df %>%
  mutate(
    low_inc = ifelse(!is.na(rspinc) & rspinc <= p25, 1, 0),
    high_inc = ifelse(!is.na(rspinc) & rspinc >= p75, 1, 0),
    support_share = (replace_na(ftepara, 0) + replace_na(ftecsa, 0)) / fteptot,
    clients_per_staff = cltot / fteptot
  )

# Summary 
get_summary_stats <- function(data, group_label) {
  data %>%
    summarise(
      `Median annual income` = median(rspinc, na.rm = TRUE),
      `Wellbeing 5 years prior (Cantril Ave.)` = mean(cantril_past, na.rm = TRUE),
      `Median hours worked per week` = median(rsphrsadj, na.rm = TRUE),
      `Share of time on admin or compliance tasks (%)` = mean(admin_burden, na.rm = TRUE) * 100,
      `Median Service Team (No. of FTEs)` = median(fteptot, na.rm = TRUE),
      `Median Clients per Staff` = median(clients_per_staff, na.rm = TRUE),
      `Years of client-facing experience` = mean(rspclexp, na.rm = TRUE),
      `Share of FTEs consisting of support staff (%)` = mean(support_share, na.rm = TRUE) * 100
    ) %>%
    pivot_longer(everything(), names_to = "Variable", values_to = group_label)
}

# Groups
low_stats <- df_clean %>% filter(low_inc == 1 & grpcantril == 1) %>% get_summary_stats("Bottom 25% Income")
high_stats <- df_clean %>% filter(high_inc == 1 & grpcantril == 1) %>% get_summary_stats("Top 25% Income")
final_table <- left_join(low_stats, high_stats, by = "Variable")

# Format 
final_table_formatted <- final_table %>%
  mutate(across(c(`Bottom 25% Income`, `Top 25% Income`), ~ case_when(
   Variable == "Median annual income" ~ paste0("$", format(round(., 0), big.mark = ",")),
    TRUE ~ format(round(., 2), nsmall = 2)
  )))

final_table_formatted %>%
  kable(caption = "Highest And Lowest Income Quartiles By Unwell", align = "lrr") %>%
  kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condensed"))
Highest And Lowest Income Quartiles By Unwell
Variable Bottom 25% Income Top 25% Income
Median annual income $68,750 $850,000
Wellbeing 5 years prior (Cantril Ave.) 3.47 4.75
Median hours worked per week 38.46 50.43
Share of time on admin or compliance tasks (%) 36.08 18.13
Median Service Team (No. of FTEs) 3.50 4.50
Median Clients per Staff 24.00 21.88
Years of client-facing experience 8.54 19.18
Share of FTEs consisting of support staff (%) 9.06 25.69