Research Question: # Is Perceived Organizational Support (POS) associated # with burnout among federal employees? # # IV: Perceived Organizational Support (POS) # DV: Burnout (reverse-coded engagement/satisfaction items) #SECTION 1: Load Packages

library(tidyverse)
library(psych)
library(carData)
library(corrplot)
library(ggplot2)

import dataset

FEVS_2024_1 <- read.csv("FEVS 2024-1.csv")
cat("Respondents loaded:", nrow(FEVS_2024_1), "\n")
## Respondents loaded: 674207
cat("Total variables:", ncol(FEVS_2024_1), "\n")
## Total variables: 96
library(readxl)

Copy_of_2024_OPM_FEVS_PRDF_Codebook_1 <- read_excel("Copy of 2024_OPM_FEVS_PRDF_Codebook-1.xlsx")

#View (Copy_of_2024_OPM_FEVS_PRDF_Codebook_1)
# Preview the 8 variables of interest
cat("Respondents loaded:", nrow(FEVS_2024_1), "\n")
## Respondents loaded: 674207
cat("Total variables:", ncol(FEVS_2024_1), "\n")
## Total variables: 96
cat("\nSample of target variables:\n")
## 
## Sample of target variables:
dim(FEVS_2024_1)
## [1] 674207     96
names(FEVS_2024_1)[5:24]
##  [1] "Q1"    "Q2"    "Q3"    "Q4"    "Q5"    "Q6"    "Q7"    "Q8"    "Q9"   
## [10] "Q10"   "Q11"   "Q12"   "Q13"   "Q14"   "Q15"   "Q16_1" "Q16_2" "Q16_3"
## [19] "Q16_4" "Q16_5"
cat("Respondents loaded:", nrow(FEVS_2024_1), "\n")
## Respondents loaded: 674207
cat("Total variables:", ncol(FEVS_2024_1), "\n")
## Total variables: 96
# print(head(FEVS_2024_1[, c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")], 5))

vars_of_interest <- c(“Q3”, “Q46”, “Q57”, “Q62”, “Q63”, “Q70”, “Q86”, “Q88”)

vars_of_interest <- c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")

FEVS_2024_1 <- FEVS_2024_1 %>%
  mutate(across(all_of(vars_of_interest), ~ {
    x <- suppressWarnings(as.numeric(as.character(.)))
    ifelse(x %in% 1:5, x, NA)
  }))

cat("\nMissing values after recode:\n")
## 
## Missing values after recode:
print(colSums(is.na(FEVS_2024_1[, vars_of_interest])))
##    Q3   Q46   Q57   Q62   Q63   Q70   Q86   Q88 
##  9648 14655 34063 28295 58657 27662 29117 27871
# Preview the 8 variables of interest
cat("\nSample of target variables:\n")
## 
## Sample of target variables:
print(head(FEVS_2024_1[, c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")], 5))
##   Q3 Q46 Q57 Q62 Q63 Q70 Q86 Q88
## 1  4   5   3   4   3   4   4   5
## 2  4   3   2   3   3   3   3   3
## 3  3   1   1   1  NA   1   2   1
## 4  5   4   4   4   5   5   5   5
## 5  4   4   3   4   3   4   5   4

SECTION 3: Recode non-response values to NA

#Per the 2024 FEVS codebook: # Q57 and Q63 include “X” = “Do Not Know” (non-substantive) # All target items use a 1–5 numeric scale # Anything outside 1–5 (including “X”, blanks, or other codes) # should be treated as missing before we build composites.

vars_of_interest <- c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")
FEVS_2024_1<- FEVS_2024_1 %>%
  mutate(across(all_of(vars_of_interest), ~ {
    x <- suppressWarnings(as.numeric(as.character(.)))
    ifelse(x %in% 1:5, x, NA)
  }))
 
cat("\nMissing values after recode:\n")
## 
## Missing values after recode:
print(colSums(is.na(FEVS_2024_1[, vars_of_interest])))
##    Q3   Q46   Q57   Q62   Q63   Q70   Q86   Q88 
##  9648 14655 34063 28295 58657 27662 29117 27871

Move Q62 to POS (aligns with codebook classification)

pos_items_extended <- c("Q57", "Q63", "Q62")
burnout_items      <- c("Q3", "Q46", "Q70", "Q86", "Q88")

SECTION 5: Reverse-code burnout items

All burnout indicators are positively worded:

High score = positive experience (accomplishment, inspiration, etc.)

Reversed them so that HIGH score = MORE burnout.

Formula for a 5-point scale: reversed = (5 + 1) - original = 6 - x

Codebook scale labels (for reference):

Q3, Q46, Q62, Q86, Q88: Strongly Agree(5) → Strongly Disagree(1)

Q70: Very Satisfied(5) → Very Dissatisfied(1)

FEVS_2024_1 <- FEVS_2024_1 %>%
  mutate(
    Q3_r  = 6 - Q3,    # "My work gives me a feeling of personal accomplishment"
    Q46_r = 6 - Q46,   # "I recommend my organization as a good place to work"
    Q62_r = 6 - Q62,   # "I have a high level of respect for my organization's senior leaders"
    Q70_r = 6 - Q70,   # "Considering everything, how satisfied are you with your job?"
    Q86_r = 6 - Q86,   # "My job inspires me"
    Q88_r = 6 - Q88    # "I feel a strong personal attachment to my organization"
  )
 
cat("\nReverse-coded items created successfully.\n")
## 
## Reverse-coded items created successfully.

SECTION 6: Build composite variables

POS Composite (average of Q57 and Q63):

Q57 — Leaders Lead Subindex: senior leaders generate motivation & commitment

Q63 — Work-Life Support: senior leaders demonstrate support for Work-Life programs

Higher score = STRONGER perceived organizational support

Burnout Composite (average of 6 reversed items):

Higher score = MORE burnout (less satisfaction, less inspiration, etc.)

FEVS_2024_1<- FEVS_2024_1 %>%
  mutate(
    POS_composite     = rowMeans(select(., Q57, Q63), na.rm = TRUE),
    Burnout_composite = rowMeans(select(., Q3_r, Q46_r, Q62_r,
                                        Q70_r, Q86_r, Q88_r), na.rm = TRUE)
  )
cat("\nComposite variables created.\n")
## 
## Composite variables created.
print(summary(FEVS_2024_1 %>% select(POS_composite, Burnout_composite)))
##  POS_composite   Burnout_composite
##  Min.   :1.000   Min.   :1.000    
##  1st Qu.:3.000   1st Qu.:1.500    
##  Median :4.000   Median :2.000    
##  Mean   :3.522   Mean   :2.206    
##  3rd Qu.:4.500   3rd Qu.:2.667    
##  Max.   :5.000   Max.   :5.000    
##  NA's   :24572   NA's   :274

SECTION 7: Reliability (Cronbach’s Alpha)

Determines whether our composite items hang together well.

α ≥ 0.70 = acceptable; α ≥ 0.80 = good; α ≥ 0.90 = excellent.

cat("\n========== INTERNAL CONSISTENCY (Cronbach's Alpha) ==========\n")
## 
## ========== INTERNAL CONSISTENCY (Cronbach's Alpha) ==========
cat("\n--- POS scale (Q57, Q63) ---\n")
## 
## --- POS scale (Q57, Q63) ---
pos_alpha <- psych::alpha(FEVS_2024_1 %>% select(Q57, Q63) %>% drop_na())
cat("Alpha =", round(pos_alpha$total$raw_alpha, 3), "\n")
## Alpha = 0.836
cat("\n--- Burnout scale (6 reversed items) ---\n")
## 
## --- Burnout scale (6 reversed items) ---
burnout_alpha <- psych::alpha(
  FEVS_2024_1 %>% select(Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r) %>% drop_na()
)
cat("Alpha =", round(burnout_alpha$total$raw_alpha, 3), "\n")
## Alpha = 0.917

NOTE: With only 2 items, POS alpha may be lower than ideal. # instead for 2-item scales. The correlation between Q57 and Q63 # is printed below as a supplement.

cat("\nCorrelation between Q57 and Q63 (POS items):\n")
## 
## Correlation between Q57 and Q63 (POS items):
print(cor.test(FEVS_2024_1$Q57, FEVS_2024_1$Q63, use = "complete.obs"))
## 
##  Pearson's product-moment correlation
## 
## data:  FEVS_2024_1$Q57 and FEVS_2024_1$Q63
## t = 808.45, df = 606057, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7191121 0.7215347
## sample estimates:
##       cor 
## 0.7203256

SECTION 8: Descriptive statistics

cat("\n========== DESCRIPTIVE STATISTICS ==========\n")
## 
## ========== DESCRIPTIVE STATISTICS ==========
desc_data <- FEVS_2024_1 %>%
  select(
    POS_composite, Burnout_composite,
    Q57, Q63,
    Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r
  ) %>%
  drop_na()
 
describe(desc_data) %>%
  select(n, mean, sd, median, min, max, skew, kurtosis) %>%
  round(3) %>%
  print()
##                        n mean   sd median min max  skew kurtosis
## POS_composite     578079 3.57 1.13      4   1   5 -0.63    -0.36
## Burnout_composite 578079 2.17 0.93      2   1   5  0.88     0.40
## Q57               578079 3.42 1.26      4   1   5 -0.51    -0.74
## Q63               578079 3.72 1.17      4   1   5 -0.82    -0.07
## Q3_r              578079 2.02 1.04      2   1   5  1.12     0.84
## Q46_r             578079 2.10 1.09      2   1   5  0.98     0.39
## Q62_r             578079 2.24 1.18      2   1   5  0.85    -0.06
## Q70_r             578079 2.15 1.08      2   1   5  0.95     0.32
## Q86_r             578079 2.24 1.10      2   1   5  0.78    -0.04
## Q88_r             578079 2.24 1.13      2   1   5  0.76    -0.15

Frequency distributions — useful for determining/ spotting response patterns

cat("\n--- Frequency: Q57 (Leaders generate motivation & commitment) ---\n")
## 
## --- Frequency: Q57 (Leaders generate motivation & commitment) ---
print(table(FEVS_2024_1$Q57, useNA = "ifany"))
## 
##      1      2      3      4      5   <NA> 
##  73285  84881 140075 212422 129481  34063
cat("\n--- Frequency: Q63 (Leaders support Work-Life programs) ---\n")
## 
## --- Frequency: Q63 (Leaders support Work-Life programs) ---
print(table(FEVS_2024_1$Q63, useNA = "ifany"))
## 
##      1      2      3      4      5   <NA> 
##  45021  47625 122673 224754 175477  58657
cat("\n--- Frequency: Q70_r (Job satisfaction — reversed to burnout) ---\n")
## 
## --- Frequency: Q70_r (Job satisfaction — reversed to burnout) ---
print(table(FEVS_2024_1$Q70_r, useNA = "ifany"))
## 
##      1      2      3      4      5   <NA> 
## 185684 273691  99326  57632  30212  27662

SECTION 9: Pearson correlation

cat("\n========== PEARSON CORRELATION ==========\n")
## 
## ========== PEARSON CORRELATION ==========
r_result <- cor.test(
  FEVS_2024_1$POS_composite,
  FEVS_2024_1$Burnout_composite,
  method = "pearson",
  use    = "complete.obs"
)
 
print(r_result)
## 
##  Pearson's product-moment correlation
## 
## data:  FEVS_2024_1$POS_composite and FEVS_2024_1$Burnout_composite
## t = -947.04, df = 649626, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7625592 -0.7605162
## sample estimates:
##        cor 
## -0.7615396
cat("\n--- Plain-language summary ---\n")
## 
## --- Plain-language summary ---
cat(sprintf(
  "r = %.3f | p = %.4f | 95%% CI [%.3f, %.3f]\n",
  r_result$estimate,
  r_result$p.value,
  r_result$conf.int[1],
  r_result$conf.int[2]
))
## r = -0.762 | p = 0.0000 | 95% CI [-0.763, -0.761]
if (r_result$p.value < 0.05) {
  direction <- ifelse(r_result$estimate < 0, "negative", "positive")
  cat(sprintf(
    "Interpretation: There is a statistically significant %s correlation.\n",
    direction
  ))
  cat("Higher POS is associated with",
      ifelse(r_result$estimate < 0, "LOWER", "HIGHER"),
      "burnout scores.\n")
} else {
  cat("Interpretation: The correlation is not statistically significant (p ≥ .05).\n")
}
## Interpretation: There is a statistically significant negative correlation.
## Higher POS is associated with LOWER burnout scores.

Full item-level correlation matrix

cat("\n--- Item-level correlation matrix ---\n")
## 
## --- Item-level correlation matrix ---
cor_matrix <- FEVS_2024_1 %>%
  select(POS_composite, Burnout_composite,
         Q57, Q63, Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r) %>%
  drop_na() %>%
  cor(method = "pearson")
print(round(cor_matrix, 3))
##                   POS_composite Burnout_composite    Q57    Q63   Q3_r  Q46_r
## POS_composite             1.000            -0.773  0.933  0.922 -0.533 -0.695
## Burnout_composite        -0.773             1.000 -0.742 -0.690  0.816  0.865
## Q57                       0.933            -0.742  1.000  0.721 -0.514 -0.665
## Q63                       0.922            -0.690  0.721  1.000 -0.473 -0.624
## Q3_r                     -0.533             0.816 -0.514 -0.473  1.000  0.623
## Q46_r                    -0.695             0.865 -0.665 -0.624  0.623  1.000
## Q62_r                    -0.835             0.779 -0.798 -0.749  0.501  0.661
## Q70_r                    -0.650             0.878 -0.619 -0.586  0.681  0.771
## Q86_r                    -0.584             0.871 -0.568 -0.513  0.720  0.656
## Q88_r                    -0.597             0.855 -0.575 -0.531  0.634  0.670
##                    Q62_r  Q70_r  Q86_r  Q88_r
## POS_composite     -0.835 -0.650 -0.584 -0.597
## Burnout_composite  0.779  0.878  0.871  0.855
## Q57               -0.798 -0.619 -0.568 -0.575
## Q63               -0.749 -0.586 -0.513 -0.531
## Q3_r               0.501  0.681  0.720  0.634
## Q46_r              0.661  0.771  0.656  0.670
## Q62_r              1.000  0.610  0.558  0.581
## Q70_r              0.610  1.000  0.720  0.675
## Q86_r              0.558  0.720  1.000  0.762
## Q88_r              0.581  0.675  0.762  1.000

# Visualization

corrplot(
  cor_matrix,
  method       = "color",
  type         = "upper",
  addCoef.col  = "black",
  number.cex   = 0.7,
  tl.col       = "black",
  tl.srt       = 45,
  col          = colorRampPalette(c("#8B3A3A", "white", "#2C5F8A"))(200),
  title        = "Correlation Matrix — POS & Burnout (FEVS 2024)",
  mar          = c(0, 0, 2, 0)
)

#SECTION 10: Simple linear regression

cat("\n========== SIMPLE LINEAR REGRESSION ==========\n")
## 
## ========== SIMPLE LINEAR REGRESSION ==========
cat("Model: Burnout_composite ~ POS_composite\n\n")
## Model: Burnout_composite ~ POS_composite
model <- lm(Burnout_composite ~ POS_composite, data = FEVS_2024_1)
print(summary(model))
## 
## Call:
## lm(formula = Burnout_composite ~ POS_composite, data = FEVS_2024_1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7732 -0.3377 -0.0689  0.3074  3.7214 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    4.3969008  0.0024377    1804   <2e-16 ***
## POS_composite -0.6236613  0.0006585    -947   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6054 on 649626 degrees of freedom
##   (24579 observations deleted due to missingness)
## Multiple R-squared:  0.5799, Adjusted R-squared:  0.5799 
## F-statistic: 8.969e+05 on 1 and 649626 DF,  p-value: < 2.2e-16

Standardized coefficients (beta) — easier to interpret in survey research

cat("\n--- Standardized coefficients (beta) ---\n")
## 
## --- Standardized coefficients (beta) ---
cat("\n--- Plain-language model summary ---\n")
## 
## --- Plain-language model summary ---
b  <- round(coef(model)[2], 3)
r2 <- round(summary(model)$r.squared, 3)
p  <- round(summary(model)$coefficients[2, 4], 4)
cat(sprintf(
  "For every 1-unit increase in POS, burnout changes by %.3f units (β = %.3f, p = %.4f).\n",
  b, b, p
))
## For every 1-unit increase in POS, burnout changes by -0.624 units (β = -0.624, p = 0.0000).
cat(sprintf("POS explains %.1f%% of the variance in burnout (R² = %.3f).\n",
            r2 * 100, r2))
## POS explains 58.0% of the variance in burnout (R² = 0.580).

#SECTION 11: Regression diagnostics

cat("\n--- Regression diagnostics ---\n")
## 
## --- Regression diagnostics ---
# Residual plots (4-panel)
par(mfrow = c(2,2))
plot(model, main = "Regression Diagnostics")

par(mfrow = c(1, 1))

Normality of residuals

resids <- residuals(model)
if (length(resids) > 5000) {
  set.seed(42)
  resids <- sample(resids, 5000)
}
cat("\nShapiro-Wilk test on residuals:\n")
## 
## Shapiro-Wilk test on residuals:
print(shapiro.test(resids))
## 
##  Shapiro-Wilk normality test
## 
## data:  resids
## W = 0.97425, p-value < 2.2e-16
cat("\n--- Reminder ---\n")
## 
## --- Reminder ---
cat("With large federal survey samples, Shapiro-Wilk almost always rejects\n")
## With large federal survey samples, Shapiro-Wilk almost always rejects
cat("normality. Visually inspect the Q-Q plot — mild deviations are fine.\n")
## normality. Visually inspect the Q-Q plot — mild deviations are fine.
cat("OLS is robust to non-normality with large n by the Central Limit Theorem.\n")
## OLS is robust to non-normality with large n by the Central Limit Theorem.

#SECTION 12: Visualizations ## Scatter plot with regression line

ggplot(FEVS_2024_1 %>% drop_na(POS_composite, Burnout_composite),
       aes(x = POS_composite, y = Burnout_composite)) +
  geom_jitter(alpha = 0.06, color = "#2C5F8A", size = 0.7, width = 0.05) +
  geom_smooth(method = "lm", se = TRUE,
              color = "#C8A951", fill = "#C8A951", alpha = 0.15,
              linewidth = 1.3) +
  scale_x_continuous(breaks = 1:5, limits = c(0.8, 5.2)) +
  scale_y_continuous(breaks = 1:5, limits = c(0.8, 5.2)) +
  labs(
    title    = "Perceived Organizational Support and Employee Burnout",
    subtitle = "2024 Federal Employee Viewpoint Survey (FEVS)",
    x        = "POS Composite Score\n(1 = low support  →  5 = high support)",
    y        = "Burnout Composite Score\n(1 = low burnout  →  5 = high burnout)",
    caption  = paste0(
      "Note: Burnout items reverse-coded (6 − original) so higher scores reflect more burnout.\n",
      "POS items: Q57 (leadership motivation) + Q63 (Work-Life support). ",
      "Burnout items: Q3, Q46, Q62, Q70, Q86, Q88."
    )
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title    = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "gray50", size = 10),
    plot.caption  = element_text(color = "gray60", size = 8, hjust = 0)
  )

# Distribution: POS composite

ggplot(FEVS_2024_1 %>% drop_na(POS_composite), aes(x = POS_composite)) +
  geom_histogram(binwidth = 0.25, fill = "#2C5F8A",
                 color = "white", alpha = 0.85) +
  geom_vline(aes(xintercept = mean(POS_composite, na.rm = TRUE)),
             color = "#C8A951", linetype = "dashed", linewidth = 1) +
  labs(
    title = "Distribution of POS Composite Scores",
    x = "POS Composite (1–5)", y = "Count",
    caption = "Dashed line = mean"
  ) +
  theme_minimal()

# Distribution: Burnout composite

ggplot(FEVS_2024_1%>% drop_na(Burnout_composite), aes(x = Burnout_composite)) +
  geom_histogram(binwidth = 0.25, fill = "#8B3A3A",
                 color = "white", alpha = 0.85) +
  geom_vline(aes(xintercept = mean(Burnout_composite, na.rm = TRUE)),
             color = "#C8A951", linetype = "dashed", linewidth = 1) +
  labs(
    title = "Distribution of Burnout Composite Scores (Reversed)",
    x = "Burnout Composite (1–5, higher = more burnout)", y = "Count",
    caption = "Dashed line = mean"
  ) +
  theme_minimal()

#SECTION 13: Export clean data (optional)

FEVS_2024_1 %>%
select(POS_composite, Burnout_composite,
Q57, Q63, Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r) %>%
write.csv("fevs_pos_burnout_composites.csv", row.names = FALSE)

#updated analysis #recode demographics control variables All four variables use letter codes (A, B, C) in the FEVS. # convert them to labeled factors so R treats them correctly in the regression (as categorical, not numeric). #1st level reference catergory

demo_vars <- c("DAGEGRP", "DSUPER", "DFEDTEN", "DSEX")
missing_vars <- demo_vars[!demo_vars %in% names(FEVS_2024_1)]
if (length(missing_vars) > 0) {
  cat("WARNING — these variables were not found:", paste(missing_vars, collapse = ", "), "\n")
  cat("Check if tenure is spelled DEFEDTEN in your dataset.\n")
  # Try alternate spelling for tenure
  if ("DEFEDTEN" %in% names(fevs)) {
    fevs$DFEDTEN <- fevs$DEFEDTEN
    cat("Found DEFEDTEN — copied to DFEDTEN for consistency.\n")
  }
} else {
  cat("All four demographic variables confirmed in dataset.\n")
}
## All four demographic variables confirmed in dataset.
# Recode each variable into a labeled factor
FEVS_2024_1 <- FEVS_2024_1 %>%
  mutate(
 
    # Age group: A = Under 40 (reference), B = 40 or Older
    age_group = factor(
      case_when(
        DAGEGRP == "A" ~ "Under 40",
        DAGEGRP == "B" ~ "40 or Older",
        TRUE ~ NA_character_
      ),
      levels = c("Under 40", "40 or Older")   # Under 40 = reference
    ),
 
    # Supervisory status: A = Non-Supervisor (reference), B = Supervisor
    supervisory = factor(
      case_when(
        DSUPER == "A" ~ "Non-Supervisor",
        DSUPER == "B" ~ "Supervisor/Manager",
        TRUE ~ NA_character_
      ),
      levels = c("Non-Supervisor", "Supervisor/Manager")   # Non-Supervisor = reference
    ),
 
    # Federal tenure: A = ≤10 yrs (reference), B = 11–20 yrs, C = 20+ yrs
    fed_tenure = factor(
      case_when(
        DFEDTEN == "A" ~ "10 yrs or fewer",
        DFEDTEN == "B" ~ "11 to 20 yrs",
        DFEDTEN == "C" ~ "More than 20 yrs",
        TRUE ~ NA_character_
      ),
      levels = c("10 yrs or fewer", "11 to 20 yrs", "More than 20 yrs")   # Newest = reference
    ),
 
    # Gender: A = Male (reference), B = Female
    gender = factor(
      case_when(
        DSEX == "A" ~ "Male",
        DSEX == "B" ~ "Female",
        TRUE ~ NA_character_
      ),
      levels = c("Male", "Female")   # Male = reference
    )
 
  )

Quick check — frequency counts for each control variable

cat("\n--- Age Group ---\n")
## 
## --- Age Group ---
print(table(FEVS_2024_1$age_group, useNA = "ifany"))
## 
##    Under 40 40 or Older        <NA> 
##      151314      441972       80921
cat("\n--- Supervisory Status ---\n")
## 
## --- Supervisory Status ---
print(table(FEVS_2024_1$supervisory, useNA = "ifany"))
## 
##     Non-Supervisor Supervisor/Manager               <NA> 
##             494780             133618              45809
cat("\n--- Federal Tenure ---\n")
## 
## --- Federal Tenure ---
print(table(FEVS_2024_1$fed_tenure, useNA = "ifany"))
## 
##  10 yrs or fewer     11 to 20 yrs More than 20 yrs             <NA> 
##           276499           199531           152612            45565
cat("\n--- Gender ---\n")
## 
## --- Gender ---
print(table(FEVS_2024_1$gender, useNA = "ifany"))
## 
##   Male Female   <NA> 
## 269549 256446 148212

#Descriptive statistics for control variables

cat("\n========== CONTROL VARIABLE DESCRIPTIVES ==========\n")
## 
## ========== CONTROL VARIABLE DESCRIPTIVES ==========
cat("\nAge Group (%):\n")
## 
## Age Group (%):
print(round(prop.table(table(FEVS_2024_1$age_group)) * 100, 1))
## 
##    Under 40 40 or Older 
##        25.5        74.5
cat("\nSupervisory Status (%):\n")
## 
## Supervisory Status (%):
print(round(prop.table(table(FEVS_2024_1$supervisory)) * 100, 1))
## 
##     Non-Supervisor Supervisor/Manager 
##               78.7               21.3
cat("\nFederal Tenure (%):\n")
## 
## Federal Tenure (%):
print(round(prop.table(table(FEVS_2024_1$fed_tenure)) * 100, 1))
## 
##  10 yrs or fewer     11 to 20 yrs More than 20 yrs 
##             44.0             31.7             24.3
cat("\nGender (%):\n")
## 
## Gender (%):
print(round(prop.table(table(FEVS_2024_1$gender)) * 100, 1))
## 
##   Male Female 
##   51.2   48.8

#Model 1 — Original (POS only, no controls)

# Keep the original model for comparison
cat("\n========== MODEL 1: POS Only (Original) ==========\n")
## 
## ========== MODEL 1: POS Only (Original) ==========
model1 <- lm(Burnout_composite ~ POS_composite, data = FEVS_2024_1)
summary(model1)
## 
## Call:
## lm(formula = Burnout_composite ~ POS_composite, data = FEVS_2024_1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7732 -0.3377 -0.0689  0.3074  3.7214 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    4.3969008  0.0024377    1804   <2e-16 ***
## POS_composite -0.6236613  0.0006585    -947   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6054 on 649626 degrees of freedom
##   (24579 observations deleted due to missingness)
## Multiple R-squared:  0.5799, Adjusted R-squared:  0.5799 
## F-statistic: 8.969e+05 on 1 and 649626 DF,  p-value: < 2.2e-16

#Model 2 — POS + Demographic Controls

cat("\n========== MODEL 2: POS + Demographic Controls ==========\n")
## 
## ========== MODEL 2: POS + Demographic Controls ==========
cat("Controls: age group, supervisory status, federal tenure, gender\n\n")
## Controls: age group, supervisory status, federal tenure, gender
model2 <- lm(
  Burnout_composite ~ POS_composite +
    age_group +
    supervisory +
    fed_tenure +
    gender,
  data = FEVS_2024_1
)
summary(model2)
## 
## Call:
## lm(formula = Burnout_composite ~ POS_composite + age_group + 
##     supervisory + fed_tenure + gender, data = FEVS_2024_1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7925 -0.3226 -0.0694  0.3085  3.3854 
## 
## Coefficients:
##                                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)                    4.4100301  0.0031786 1387.418  < 2e-16 ***
## POS_composite                 -0.6174951  0.0007358 -839.203  < 2e-16 ***
## age_group40 or Older          -0.0364928  0.0021750  -16.779  < 2e-16 ***
## supervisorySupervisor/Manager -0.1110891  0.0021556  -51.536  < 2e-16 ***
## fed_tenure11 to 20 yrs         0.0331483  0.0021071   15.732  < 2e-16 ***
## fed_tenureMore than 20 yrs     0.0074923  0.0023757    3.154  0.00161 ** 
## genderFemale                  -0.0166628  0.0016638  -10.015  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5961 on 519054 degrees of freedom
##   (155146 observations deleted due to missingness)
## Multiple R-squared:  0.5809, Adjusted R-squared:  0.5809 
## F-statistic: 1.199e+05 on 6 and 519054 DF,  p-value: < 2.2e-16

#Model comparison — did the controls matter?

cat("\n========== MODEL COMPARISON ==========\n")
## 
## ========== MODEL COMPARISON ==========
# Side-by-side R-squared comparison
cat(sprintf("Model 1 (POS only)      — R² = %.3f\n", summary(model1)$r.squared))
## Model 1 (POS only)      — R² = 0.580
cat(sprintf("Model 2 (POS + controls)— R² = %.3f\n", summary(model2)$r.squared))
## Model 2 (POS + controls)— R² = 0.581
cat(sprintf("Change in R²            — ΔR² = %.3f\n",
            summary(model2)$r.squared - summary(model1)$r.squared))
## Change in R²            — ΔR² = 0.001
# F-test: does adding controls significantly improve the model?
cat("\nF-test — does Model 2 explain significantly more variance than Model 1?\n")
## 
## F-test — does Model 2 explain significantly more variance than Model 1?
# Refit Model 1 on the SAME rows as Model 2 so sizes match
model1_matched <- lm(Burnout_composite ~ POS_composite,
                     data = na.omit(FEVS_2024_1[, c("Burnout_composite", "POS_composite",
                                             "age_group", "supervisory",
                                             "fed_tenure", "gender")]))

cat(sprintf("Model 1 (POS only)       — R² = %.3f\n", summary(model1_matched)$r.squared))
## Model 1 (POS only)       — R² = 0.578
cat(sprintf("Model 2 (POS + controls) — R² = %.3f\n", summary(model2)$r.squared))
## Model 2 (POS + controls) — R² = 0.581
cat(sprintf("Change in R²             — ΔR² = %.3f\n",
            summary(model2)$r.squared - summary(model1_matched)$r.squared))
## Change in R²             — ΔR² = 0.003
print(anova(model1_matched, model2))
## Analysis of Variance Table
## 
## Model 1: Burnout_composite ~ POS_composite
## Model 2: Burnout_composite ~ POS_composite + age_group + supervisory + 
##     fed_tenure + gender
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1 519059 185666                                  
## 2 519054 184444  5    1222.1 687.81 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#plain-language coefficient summary

cat("\n========== PLAIN-LANGUAGE COEFFICIENT SUMMARY ==========\n")
## 
## ========== PLAIN-LANGUAGE COEFFICIENT SUMMARY ==========
coefs <- summary(model2)$coefficients
cat("\nFor each predictor in Model 2:\n\n")
## 
## For each predictor in Model 2:
for (i in 1:nrow(coefs)) {
  var   <- rownames(coefs)[i]
  b     <- round(coefs[i, "Estimate"], 3)
  p     <- coefs[i, "Pr(>|t|)"]
  sig   <- ifelse(p < .001, "***", ifelse(p < .01, "**", ifelse(p < .05, "*", "ns")))
  cat(sprintf("  %-40s b = %6.3f  %s\n", var, b, sig))
}
##   (Intercept)                              b =  4.410  ***
##   POS_composite                            b = -0.617  ***
##   age_group40 or Older                     b = -0.036  ***
##   supervisorySupervisor/Manager            b = -0.111  ***
##   fed_tenure11 to 20 yrs                   b =  0.033  ***
##   fed_tenureMore than 20 yrs               b =  0.007  **
##   genderFemale                             b = -0.017  ***
cat("\n*** p < .001 | ** p < .01 | * p < .05 | ns = not significant\n")
## 
## *** p < .001 | ** p < .01 | * p < .05 | ns = not significant
cat("\nReference categories:\n")
## 
## Reference categories:
cat("  age_group   → Under 40\n")
##   age_group   → Under 40
cat("  supervisory → Non-Supervisor\n")
##   supervisory → Non-Supervisor
cat("  fed_tenure  → 10 yrs or fewer\n")
##   fed_tenure  → 10 yrs or fewer
cat("  gender      → Male\n")
##   gender      → Male

Visualization — comparing model coefficients

# Extract Model 2 coefficients for plotting (exclude intercept)
coef_df <- as.data.frame(summary(model2)$coefficients) %>%
  rownames_to_column("term") %>%
  filter(term != "(Intercept)") %>%
  rename(estimate = Estimate, se = `Std. Error`, p = `Pr(>|t|)`) %>%
  mutate(
    significant = ifelse(p < .05, "Significant", "Not Significant"),
    term = case_when(
      term == "POS_composite"                  ~ "POS Composite",
      term == "age_group40 or Older"           ~ "Age: 40 or Older\n(vs. Under 40)",
      term == "supervisorySupervisor/Manager"  ~ "Supervisor/Manager\n(vs. Non-Supervisor)",
      term == "fed_tenure11 to 20 yrs"         ~ "Tenure: 11-20 yrs\n(vs. 10 yrs or fewer)",
      term == "fed_tenureMore than 20 yrs"     ~ "Tenure: 20+ yrs\n(vs. 10 yrs or fewer)",
      term == "genderFemale"                   ~ "Female\n(vs. Male)",
      TRUE ~ term
    )
  )
 
ggplot(coef_df, aes(x = reorder(term, estimate), y = estimate, color = significant)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = estimate - 1.96 * se,
                    ymax = estimate + 1.96 * se), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  coord_flip() +
  scale_color_manual(values = c("Significant" = "#2C5F8A",
                                "Not Significant" = "#AAAAAA")) +
  labs(
    title = "Model 2 Regression Coefficients — POS + Demographic Controls",
    subtitle = "Error bars = 95% confidence intervals | FEVS 2024",
    x = NULL,
    y = "Coefficient (b)",
    color = NULL,
    caption = "Reference groups: Under 40 | Non-Supervisor | ≤10 yrs tenure | Male"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title    = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "gray50", size = 10),
    legend.position = "bottom"
  )

#Interpretation guide (read in console)