Research Question: # Is Perceived Organizational Support (POS) associated # with burnout among federal employees? # # IV: Perceived Organizational Support (POS) # DV: Burnout (reverse-coded engagement/satisfaction items) #SECTION 1: Load Packages
library(tidyverse)
library(psych)
library(carData)
library(corrplot)
library(ggplot2)
FEVS_2024_1 <- read.csv("FEVS 2024-1.csv")
cat("Respondents loaded:", nrow(FEVS_2024_1), "\n")
## Respondents loaded: 674207
cat("Total variables:", ncol(FEVS_2024_1), "\n")
## Total variables: 96
library(readxl)
Copy_of_2024_OPM_FEVS_PRDF_Codebook_1 <- read_excel("Copy of 2024_OPM_FEVS_PRDF_Codebook-1.xlsx")
#View (Copy_of_2024_OPM_FEVS_PRDF_Codebook_1)
# Preview the 8 variables of interest
cat("Respondents loaded:", nrow(FEVS_2024_1), "\n")
## Respondents loaded: 674207
cat("Total variables:", ncol(FEVS_2024_1), "\n")
## Total variables: 96
cat("\nSample of target variables:\n")
##
## Sample of target variables:
dim(FEVS_2024_1)
## [1] 674207 96
names(FEVS_2024_1)[5:24]
## [1] "Q1" "Q2" "Q3" "Q4" "Q5" "Q6" "Q7" "Q8" "Q9"
## [10] "Q10" "Q11" "Q12" "Q13" "Q14" "Q15" "Q16_1" "Q16_2" "Q16_3"
## [19] "Q16_4" "Q16_5"
cat("Respondents loaded:", nrow(FEVS_2024_1), "\n")
## Respondents loaded: 674207
cat("Total variables:", ncol(FEVS_2024_1), "\n")
## Total variables: 96
# print(head(FEVS_2024_1[, c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")], 5))
vars_of_interest <- c(“Q3”, “Q46”, “Q57”, “Q62”, “Q63”, “Q70”, “Q86”, “Q88”)
vars_of_interest <- c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")
FEVS_2024_1 <- FEVS_2024_1 %>%
mutate(across(all_of(vars_of_interest), ~ {
x <- suppressWarnings(as.numeric(as.character(.)))
ifelse(x %in% 1:5, x, NA)
}))
cat("\nMissing values after recode:\n")
##
## Missing values after recode:
print(colSums(is.na(FEVS_2024_1[, vars_of_interest])))
## Q3 Q46 Q57 Q62 Q63 Q70 Q86 Q88
## 9648 14655 34063 28295 58657 27662 29117 27871
# Preview the 8 variables of interest
cat("\nSample of target variables:\n")
##
## Sample of target variables:
print(head(FEVS_2024_1[, c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")], 5))
## Q3 Q46 Q57 Q62 Q63 Q70 Q86 Q88
## 1 4 5 3 4 3 4 4 5
## 2 4 3 2 3 3 3 3 3
## 3 3 1 1 1 NA 1 2 1
## 4 5 4 4 4 5 5 5 5
## 5 4 4 3 4 3 4 5 4
#Per the 2024 FEVS codebook: # Q57 and Q63 include “X” = “Do Not Know” (non-substantive) # All target items use a 1–5 numeric scale # Anything outside 1–5 (including “X”, blanks, or other codes) # should be treated as missing before we build composites.
vars_of_interest <- c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")
FEVS_2024_1<- FEVS_2024_1 %>%
mutate(across(all_of(vars_of_interest), ~ {
x <- suppressWarnings(as.numeric(as.character(.)))
ifelse(x %in% 1:5, x, NA)
}))
cat("\nMissing values after recode:\n")
##
## Missing values after recode:
print(colSums(is.na(FEVS_2024_1[, vars_of_interest])))
## Q3 Q46 Q57 Q62 Q63 Q70 Q86 Q88
## 9648 14655 34063 28295 58657 27662 29117 27871
pos_items_extended <- c("Q57", "Q63", "Q62")
burnout_items <- c("Q3", "Q46", "Q70", "Q86", "Q88")
SECTION 5: Reverse-code burnout items
FEVS_2024_1 <- FEVS_2024_1 %>%
mutate(
Q3_r = 6 - Q3, # "My work gives me a feeling of personal accomplishment"
Q46_r = 6 - Q46, # "I recommend my organization as a good place to work"
Q62_r = 6 - Q62, # "I have a high level of respect for my organization's senior leaders"
Q70_r = 6 - Q70, # "Considering everything, how satisfied are you with your job?"
Q86_r = 6 - Q86, # "My job inspires me"
Q88_r = 6 - Q88 # "I feel a strong personal attachment to my organization"
)
cat("\nReverse-coded items created successfully.\n")
##
## Reverse-coded items created successfully.
SECTION 6: Build composite variables
FEVS_2024_1<- FEVS_2024_1 %>%
mutate(
POS_composite = rowMeans(select(., Q57, Q63), na.rm = TRUE),
Burnout_composite = rowMeans(select(., Q3_r, Q46_r, Q62_r,
Q70_r, Q86_r, Q88_r), na.rm = TRUE)
)
cat("\nComposite variables created.\n")
##
## Composite variables created.
print(summary(FEVS_2024_1 %>% select(POS_composite, Burnout_composite)))
## POS_composite Burnout_composite
## Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:1.500
## Median :4.000 Median :2.000
## Mean :3.522 Mean :2.206
## 3rd Qu.:4.500 3rd Qu.:2.667
## Max. :5.000 Max. :5.000
## NA's :24572 NA's :274
SECTION 7: Reliability (Cronbach’s Alpha)
cat("\n========== INTERNAL CONSISTENCY (Cronbach's Alpha) ==========\n")
##
## ========== INTERNAL CONSISTENCY (Cronbach's Alpha) ==========
cat("\n--- POS scale (Q57, Q63) ---\n")
##
## --- POS scale (Q57, Q63) ---
pos_alpha <- psych::alpha(FEVS_2024_1 %>% select(Q57, Q63) %>% drop_na())
cat("Alpha =", round(pos_alpha$total$raw_alpha, 3), "\n")
## Alpha = 0.836
cat("\n--- Burnout scale (6 reversed items) ---\n")
##
## --- Burnout scale (6 reversed items) ---
burnout_alpha <- psych::alpha(
FEVS_2024_1 %>% select(Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r) %>% drop_na()
)
cat("Alpha =", round(burnout_alpha$total$raw_alpha, 3), "\n")
## Alpha = 0.917
NOTE: With only 2 items, POS alpha may be lower than ideal. # instead for 2-item scales. The correlation between Q57 and Q63 # is printed below as a supplement.
cat("\nCorrelation between Q57 and Q63 (POS items):\n")
##
## Correlation between Q57 and Q63 (POS items):
print(cor.test(FEVS_2024_1$Q57, FEVS_2024_1$Q63, use = "complete.obs"))
##
## Pearson's product-moment correlation
##
## data: FEVS_2024_1$Q57 and FEVS_2024_1$Q63
## t = 808.45, df = 606057, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7191121 0.7215347
## sample estimates:
## cor
## 0.7203256
cat("\n========== DESCRIPTIVE STATISTICS ==========\n")
##
## ========== DESCRIPTIVE STATISTICS ==========
desc_data <- FEVS_2024_1 %>%
select(
POS_composite, Burnout_composite,
Q57, Q63,
Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r
) %>%
drop_na()
describe(desc_data) %>%
select(n, mean, sd, median, min, max, skew, kurtosis) %>%
round(3) %>%
print()
## n mean sd median min max skew kurtosis
## POS_composite 578079 3.57 1.13 4 1 5 -0.63 -0.36
## Burnout_composite 578079 2.17 0.93 2 1 5 0.88 0.40
## Q57 578079 3.42 1.26 4 1 5 -0.51 -0.74
## Q63 578079 3.72 1.17 4 1 5 -0.82 -0.07
## Q3_r 578079 2.02 1.04 2 1 5 1.12 0.84
## Q46_r 578079 2.10 1.09 2 1 5 0.98 0.39
## Q62_r 578079 2.24 1.18 2 1 5 0.85 -0.06
## Q70_r 578079 2.15 1.08 2 1 5 0.95 0.32
## Q86_r 578079 2.24 1.10 2 1 5 0.78 -0.04
## Q88_r 578079 2.24 1.13 2 1 5 0.76 -0.15
cat("\n--- Frequency: Q57 (Leaders generate motivation & commitment) ---\n")
##
## --- Frequency: Q57 (Leaders generate motivation & commitment) ---
print(table(FEVS_2024_1$Q57, useNA = "ifany"))
##
## 1 2 3 4 5 <NA>
## 73285 84881 140075 212422 129481 34063
cat("\n--- Frequency: Q63 (Leaders support Work-Life programs) ---\n")
##
## --- Frequency: Q63 (Leaders support Work-Life programs) ---
print(table(FEVS_2024_1$Q63, useNA = "ifany"))
##
## 1 2 3 4 5 <NA>
## 45021 47625 122673 224754 175477 58657
cat("\n--- Frequency: Q70_r (Job satisfaction — reversed to burnout) ---\n")
##
## --- Frequency: Q70_r (Job satisfaction — reversed to burnout) ---
print(table(FEVS_2024_1$Q70_r, useNA = "ifany"))
##
## 1 2 3 4 5 <NA>
## 185684 273691 99326 57632 30212 27662
cat("\n========== PEARSON CORRELATION ==========\n")
##
## ========== PEARSON CORRELATION ==========
r_result <- cor.test(
FEVS_2024_1$POS_composite,
FEVS_2024_1$Burnout_composite,
method = "pearson",
use = "complete.obs"
)
print(r_result)
##
## Pearson's product-moment correlation
##
## data: FEVS_2024_1$POS_composite and FEVS_2024_1$Burnout_composite
## t = -947.04, df = 649626, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7625592 -0.7605162
## sample estimates:
## cor
## -0.7615396
cat("\n--- Plain-language summary ---\n")
##
## --- Plain-language summary ---
cat(sprintf(
"r = %.3f | p = %.4f | 95%% CI [%.3f, %.3f]\n",
r_result$estimate,
r_result$p.value,
r_result$conf.int[1],
r_result$conf.int[2]
))
## r = -0.762 | p = 0.0000 | 95% CI [-0.763, -0.761]
if (r_result$p.value < 0.05) {
direction <- ifelse(r_result$estimate < 0, "negative", "positive")
cat(sprintf(
"Interpretation: There is a statistically significant %s correlation.\n",
direction
))
cat("Higher POS is associated with",
ifelse(r_result$estimate < 0, "LOWER", "HIGHER"),
"burnout scores.\n")
} else {
cat("Interpretation: The correlation is not statistically significant (p ≥ .05).\n")
}
## Interpretation: There is a statistically significant negative correlation.
## Higher POS is associated with LOWER burnout scores.
cat("\n--- Item-level correlation matrix ---\n")
##
## --- Item-level correlation matrix ---
cor_matrix <- FEVS_2024_1 %>%
select(POS_composite, Burnout_composite,
Q57, Q63, Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r) %>%
drop_na() %>%
cor(method = "pearson")
print(round(cor_matrix, 3))
## POS_composite Burnout_composite Q57 Q63 Q3_r Q46_r
## POS_composite 1.000 -0.773 0.933 0.922 -0.533 -0.695
## Burnout_composite -0.773 1.000 -0.742 -0.690 0.816 0.865
## Q57 0.933 -0.742 1.000 0.721 -0.514 -0.665
## Q63 0.922 -0.690 0.721 1.000 -0.473 -0.624
## Q3_r -0.533 0.816 -0.514 -0.473 1.000 0.623
## Q46_r -0.695 0.865 -0.665 -0.624 0.623 1.000
## Q62_r -0.835 0.779 -0.798 -0.749 0.501 0.661
## Q70_r -0.650 0.878 -0.619 -0.586 0.681 0.771
## Q86_r -0.584 0.871 -0.568 -0.513 0.720 0.656
## Q88_r -0.597 0.855 -0.575 -0.531 0.634 0.670
## Q62_r Q70_r Q86_r Q88_r
## POS_composite -0.835 -0.650 -0.584 -0.597
## Burnout_composite 0.779 0.878 0.871 0.855
## Q57 -0.798 -0.619 -0.568 -0.575
## Q63 -0.749 -0.586 -0.513 -0.531
## Q3_r 0.501 0.681 0.720 0.634
## Q46_r 0.661 0.771 0.656 0.670
## Q62_r 1.000 0.610 0.558 0.581
## Q70_r 0.610 1.000 0.720 0.675
## Q86_r 0.558 0.720 1.000 0.762
## Q88_r 0.581 0.675 0.762 1.000
# Visualization
corrplot(
cor_matrix,
method = "color",
type = "upper",
addCoef.col = "black",
number.cex = 0.7,
tl.col = "black",
tl.srt = 45,
col = colorRampPalette(c("#8B3A3A", "white", "#2C5F8A"))(200),
title = "Correlation Matrix — POS & Burnout (FEVS 2024)",
mar = c(0, 0, 2, 0)
)
#SECTION 10: Simple linear regression
cat("\n========== SIMPLE LINEAR REGRESSION ==========\n")
##
## ========== SIMPLE LINEAR REGRESSION ==========
cat("Model: Burnout_composite ~ POS_composite\n\n")
## Model: Burnout_composite ~ POS_composite
model <- lm(Burnout_composite ~ POS_composite, data = FEVS_2024_1)
print(summary(model))
##
## Call:
## lm(formula = Burnout_composite ~ POS_composite, data = FEVS_2024_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7732 -0.3377 -0.0689 0.3074 3.7214
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.3969008 0.0024377 1804 <2e-16 ***
## POS_composite -0.6236613 0.0006585 -947 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6054 on 649626 degrees of freedom
## (24579 observations deleted due to missingness)
## Multiple R-squared: 0.5799, Adjusted R-squared: 0.5799
## F-statistic: 8.969e+05 on 1 and 649626 DF, p-value: < 2.2e-16
cat("\n--- Standardized coefficients (beta) ---\n")
##
## --- Standardized coefficients (beta) ---
cat("\n--- Plain-language model summary ---\n")
##
## --- Plain-language model summary ---
b <- round(coef(model)[2], 3)
r2 <- round(summary(model)$r.squared, 3)
p <- round(summary(model)$coefficients[2, 4], 4)
cat(sprintf(
"For every 1-unit increase in POS, burnout changes by %.3f units (β = %.3f, p = %.4f).\n",
b, b, p
))
## For every 1-unit increase in POS, burnout changes by -0.624 units (β = -0.624, p = 0.0000).
cat(sprintf("POS explains %.1f%% of the variance in burnout (R² = %.3f).\n",
r2 * 100, r2))
## POS explains 58.0% of the variance in burnout (R² = 0.580).
#SECTION 11: Regression diagnostics
cat("\n--- Regression diagnostics ---\n")
##
## --- Regression diagnostics ---
# Residual plots (4-panel)
par(mfrow = c(2,2))
plot(model, main = "Regression Diagnostics")
par(mfrow = c(1, 1))
resids <- residuals(model)
if (length(resids) > 5000) {
set.seed(42)
resids <- sample(resids, 5000)
}
cat("\nShapiro-Wilk test on residuals:\n")
##
## Shapiro-Wilk test on residuals:
print(shapiro.test(resids))
##
## Shapiro-Wilk normality test
##
## data: resids
## W = 0.97425, p-value < 2.2e-16
cat("\n--- Reminder ---\n")
##
## --- Reminder ---
cat("With large federal survey samples, Shapiro-Wilk almost always rejects\n")
## With large federal survey samples, Shapiro-Wilk almost always rejects
cat("normality. Visually inspect the Q-Q plot — mild deviations are fine.\n")
## normality. Visually inspect the Q-Q plot — mild deviations are fine.
cat("OLS is robust to non-normality with large n by the Central Limit Theorem.\n")
## OLS is robust to non-normality with large n by the Central Limit Theorem.
#SECTION 12: Visualizations ## Scatter plot with regression line
ggplot(FEVS_2024_1 %>% drop_na(POS_composite, Burnout_composite),
aes(x = POS_composite, y = Burnout_composite)) +
geom_jitter(alpha = 0.06, color = "#2C5F8A", size = 0.7, width = 0.05) +
geom_smooth(method = "lm", se = TRUE,
color = "#C8A951", fill = "#C8A951", alpha = 0.15,
linewidth = 1.3) +
scale_x_continuous(breaks = 1:5, limits = c(0.8, 5.2)) +
scale_y_continuous(breaks = 1:5, limits = c(0.8, 5.2)) +
labs(
title = "Perceived Organizational Support and Employee Burnout",
subtitle = "2024 Federal Employee Viewpoint Survey (FEVS)",
x = "POS Composite Score\n(1 = low support → 5 = high support)",
y = "Burnout Composite Score\n(1 = low burnout → 5 = high burnout)",
caption = paste0(
"Note: Burnout items reverse-coded (6 − original) so higher scores reflect more burnout.\n",
"POS items: Q57 (leadership motivation) + Q63 (Work-Life support). ",
"Burnout items: Q3, Q46, Q62, Q70, Q86, Q88."
)
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(color = "gray50", size = 10),
plot.caption = element_text(color = "gray60", size = 8, hjust = 0)
)
# Distribution: POS composite
ggplot(FEVS_2024_1 %>% drop_na(POS_composite), aes(x = POS_composite)) +
geom_histogram(binwidth = 0.25, fill = "#2C5F8A",
color = "white", alpha = 0.85) +
geom_vline(aes(xintercept = mean(POS_composite, na.rm = TRUE)),
color = "#C8A951", linetype = "dashed", linewidth = 1) +
labs(
title = "Distribution of POS Composite Scores",
x = "POS Composite (1–5)", y = "Count",
caption = "Dashed line = mean"
) +
theme_minimal()
# Distribution: Burnout composite
ggplot(FEVS_2024_1%>% drop_na(Burnout_composite), aes(x = Burnout_composite)) +
geom_histogram(binwidth = 0.25, fill = "#8B3A3A",
color = "white", alpha = 0.85) +
geom_vline(aes(xintercept = mean(Burnout_composite, na.rm = TRUE)),
color = "#C8A951", linetype = "dashed", linewidth = 1) +
labs(
title = "Distribution of Burnout Composite Scores (Reversed)",
x = "Burnout Composite (1–5, higher = more burnout)", y = "Count",
caption = "Dashed line = mean"
) +
theme_minimal()
#SECTION 13: Export clean data (optional)
FEVS_2024_1 %>%
select(POS_composite, Burnout_composite,
Q57, Q63, Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r) %>%
write.csv("fevs_pos_burnout_composites.csv", row.names = FALSE)
#updated analysis #recode demographics control variables All four variables use letter codes (A, B, C) in the FEVS. # convert them to labeled factors so R treats them correctly in the regression (as categorical, not numeric). #1st level reference catergory
demo_vars <- c("DAGEGRP", "DSUPER", "DFEDTEN", "DSEX")
missing_vars <- demo_vars[!demo_vars %in% names(FEVS_2024_1)]
if (length(missing_vars) > 0) {
cat("WARNING — these variables were not found:", paste(missing_vars, collapse = ", "), "\n")
cat("Check if tenure is spelled DEFEDTEN in your dataset.\n")
# Try alternate spelling for tenure
if ("DEFEDTEN" %in% names(fevs)) {
fevs$DFEDTEN <- fevs$DEFEDTEN
cat("Found DEFEDTEN — copied to DFEDTEN for consistency.\n")
}
} else {
cat("All four demographic variables confirmed in dataset.\n")
}
## All four demographic variables confirmed in dataset.
# Recode each variable into a labeled factor
FEVS_2024_1 <- FEVS_2024_1 %>%
mutate(
# Age group: A = Under 40 (reference), B = 40 or Older
age_group = factor(
case_when(
DAGEGRP == "A" ~ "Under 40",
DAGEGRP == "B" ~ "40 or Older",
TRUE ~ NA_character_
),
levels = c("Under 40", "40 or Older") # Under 40 = reference
),
# Supervisory status: A = Non-Supervisor (reference), B = Supervisor
supervisory = factor(
case_when(
DSUPER == "A" ~ "Non-Supervisor",
DSUPER == "B" ~ "Supervisor/Manager",
TRUE ~ NA_character_
),
levels = c("Non-Supervisor", "Supervisor/Manager") # Non-Supervisor = reference
),
# Federal tenure: A = ≤10 yrs (reference), B = 11–20 yrs, C = 20+ yrs
fed_tenure = factor(
case_when(
DFEDTEN == "A" ~ "10 yrs or fewer",
DFEDTEN == "B" ~ "11 to 20 yrs",
DFEDTEN == "C" ~ "More than 20 yrs",
TRUE ~ NA_character_
),
levels = c("10 yrs or fewer", "11 to 20 yrs", "More than 20 yrs") # Newest = reference
),
# Gender: A = Male (reference), B = Female
gender = factor(
case_when(
DSEX == "A" ~ "Male",
DSEX == "B" ~ "Female",
TRUE ~ NA_character_
),
levels = c("Male", "Female") # Male = reference
)
)
cat("\n--- Age Group ---\n")
##
## --- Age Group ---
print(table(FEVS_2024_1$age_group, useNA = "ifany"))
##
## Under 40 40 or Older <NA>
## 151314 441972 80921
cat("\n--- Supervisory Status ---\n")
##
## --- Supervisory Status ---
print(table(FEVS_2024_1$supervisory, useNA = "ifany"))
##
## Non-Supervisor Supervisor/Manager <NA>
## 494780 133618 45809
cat("\n--- Federal Tenure ---\n")
##
## --- Federal Tenure ---
print(table(FEVS_2024_1$fed_tenure, useNA = "ifany"))
##
## 10 yrs or fewer 11 to 20 yrs More than 20 yrs <NA>
## 276499 199531 152612 45565
cat("\n--- Gender ---\n")
##
## --- Gender ---
print(table(FEVS_2024_1$gender, useNA = "ifany"))
##
## Male Female <NA>
## 269549 256446 148212
#Descriptive statistics for control variables
cat("\n========== CONTROL VARIABLE DESCRIPTIVES ==========\n")
##
## ========== CONTROL VARIABLE DESCRIPTIVES ==========
cat("\nAge Group (%):\n")
##
## Age Group (%):
print(round(prop.table(table(FEVS_2024_1$age_group)) * 100, 1))
##
## Under 40 40 or Older
## 25.5 74.5
cat("\nSupervisory Status (%):\n")
##
## Supervisory Status (%):
print(round(prop.table(table(FEVS_2024_1$supervisory)) * 100, 1))
##
## Non-Supervisor Supervisor/Manager
## 78.7 21.3
cat("\nFederal Tenure (%):\n")
##
## Federal Tenure (%):
print(round(prop.table(table(FEVS_2024_1$fed_tenure)) * 100, 1))
##
## 10 yrs or fewer 11 to 20 yrs More than 20 yrs
## 44.0 31.7 24.3
cat("\nGender (%):\n")
##
## Gender (%):
print(round(prop.table(table(FEVS_2024_1$gender)) * 100, 1))
##
## Male Female
## 51.2 48.8
#Model 1 — Original (POS only, no controls)
# Keep the original model for comparison
cat("\n========== MODEL 1: POS Only (Original) ==========\n")
##
## ========== MODEL 1: POS Only (Original) ==========
model1 <- lm(Burnout_composite ~ POS_composite, data = FEVS_2024_1)
summary(model1)
##
## Call:
## lm(formula = Burnout_composite ~ POS_composite, data = FEVS_2024_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7732 -0.3377 -0.0689 0.3074 3.7214
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.3969008 0.0024377 1804 <2e-16 ***
## POS_composite -0.6236613 0.0006585 -947 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6054 on 649626 degrees of freedom
## (24579 observations deleted due to missingness)
## Multiple R-squared: 0.5799, Adjusted R-squared: 0.5799
## F-statistic: 8.969e+05 on 1 and 649626 DF, p-value: < 2.2e-16
#Model 2 — POS + Demographic Controls
cat("\n========== MODEL 2: POS + Demographic Controls ==========\n")
##
## ========== MODEL 2: POS + Demographic Controls ==========
cat("Controls: age group, supervisory status, federal tenure, gender\n\n")
## Controls: age group, supervisory status, federal tenure, gender
model2 <- lm(
Burnout_composite ~ POS_composite +
age_group +
supervisory +
fed_tenure +
gender,
data = FEVS_2024_1
)
summary(model2)
##
## Call:
## lm(formula = Burnout_composite ~ POS_composite + age_group +
## supervisory + fed_tenure + gender, data = FEVS_2024_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7925 -0.3226 -0.0694 0.3085 3.3854
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4100301 0.0031786 1387.418 < 2e-16 ***
## POS_composite -0.6174951 0.0007358 -839.203 < 2e-16 ***
## age_group40 or Older -0.0364928 0.0021750 -16.779 < 2e-16 ***
## supervisorySupervisor/Manager -0.1110891 0.0021556 -51.536 < 2e-16 ***
## fed_tenure11 to 20 yrs 0.0331483 0.0021071 15.732 < 2e-16 ***
## fed_tenureMore than 20 yrs 0.0074923 0.0023757 3.154 0.00161 **
## genderFemale -0.0166628 0.0016638 -10.015 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5961 on 519054 degrees of freedom
## (155146 observations deleted due to missingness)
## Multiple R-squared: 0.5809, Adjusted R-squared: 0.5809
## F-statistic: 1.199e+05 on 6 and 519054 DF, p-value: < 2.2e-16
#Model comparison — did the controls matter?
cat("\n========== MODEL COMPARISON ==========\n")
##
## ========== MODEL COMPARISON ==========
# Side-by-side R-squared comparison
cat(sprintf("Model 1 (POS only) — R² = %.3f\n", summary(model1)$r.squared))
## Model 1 (POS only) — R² = 0.580
cat(sprintf("Model 2 (POS + controls)— R² = %.3f\n", summary(model2)$r.squared))
## Model 2 (POS + controls)— R² = 0.581
cat(sprintf("Change in R² — ΔR² = %.3f\n",
summary(model2)$r.squared - summary(model1)$r.squared))
## Change in R² — ΔR² = 0.001
# F-test: does adding controls significantly improve the model?
cat("\nF-test — does Model 2 explain significantly more variance than Model 1?\n")
##
## F-test — does Model 2 explain significantly more variance than Model 1?
# Refit Model 1 on the SAME rows as Model 2 so sizes match
model1_matched <- lm(Burnout_composite ~ POS_composite,
data = na.omit(FEVS_2024_1[, c("Burnout_composite", "POS_composite",
"age_group", "supervisory",
"fed_tenure", "gender")]))
cat(sprintf("Model 1 (POS only) — R² = %.3f\n", summary(model1_matched)$r.squared))
## Model 1 (POS only) — R² = 0.578
cat(sprintf("Model 2 (POS + controls) — R² = %.3f\n", summary(model2)$r.squared))
## Model 2 (POS + controls) — R² = 0.581
cat(sprintf("Change in R² — ΔR² = %.3f\n",
summary(model2)$r.squared - summary(model1_matched)$r.squared))
## Change in R² — ΔR² = 0.003
print(anova(model1_matched, model2))
## Analysis of Variance Table
##
## Model 1: Burnout_composite ~ POS_composite
## Model 2: Burnout_composite ~ POS_composite + age_group + supervisory +
## fed_tenure + gender
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 519059 185666
## 2 519054 184444 5 1222.1 687.81 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#plain-language coefficient summary
cat("\n========== PLAIN-LANGUAGE COEFFICIENT SUMMARY ==========\n")
##
## ========== PLAIN-LANGUAGE COEFFICIENT SUMMARY ==========
coefs <- summary(model2)$coefficients
cat("\nFor each predictor in Model 2:\n\n")
##
## For each predictor in Model 2:
for (i in 1:nrow(coefs)) {
var <- rownames(coefs)[i]
b <- round(coefs[i, "Estimate"], 3)
p <- coefs[i, "Pr(>|t|)"]
sig <- ifelse(p < .001, "***", ifelse(p < .01, "**", ifelse(p < .05, "*", "ns")))
cat(sprintf(" %-40s b = %6.3f %s\n", var, b, sig))
}
## (Intercept) b = 4.410 ***
## POS_composite b = -0.617 ***
## age_group40 or Older b = -0.036 ***
## supervisorySupervisor/Manager b = -0.111 ***
## fed_tenure11 to 20 yrs b = 0.033 ***
## fed_tenureMore than 20 yrs b = 0.007 **
## genderFemale b = -0.017 ***
cat("\n*** p < .001 | ** p < .01 | * p < .05 | ns = not significant\n")
##
## *** p < .001 | ** p < .01 | * p < .05 | ns = not significant
cat("\nReference categories:\n")
##
## Reference categories:
cat(" age_group → Under 40\n")
## age_group → Under 40
cat(" supervisory → Non-Supervisor\n")
## supervisory → Non-Supervisor
cat(" fed_tenure → 10 yrs or fewer\n")
## fed_tenure → 10 yrs or fewer
cat(" gender → Male\n")
## gender → Male
# Extract Model 2 coefficients for plotting (exclude intercept)
coef_df <- as.data.frame(summary(model2)$coefficients) %>%
rownames_to_column("term") %>%
filter(term != "(Intercept)") %>%
rename(estimate = Estimate, se = `Std. Error`, p = `Pr(>|t|)`) %>%
mutate(
significant = ifelse(p < .05, "Significant", "Not Significant"),
term = case_when(
term == "POS_composite" ~ "POS Composite",
term == "age_group40 or Older" ~ "Age: 40 or Older\n(vs. Under 40)",
term == "supervisorySupervisor/Manager" ~ "Supervisor/Manager\n(vs. Non-Supervisor)",
term == "fed_tenure11 to 20 yrs" ~ "Tenure: 11-20 yrs\n(vs. 10 yrs or fewer)",
term == "fed_tenureMore than 20 yrs" ~ "Tenure: 20+ yrs\n(vs. 10 yrs or fewer)",
term == "genderFemale" ~ "Female\n(vs. Male)",
TRUE ~ term
)
)
ggplot(coef_df, aes(x = reorder(term, estimate), y = estimate, color = significant)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = estimate - 1.96 * se,
ymax = estimate + 1.96 * se), width = 0.2) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
coord_flip() +
scale_color_manual(values = c("Significant" = "#2C5F8A",
"Not Significant" = "#AAAAAA")) +
labs(
title = "Model 2 Regression Coefficients — POS + Demographic Controls",
subtitle = "Error bars = 95% confidence intervals | FEVS 2024",
x = NULL,
y = "Coefficient (b)",
color = NULL,
caption = "Reference groups: Under 40 | Non-Supervisor | ≤10 yrs tenure | Male"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(color = "gray50", size = 10),
legend.position = "bottom"
)
#Interpretation guide (read in console)