Research Question: # What is the relationship between POS and # burnout among federal employees? # # IV: Perceived Organizational Support (POS) # DV: Burnout (reverse-coded engagement/satisfaction items) #SECTION 1: Load Packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(psych)
## Warning: package 'psych' was built under R version 4.5.2
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(carData)
library(corrplot)
## corrplot 0.95 loaded
library(ggplot2)
FEVS_2024_1_ <- read.csv("FEVS 2024 (1).csv")
cat("Respondents loaded:", nrow(FEVS_2024_1_), "\n")
## Respondents loaded: 674207
cat("Total variables:", ncol(FEVS_2024_1_), "\n")
## Total variables: 96
cat("\nSample of target variables:\n")
##
## Sample of target variables:
dim(FEVS_2024_1_)
## [1] 674207 96
names(FEVS_2024_1_)[5:24]
## [1] "Q1" "Q2" "Q3" "Q4" "Q5" "Q6" "Q7" "Q8" "Q9"
## [10] "Q10" "Q11" "Q12" "Q13" "Q14" "Q15" "Q16_1" "Q16_2" "Q16_3"
## [19] "Q16_4" "Q16_5"
FEVS_2024_1_<- read.csv("FEVS 2024 (1).csv")
cat("Respondents loaded:", nrow(FEVS_2024_1_), "\n")
## Respondents loaded: 674207
cat("Total variables:", ncol(FEVS_2024_1_), "\n")
## Total variables: 96
# print(head(FEVS_2024_1_[, c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")], 5))
vars_of_interest <- c(“Q3”, “Q46”, “Q57”, “Q62”, “Q63”, “Q70”, “Q86”, “Q88”)
vars_of_interest <- c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")
FEVS_2024_1_ <- FEVS_2024_1_ %>%
mutate(across(all_of(vars_of_interest), ~ {
x <- suppressWarnings(as.numeric(as.character(.)))
ifelse(x %in% 1:5, x, NA)
}))
cat("\nMissing values after recode:\n")
##
## Missing values after recode:
print(colSums(is.na(FEVS_2024_1_[, vars_of_interest])))
## Q3 Q46 Q57 Q62 Q63 Q70 Q86 Q88
## 9648 14655 34063 28295 58657 27662 29117 27871
# Preview the 8 variables of interest
cat("\nSample of target variables:\n")
##
## Sample of target variables:
print(head(FEVS_2024_1_[, c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")], 5))
## Q3 Q46 Q57 Q62 Q63 Q70 Q86 Q88
## 1 4 5 3 4 3 4 4 5
## 2 4 3 2 3 3 3 3 3
## 3 3 1 1 1 NA 1 2 1
## 4 5 4 4 4 5 5 5 5
## 5 4 4 3 4 3 4 5 4
#Per the 2024 FEVS codebook: # Q57 and Q63 include “X” = “Do Not Know” (non-substantive) # All target items use a 1–5 numeric scale # Anything outside 1–5 (including “X”, blanks, or other codes) # should be treated as missing before we build composites.
vars_of_interest <- c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")
FEVS_2024_1_<- FEVS_2024_1_ %>%
mutate(across(all_of(vars_of_interest), ~ {
x <- suppressWarnings(as.numeric(as.character(.)))
ifelse(x %in% 1:5, x, NA)
}))
cat("\nMissing values after recode:\n")
##
## Missing values after recode:
print(colSums(is.na(FEVS_2024_1_[, vars_of_interest])))
## Q3 Q46 Q57 Q62 Q63 Q70 Q86 Q88
## 9648 14655 34063 28295 58657 27662 29117 27871
pos_items_extended <- c("Q57", "Q63", "Q62")
burnout_items <- c("Q3", "Q46", "Q70", "Q86", "Q88")
SECTION 5: Reverse-code burnout items
FEVS_2024_1_ <- FEVS_2024_1_ %>%
mutate(
Q3_r = 6 - Q3, # "My work gives me a feeling of personal accomplishment"
Q46_r = 6 - Q46, # "I recommend my organization as a good place to work"
Q62_r = 6 - Q62, # "I have a high level of respect for my organization's senior leaders"
Q70_r = 6 - Q70, # "Considering everything, how satisfied are you with your job?"
Q86_r = 6 - Q86, # "My job inspires me"
Q88_r = 6 - Q88 # "I feel a strong personal attachment to my organization"
)
cat("\nReverse-coded items created successfully.\n")
##
## Reverse-coded items created successfully.
SECTION 6: Build composite variables
FEVS_2024_1_<- FEVS_2024_1_ %>%
mutate(
POS_composite = rowMeans(select(., Q57, Q63), na.rm = TRUE),
Burnout_composite = rowMeans(select(., Q3_r, Q46_r, Q62_r,
Q70_r, Q86_r, Q88_r), na.rm = TRUE)
)
cat("\nComposite variables created.\n")
##
## Composite variables created.
print(summary(FEVS_2024_1_ %>% select(POS_composite, Burnout_composite)))
## POS_composite Burnout_composite
## Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:1.500
## Median :4.000 Median :2.000
## Mean :3.522 Mean :2.206
## 3rd Qu.:4.500 3rd Qu.:2.667
## Max. :5.000 Max. :5.000
## NA's :24572 NA's :274
SECTION 7: Reliability (Cronbach’s Alpha)
cat("\n========== INTERNAL CONSISTENCY (Cronbach's Alpha) ==========\n")
##
## ========== INTERNAL CONSISTENCY (Cronbach's Alpha) ==========
cat("\n--- POS scale (Q57, Q63) ---\n")
##
## --- POS scale (Q57, Q63) ---
pos_alpha <- psych::alpha(FEVS_2024_1_ %>% select(Q57, Q63) %>% drop_na())
cat("Alpha =", round(pos_alpha$total$raw_alpha, 3), "\n")
## Alpha = 0.836
cat("\n--- Burnout scale (6 reversed items) ---\n")
##
## --- Burnout scale (6 reversed items) ---
burnout_alpha <- psych::alpha(
FEVS_2024_1_ %>% select(Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r) %>% drop_na()
)
cat("Alpha =", round(burnout_alpha$total$raw_alpha, 3), "\n")
## Alpha = 0.917
NOTE: With only 2 items, POS alpha may be lower than ideal. # instead for 2-item scales. The correlation between Q57 and Q63 # is printed below as a supplement.
cat("\nCorrelation between Q57 and Q63 (POS items):\n")
##
## Correlation between Q57 and Q63 (POS items):
print(cor.test(FEVS_2024_1_$Q57, FEVS_2024_1_$Q63, use = "complete.obs"))
##
## Pearson's product-moment correlation
##
## data: FEVS_2024_1_$Q57 and FEVS_2024_1_$Q63
## t = 808.45, df = 606057, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7191121 0.7215347
## sample estimates:
## cor
## 0.7203256
cat("\n========== DESCRIPTIVE STATISTICS ==========\n")
##
## ========== DESCRIPTIVE STATISTICS ==========
desc_data <- FEVS_2024_1_ %>%
select(
POS_composite, Burnout_composite,
Q57, Q63,
Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r
) %>%
drop_na()
describe(desc_data) %>%
select(n, mean, sd, median, min, max, skew, kurtosis) %>%
round(3) %>%
print()
## n mean sd median min max skew kurtosis
## POS_composite 578079 3.57 1.13 4 1 5 -0.63 -0.36
## Burnout_composite 578079 2.17 0.93 2 1 5 0.88 0.40
## Q57 578079 3.42 1.26 4 1 5 -0.51 -0.74
## Q63 578079 3.72 1.17 4 1 5 -0.82 -0.07
## Q3_r 578079 2.02 1.04 2 1 5 1.12 0.84
## Q46_r 578079 2.10 1.09 2 1 5 0.98 0.39
## Q62_r 578079 2.24 1.18 2 1 5 0.85 -0.06
## Q70_r 578079 2.15 1.08 2 1 5 0.95 0.32
## Q86_r 578079 2.24 1.10 2 1 5 0.78 -0.04
## Q88_r 578079 2.24 1.13 2 1 5 0.76 -0.15
cat("\n--- Frequency: Q57 (Leaders generate motivation & commitment) ---\n")
##
## --- Frequency: Q57 (Leaders generate motivation & commitment) ---
print(table(FEVS_2024_1_$Q57, useNA = "ifany"))
##
## 1 2 3 4 5 <NA>
## 73285 84881 140075 212422 129481 34063
cat("\n--- Frequency: Q63 (Leaders support Work-Life programs) ---\n")
##
## --- Frequency: Q63 (Leaders support Work-Life programs) ---
print(table(FEVS_2024_1_$Q63, useNA = "ifany"))
##
## 1 2 3 4 5 <NA>
## 45021 47625 122673 224754 175477 58657
cat("\n--- Frequency: Q70_r (Job satisfaction — reversed to burnout) ---\n")
##
## --- Frequency: Q70_r (Job satisfaction — reversed to burnout) ---
print(table(FEVS_2024_1_$Q70_r, useNA = "ifany"))
##
## 1 2 3 4 5 <NA>
## 185684 273691 99326 57632 30212 27662
cat("\n========== PEARSON CORRELATION ==========\n")
##
## ========== PEARSON CORRELATION ==========
r_result <- cor.test(
FEVS_2024_1_$POS_composite,
FEVS_2024_1_$Burnout_composite,
method = "pearson",
use = "complete.obs"
)
print(r_result)
##
## Pearson's product-moment correlation
##
## data: FEVS_2024_1_$POS_composite and FEVS_2024_1_$Burnout_composite
## t = -947.04, df = 649626, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7625592 -0.7605162
## sample estimates:
## cor
## -0.7615396
cat("\n--- Plain-language summary ---\n")
##
## --- Plain-language summary ---
cat(sprintf(
"r = %.3f | p = %.4f | 95%% CI [%.3f, %.3f]\n",
r_result$estimate,
r_result$p.value,
r_result$conf.int[1],
r_result$conf.int[2]
))
## r = -0.762 | p = 0.0000 | 95% CI [-0.763, -0.761]
if (r_result$p.value < 0.05) {
direction <- ifelse(r_result$estimate < 0, "negative", "positive")
cat(sprintf(
"Interpretation: There is a statistically significant %s correlation.\n",
direction
))
cat("Higher POS is associated with",
ifelse(r_result$estimate < 0, "LOWER", "HIGHER"),
"burnout scores.\n")
} else {
cat("Interpretation: The correlation is not statistically significant (p ≥ .05).\n")
}
## Interpretation: There is a statistically significant negative correlation.
## Higher POS is associated with LOWER burnout scores.
cat("\n--- Item-level correlation matrix ---\n")
##
## --- Item-level correlation matrix ---
cor_matrix <- FEVS_2024_1_ %>%
select(POS_composite, Burnout_composite,
Q57, Q63, Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r) %>%
drop_na() %>%
cor(method = "pearson")
print(round(cor_matrix, 3))
## POS_composite Burnout_composite Q57 Q63 Q3_r Q46_r
## POS_composite 1.000 -0.773 0.933 0.922 -0.533 -0.695
## Burnout_composite -0.773 1.000 -0.742 -0.690 0.816 0.865
## Q57 0.933 -0.742 1.000 0.721 -0.514 -0.665
## Q63 0.922 -0.690 0.721 1.000 -0.473 -0.624
## Q3_r -0.533 0.816 -0.514 -0.473 1.000 0.623
## Q46_r -0.695 0.865 -0.665 -0.624 0.623 1.000
## Q62_r -0.835 0.779 -0.798 -0.749 0.501 0.661
## Q70_r -0.650 0.878 -0.619 -0.586 0.681 0.771
## Q86_r -0.584 0.871 -0.568 -0.513 0.720 0.656
## Q88_r -0.597 0.855 -0.575 -0.531 0.634 0.670
## Q62_r Q70_r Q86_r Q88_r
## POS_composite -0.835 -0.650 -0.584 -0.597
## Burnout_composite 0.779 0.878 0.871 0.855
## Q57 -0.798 -0.619 -0.568 -0.575
## Q63 -0.749 -0.586 -0.513 -0.531
## Q3_r 0.501 0.681 0.720 0.634
## Q46_r 0.661 0.771 0.656 0.670
## Q62_r 1.000 0.610 0.558 0.581
## Q70_r 0.610 1.000 0.720 0.675
## Q86_r 0.558 0.720 1.000 0.762
## Q88_r 0.581 0.675 0.762 1.000
# Visualization
corrplot(
cor_matrix,
method = "color",
type = "upper",
addCoef.col = "black",
number.cex = 0.7,
tl.col = "black",
tl.srt = 45,
col = colorRampPalette(c("#8B3A3A", "white", "#2C5F8A"))(200),
title = "Correlation Matrix — POS & Burnout (FEVS 2024)",
mar = c(0, 0, 2, 0)
)
#SECTION 10: Simple linear regression
cat("\n========== SIMPLE LINEAR REGRESSION ==========\n")
##
## ========== SIMPLE LINEAR REGRESSION ==========
cat("Model: Burnout_composite ~ POS_composite\n\n")
## Model: Burnout_composite ~ POS_composite
model <- lm(Burnout_composite ~ POS_composite, data = FEVS_2024_1_)
print(summary(model))
##
## Call:
## lm(formula = Burnout_composite ~ POS_composite, data = FEVS_2024_1_)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7732 -0.3377 -0.0689 0.3074 3.7214
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.3969008 0.0024377 1804 <2e-16 ***
## POS_composite -0.6236613 0.0006585 -947 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6054 on 649626 degrees of freedom
## (24579 observations deleted due to missingness)
## Multiple R-squared: 0.5799, Adjusted R-squared: 0.5799
## F-statistic: 8.969e+05 on 1 and 649626 DF, p-value: < 2.2e-16
cat("\n--- Standardized coefficients (beta) ---\n")
##
## --- Standardized coefficients (beta) ---
cat("\n--- Plain-language model summary ---\n")
##
## --- Plain-language model summary ---
b <- round(coef(model)[2], 3)
r2 <- round(summary(model)$r.squared, 3)
p <- round(summary(model)$coefficients[2, 4], 4)
cat(sprintf(
"For every 1-unit increase in POS, burnout changes by %.3f units (β = %.3f, p = %.4f).\n",
b, b, p
))
## For every 1-unit increase in POS, burnout changes by -0.624 units (β = -0.624, p = 0.0000).
cat(sprintf("POS explains %.1f%% of the variance in burnout (R² = %.3f).\n",
r2 * 100, r2))
## POS explains 58.0% of the variance in burnout (R² = 0.580).
#SECTION 11: Regression diagnostics
cat("\n--- Regression diagnostics ---\n")
##
## --- Regression diagnostics ---
# Residual plots (4-panel)
par(mfrow = c(2, 2))
plot(model, main = "Regression Diagnostics")
par(mfrow = c(1, 1))
resids <- residuals(model)
if (length(resids) > 5000) {
set.seed(42)
resids <- sample(resids, 5000)
}
cat("\nShapiro-Wilk test on residuals:\n")
##
## Shapiro-Wilk test on residuals:
print(shapiro.test(resids))
##
## Shapiro-Wilk normality test
##
## data: resids
## W = 0.97425, p-value < 2.2e-16
cat("\n--- Reminder ---\n")
##
## --- Reminder ---
cat("With large federal survey samples, Shapiro-Wilk almost always rejects\n")
## With large federal survey samples, Shapiro-Wilk almost always rejects
cat("normality. Visually inspect the Q-Q plot — mild deviations are fine.\n")
## normality. Visually inspect the Q-Q plot — mild deviations are fine.
cat("OLS is robust to non-normality with large n by the Central Limit Theorem.\n")
## OLS is robust to non-normality with large n by the Central Limit Theorem.
#SECTION 12: Visualizations ## Scatter plot with regression line
ggplot(FEVS_2024_1_ %>% drop_na(POS_composite, Burnout_composite),
aes(x = POS_composite, y = Burnout_composite)) +
geom_jitter(alpha = 0.06, color = "#2C5F8A", size = 0.7, width = 0.05) +
geom_smooth(method = "lm", se = TRUE,
color = "#C8A951", fill = "#C8A951", alpha = 0.15,
linewidth = 1.3) +
scale_x_continuous(breaks = 1:5, limits = c(0.8, 5.2)) +
scale_y_continuous(breaks = 1:5, limits = c(0.8, 5.2)) +
labs(
title = "Perceived Organizational Support and Employee Burnout",
subtitle = "2024 Federal Employee Viewpoint Survey (FEVS)",
x = "POS Composite Score\n(1 = low support → 5 = high support)",
y = "Burnout Composite Score\n(1 = low burnout → 5 = high burnout)",
caption = paste0(
"Note: Burnout items reverse-coded (6 − original) so higher scores reflect more burnout.\n",
"POS items: Q57 (leadership motivation) + Q63 (Work-Life support). ",
"Burnout items: Q3, Q46, Q62, Q70, Q86, Q88."
)
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(color = "gray50", size = 10),
plot.caption = element_text(color = "gray60", size = 8, hjust = 0)
)
## `geom_smooth()` using formula = 'y ~ x'
# Distribution: POS composite
ggplot(FEVS_2024_1_ %>% drop_na(POS_composite), aes(x = POS_composite)) +
geom_histogram(binwidth = 0.25, fill = "#2C5F8A",
color = "white", alpha = 0.85) +
geom_vline(aes(xintercept = mean(POS_composite, na.rm = TRUE)),
color = "#C8A951", linetype = "dashed", linewidth = 1) +
labs(
title = "Distribution of POS Composite Scores",
x = "POS Composite (1–5)", y = "Count",
caption = "Dashed line = mean"
) +
theme_minimal()
# Distribution: Burnout composite
ggplot(FEVS_2024_1_%>% drop_na(Burnout_composite), aes(x = Burnout_composite)) +
geom_histogram(binwidth = 0.25, fill = "#8B3A3A",
color = "white", alpha = 0.85) +
geom_vline(aes(xintercept = mean(Burnout_composite, na.rm = TRUE)),
color = "#C8A951", linetype = "dashed", linewidth = 1) +
labs(
title = "Distribution of Burnout Composite Scores (Reversed)",
x = "Burnout Composite (1–5, higher = more burnout)", y = "Count",
caption = "Dashed line = mean"
) +
theme_minimal()
#SECTION 13: Export clean data (optional)
FEVS_2024_1_ %>%
select(POS_composite, Burnout_composite,
Q57, Q63, Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r) %>%
write.csv("fevs_pos_burnout_composites.csv", row.names = FALSE)