Research Question: # What is the relationship between POS and # burnout among federal employees? # # IV: Perceived Organizational Support (POS) # DV: Burnout (reverse-coded engagement/satisfaction items) #SECTION 1: Load Packages

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(psych)
## Warning: package 'psych' was built under R version 4.5.2
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(carData)
library(corrplot)
## corrplot 0.95 loaded
library(ggplot2)

import dataset

FEVS_2024_1_ <- read.csv("FEVS 2024 (1).csv")
cat("Respondents loaded:", nrow(FEVS_2024_1_), "\n")
## Respondents loaded: 674207
cat("Total variables:", ncol(FEVS_2024_1_), "\n")
## Total variables: 96
cat("\nSample of target variables:\n")
## 
## Sample of target variables:
dim(FEVS_2024_1_)
## [1] 674207     96
names(FEVS_2024_1_)[5:24]
##  [1] "Q1"    "Q2"    "Q3"    "Q4"    "Q5"    "Q6"    "Q7"    "Q8"    "Q9"   
## [10] "Q10"   "Q11"   "Q12"   "Q13"   "Q14"   "Q15"   "Q16_1" "Q16_2" "Q16_3"
## [19] "Q16_4" "Q16_5"
FEVS_2024_1_<- read.csv("FEVS 2024 (1).csv")

cat("Respondents loaded:", nrow(FEVS_2024_1_), "\n")
## Respondents loaded: 674207
cat("Total variables:", ncol(FEVS_2024_1_), "\n")
## Total variables: 96
# print(head(FEVS_2024_1_[, c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")], 5))

vars_of_interest <- c(“Q3”, “Q46”, “Q57”, “Q62”, “Q63”, “Q70”, “Q86”, “Q88”)

vars_of_interest <- c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")

FEVS_2024_1_ <- FEVS_2024_1_ %>%
  mutate(across(all_of(vars_of_interest), ~ {
    x <- suppressWarnings(as.numeric(as.character(.)))
    ifelse(x %in% 1:5, x, NA)
  }))

cat("\nMissing values after recode:\n")
## 
## Missing values after recode:
print(colSums(is.na(FEVS_2024_1_[, vars_of_interest])))
##    Q3   Q46   Q57   Q62   Q63   Q70   Q86   Q88 
##  9648 14655 34063 28295 58657 27662 29117 27871
# Preview the 8 variables of interest
cat("\nSample of target variables:\n")
## 
## Sample of target variables:
print(head(FEVS_2024_1_[, c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")], 5))
##   Q3 Q46 Q57 Q62 Q63 Q70 Q86 Q88
## 1  4   5   3   4   3   4   4   5
## 2  4   3   2   3   3   3   3   3
## 3  3   1   1   1  NA   1   2   1
## 4  5   4   4   4   5   5   5   5
## 5  4   4   3   4   3   4   5   4

SECTION 3: Recode non-response values to NA

#Per the 2024 FEVS codebook: # Q57 and Q63 include “X” = “Do Not Know” (non-substantive) # All target items use a 1–5 numeric scale # Anything outside 1–5 (including “X”, blanks, or other codes) # should be treated as missing before we build composites.

vars_of_interest <- c("Q3", "Q46", "Q57", "Q62", "Q63", "Q70", "Q86", "Q88")
FEVS_2024_1_<- FEVS_2024_1_ %>%
  mutate(across(all_of(vars_of_interest), ~ {
    x <- suppressWarnings(as.numeric(as.character(.)))
    ifelse(x %in% 1:5, x, NA)
  }))
 
cat("\nMissing values after recode:\n")
## 
## Missing values after recode:
print(colSums(is.na(FEVS_2024_1_[, vars_of_interest])))
##    Q3   Q46   Q57   Q62   Q63   Q70   Q86   Q88 
##  9648 14655 34063 28295 58657 27662 29117 27871

Move Q62 to POS (aligns with codebook classification)

pos_items_extended <- c("Q57", "Q63", "Q62")
burnout_items      <- c("Q3", "Q46", "Q70", "Q86", "Q88")

SECTION 5: Reverse-code burnout items

All burnout indicators are positively worded:

High score = positive experience (accomplishment, inspiration, etc.)

Reversed them so that HIGH score = MORE burnout.

Formula for a 5-point scale: reversed = (5 + 1) - original = 6 - x

Codebook scale labels (for reference):

Q3, Q46, Q62, Q86, Q88: Strongly Agree(5) → Strongly Disagree(1)

Q70: Very Satisfied(5) → Very Dissatisfied(1)

FEVS_2024_1_ <- FEVS_2024_1_ %>%
  mutate(
    Q3_r  = 6 - Q3,    # "My work gives me a feeling of personal accomplishment"
    Q46_r = 6 - Q46,   # "I recommend my organization as a good place to work"
    Q62_r = 6 - Q62,   # "I have a high level of respect for my organization's senior leaders"
    Q70_r = 6 - Q70,   # "Considering everything, how satisfied are you with your job?"
    Q86_r = 6 - Q86,   # "My job inspires me"
    Q88_r = 6 - Q88    # "I feel a strong personal attachment to my organization"
  )
 
cat("\nReverse-coded items created successfully.\n")
## 
## Reverse-coded items created successfully.

SECTION 6: Build composite variables

POS Composite (average of Q57 and Q63):

Q57 — Leaders Lead Subindex: senior leaders generate motivation & commitment

Q63 — Work-Life Support: senior leaders demonstrate support for Work-Life programs

Higher score = STRONGER perceived organizational support

Burnout Composite (average of 6 reversed items):

Higher score = MORE burnout (less satisfaction, less inspiration, etc.)

FEVS_2024_1_<- FEVS_2024_1_ %>%
  mutate(
    POS_composite     = rowMeans(select(., Q57, Q63), na.rm = TRUE),
    Burnout_composite = rowMeans(select(., Q3_r, Q46_r, Q62_r,
                                        Q70_r, Q86_r, Q88_r), na.rm = TRUE)
  )
cat("\nComposite variables created.\n")
## 
## Composite variables created.
print(summary(FEVS_2024_1_ %>% select(POS_composite, Burnout_composite)))
##  POS_composite   Burnout_composite
##  Min.   :1.000   Min.   :1.000    
##  1st Qu.:3.000   1st Qu.:1.500    
##  Median :4.000   Median :2.000    
##  Mean   :3.522   Mean   :2.206    
##  3rd Qu.:4.500   3rd Qu.:2.667    
##  Max.   :5.000   Max.   :5.000    
##  NA's   :24572   NA's   :274

SECTION 7: Reliability (Cronbach’s Alpha)

Determines whether our composite items hang together well.

α ≥ 0.70 = acceptable; α ≥ 0.80 = good; α ≥ 0.90 = excellent.

cat("\n========== INTERNAL CONSISTENCY (Cronbach's Alpha) ==========\n")
## 
## ========== INTERNAL CONSISTENCY (Cronbach's Alpha) ==========
cat("\n--- POS scale (Q57, Q63) ---\n")
## 
## --- POS scale (Q57, Q63) ---
pos_alpha <- psych::alpha(FEVS_2024_1_ %>% select(Q57, Q63) %>% drop_na())
cat("Alpha =", round(pos_alpha$total$raw_alpha, 3), "\n")
## Alpha = 0.836
cat("\n--- Burnout scale (6 reversed items) ---\n")
## 
## --- Burnout scale (6 reversed items) ---
burnout_alpha <- psych::alpha(
  FEVS_2024_1_ %>% select(Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r) %>% drop_na()
)
cat("Alpha =", round(burnout_alpha$total$raw_alpha, 3), "\n")
## Alpha = 0.917

NOTE: With only 2 items, POS alpha may be lower than ideal. # instead for 2-item scales. The correlation between Q57 and Q63 # is printed below as a supplement.

cat("\nCorrelation between Q57 and Q63 (POS items):\n")
## 
## Correlation between Q57 and Q63 (POS items):
print(cor.test(FEVS_2024_1_$Q57, FEVS_2024_1_$Q63, use = "complete.obs"))
## 
##  Pearson's product-moment correlation
## 
## data:  FEVS_2024_1_$Q57 and FEVS_2024_1_$Q63
## t = 808.45, df = 606057, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7191121 0.7215347
## sample estimates:
##       cor 
## 0.7203256

SECTION 8: Descriptive statistics

cat("\n========== DESCRIPTIVE STATISTICS ==========\n")
## 
## ========== DESCRIPTIVE STATISTICS ==========
desc_data <- FEVS_2024_1_ %>%
  select(
    POS_composite, Burnout_composite,
    Q57, Q63,
    Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r
  ) %>%
  drop_na()
 
describe(desc_data) %>%
  select(n, mean, sd, median, min, max, skew, kurtosis) %>%
  round(3) %>%
  print()
##                        n mean   sd median min max  skew kurtosis
## POS_composite     578079 3.57 1.13      4   1   5 -0.63    -0.36
## Burnout_composite 578079 2.17 0.93      2   1   5  0.88     0.40
## Q57               578079 3.42 1.26      4   1   5 -0.51    -0.74
## Q63               578079 3.72 1.17      4   1   5 -0.82    -0.07
## Q3_r              578079 2.02 1.04      2   1   5  1.12     0.84
## Q46_r             578079 2.10 1.09      2   1   5  0.98     0.39
## Q62_r             578079 2.24 1.18      2   1   5  0.85    -0.06
## Q70_r             578079 2.15 1.08      2   1   5  0.95     0.32
## Q86_r             578079 2.24 1.10      2   1   5  0.78    -0.04
## Q88_r             578079 2.24 1.13      2   1   5  0.76    -0.15

Frequency distributions — useful for determining/ spotting response patterns

cat("\n--- Frequency: Q57 (Leaders generate motivation & commitment) ---\n")
## 
## --- Frequency: Q57 (Leaders generate motivation & commitment) ---
print(table(FEVS_2024_1_$Q57, useNA = "ifany"))
## 
##      1      2      3      4      5   <NA> 
##  73285  84881 140075 212422 129481  34063
cat("\n--- Frequency: Q63 (Leaders support Work-Life programs) ---\n")
## 
## --- Frequency: Q63 (Leaders support Work-Life programs) ---
print(table(FEVS_2024_1_$Q63, useNA = "ifany"))
## 
##      1      2      3      4      5   <NA> 
##  45021  47625 122673 224754 175477  58657
cat("\n--- Frequency: Q70_r (Job satisfaction — reversed to burnout) ---\n")
## 
## --- Frequency: Q70_r (Job satisfaction — reversed to burnout) ---
print(table(FEVS_2024_1_$Q70_r, useNA = "ifany"))
## 
##      1      2      3      4      5   <NA> 
## 185684 273691  99326  57632  30212  27662

SECTION 9: Pearson correlation

cat("\n========== PEARSON CORRELATION ==========\n")
## 
## ========== PEARSON CORRELATION ==========
r_result <- cor.test(
  FEVS_2024_1_$POS_composite,
  FEVS_2024_1_$Burnout_composite,
  method = "pearson",
  use    = "complete.obs"
)
 
print(r_result)
## 
##  Pearson's product-moment correlation
## 
## data:  FEVS_2024_1_$POS_composite and FEVS_2024_1_$Burnout_composite
## t = -947.04, df = 649626, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7625592 -0.7605162
## sample estimates:
##        cor 
## -0.7615396
cat("\n--- Plain-language summary ---\n")
## 
## --- Plain-language summary ---
cat(sprintf(
  "r = %.3f | p = %.4f | 95%% CI [%.3f, %.3f]\n",
  r_result$estimate,
  r_result$p.value,
  r_result$conf.int[1],
  r_result$conf.int[2]
))
## r = -0.762 | p = 0.0000 | 95% CI [-0.763, -0.761]
if (r_result$p.value < 0.05) {
  direction <- ifelse(r_result$estimate < 0, "negative", "positive")
  cat(sprintf(
    "Interpretation: There is a statistically significant %s correlation.\n",
    direction
  ))
  cat("Higher POS is associated with",
      ifelse(r_result$estimate < 0, "LOWER", "HIGHER"),
      "burnout scores.\n")
} else {
  cat("Interpretation: The correlation is not statistically significant (p ≥ .05).\n")
}
## Interpretation: There is a statistically significant negative correlation.
## Higher POS is associated with LOWER burnout scores.

Full item-level correlation matrix

cat("\n--- Item-level correlation matrix ---\n")
## 
## --- Item-level correlation matrix ---
cor_matrix <- FEVS_2024_1_ %>%
  select(POS_composite, Burnout_composite,
         Q57, Q63, Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r) %>%
  drop_na() %>%
  cor(method = "pearson")
print(round(cor_matrix, 3))
##                   POS_composite Burnout_composite    Q57    Q63   Q3_r  Q46_r
## POS_composite             1.000            -0.773  0.933  0.922 -0.533 -0.695
## Burnout_composite        -0.773             1.000 -0.742 -0.690  0.816  0.865
## Q57                       0.933            -0.742  1.000  0.721 -0.514 -0.665
## Q63                       0.922            -0.690  0.721  1.000 -0.473 -0.624
## Q3_r                     -0.533             0.816 -0.514 -0.473  1.000  0.623
## Q46_r                    -0.695             0.865 -0.665 -0.624  0.623  1.000
## Q62_r                    -0.835             0.779 -0.798 -0.749  0.501  0.661
## Q70_r                    -0.650             0.878 -0.619 -0.586  0.681  0.771
## Q86_r                    -0.584             0.871 -0.568 -0.513  0.720  0.656
## Q88_r                    -0.597             0.855 -0.575 -0.531  0.634  0.670
##                    Q62_r  Q70_r  Q86_r  Q88_r
## POS_composite     -0.835 -0.650 -0.584 -0.597
## Burnout_composite  0.779  0.878  0.871  0.855
## Q57               -0.798 -0.619 -0.568 -0.575
## Q63               -0.749 -0.586 -0.513 -0.531
## Q3_r               0.501  0.681  0.720  0.634
## Q46_r              0.661  0.771  0.656  0.670
## Q62_r              1.000  0.610  0.558  0.581
## Q70_r              0.610  1.000  0.720  0.675
## Q86_r              0.558  0.720  1.000  0.762
## Q88_r              0.581  0.675  0.762  1.000

# Visualization

corrplot(
  cor_matrix,
  method       = "color",
  type         = "upper",
  addCoef.col  = "black",
  number.cex   = 0.7,
  tl.col       = "black",
  tl.srt       = 45,
  col          = colorRampPalette(c("#8B3A3A", "white", "#2C5F8A"))(200),
  title        = "Correlation Matrix — POS & Burnout (FEVS 2024)",
  mar          = c(0, 0, 2, 0)
)

#SECTION 10: Simple linear regression

cat("\n========== SIMPLE LINEAR REGRESSION ==========\n")
## 
## ========== SIMPLE LINEAR REGRESSION ==========
cat("Model: Burnout_composite ~ POS_composite\n\n")
## Model: Burnout_composite ~ POS_composite
model <- lm(Burnout_composite ~ POS_composite, data = FEVS_2024_1_)
print(summary(model))
## 
## Call:
## lm(formula = Burnout_composite ~ POS_composite, data = FEVS_2024_1_)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7732 -0.3377 -0.0689  0.3074  3.7214 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    4.3969008  0.0024377    1804   <2e-16 ***
## POS_composite -0.6236613  0.0006585    -947   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6054 on 649626 degrees of freedom
##   (24579 observations deleted due to missingness)
## Multiple R-squared:  0.5799, Adjusted R-squared:  0.5799 
## F-statistic: 8.969e+05 on 1 and 649626 DF,  p-value: < 2.2e-16

Standardized coefficients (beta) — easier to interpret in survey research

cat("\n--- Standardized coefficients (beta) ---\n")
## 
## --- Standardized coefficients (beta) ---
cat("\n--- Plain-language model summary ---\n")
## 
## --- Plain-language model summary ---
b  <- round(coef(model)[2], 3)
r2 <- round(summary(model)$r.squared, 3)
p  <- round(summary(model)$coefficients[2, 4], 4)
cat(sprintf(
  "For every 1-unit increase in POS, burnout changes by %.3f units (β = %.3f, p = %.4f).\n",
  b, b, p
))
## For every 1-unit increase in POS, burnout changes by -0.624 units (β = -0.624, p = 0.0000).
cat(sprintf("POS explains %.1f%% of the variance in burnout (R² = %.3f).\n",
            r2 * 100, r2))
## POS explains 58.0% of the variance in burnout (R² = 0.580).

#SECTION 11: Regression diagnostics

cat("\n--- Regression diagnostics ---\n")
## 
## --- Regression diagnostics ---
# Residual plots (4-panel)
par(mfrow = c(2, 2))
plot(model, main = "Regression Diagnostics")

par(mfrow = c(1, 1))

Normality of residuals

resids <- residuals(model)
if (length(resids) > 5000) {
  set.seed(42)
  resids <- sample(resids, 5000)
}
cat("\nShapiro-Wilk test on residuals:\n")
## 
## Shapiro-Wilk test on residuals:
print(shapiro.test(resids))
## 
##  Shapiro-Wilk normality test
## 
## data:  resids
## W = 0.97425, p-value < 2.2e-16
cat("\n--- Reminder ---\n")
## 
## --- Reminder ---
cat("With large federal survey samples, Shapiro-Wilk almost always rejects\n")
## With large federal survey samples, Shapiro-Wilk almost always rejects
cat("normality. Visually inspect the Q-Q plot — mild deviations are fine.\n")
## normality. Visually inspect the Q-Q plot — mild deviations are fine.
cat("OLS is robust to non-normality with large n by the Central Limit Theorem.\n")
## OLS is robust to non-normality with large n by the Central Limit Theorem.

#SECTION 12: Visualizations ## Scatter plot with regression line

ggplot(FEVS_2024_1_ %>% drop_na(POS_composite, Burnout_composite),
       aes(x = POS_composite, y = Burnout_composite)) +
  geom_jitter(alpha = 0.06, color = "#2C5F8A", size = 0.7, width = 0.05) +
  geom_smooth(method = "lm", se = TRUE,
              color = "#C8A951", fill = "#C8A951", alpha = 0.15,
              linewidth = 1.3) +
  scale_x_continuous(breaks = 1:5, limits = c(0.8, 5.2)) +
  scale_y_continuous(breaks = 1:5, limits = c(0.8, 5.2)) +
  labs(
    title    = "Perceived Organizational Support and Employee Burnout",
    subtitle = "2024 Federal Employee Viewpoint Survey (FEVS)",
    x        = "POS Composite Score\n(1 = low support  →  5 = high support)",
    y        = "Burnout Composite Score\n(1 = low burnout  →  5 = high burnout)",
    caption  = paste0(
      "Note: Burnout items reverse-coded (6 − original) so higher scores reflect more burnout.\n",
      "POS items: Q57 (leadership motivation) + Q63 (Work-Life support). ",
      "Burnout items: Q3, Q46, Q62, Q70, Q86, Q88."
    )
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title    = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "gray50", size = 10),
    plot.caption  = element_text(color = "gray60", size = 8, hjust = 0)
  )
## `geom_smooth()` using formula = 'y ~ x'

# Distribution: POS composite

ggplot(FEVS_2024_1_ %>% drop_na(POS_composite), aes(x = POS_composite)) +
  geom_histogram(binwidth = 0.25, fill = "#2C5F8A",
                 color = "white", alpha = 0.85) +
  geom_vline(aes(xintercept = mean(POS_composite, na.rm = TRUE)),
             color = "#C8A951", linetype = "dashed", linewidth = 1) +
  labs(
    title = "Distribution of POS Composite Scores",
    x = "POS Composite (1–5)", y = "Count",
    caption = "Dashed line = mean"
  ) +
  theme_minimal()

# Distribution: Burnout composite

ggplot(FEVS_2024_1_%>% drop_na(Burnout_composite), aes(x = Burnout_composite)) +
  geom_histogram(binwidth = 0.25, fill = "#8B3A3A",
                 color = "white", alpha = 0.85) +
  geom_vline(aes(xintercept = mean(Burnout_composite, na.rm = TRUE)),
             color = "#C8A951", linetype = "dashed", linewidth = 1) +
  labs(
    title = "Distribution of Burnout Composite Scores (Reversed)",
    x = "Burnout Composite (1–5, higher = more burnout)", y = "Count",
    caption = "Dashed line = mean"
  ) +
  theme_minimal()

#SECTION 13: Export clean data (optional)

FEVS_2024_1_ %>%
select(POS_composite, Burnout_composite,
Q57, Q63, Q3_r, Q46_r, Q62_r, Q70_r, Q86_r, Q88_r) %>%
write.csv("fevs_pos_burnout_composites.csv", row.names = FALSE)