Ed_thriving_TWB

# Load packages
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(janitor)

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(psych)

## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(haven)
library(skimr)
library(lubridate)
library(readr)
library(knitr)
library(kableExtra)

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(dplyr)
library(tinytex)

#upload data
#df <- ET_District_Survey_Pleasantville_LMC_Spring_2025_5_24_25_analysis

df_up <- Spring_2025_NEA_Data_Survey_data_1_

# Remove the first row from spring data item prompts
df <- df_up[-1, ] 


#convert to factor
df$PID <- as.factor(df$PID)
df$TWB39 <- as.factor(df$TWB39)


#update other variables to num  
# Identify character columns excluding PID and twb39
char_cols <- sapply(df, is.character)
char_cols[c("PID", "TWB39")] <- FALSE  # exclude these two

# Convert remaining character columns to numeric
df[, char_cols] <- lapply(df[, char_cols], function(x) as.numeric(x))


df

## # A tibble: 3,814 × 54
##    PID    TWB1  TWB2  TWB3  TWB4  TWB5  TWB6  TWB7  TWB8  TWB9 TWB10 TWB11 TWB12
##    <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 Clif…     1     1     1     1     1     1     1     1     1     1     1     1
##  2 Clif…     1     1     1     1     1     1     1     1     1     1     1     1
##  3 Clif…     1     1     1     1     1     6     6     1     1     1     1     1
##  4 Clif…     1     3     4     5     4     1     4     2     5     4     2     4
##  5 Clif…     1     1     1     1     1     1     1     1     1     1     1     1
##  6 Clif…     1     1     1     1     1     1     1     1     1     1     1     1
##  7 Clif…     1     1     1     1     1     1     1     1     1     1     1     1
##  8 Clif…     1     1     2     3     2     2     1     3     2     2     1     1
##  9 Clif…     6     5     5     5     6     6     4     5     6     6     4     6
## 10 Clif…     6     6     6     6     6     6     6     6     6     6     6     6
## # ℹ 3,804 more rows
## # ℹ 41 more variables: TWB13 <dbl>, TWB14 <dbl>, TWB15 <dbl>, TWB16 <dbl>,
## #   TWB17 <dbl>, TWB18 <dbl>, TWB19 <dbl>, TWB20 <dbl>, TWB21 <dbl>,
## #   TWB22 <dbl>, TWB23 <dbl>, TWB24 <dbl>, TWB25 <dbl>, TWB26 <dbl>,
## #   TWB27 <dbl>, TWB28 <dbl>, TWB29 <dbl>, TWB30 <dbl>, TWB31 <dbl>,
## #   TWB32 <dbl>, TWB33 <dbl>, TWB34 <dbl>, TWB35 <dbl>, TWB36 <dbl>,
## #   TWB37 <dbl>, TWB38 <dbl>, TWB39 <fct>, TWB40 <dbl>, TWB41 <dbl>, …

# TWB: Responsive Leadership & Supportive Culture (TWB1–TWB14)
df <- df %>%
  mutate(
    twb_sub_score = (
      TWB1 + TWB2 + TWB3 + TWB4 + TWB5 + TWB6 + TWB7 +
      TWB8 + TWB9 + TWB10 + TWB11 + TWB12 + TWB13 + TWB14
    ) / 14
  )

# Growth (TWB15–TWB17)
df <- df %>%
  mutate(
    growth_score = (TWB15 + TWB16 + TWB17) / 3
  )

# Well-being (TWB18–TWB19)
df <- df %>%
  mutate(
    wellbeing_score = (TWB18 + TWB19) / 2
  )

# Acceptance (TWB20–TWB21)
df <- df %>%
  mutate(
    acceptance_score = (TWB20 + TWB21) / 2
  )

# Adaptability (TWB22–TWB23)
df <- df %>%
  mutate(
    adaptability_score = (TWB22 + TWB23) / 2
  )

# Depletion (TWB24–TWB25)
df <- df %>%
  mutate(
    depletion_score = (TWB24 + TWB25) / 2
  )

# Foundational Support (TWB26–TWB31)
df <- df %>%
  mutate(
    foundational_support_score = (
      TWB26 + TWB27 + TWB28 + TWB29 + TWB30 + TWB31
    ) / 6
  )

# TWB composite (all above subscales)
df <- df %>%
  mutate(
    twb_composite_score = (
      twb_sub_score +
      growth_score +
      wellbeing_score +
      acceptance_score +
      adaptability_score +
      depletion_score +
      foundational_support_score
    ) 
  )

# Create item-level dataframe for reliability checks

twb_items <- df %>%
  select(TWB1:TWB14) %>%
  na.omit()

growth_items <- df %>%
  select(TWB15, TWB16, TWB17) %>%
  na.omit()

wellbeing_items <- df %>%
  select(TWB18, TWB19) %>%
  na.omit()

acceptance_items <- df %>%
  select(TWB20, TWB21) %>%
  na.omit()

adaptability_items <- df %>%
  select(TWB22, TWB23) %>%
  na.omit()

depletion_items <- df %>%
  select(TWB24, TWB25) %>%
  na.omit()

foundational_items <- df %>%
  select(TWB26:TWB31) %>%
  na.omit()


#alphas
psych::alpha(twb_items)

## 
## Reliability analysis   
## Call: psych::alpha(x = twb_items)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N     ase mean  sd median_r
##       0.97      0.97    0.97       0.7  33 0.00075  4.5 1.2     0.69
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.97  0.97  0.97
## Duhachek  0.97  0.97  0.97
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## TWB1       0.97      0.97    0.97      0.69  29  0.00084 0.0037  0.69
## TWB2       0.97      0.97    0.97      0.70  30  0.00083 0.0040  0.69
## TWB3       0.97      0.97    0.97      0.70  30  0.00083 0.0038  0.69
## TWB4       0.97      0.97    0.97      0.70  30  0.00083 0.0037  0.69
## TWB5       0.97      0.97    0.97      0.69  30  0.00084 0.0038  0.69
## TWB6       0.97      0.97    0.97      0.70  30  0.00081 0.0043  0.69
## TWB7       0.97      0.97    0.97      0.70  30  0.00083 0.0039  0.69
## TWB8       0.97      0.97    0.97      0.70  30  0.00082 0.0041  0.69
## TWB9       0.97      0.97    0.97      0.70  30  0.00081 0.0041  0.69
## TWB10      0.97      0.97    0.97      0.71  31  0.00079 0.0041  0.71
## TWB11      0.97      0.97    0.97      0.70  31  0.00079 0.0038  0.69
## TWB12      0.97      0.97    0.97      0.71  32  0.00077 0.0031  0.71
## TWB13      0.97      0.97    0.97      0.71  32  0.00079 0.0039  0.71
## TWB14      0.97      0.97    0.97      0.71  32  0.00078 0.0039  0.71
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## TWB1  3307  0.90  0.90  0.90   0.89  4.6 1.4
## TWB2  3307  0.88  0.88  0.87   0.86  4.4 1.5
## TWB3  3307  0.88  0.88  0.88   0.86  4.7 1.3
## TWB4  3307  0.89  0.89  0.88   0.87  4.6 1.4
## TWB5  3307  0.89  0.89  0.89   0.87  4.6 1.4
## TWB6  3307  0.86  0.86  0.84   0.83  4.5 1.5
## TWB7  3307  0.89  0.89  0.88   0.87  4.5 1.4
## TWB8  3307  0.87  0.86  0.85   0.84  4.4 1.5
## TWB9  3307  0.85  0.86  0.84   0.83  4.8 1.3
## TWB10 3307  0.80  0.80  0.78   0.77  4.8 1.3
## TWB11 3307  0.83  0.82  0.81   0.79  4.3 1.5
## TWB12 3307  0.76  0.76  0.74   0.72  4.9 1.4
## TWB13 3307  0.80  0.80  0.78   0.77  4.3 1.4
## TWB14 3307  0.79  0.79  0.77   0.76  4.2 1.4
## 
## Non missing response frequency for each item
##          1    2    3    4    5    6 miss
## TWB1  0.05 0.06 0.08 0.17 0.31 0.33    0
## TWB2  0.07 0.08 0.09 0.20 0.29 0.26    0
## TWB3  0.04 0.06 0.06 0.18 0.31 0.35    0
## TWB4  0.05 0.06 0.09 0.19 0.33 0.28    0
## TWB5  0.05 0.07 0.07 0.20 0.33 0.29    0
## TWB6  0.07 0.08 0.08 0.18 0.27 0.32    0
## TWB7  0.05 0.07 0.08 0.20 0.34 0.25    0
## TWB8  0.07 0.08 0.09 0.17 0.33 0.26    0
## TWB9  0.03 0.04 0.06 0.17 0.35 0.34    0
## TWB10 0.03 0.04 0.06 0.18 0.35 0.33    0
## TWB11 0.08 0.08 0.09 0.18 0.32 0.25    0
## TWB12 0.04 0.05 0.04 0.13 0.31 0.43    0
## TWB13 0.06 0.07 0.09 0.22 0.33 0.22    0
## TWB14 0.07 0.08 0.12 0.22 0.32 0.19    0

psych::alpha(growth_items)

## 
## Reliability analysis   
## Call: psych::alpha(x = growth_items)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.83      0.83    0.77      0.62 4.9 0.0051  5.2 0.74     0.62
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.82  0.83  0.84
## Duhachek  0.82  0.83  0.84
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## TWB15      0.76      0.76    0.62      0.62 3.2   0.0082    NA  0.62
## TWB16      0.75      0.77    0.62      0.62 3.3   0.0081    NA  0.62
## TWB17      0.77      0.77    0.63      0.63 3.4   0.0080    NA  0.63
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean   sd
## TWB15 3328  0.88  0.87  0.76   0.69  5.2 0.95
## TWB16 3328  0.86  0.86  0.76   0.69  5.2 0.84
## TWB17 3328  0.85  0.86  0.75   0.69  5.4 0.75
## 
## Non missing response frequency for each item
##          1    2    3    4    5    6 miss
## TWB15 0.01 0.02 0.03 0.13 0.39 0.42    0
## TWB16 0.00 0.01 0.02 0.14 0.43 0.40    0
## TWB17 0.00 0.00 0.01 0.10 0.40 0.49    0

psych::alpha(wellbeing_items)

## 
## Reliability analysis   
## Call: psych::alpha(x = wellbeing_items)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.73      0.74    0.58      0.58 2.8 0.0091  5.2 0.84     0.58
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.72  0.73  0.75
## Duhachek  0.72  0.73  0.75
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## TWB18      0.63      0.58    0.34      0.58 1.4       NA     0  0.58
## TWB19      0.54      0.58    0.34      0.58 1.4       NA     0  0.58
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean   sd
## TWB18 3332  0.90  0.89  0.68   0.58  5.1 0.98
## TWB19 3332  0.88  0.89  0.68   0.58  5.3 0.91
## 
## Non missing response frequency for each item
##          1    2    3    4    5    6 miss
## TWB18 0.01 0.02 0.03 0.14 0.38 0.42    0
## TWB19 0.01 0.01 0.03 0.10 0.38 0.47    0

psych::alpha(acceptance_items)

## 
## Reliability analysis   
## Call: psych::alpha(x = acceptance_items)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.73      0.73    0.58      0.58 2.7 0.0093  4.8 0.83     0.58
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.71  0.73  0.75
## Duhachek  0.71  0.73  0.75
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## TWB20      0.60      0.58    0.33      0.58 1.4       NA     0  0.58
## TWB21      0.56      0.58    0.33      0.58 1.4       NA     0  0.58
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean   sd
## TWB20 3317  0.89  0.89  0.68   0.58  4.8 0.95
## TWB21 3317  0.88  0.89  0.68   0.58  4.8 0.91
## 
## Non missing response frequency for each item
##          1    2    3    4    5    6 miss
## TWB20 0.01 0.02 0.05 0.24 0.46 0.22    0
## TWB21 0.00 0.02 0.06 0.25 0.48 0.19    0

psych::alpha(adaptability_items)

## 
## Reliability analysis   
## Call: psych::alpha(x = adaptability_items)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.68      0.68    0.52      0.52 2.2 0.011  5.2 0.66     0.52
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.66  0.68   0.7
## Duhachek  0.66  0.68   0.7
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## TWB22      0.58      0.52    0.27      0.52 1.1       NA     0  0.52
## TWB23      0.47      0.52    0.27      0.52 1.1       NA     0  0.52
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean   sd
## TWB22 3321  0.89  0.87  0.63   0.52  5.1 0.80
## TWB23 3321  0.86  0.87  0.63   0.52  5.2 0.72
## 
## Non missing response frequency for each item
##       1    2    3    4    5    6 miss
## TWB22 0 0.01 0.02 0.13 0.52 0.31    0
## TWB23 0 0.00 0.01 0.12 0.51 0.36    0

psych::alpha(depletion_items)

## 
## Reliability analysis   
## Call: psych::alpha(x = depletion_items)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
##       0.76      0.76    0.61      0.61 3.1 0.0085  3.6 1.3     0.61
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.74  0.76  0.77
## Duhachek  0.74  0.76  0.77
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## TWB24      0.62      0.61    0.37      0.61 1.6       NA     0  0.61
## TWB25      0.59      0.61    0.37      0.61 1.6       NA     0  0.61
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## TWB24 3313  0.90   0.9   0.7   0.61  3.8 1.4
## TWB25 3313  0.89   0.9   0.7   0.61  3.3 1.4
## 
## Non missing response frequency for each item
##          1    2    3    4    5    6 miss
## TWB24 0.05 0.16 0.14 0.32 0.18 0.14    0
## TWB25 0.09 0.28 0.16 0.27 0.14 0.06    0

psych::alpha(foundational_items)

## 
## Reliability analysis   
## Call: psych::alpha(x = foundational_items)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
##       0.87      0.87    0.86      0.53 6.7 0.0049  3.7 1.1     0.57
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.86  0.87  0.88
## Duhachek  0.86  0.87  0.88
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## TWB26      0.89      0.89    0.87      0.62 8.0   0.0042 0.0066  0.61
## TWB27      0.85      0.85    0.84      0.53 5.6   0.0059 0.0279  0.57
## TWB28      0.84      0.85    0.84      0.52 5.5   0.0060 0.0255  0.55
## TWB29      0.82      0.83    0.81      0.49 4.8   0.0066 0.0160  0.51
## TWB30      0.83      0.83    0.81      0.49 4.9   0.0065 0.0148  0.51
## TWB31      0.84      0.85    0.84      0.52 5.5   0.0060 0.0245  0.58
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## TWB26 1793  0.60  0.59  0.45   0.43  3.0 1.5
## TWB27 1793  0.78  0.78  0.71   0.67  3.5 1.5
## TWB28 1793  0.80  0.79  0.74   0.69  3.5 1.6
## TWB29 1793  0.86  0.87  0.86   0.79  4.0 1.4
## TWB30 1793  0.85  0.85  0.84   0.77  4.0 1.4
## TWB31 1793  0.78  0.79  0.74   0.69  4.3 1.4
## 
## Non missing response frequency for each item
##          1    2    3    4    5    6 miss
## TWB26 0.25 0.18 0.16 0.21 0.17 0.04    0
## TWB27 0.12 0.17 0.17 0.24 0.22 0.09    0
## TWB28 0.15 0.14 0.16 0.22 0.23 0.10    0
## TWB29 0.08 0.09 0.13 0.27 0.30 0.13    0
## TWB30 0.08 0.08 0.13 0.27 0.32 0.12    0
## TWB31 0.07 0.05 0.10 0.22 0.40 0.16    0

# Combine all TWB items (TWB1 to TWB31) into one dataframe
twb_full_items <- df %>%
  select(TWB1:TWB31) %>%
  na.omit()  # only include complete cases for alpha

# Cronbach's alpha for full TWB scale (31 items)
twb_full_alpha <- psych::alpha(twb_full_items)

## Some items ( TWB24 TWB25 ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option

print(twb_full_alpha) #.93 high alpha for collective measure

## 
## Reliability analysis   
## Call: psych::alpha(x = twb_full_items)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.94      0.94    0.96      0.34  16 0.0018  4.6 0.76      0.3
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.94  0.94  0.94
## Duhachek  0.94  0.94  0.94
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## TWB1       0.94      0.94    0.96      0.33  14   0.0019 0.070  0.29
## TWB2       0.94      0.94    0.96      0.33  15   0.0019 0.071  0.29
## TWB3       0.94      0.94    0.96      0.33  15   0.0019 0.071  0.29
## TWB4       0.94      0.94    0.96      0.33  14   0.0019 0.070  0.29
## TWB5       0.94      0.94    0.96      0.33  14   0.0019 0.070  0.29
## TWB6       0.94      0.94    0.96      0.33  15   0.0019 0.071  0.29
## TWB7       0.94      0.94    0.96      0.33  15   0.0019 0.071  0.29
## TWB8       0.94      0.94    0.96      0.33  15   0.0019 0.071  0.29
## TWB9       0.94      0.94    0.96      0.33  15   0.0019 0.071  0.29
## TWB10      0.94      0.94    0.96      0.33  15   0.0019 0.072  0.29
## TWB11      0.94      0.94    0.96      0.33  15   0.0019 0.071  0.29
## TWB12      0.94      0.94    0.96      0.33  15   0.0019 0.073  0.29
## TWB13      0.94      0.94    0.96      0.33  15   0.0019 0.072  0.29
## TWB14      0.94      0.94    0.96      0.33  15   0.0019 0.072  0.29
## TWB15      0.94      0.94    0.96      0.34  15   0.0018 0.076  0.29
## TWB16      0.94      0.94    0.96      0.34  15   0.0018 0.076  0.29
## TWB17      0.94      0.94    0.96      0.34  16   0.0018 0.076  0.30
## TWB18      0.94      0.94    0.96      0.35  16   0.0017 0.073  0.30
## TWB19      0.94      0.94    0.96      0.35  16   0.0017 0.074  0.31
## TWB20      0.94      0.94    0.96      0.34  16   0.0018 0.075  0.30
## TWB21      0.94      0.94    0.96      0.34  15   0.0018 0.075  0.30
## TWB22      0.94      0.94    0.96      0.34  16   0.0018 0.075  0.30
## TWB23      0.94      0.94    0.96      0.34  16   0.0018 0.076  0.30
## TWB24      0.95      0.95    0.96      0.37  18   0.0016 0.059  0.31
## TWB25      0.95      0.95    0.96      0.37  17   0.0016 0.060  0.31
## TWB26      0.94      0.94    0.96      0.35  16   0.0017 0.075  0.30
## TWB27      0.94      0.94    0.96      0.34  15   0.0018 0.075  0.29
## TWB28      0.94      0.94    0.96      0.34  15   0.0018 0.075  0.29
## TWB29      0.94      0.94    0.96      0.33  15   0.0019 0.073  0.29
## TWB30      0.94      0.94    0.96      0.33  15   0.0019 0.074  0.29
## TWB31      0.94      0.94    0.96      0.33  15   0.0018 0.074  0.29
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean   sd
## TWB1  1759  0.85  0.83  0.84   0.83  4.9 1.30
## TWB2  1759  0.84  0.82  0.82   0.82  4.6 1.40
## TWB3  1759  0.83  0.81  0.81   0.81  5.0 1.25
## TWB4  1759  0.85  0.84  0.84   0.83  4.8 1.30
## TWB5  1759  0.85  0.84  0.84   0.84  4.8 1.32
## TWB6  1759  0.82  0.81  0.81   0.80  4.7 1.45
## TWB7  1759  0.84  0.82  0.82   0.82  4.7 1.28
## TWB8  1759  0.83  0.81  0.82   0.81  4.6 1.45
## TWB9  1759  0.81  0.80  0.80   0.79  4.9 1.23
## TWB10 1759  0.79  0.78  0.78   0.77  4.9 1.24
## TWB11 1759  0.81  0.80  0.80   0.79  4.5 1.51
## TWB12 1759  0.72  0.71  0.70   0.69  5.1 1.27
## TWB13 1759  0.80  0.79  0.79   0.78  4.5 1.37
## TWB14 1759  0.77  0.76  0.76   0.75  4.4 1.37
## TWB15 1759  0.51  0.55  0.53   0.47  5.2 0.96
## TWB16 1759  0.46  0.51  0.49   0.43  5.2 0.83
## TWB17 1759  0.42  0.47  0.45   0.39  5.4 0.76
## TWB18 1759  0.26  0.31  0.28   0.22  5.1 1.00
## TWB19 1759  0.27  0.32  0.29   0.24  5.3 0.91
## TWB20 1759  0.40  0.44  0.42   0.36  4.9 0.97
## TWB21 1759  0.45  0.50  0.48   0.42  4.8 0.93
## TWB22 1759  0.41  0.46  0.43   0.38  5.1 0.83
## TWB23 1759  0.42  0.47  0.44   0.39  5.3 0.72
## TWB24 1759 -0.16 -0.18 -0.21  -0.22  3.9 1.46
## TWB25 1759 -0.13 -0.15 -0.18  -0.18  3.2 1.43
## TWB26 1759  0.39  0.37  0.34   0.33  3.0 1.54
## TWB27 1759  0.59  0.57  0.56   0.55  3.5 1.50
## TWB28 1759  0.61  0.59  0.57   0.57  3.5 1.58
## TWB29 1759  0.74  0.72  0.72   0.71  4.0 1.43
## TWB30 1759  0.71  0.70  0.69   0.68  4.0 1.40
## TWB31 1759  0.67  0.65  0.64   0.64  4.3 1.36
## 
## Non missing response frequency for each item
##          1    2    3    4    5    6 miss
## TWB1  0.04 0.03 0.06 0.15 0.32 0.41    0
## TWB2  0.05 0.05 0.07 0.19 0.31 0.33    0
## TWB3  0.03 0.04 0.05 0.15 0.31 0.43    0
## TWB4  0.04 0.04 0.07 0.17 0.34 0.35    0
## TWB5  0.04 0.05 0.05 0.17 0.34 0.35    0
## TWB6  0.06 0.04 0.07 0.16 0.26 0.40    0
## TWB7  0.04 0.04 0.06 0.18 0.37 0.31    0
## TWB8  0.06 0.05 0.08 0.15 0.33 0.33    0
## TWB9  0.03 0.03 0.04 0.15 0.34 0.40    0
## TWB10 0.03 0.04 0.04 0.16 0.35 0.38    0
## TWB11 0.07 0.07 0.09 0.17 0.30 0.31    0
## TWB12 0.03 0.04 0.03 0.11 0.31 0.48    0
## TWB13 0.05 0.06 0.08 0.21 0.34 0.26    0
## TWB14 0.05 0.06 0.10 0.21 0.34 0.23    0
## TWB15 0.01 0.02 0.03 0.12 0.36 0.47    0
## TWB16 0.00 0.01 0.02 0.13 0.42 0.42    0
## TWB17 0.00 0.00 0.01 0.09 0.37 0.52    0
## TWB18 0.01 0.02 0.04 0.14 0.37 0.42    0
## TWB19 0.01 0.01 0.03 0.10 0.37 0.48    0
## TWB20 0.01 0.02 0.04 0.20 0.45 0.28    0
## TWB21 0.01 0.01 0.05 0.23 0.47 0.23    0
## TWB22 0.00 0.01 0.02 0.13 0.50 0.34    0
## TWB23 0.00 0.00 0.01 0.10 0.48 0.41    0
## TWB24 0.06 0.17 0.13 0.31 0.18 0.16    0
## TWB25 0.10 0.29 0.14 0.26 0.14 0.06    0
## TWB26 0.25 0.18 0.16 0.21 0.17 0.04    0
## TWB27 0.12 0.17 0.16 0.24 0.22 0.09    0
## TWB28 0.15 0.14 0.16 0.22 0.23 0.10    0
## TWB29 0.08 0.09 0.13 0.27 0.30 0.13    0
## TWB30 0.08 0.07 0.13 0.27 0.33 0.12    0
## TWB31 0.07 0.05 0.10 0.22 0.40 0.16    0

# Install if needed
# install.packages("lavaan")

library(lavaan)

# Define 7-factor model using lavaan syntax
twb_model <- '
  TWB =~ TWB1 + TWB2 + TWB3 + TWB4 + TWB5 + TWB6 + TWB7 + TWB8 + TWB9 + TWB10 + TWB11 + TWB12 + TWB13 + TWB14
  Growth =~ TWB15 + TWB16 + TWB17
  Wellbeing =~ TWB18 + TWB19
  Acceptance =~ TWB20 + TWB21
  Adaptability =~ TWB22 + TWB23
  Depletion =~ TWB24 + TWB25
  Foundation =~ TWB26 + TWB27 + TWB28 + TWB29 + TWB30 + TWB31
'

# Run CFA on full data (remove rows with missing values in TWB1–TWB31)
twb_cfa_data <- df %>%
  select(TWB1:TWB31) %>%
  na.omit()

# Fit the model
twb_cfa_fit <- cfa(model = twb_model, data = twb_cfa_data, std.lv = TRUE)

# Print summary with fit indices
summary(twb_cfa_fit, fit.measures = TRUE, standardized = TRUE)

## lavaan 0.6-19 ended normally after 46 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        83
## 
##   Number of observations                          1759
## 
## Model Test User Model:
##                                                       
##   Test statistic                              2031.398
##   Degrees of freedom                               413
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                             40973.122
##   Degrees of freedom                               465
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.960
##   Tucker-Lewis Index (TLI)                       0.955
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -68229.690
##   Loglikelihood unrestricted model (H1)     -67213.991
##                                                       
##   Akaike (AIC)                              136625.380
##   Bayesian (BIC)                            137079.598
##   Sample-size adjusted Bayesian (SABIC)     136815.914
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.047
##   90 Percent confidence interval - lower         0.045
##   90 Percent confidence interval - upper         0.049
##   P-value H_0: RMSEA <= 0.050                    0.988
##   P-value H_0: RMSEA >= 0.080                    0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.029
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   TWB =~                                                                
##     TWB1              1.170    0.024   48.759    0.000    1.170    0.901
##     TWB2              1.223    0.026   46.331    0.000    1.223    0.873
##     TWB3              1.088    0.024   46.232    0.000    1.088    0.872
##     TWB4              1.162    0.024   48.167    0.000    1.162    0.894
##     TWB5              1.188    0.024   48.790    0.000    1.188    0.901
##     TWB6              1.238    0.028   44.553    0.000    1.238    0.853
##     TWB7              1.131    0.024   47.155    0.000    1.131    0.883
##     TWB8              1.241    0.028   44.973    0.000    1.241    0.858
##     TWB9              1.057    0.023   44.985    0.000    1.057    0.858
##     TWB10             1.001    0.024   40.883    0.000    1.001    0.806
##     TWB11             1.274    0.029   43.824    0.000    1.274    0.844
##     TWB12             0.931    0.026   35.845    0.000    0.931    0.736
##     TWB13             1.075    0.027   39.152    0.000    1.075    0.783
##     TWB14             1.052    0.028   38.176    0.000    1.052    0.769
##   Growth =~                                                             
##     TWB15             0.751    0.021   35.885    0.000    0.751    0.783
##     TWB16             0.629    0.018   34.277    0.000    0.629    0.756
##     TWB17             0.603    0.017   36.219    0.000    0.603    0.788
##   Wellbeing =~                                                          
##     TWB18             0.743    0.026   28.782    0.000    0.743    0.740
##     TWB19             0.686    0.024   29.159    0.000    0.686    0.751
##   Acceptance =~                                                         
##     TWB20             0.706    0.022   32.183    0.000    0.706    0.726
##     TWB21             0.747    0.021   36.109    0.000    0.747    0.804
##   Adaptability =~                                                       
##     TWB22             0.570    0.020   29.025    0.000    0.570    0.690
##     TWB23             0.502    0.017   29.504    0.000    0.502    0.701
##   Depletion =~                                                          
##     TWB24             1.135    0.041   27.339    0.000    1.135    0.777
##     TWB25             1.121    0.041   27.490    0.000    1.121    0.783
##   Foundation =~                                                         
##     TWB26             0.658    0.036   18.088    0.000    0.658    0.427
##     TWB27             1.029    0.032   31.782    0.000    1.029    0.685
##     TWB28             1.144    0.033   34.316    0.000    1.144    0.725
##     TWB29             1.275    0.027   46.809    0.000    1.275    0.892
##     TWB30             1.223    0.027   45.098    0.000    1.223    0.872
##     TWB31             1.022    0.028   36.116    0.000    1.022    0.752
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   TWB ~~                                                                
##     Growth            0.455    0.022   20.887    0.000    0.455    0.455
##     Wellbeing         0.224    0.027    8.166    0.000    0.224    0.224
##     Acceptance        0.406    0.024   16.757    0.000    0.406    0.406
##     Adaptability      0.443    0.026   17.209    0.000    0.443    0.443
##     Depletion        -0.210    0.027   -7.857    0.000   -0.210   -0.210
##     Foundation        0.702    0.014   51.758    0.000    0.702    0.702
##   Growth ~~                                                             
##     Wellbeing         0.516    0.025   20.328    0.000    0.516    0.516
##     Acceptance        0.513    0.025   20.768    0.000    0.513    0.513
##     Adaptability      0.601    0.025   23.798    0.000    0.601    0.601
##     Depletion        -0.177    0.030   -5.971    0.000   -0.177   -0.177
##     Foundation        0.393    0.024   16.418    0.000    0.393    0.393
##   Wellbeing ~~                                                          
##     Acceptance        0.540    0.027   20.274    0.000    0.540    0.540
##     Adaptability      0.460    0.031   14.979    0.000    0.460    0.460
##     Depletion        -0.460    0.028  -16.456    0.000   -0.460   -0.460
##     Foundation        0.226    0.028    7.980    0.000    0.226    0.226
##   Acceptance ~~                                                         
##     Adaptability      0.935    0.020   47.912    0.000    0.935    0.935
##     Depletion        -0.399    0.028  -14.123    0.000   -0.399   -0.399
##     Foundation        0.390    0.025   15.339    0.000    0.390    0.390
##   Adaptability ~~                                                       
##     Depletion        -0.250    0.033   -7.656    0.000   -0.250   -0.250
##     Foundation        0.392    0.028   14.183    0.000    0.392    0.392
##   Depletion ~~                                                          
##     Foundation       -0.275    0.027  -10.215    0.000   -0.275   -0.275
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .TWB1              0.319    0.012   26.484    0.000    0.319    0.189
##    .TWB2              0.465    0.017   27.279    0.000    0.465    0.237
##    .TWB3              0.372    0.014   27.305    0.000    0.372    0.239
##    .TWB4              0.339    0.013   26.710    0.000    0.339    0.201
##    .TWB5              0.328    0.012   26.471    0.000    0.328    0.188
##    .TWB6              0.576    0.021   27.692    0.000    0.576    0.273
##    .TWB7              0.362    0.013   27.046    0.000    0.362    0.221
##    .TWB8              0.554    0.020   27.604    0.000    0.554    0.265
##    .TWB9              0.401    0.015   27.602    0.000    0.401    0.264
##    .TWB10             0.539    0.019   28.286    0.000    0.539    0.350
##    .TWB11             0.657    0.024   27.833    0.000    0.657    0.288
##    .TWB12             0.734    0.025   28.786    0.000    0.734    0.459
##    .TWB13             0.729    0.026   28.488    0.000    0.729    0.387
##    .TWB14             0.762    0.027   28.586    0.000    0.762    0.408
##    .TWB15             0.356    0.018   20.141    0.000    0.356    0.387
##    .TWB16             0.297    0.014   21.697    0.000    0.297    0.429
##    .TWB17             0.221    0.011   19.784    0.000    0.221    0.379
##    .TWB18             0.457    0.028   16.256    0.000    0.457    0.453
##    .TWB19             0.363    0.023   15.484    0.000    0.363    0.436
##    .TWB20             0.445    0.020   22.366    0.000    0.445    0.472
##    .TWB21             0.304    0.018   17.010    0.000    0.304    0.353
##    .TWB22             0.357    0.016   22.055    0.000    0.357    0.524
##    .TWB23             0.260    0.012   21.420    0.000    0.260    0.508
##    .TWB24             0.843    0.073   11.556    0.000    0.843    0.396
##    .TWB25             0.790    0.071   11.166    0.000    0.790    0.386
##    .TWB26             1.939    0.067   29.082    0.000    1.939    0.817
##    .TWB27             1.196    0.044   27.356    0.000    1.196    0.530
##    .TWB28             1.180    0.044   26.762    0.000    1.180    0.474
##    .TWB29             0.418    0.022   19.270    0.000    0.418    0.205
##    .TWB30             0.473    0.022   21.133    0.000    0.473    0.240
##    .TWB31             0.801    0.031   26.244    0.000    0.801    0.434
##     TWB               1.000                               1.000    1.000
##     Growth            1.000                               1.000    1.000
##     Wellbeing         1.000                               1.000    1.000
##     Acceptance        1.000                               1.000    1.000
##     Adaptability      1.000                               1.000    1.000
##     Depletion         1.000                               1.000    1.000
##     Foundation        1.000                               1.000    1.000

strong 7 factor fit: | Fit Index | Value | Interpretation | | ————– | ————————— | ————————————————— | | Chi-Square | 744.46 (df = 413, p < .001) | Significant (common in large samples), but expected | | CFI | 0.963 | Excellent fit (> .95 is strong) | | TLI | 0.958 | Excellent fit | | RMSEA | 0.046 (CI: .041–.052) | Good fit (< .06 is desired) | | SRMR | 0.037 | Excellent fit (< .08 is desired) |

TWB (Responsive Leadership & Supportive Culture) All items (TWB1–TWB14) load strongly (.79–.91), e.g.:

TWB6 loading = .912

TWB1 loading = .899

Growth Mindset (TWB15–TWB17) Loadings range from .73–.80

Well-being (TWB18–TWB19) Both loadings are ~.80 → very clean 2-item factor

Acceptance, Adaptability, Depletion, Foundation All items load at acceptable to strong levels

Lowest loading: TWB23 at .594 (still okay)

Some weaker loadings for Foundational Support (e.g., TWB26 at .491) — might indicate room for refinement- same as first data set

Conclusion: Factor loadings are mostly strong, with all standardized loadings > .49 and most > .70. The items are measuring their intended constructs well.

library(lavaan)

# --- Define the 1-Factor Model (TWB1 to TWB31) ---
twb_1factor_model <- '
  TWB =~ TWB1 + TWB2 + TWB3 + TWB4 + TWB5 + TWB6 + TWB7 + TWB8 + TWB9 + TWB10 + 
         TWB11 + TWB12 + TWB13 + TWB14 + TWB15 + TWB16 + TWB17 + TWB18 + TWB19 + 
         TWB20 + TWB21 + TWB22 + TWB23 + TWB24 + TWB25 + TWB26 + TWB27 + TWB28 + 
         TWB29 + TWB30 + TWB31
'

# --- Reuse the 7-Factor Model from before ---
twb_7factor_model <- '
  TWB =~ TWB1 + TWB2 + TWB3 + TWB4 + TWB5 + TWB6 + TWB7 + TWB8 + TWB9 + TWB10 + TWB11 + TWB12 + TWB13 + TWB14
  Growth =~ TWB15 + TWB16 + TWB17
  Wellbeing =~ TWB18 + TWB19
  Acceptance =~ TWB20 + TWB21
  Adaptability =~ TWB22 + TWB23
  Depletion =~ TWB24 + TWB25
  Foundation =~ TWB26 + TWB27 + TWB28 + TWB29 + TWB30 + TWB31
'

# --- Prepare data (complete cases for TWB1–TWB31) ---
twb_cfa_data <- df %>%
  select(TWB1:TWB31) %>%
  na.omit()

# --- Fit both models ---
fit_1factor <- cfa(twb_1factor_model, data = twb_cfa_data, std.lv = TRUE)
fit_7factor <- cfa(twb_7factor_model, data = twb_cfa_data, std.lv = TRUE)

# --- Compare fit using Chi-square difference test ---
anova(fit_1factor, fit_7factor)

## 
## Chi-Squared Difference Test
## 
##              Df    AIC    BIC   Chisq Chisq diff   RMSEA Df diff Pr(>Chisq)    
## fit_7factor 413 136625 137080  2031.4                                          
## fit_1factor 434 144691 145030 10138.6     8107.2 0.46788      21  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The difference in χ² = 1883.3 with 21 df is statistically significant (p < .00001), indicating that the improvement in fit is not due to chance.

The AIC and BIC are also much lower in the 7-factor model, which strongly favors it (lower is better).

The RMSEA for the 1-factor model is 0.4876 — which is extremely poor fit (good models are ≤ .06).

basically it is multidimensional so we should look at the predictive models as such instead of composite the 7 factor is much mor emeaningful to look at at the factor level but adequate to look as single construct scale as well

summary(fit_1factor, fit.measures = TRUE, standardized = TRUE)

## lavaan 0.6-19 ended normally after 34 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        62
## 
##   Number of observations                          1759
## 
## Model Test User Model:
##                                                        
##   Test statistic                              10138.642
##   Degrees of freedom                                434
##   P-value (Chi-square)                            0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                             40973.122
##   Degrees of freedom                               465
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.760
##   Tucker-Lewis Index (TLI)                       0.743
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -72283.312
##   Loglikelihood unrestricted model (H1)     -67213.991
##                                                       
##   Akaike (AIC)                              144690.624
##   Bayesian (BIC)                            145029.919
##   Sample-size adjusted Bayesian (SABIC)     144832.950
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.113
##   90 Percent confidence interval - lower         0.111
##   90 Percent confidence interval - upper         0.115
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.101
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   TWB =~                                                                
##     TWB1              1.161    0.024   48.107    0.000    1.161    0.893
##     TWB2              1.218    0.026   46.012    0.000    1.218    0.870
##     TWB3              1.081    0.024   45.724    0.000    1.081    0.866
##     TWB4              1.156    0.024   47.784    0.000    1.156    0.890
##     TWB5              1.182    0.024   48.340    0.000    1.182    0.896
##     TWB6              1.235    0.028   44.364    0.000    1.235    0.850
##     TWB7              1.124    0.024   46.696    0.000    1.124    0.878
##     TWB8              1.240    0.028   44.948    0.000    1.240    0.857
##     TWB9              1.050    0.024   44.526    0.000    1.050    0.852
##     TWB10             1.002    0.024   40.949    0.000    1.002    0.807
##     TWB11             1.271    0.029   43.656    0.000    1.271    0.842
##     TWB12             0.927    0.026   35.670    0.000    0.927    0.733
##     TWB13             1.089    0.027   39.905    0.000    1.089    0.793
##     TWB14             1.058    0.027   38.520    0.000    1.058    0.774
##     TWB15             0.410    0.022   18.584    0.000    0.410    0.427
##     TWB16             0.311    0.019   16.094    0.000    0.311    0.374
##     TWB17             0.256    0.018   14.312    0.000    0.256    0.335
##     TWB18             0.187    0.024    7.798    0.000    0.187    0.186
##     TWB19             0.181    0.022    8.286    0.000    0.181    0.198
##     TWB20             0.303    0.023   13.247    0.000    0.303    0.312
##     TWB21             0.339    0.022   15.684    0.000    0.339    0.365
##     TWB22             0.275    0.019   14.232    0.000    0.275    0.333
##     TWB23             0.238    0.017   14.202    0.000    0.238    0.333
##     TWB24            -0.289    0.035   -8.293    0.000   -0.289   -0.198
##     TWB25            -0.247    0.034   -7.215    0.000   -0.247   -0.173
##     TWB26             0.461    0.036   12.709    0.000    0.461    0.300
##     TWB27             0.751    0.034   22.188    0.000    0.751    0.500
##     TWB28             0.841    0.035   23.888    0.000    0.841    0.533
##     TWB29             0.947    0.030   31.177    0.000    0.947    0.663
##     TWB30             0.895    0.030   29.731    0.000    0.895    0.638
##     TWB31             0.816    0.030   27.590    0.000    0.816    0.601
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .TWB1              0.341    0.013   26.856    0.000    0.341    0.202
##    .TWB2              0.478    0.017   27.458    0.000    0.478    0.244
##    .TWB3              0.388    0.014   27.527    0.000    0.388    0.250
##    .TWB4              0.352    0.013   26.963    0.000    0.352    0.208
##    .TWB5              0.343    0.013   26.775    0.000    0.343    0.197
##    .TWB6              0.585    0.021   27.811    0.000    0.585    0.277
##    .TWB7              0.377    0.014   27.284    0.000    0.377    0.230
##    .TWB8              0.555    0.020   27.696    0.000    0.555    0.265
##    .TWB9              0.416    0.015   27.780    0.000    0.416    0.274
##    .TWB10             0.537    0.019   28.335    0.000    0.537    0.349
##    .TWB11             0.666    0.024   27.939    0.000    0.666    0.292
##    .TWB12             0.740    0.026   28.835    0.000    0.740    0.463
##    .TWB13             0.699    0.025   28.457    0.000    0.699    0.371
##    .TWB14             0.749    0.026   28.599    0.000    0.749    0.401
##    .TWB15             0.752    0.026   29.499    0.000    0.752    0.818
##    .TWB16             0.595    0.020   29.541    0.000    0.595    0.860
##    .TWB17             0.519    0.018   29.567    0.000    0.519    0.888
##    .TWB18             0.974    0.033   29.631    0.000    0.974    0.965
##    .TWB19             0.801    0.027   29.628    0.000    0.801    0.961
##    .TWB20             0.852    0.029   29.580    0.000    0.852    0.903
##    .TWB21             0.747    0.025   29.548    0.000    0.747    0.867
##    .TWB22             0.606    0.021   29.568    0.000    0.606    0.889
##    .TWB23             0.456    0.015   29.568    0.000    0.456    0.889
##    .TWB24             2.047    0.069   29.628    0.000    2.047    0.961
##    .TWB25             1.985    0.067   29.635    0.000    1.985    0.970
##    .TWB26             2.159    0.073   29.587    0.000    2.159    0.910
##    .TWB27             1.691    0.057   29.421    0.000    1.691    0.750
##    .TWB28             1.783    0.061   29.376    0.000    1.783    0.716
##    .TWB29             1.146    0.039   29.103    0.000    1.146    0.561
##    .TWB30             1.166    0.040   29.170    0.000    1.166    0.593
##    .TWB31             1.178    0.040   29.257    0.000    1.178    0.639
##     TWB               1.000                               1.000    1.000

summary(fit_7factor, fit.measures = TRUE, standardized = TRUE)

## lavaan 0.6-19 ended normally after 46 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        83
## 
##   Number of observations                          1759
## 
## Model Test User Model:
##                                                       
##   Test statistic                              2031.398
##   Degrees of freedom                               413
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                             40973.122
##   Degrees of freedom                               465
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.960
##   Tucker-Lewis Index (TLI)                       0.955
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -68229.690
##   Loglikelihood unrestricted model (H1)     -67213.991
##                                                       
##   Akaike (AIC)                              136625.380
##   Bayesian (BIC)                            137079.598
##   Sample-size adjusted Bayesian (SABIC)     136815.914
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.047
##   90 Percent confidence interval - lower         0.045
##   90 Percent confidence interval - upper         0.049
##   P-value H_0: RMSEA <= 0.050                    0.988
##   P-value H_0: RMSEA >= 0.080                    0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.029
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   TWB =~                                                                
##     TWB1              1.170    0.024   48.759    0.000    1.170    0.901
##     TWB2              1.223    0.026   46.331    0.000    1.223    0.873
##     TWB3              1.088    0.024   46.232    0.000    1.088    0.872
##     TWB4              1.162    0.024   48.167    0.000    1.162    0.894
##     TWB5              1.188    0.024   48.790    0.000    1.188    0.901
##     TWB6              1.238    0.028   44.553    0.000    1.238    0.853
##     TWB7              1.131    0.024   47.155    0.000    1.131    0.883
##     TWB8              1.241    0.028   44.973    0.000    1.241    0.858
##     TWB9              1.057    0.023   44.985    0.000    1.057    0.858
##     TWB10             1.001    0.024   40.883    0.000    1.001    0.806
##     TWB11             1.274    0.029   43.824    0.000    1.274    0.844
##     TWB12             0.931    0.026   35.845    0.000    0.931    0.736
##     TWB13             1.075    0.027   39.152    0.000    1.075    0.783
##     TWB14             1.052    0.028   38.176    0.000    1.052    0.769
##   Growth =~                                                             
##     TWB15             0.751    0.021   35.885    0.000    0.751    0.783
##     TWB16             0.629    0.018   34.277    0.000    0.629    0.756
##     TWB17             0.603    0.017   36.219    0.000    0.603    0.788
##   Wellbeing =~                                                          
##     TWB18             0.743    0.026   28.782    0.000    0.743    0.740
##     TWB19             0.686    0.024   29.159    0.000    0.686    0.751
##   Acceptance =~                                                         
##     TWB20             0.706    0.022   32.183    0.000    0.706    0.726
##     TWB21             0.747    0.021   36.109    0.000    0.747    0.804
##   Adaptability =~                                                       
##     TWB22             0.570    0.020   29.025    0.000    0.570    0.690
##     TWB23             0.502    0.017   29.504    0.000    0.502    0.701
##   Depletion =~                                                          
##     TWB24             1.135    0.041   27.339    0.000    1.135    0.777
##     TWB25             1.121    0.041   27.490    0.000    1.121    0.783
##   Foundation =~                                                         
##     TWB26             0.658    0.036   18.088    0.000    0.658    0.427
##     TWB27             1.029    0.032   31.782    0.000    1.029    0.685
##     TWB28             1.144    0.033   34.316    0.000    1.144    0.725
##     TWB29             1.275    0.027   46.809    0.000    1.275    0.892
##     TWB30             1.223    0.027   45.098    0.000    1.223    0.872
##     TWB31             1.022    0.028   36.116    0.000    1.022    0.752
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   TWB ~~                                                                
##     Growth            0.455    0.022   20.887    0.000    0.455    0.455
##     Wellbeing         0.224    0.027    8.166    0.000    0.224    0.224
##     Acceptance        0.406    0.024   16.757    0.000    0.406    0.406
##     Adaptability      0.443    0.026   17.209    0.000    0.443    0.443
##     Depletion        -0.210    0.027   -7.857    0.000   -0.210   -0.210
##     Foundation        0.702    0.014   51.758    0.000    0.702    0.702
##   Growth ~~                                                             
##     Wellbeing         0.516    0.025   20.328    0.000    0.516    0.516
##     Acceptance        0.513    0.025   20.768    0.000    0.513    0.513
##     Adaptability      0.601    0.025   23.798    0.000    0.601    0.601
##     Depletion        -0.177    0.030   -5.971    0.000   -0.177   -0.177
##     Foundation        0.393    0.024   16.418    0.000    0.393    0.393
##   Wellbeing ~~                                                          
##     Acceptance        0.540    0.027   20.274    0.000    0.540    0.540
##     Adaptability      0.460    0.031   14.979    0.000    0.460    0.460
##     Depletion        -0.460    0.028  -16.456    0.000   -0.460   -0.460
##     Foundation        0.226    0.028    7.980    0.000    0.226    0.226
##   Acceptance ~~                                                         
##     Adaptability      0.935    0.020   47.912    0.000    0.935    0.935
##     Depletion        -0.399    0.028  -14.123    0.000   -0.399   -0.399
##     Foundation        0.390    0.025   15.339    0.000    0.390    0.390
##   Adaptability ~~                                                       
##     Depletion        -0.250    0.033   -7.656    0.000   -0.250   -0.250
##     Foundation        0.392    0.028   14.183    0.000    0.392    0.392
##   Depletion ~~                                                          
##     Foundation       -0.275    0.027  -10.215    0.000   -0.275   -0.275
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .TWB1              0.319    0.012   26.484    0.000    0.319    0.189
##    .TWB2              0.465    0.017   27.279    0.000    0.465    0.237
##    .TWB3              0.372    0.014   27.305    0.000    0.372    0.239
##    .TWB4              0.339    0.013   26.710    0.000    0.339    0.201
##    .TWB5              0.328    0.012   26.471    0.000    0.328    0.188
##    .TWB6              0.576    0.021   27.692    0.000    0.576    0.273
##    .TWB7              0.362    0.013   27.046    0.000    0.362    0.221
##    .TWB8              0.554    0.020   27.604    0.000    0.554    0.265
##    .TWB9              0.401    0.015   27.602    0.000    0.401    0.264
##    .TWB10             0.539    0.019   28.286    0.000    0.539    0.350
##    .TWB11             0.657    0.024   27.833    0.000    0.657    0.288
##    .TWB12             0.734    0.025   28.786    0.000    0.734    0.459
##    .TWB13             0.729    0.026   28.488    0.000    0.729    0.387
##    .TWB14             0.762    0.027   28.586    0.000    0.762    0.408
##    .TWB15             0.356    0.018   20.141    0.000    0.356    0.387
##    .TWB16             0.297    0.014   21.697    0.000    0.297    0.429
##    .TWB17             0.221    0.011   19.784    0.000    0.221    0.379
##    .TWB18             0.457    0.028   16.256    0.000    0.457    0.453
##    .TWB19             0.363    0.023   15.484    0.000    0.363    0.436
##    .TWB20             0.445    0.020   22.366    0.000    0.445    0.472
##    .TWB21             0.304    0.018   17.010    0.000    0.304    0.353
##    .TWB22             0.357    0.016   22.055    0.000    0.357    0.524
##    .TWB23             0.260    0.012   21.420    0.000    0.260    0.508
##    .TWB24             0.843    0.073   11.556    0.000    0.843    0.396
##    .TWB25             0.790    0.071   11.166    0.000    0.790    0.386
##    .TWB26             1.939    0.067   29.082    0.000    1.939    0.817
##    .TWB27             1.196    0.044   27.356    0.000    1.196    0.530
##    .TWB28             1.180    0.044   26.762    0.000    1.180    0.474
##    .TWB29             0.418    0.022   19.270    0.000    0.418    0.205
##    .TWB30             0.473    0.022   21.133    0.000    0.473    0.240
##    .TWB31             0.801    0.031   26.244    0.000    0.801    0.434
##     TWB               1.000                               1.000    1.000
##     Growth            1.000                               1.000    1.000
##     Wellbeing         1.000                               1.000    1.000
##     Acceptance        1.000                               1.000    1.000
##     Adaptability      1.000                               1.000    1.000
##     Depletion         1.000                               1.000    1.000
##     Foundation        1.000                               1.000    1.000

library(dplyr)
df <- df %>%
  mutate(
  
    # View of the Union: TWB41–TWB43
    union_view_score = (TWB41 + TWB42 + TWB43) / 3,

    # School-Level LMC: TWB44–TWB47 + TWB48 reversed
    school_lmc_score = (TWB44 + TWB45 + TWB46 + TWB47 ) / 4,

    # District-Level LMC: TWB48–TWB53
    district_lmc_score = (TWB48 + TWB49 + TWB50 + TWB51 + TWB52 + TWB53 ) / 6,

    # Overall LMC Composite Score
    lmc_composite_score = (union_view_score + school_lmc_score + district_lmc_score) 
  )

# Create item-level dataframes for alpha
lmc_union_view_items <- df %>%
  select(TWB41, TWB42, TWB43) %>%
  na.omit()

lmc_school_items <- df %>%
  select(TWB44, TWB45, TWB46, TWB47) %>%
  na.omit()

lmc_district_items <- df %>%
  select(TWB48:TWB53) %>%
  na.omit()

lmc_full_items <- df %>%
  select(TWB41:TWB53) %>%
  na.omit()

#alphas
psych::alpha(lmc_union_view_items)

## 
## Reliability analysis   
## Call: psych::alpha(x = lmc_union_view_items)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
##       0.78      0.77    0.75      0.53 3.4 0.0079  4.6 1.1     0.48
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.76  0.78  0.79
## Duhachek  0.76  0.78  0.79
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## TWB41      0.86      0.86    0.76      0.76 6.3   0.0056    NA  0.76
## TWB42      0.53      0.53    0.36      0.36 1.1   0.0190    NA  0.36
## TWB43      0.64      0.64    0.48      0.48 1.8   0.0145    NA  0.48
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## TWB41 2413  0.71  0.74  0.49   0.44  5.1 1.2
## TWB42 2413  0.90  0.90  0.86   0.76  4.5 1.3
## TWB43 2413  0.87  0.85  0.79   0.66  4.3 1.4
## 
## Non missing response frequency for each item
##          1    2    3    4    5    6 miss
## TWB41 0.03 0.03 0.02 0.09 0.32 0.50    0
## TWB42 0.05 0.06 0.07 0.24 0.35 0.23    0
## TWB43 0.07 0.07 0.08 0.25 0.32 0.20    0

psych::alpha(lmc_school_items, check.keys = TRUE)

## 
## Reliability analysis   
## Call: psych::alpha(x = lmc_school_items, check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.85      0.85    0.82      0.59 5.8 0.005  4.4 1.1     0.58
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.84  0.85  0.86
## Duhachek  0.84  0.85  0.86
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## TWB44      0.80      0.80    0.74      0.57 4.0   0.0072 0.0057  0.56
## TWB45      0.81      0.82    0.76      0.60 4.4   0.0068 0.0135  0.56
## TWB46      0.85      0.86    0.80      0.66 5.9   0.0053 0.0036  0.66
## TWB47      0.78      0.78    0.71      0.54 3.5   0.0079 0.0034  0.51
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## TWB44 2342  0.85  0.85  0.80   0.72  4.4 1.2
## TWB45 2342  0.84  0.83  0.75   0.69  4.5 1.4
## TWB46 2342  0.77  0.77  0.64   0.59  4.5 1.3
## TWB47 2342  0.88  0.88  0.85   0.77  4.2 1.3
## 
## Non missing response frequency for each item
##          1    2    3    4    5    6 miss
## TWB44 0.04 0.06 0.09 0.28 0.38 0.15    0
## TWB45 0.05 0.06 0.09 0.21 0.37 0.23    0
## TWB46 0.05 0.05 0.08 0.19 0.42 0.22    0
## TWB47 0.05 0.06 0.11 0.33 0.32 0.13    0

psych::alpha(lmc_district_items)

## 
## Reliability analysis   
## Call: psych::alpha(x = lmc_district_items)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean sd median_r
##       0.92      0.92    0.91      0.65  11 0.0028  4.4  1     0.59
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.91  0.92  0.92
## Duhachek  0.91  0.92  0.92
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r  S/N alpha se  var.r med.r
## TWB48      0.90      0.90    0.89      0.63  8.6   0.0035 0.0092  0.59
## TWB49      0.89      0.89    0.88      0.63  8.4   0.0036 0.0079  0.59
## TWB50      0.89      0.89    0.88      0.62  8.1   0.0037 0.0060  0.59
## TWB51      0.92      0.92    0.91      0.69 11.2   0.0028 0.0088  0.69
## TWB52      0.90      0.90    0.89      0.63  8.7   0.0035 0.0141  0.58
## TWB53      0.91      0.91    0.90      0.67 10.2   0.0030 0.0135  0.69
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## TWB48 2313  0.86  0.86  0.84   0.80  4.4 1.2
## TWB49 2313  0.88  0.88  0.87   0.82  4.4 1.1
## TWB50 2313  0.89  0.90  0.89   0.84  4.3 1.2
## TWB51 2313  0.75  0.75  0.67   0.64  4.4 1.2
## TWB52 2313  0.86  0.86  0.83   0.79  4.3 1.2
## TWB53 2313  0.79  0.79  0.72   0.69  4.4 1.3
## 
## Non missing response frequency for each item
##          1    2    3    4    5    6 miss
## TWB48 0.03 0.05 0.09 0.30 0.40 0.13    0
## TWB49 0.03 0.05 0.08 0.30 0.41 0.12    0
## TWB50 0.04 0.06 0.09 0.32 0.38 0.11    0
## TWB51 0.04 0.05 0.08 0.25 0.43 0.15    0
## TWB52 0.05 0.05 0.11 0.31 0.37 0.12    0
## TWB53 0.04 0.05 0.09 0.24 0.40 0.17    0

psych::alpha(lmc_full_items, , check.keys = TRUE)

## 
## Reliability analysis   
## Call: psych::alpha(x = lmc_full_items, check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.92      0.93    0.94      0.49  12 0.0024  4.4 0.91     0.49
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.92  0.92  0.93
## Duhachek  0.92  0.92  0.93
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## TWB41      0.93      0.93    0.95      0.52  13   0.0023 0.014  0.51
## TWB42      0.92      0.92    0.94      0.50  12   0.0025 0.021  0.50
## TWB43      0.92      0.92    0.94      0.49  12   0.0025 0.021  0.50
## TWB44      0.92      0.92    0.94      0.48  11   0.0026 0.021  0.48
## TWB45      0.92      0.92    0.94      0.50  12   0.0025 0.020  0.49
## TWB46      0.92      0.92    0.94      0.50  12   0.0025 0.021  0.50
## TWB47      0.92      0.92    0.94      0.48  11   0.0026 0.020  0.48
## TWB48      0.92      0.92    0.94      0.48  11   0.0026 0.019  0.48
## TWB49      0.92      0.92    0.93      0.48  11   0.0026 0.018  0.48
## TWB50      0.91      0.92    0.93      0.47  11   0.0027 0.017  0.48
## TWB51      0.92      0.92    0.94      0.49  11   0.0025 0.022  0.49
## TWB52      0.91      0.92    0.94      0.48  11   0.0027 0.019  0.48
## TWB53      0.92      0.92    0.94      0.48  11   0.0026 0.022  0.48
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## TWB41 2273  0.48  0.48  0.42   0.40  5.2 1.2
## TWB42 2273  0.67  0.67  0.65   0.60  4.5 1.3
## TWB43 2273  0.70  0.69  0.67   0.63  4.3 1.4
## TWB44 2273  0.78  0.78  0.76   0.73  4.4 1.2
## TWB45 2273  0.68  0.67  0.64   0.61  4.5 1.3
## TWB46 2273  0.67  0.67  0.64   0.61  4.5 1.3
## TWB47 2273  0.78  0.78  0.76   0.73  4.2 1.3
## TWB48 2273  0.79  0.80  0.78   0.75  4.4 1.2
## TWB49 2273  0.79  0.79  0.79   0.74  4.4 1.1
## TWB50 2273  0.81  0.82  0.82   0.77  4.3 1.2
## TWB51 2273  0.72  0.72  0.70   0.67  4.4 1.2
## TWB52 2273  0.81  0.81  0.80   0.77  4.3 1.2
## TWB53 2273  0.76  0.76  0.73   0.71  4.4 1.3
## 
## Non missing response frequency for each item
##          1    2    3    4    5    6 miss
## TWB41 0.03 0.03 0.02 0.09 0.32 0.51    0
## TWB42 0.05 0.06 0.06 0.24 0.36 0.23    0
## TWB43 0.07 0.07 0.08 0.26 0.33 0.20    0
## TWB44 0.04 0.06 0.09 0.28 0.38 0.15    0
## TWB45 0.05 0.06 0.08 0.21 0.37 0.23    0
## TWB46 0.05 0.05 0.08 0.19 0.42 0.22    0
## TWB47 0.05 0.06 0.11 0.32 0.32 0.13    0
## TWB48 0.03 0.05 0.09 0.30 0.40 0.13    0
## TWB49 0.03 0.05 0.08 0.30 0.42 0.12    0
## TWB50 0.04 0.06 0.09 0.32 0.38 0.11    0
## TWB51 0.04 0.05 0.08 0.25 0.43 0.15    0
## TWB52 0.05 0.05 0.11 0.31 0.37 0.12    0
## TWB53 0.04 0.05 0.08 0.24 0.40 0.17    0

adequate alphas for single factors of LMC scale and collective

library(psych)

# Prepare complete case data for LMC items
lmc_efa_data <- df %>%
  select(TWB41:TWB53) %>%
  na.omit()

# Check sampling adequacy
KMO(lmc_efa_data)  # > 0.6 is acceptable

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = lmc_efa_data)
## Overall MSA =  0.91
## MSA for each item = 
## TWB41 TWB42 TWB43 TWB44 TWB45 TWB46 TWB47 TWB48 TWB49 TWB50 TWB51 TWB52 TWB53 
##  0.88  0.84  0.86  0.94  0.92  0.86  0.92  0.96  0.91  0.90  0.89  0.95  0.97

# Bartlett's test of sphericity
cortest.bartlett(cor(lmc_efa_data), n = nrow(lmc_efa_data))

## $chisq
## [1] 20579.39
## 
## $p.value
## [1] 0
## 
## $df
## [1] 78

# Parallel analysis to suggest # of factors
fa.parallel(lmc_efa_data, fa = "fa", n.iter = 100)

## Parallel analysis suggests that the number of factors =  5  and the number of components =  NA

# Try EFA with 3 factors (based on theory)
efa_3f <- fa(lmc_efa_data, nfactors = 3, rotate = "oblimin", fm = "ml")  # oblique rotation
print(efa_3f$loadings, cutoff = 0.3)

## 
## Loadings:
##       ML1    ML3    ML2   
## TWB41                0.443
## TWB42                0.920
## TWB43                0.773
## TWB44         0.709       
## TWB45         0.789       
## TWB46         0.692       
## TWB47         0.837       
## TWB48  0.803              
## TWB49  0.946              
## TWB50  0.941              
## TWB51  0.308  0.306       
## TWB52  0.603              
## TWB53  0.406              
## 
##                  ML1   ML3   ML2
## SS loadings    3.072 2.455 1.831
## Proportion Var 0.236 0.189 0.141
## Cumulative Var 0.236 0.425 0.566

# Optionally test 1-factor and 2-factor models for comparison
efa_1f <- fa(lmc_efa_data, nfactors = 1, rotate = "oblimin", fm = "ml")
efa_2f <- fa(lmc_efa_data, nfactors = 2, rotate = "oblimin", fm = "ml")

# Compare solutions
print(efa_1f$loadings, cutoff = 0.3)

## 
## Loadings:
##       ML1  
## TWB41 0.356
## TWB42 0.558
## TWB43 0.611
## TWB44 0.730
## TWB45 0.633
## TWB46 0.578
## TWB47 0.742
## TWB48 0.833
## TWB49 0.839
## TWB50 0.872
## TWB51 0.673
## TWB52 0.827
## TWB53 0.721
## 
##                  ML1
## SS loadings    6.446
## Proportion Var 0.496

print(efa_2f$loadings, cutoff = 0.3)

## 
## Loadings:
##       ML1    ML2   
## TWB41         0.471
## TWB42         0.897
## TWB43         0.785
## TWB44  0.521       
## TWB45  0.608       
## TWB46  0.398       
## TWB47  0.603       
## TWB48  0.829       
## TWB49  0.936       
## TWB50  0.986       
## TWB51  0.536       
## TWB52  0.696       
## TWB53  0.532       
## 
##                  ML1   ML2
## SS loadings    4.766 1.987
## Proportion Var 0.367 0.153
## Cumulative Var 0.367 0.519

EFA 3 factor model predicts 54% of the variance of the model which makjes it solid, the items generally load high on their respective factors but a few are low coudl be dropped or adjusted (TWB56)

library(lavaan)



# Step 2: Prepare the dataset for CFA
lmc_cfa_data <- df %>%
  select(TWB41:TWB53) %>%
  na.omit()

# Step 3: Define 3-factor model with reversed TWB48
lmc_model_revised <- '
  UnionView =~ TWB41 + TWB42 + TWB43

  SchoolLMC =~ TWB44 + TWB45 + TWB46 + TWB47 

  DistrictLMC =~ TWB48 + TWB49 + TWB50 + TWB51 + TWB52 + TWB53 
'

# Step 4: Fit the model
fit_lmc_revised <- cfa(model = lmc_model_revised, data = lmc_cfa_data, std.lv = TRUE)

# Step 5: View model fit and loadings
summary(fit_lmc_revised, fit.measures = TRUE, standardized = TRUE)

## lavaan 0.6-19 ended normally after 28 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        29
## 
##   Number of observations                          2273
## 
## Model Test User Model:
##                                                       
##   Test statistic                              2218.510
##   Degrees of freedom                                62
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                             20635.377
##   Degrees of freedom                                78
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.895
##   Tucker-Lewis Index (TLI)                       0.868
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -39281.926
##   Loglikelihood unrestricted model (H1)     -38172.671
##                                                       
##   Akaike (AIC)                               78621.852
##   Bayesian (BIC)                             78787.988
##   Sample-size adjusted Bayesian (SABIC)      78695.850
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.124
##   90 Percent confidence interval - lower         0.119
##   90 Percent confidence interval - upper         0.128
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.064
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   UnionView =~                                                          
##     TWB41             0.575    0.025   23.115    0.000    0.575    0.484
##     TWB42             1.169    0.024   47.790    0.000    1.169    0.879
##     TWB43             1.225    0.026   46.607    0.000    1.225    0.862
##   SchoolLMC =~                                                          
##     TWB44             1.017    0.022   46.634    0.000    1.017    0.829
##     TWB45             0.991    0.025   39.294    0.000    0.991    0.736
##     TWB46             0.832    0.026   32.376    0.000    0.832    0.636
##     TWB47             1.105    0.022   50.000    0.000    1.105    0.868
##   DistrictLMC =~                                                        
##     TWB48             0.988    0.020   50.353    0.000    0.988    0.856
##     TWB49             1.009    0.019   52.786    0.000    1.009    0.881
##     TWB50             1.084    0.019   56.007    0.000    1.084    0.912
##     TWB51             0.779    0.023   33.820    0.000    0.779    0.646
##     TWB52             1.002    0.021   47.058    0.000    1.002    0.820
##     TWB53             0.875    0.023   37.348    0.000    0.875    0.697
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   UnionView ~~                                                          
##     SchoolLMC         0.595    0.017   35.489    0.000    0.595    0.595
##     DistrictLMC       0.592    0.016   36.944    0.000    0.592    0.592
##   SchoolLMC ~~                                                          
##     DistrictLMC       0.786    0.010   75.552    0.000    0.786    0.786
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .TWB41             1.079    0.033   32.251    0.000    1.079    0.765
##    .TWB42             0.402    0.028   14.181    0.000    0.402    0.227
##    .TWB43             0.517    0.032   16.057    0.000    0.517    0.256
##    .TWB44             0.471    0.019   24.688    0.000    0.471    0.313
##    .TWB45             0.831    0.029   28.977    0.000    0.831    0.458
##    .TWB46             1.019    0.033   31.029    0.000    1.019    0.596
##    .TWB47             0.401    0.019   21.284    0.000    0.401    0.247
##    .TWB48             0.357    0.013   27.888    0.000    0.357    0.268
##    .TWB49             0.294    0.011   26.295    0.000    0.294    0.224
##    .TWB50             0.237    0.010   23.117    0.000    0.237    0.168
##    .TWB51             0.847    0.026   32.215    0.000    0.847    0.582
##    .TWB52             0.491    0.017   29.383    0.000    0.491    0.328
##    .TWB53             0.810    0.026   31.733    0.000    0.810    0.514
##     UnionView         1.000                               1.000    1.000
##     SchoolLMC         1.000                               1.000    1.000
##     DistrictLMC       1.000                               1.000    1.000

TWB41 probably should be dropped weakloading and weakens the model not a three or strong single factor model fit either, they do all load well on their respective factors though, likely better to just look at single factors

# Define single-factor LMC model
lmc_model_1factor <- '
  LMC =~ TWB41 + TWB42 + TWB43 + TWB44 + TWB45 + TWB46 + TWB47 + TWB48 + 
         TWB49 + TWB50 + TWB51 + TWB52 + TWB53 
'

# Fit 1-factor model
lmc_fit_1factor <- cfa(model = lmc_model_1factor, data = lmc_cfa_data, std.lv = TRUE)

# Compare models
anova(lmc_fit_1factor, fit_lmc_revised)

## 
## Chi-Squared Difference Test
## 
##                 Df   AIC   BIC  Chisq Chisq diff   RMSEA Df diff Pr(>Chisq)    
## fit_lmc_revised 62 78622 78788 2218.5                                          
## lmc_fit_1factor 65 81338 81487 4940.4     2721.9 0.63145       3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

the 3 factor solution is better than a single factor so when looking at relationships should use the factors for the scales like the TWB measure

df <- df %>%
  mutate(
    job_satisfaction_score = TWB35,
    job_stress_score = TWB36
  )

# Drop rows with missing values for retention items, then calculate mean
df <- df %>%
  filter(!is.na(TWB32) & !is.na(TWB33) & !is.na(TWB34)) %>%
  mutate(
    retention_score = (TWB32 + TWB33 + TWB34) / 3
  )





retention_items <- df %>%
  select(TWB32, TWB33, TWB34) %>%
  na.omit()

psych::alpha(retention_items)

## 
## Reliability analysis   
## Call: psych::alpha(x = retention_items)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.71      0.72     0.7      0.46 2.6 0.011    4 0.87     0.38
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.69  0.71  0.73
## Duhachek  0.69  0.71  0.73
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r  S/N alpha se var.r med.r
## TWB32      0.55      0.55    0.38      0.38 1.23   0.0187    NA  0.38
## TWB33      0.42      0.43    0.27      0.27 0.74   0.0237    NA  0.27
## TWB34      0.85      0.85    0.74      0.74 5.73   0.0062    NA  0.74
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## TWB32 2306  0.81  0.84  0.77   0.59  4.4 1.0
## TWB33 2306  0.87  0.88  0.85   0.68  4.3 1.1
## TWB34 2306  0.72  0.69  0.39   0.35  3.4 1.2
## 
## Non missing response frequency for each item
##          1    2    3    4    5 miss
## TWB32 0.03 0.03 0.11 0.16 0.66    0
## TWB33 0.04 0.03 0.14 0.17 0.63    0
## TWB34 0.09 0.12 0.29 0.29 0.21    0

df <- df %>%
  mutate(
    nps_score = (TWB37 + TWB38) / 2
  )

# Select relevant variables for correlation
correlation_df <- df %>%
  select(
    # TWB composite and subscales
    twb_composite_score,
    twb_sub_score,
    growth_score,
    wellbeing_score,
    acceptance_score,
    adaptability_score,
    depletion_score,
    foundational_support_score,

    # LMC composite and subscales
    lmc_composite_score,
    union_view_score,
    school_lmc_score,
    district_lmc_score,

    # Outcomes
    job_satisfaction_score,
    job_stress_score,
    retention_score,
    nps_score
  ) %>%
  drop_na()  # ensure complete data for all variables

# Compute correlation matrix
cor_matrix <- cor(correlation_df, use = "complete.obs")

# Print correlation matrix (rounded for readability)
round(cor_matrix, 2)

##                            twb_composite_score twb_sub_score growth_score
## twb_composite_score                       1.00          0.76         0.69
## twb_sub_score                             0.76          1.00         0.43
## growth_score                              0.69          0.43         1.00
## wellbeing_score                           0.49          0.22         0.41
## acceptance_score                          0.64          0.35         0.41
## adaptability_score                        0.66          0.37         0.46
## depletion_score                           0.00         -0.20        -0.14
## foundational_support_score                0.69          0.66         0.35
## lmc_composite_score                       0.46          0.40         0.28
## union_view_score                          0.22          0.10         0.18
## school_lmc_score                          0.49          0.50         0.28
## district_lmc_score                        0.48          0.43         0.27
## job_satisfaction_score                    0.54          0.59         0.37
## job_stress_score                         -0.15         -0.24        -0.11
## retention_score                           0.42          0.42         0.28
## nps_score                                 0.55          0.62         0.34
##                            wellbeing_score acceptance_score adaptability_score
## twb_composite_score                   0.49             0.64               0.66
## twb_sub_score                         0.22             0.35               0.37
## growth_score                          0.41             0.41               0.46
## wellbeing_score                       1.00             0.41               0.34
## acceptance_score                      0.41             1.00               0.65
## adaptability_score                    0.34             0.65               1.00
## depletion_score                      -0.35            -0.31              -0.19
## foundational_support_score            0.20             0.31               0.30
## lmc_composite_score                   0.18             0.22               0.23
## union_view_score                      0.14             0.14               0.16
## school_lmc_score                      0.15             0.21               0.23
## district_lmc_score                    0.17             0.22               0.22
## job_satisfaction_score                0.24             0.32               0.28
## job_stress_score                     -0.16            -0.25              -0.15
## retention_score                       0.14             0.20               0.18
## nps_score                             0.15             0.27               0.28
##                            depletion_score foundational_support_score
## twb_composite_score                   0.00                       0.69
## twb_sub_score                        -0.20                       0.66
## growth_score                         -0.14                       0.35
## wellbeing_score                      -0.35                       0.20
## acceptance_score                     -0.31                       0.31
## adaptability_score                   -0.19                       0.30
## depletion_score                       1.00                      -0.26
## foundational_support_score           -0.26                       1.00
## lmc_composite_score                  -0.08                       0.51
## union_view_score                     -0.04                       0.22
## school_lmc_score                     -0.07                       0.53
## district_lmc_score                   -0.09                       0.58
## job_satisfaction_score               -0.33                       0.65
## job_stress_score                      0.50                      -0.34
## retention_score                      -0.15                       0.51
## nps_score                            -0.24                       0.67
##                            lmc_composite_score union_view_score
## twb_composite_score                       0.46             0.22
## twb_sub_score                             0.40             0.10
## growth_score                              0.28             0.18
## wellbeing_score                           0.18             0.14
## acceptance_score                          0.22             0.14
## adaptability_score                        0.23             0.16
## depletion_score                          -0.08            -0.04
## foundational_support_score                0.51             0.22
## lmc_composite_score                       1.00             0.82
## union_view_score                          0.82             1.00
## school_lmc_score                          0.89             0.56
## district_lmc_score                        0.89             0.56
## job_satisfaction_score                    0.42             0.21
## job_stress_score                         -0.14            -0.06
## retention_score                           0.40             0.24
## nps_score                                 0.45             0.21
##                            school_lmc_score district_lmc_score
## twb_composite_score                    0.49               0.48
## twb_sub_score                          0.50               0.43
## growth_score                           0.28               0.27
## wellbeing_score                        0.15               0.17
## acceptance_score                       0.21               0.22
## adaptability_score                     0.23               0.22
## depletion_score                       -0.07              -0.09
## foundational_support_score             0.53               0.58
## lmc_composite_score                    0.89               0.89
## union_view_score                       0.56               0.56
## school_lmc_score                       1.00               0.78
## district_lmc_score                     0.78               1.00
## job_satisfaction_score                 0.44               0.45
## job_stress_score                      -0.13              -0.19
## retention_score                        0.39               0.43
## nps_score                              0.47               0.49
##                            job_satisfaction_score job_stress_score
## twb_composite_score                          0.54            -0.15
## twb_sub_score                                0.59            -0.24
## growth_score                                 0.37            -0.11
## wellbeing_score                              0.24            -0.16
## acceptance_score                             0.32            -0.25
## adaptability_score                           0.28            -0.15
## depletion_score                             -0.33             0.50
## foundational_support_score                   0.65            -0.34
## lmc_composite_score                          0.42            -0.14
## union_view_score                             0.21            -0.06
## school_lmc_score                             0.44            -0.13
## district_lmc_score                           0.45            -0.19
## job_satisfaction_score                       1.00            -0.37
## job_stress_score                            -0.37             1.00
## retention_score                              0.56            -0.20
## nps_score                                    0.74            -0.31
##                            retention_score nps_score
## twb_composite_score                   0.42      0.55
## twb_sub_score                         0.42      0.62
## growth_score                          0.28      0.34
## wellbeing_score                       0.14      0.15
## acceptance_score                      0.20      0.27
## adaptability_score                    0.18      0.28
## depletion_score                      -0.15     -0.24
## foundational_support_score            0.51      0.67
## lmc_composite_score                   0.40      0.45
## union_view_score                      0.24      0.21
## school_lmc_score                      0.39      0.47
## district_lmc_score                    0.43      0.49
## job_satisfaction_score                0.56      0.74
## job_stress_score                     -0.20     -0.31
## retention_score                       1.00      0.57
## nps_score                             0.57      1.00

library(psych)

cor_test_result <- corr.test(correlation_df, use = "pairwise.complete.obs")
cor_test_result$r    # correlation coefficients

##                            twb_composite_score twb_sub_score growth_score
## twb_composite_score                1.000000000     0.7566815    0.6895244
## twb_sub_score                      0.756681519     1.0000000    0.4324160
## growth_score                       0.689524404     0.4324160    1.0000000
## wellbeing_score                    0.494277215     0.2183531    0.4137432
## acceptance_score                   0.641239539     0.3530281    0.4081834
## adaptability_score                 0.661706332     0.3681852    0.4597182
## depletion_score                   -0.004717223    -0.2028791   -0.1393486
## foundational_support_score         0.690250642     0.6616363    0.3496003
## lmc_composite_score                0.455683418     0.3957809    0.2768983
## union_view_score                   0.220671334     0.1047084    0.1765675
## school_lmc_score                   0.493767011     0.5032059    0.2764987
## district_lmc_score                 0.481256809     0.4345898    0.2710826
## job_satisfaction_score             0.541604487     0.5883221    0.3717968
## job_stress_score                  -0.151916478    -0.2359759   -0.1085983
## retention_score                    0.419955007     0.4201940    0.2801225
## nps_score                          0.549225386     0.6231290    0.3438317
##                            wellbeing_score acceptance_score adaptability_score
## twb_composite_score              0.4942772        0.6412395          0.6617063
## twb_sub_score                    0.2183531        0.3530281          0.3681852
## growth_score                     0.4137432        0.4081834          0.4597182
## wellbeing_score                  1.0000000        0.4127218          0.3398025
## acceptance_score                 0.4127218        1.0000000          0.6530894
## adaptability_score               0.3398025        0.6530894          1.0000000
## depletion_score                 -0.3468168       -0.3097684         -0.1879590
## foundational_support_score       0.2035196        0.3099500          0.2956683
## lmc_composite_score              0.1756600        0.2186820          0.2338535
## union_view_score                 0.1403354        0.1370450          0.1583018
## school_lmc_score                 0.1508585        0.2147298          0.2289725
## district_lmc_score               0.1669072        0.2204463          0.2237594
## job_satisfaction_score           0.2407085        0.3209581          0.2754101
## job_stress_score                -0.1614493       -0.2461240         -0.1537617
## retention_score                  0.1437810        0.2022098          0.1812599
## nps_score                        0.1457336        0.2707030          0.2824174
##                            depletion_score foundational_support_score
## twb_composite_score           -0.004717223                  0.6902506
## twb_sub_score                 -0.202879098                  0.6616363
## growth_score                  -0.139348579                  0.3496003
## wellbeing_score               -0.346816804                  0.2035196
## acceptance_score              -0.309768438                  0.3099500
## adaptability_score            -0.187959030                  0.2956683
## depletion_score                1.000000000                 -0.2585060
## foundational_support_score    -0.258505989                  1.0000000
## lmc_composite_score           -0.080094434                  0.5056475
## union_view_score              -0.043292674                  0.2207401
## school_lmc_score              -0.073756772                  0.5326269
## district_lmc_score            -0.093247366                  0.5758653
## job_satisfaction_score        -0.326868499                  0.6491439
## job_stress_score               0.497084259                 -0.3383081
## retention_score               -0.147874585                  0.5103111
## nps_score                     -0.244976590                  0.6663052
##                            lmc_composite_score union_view_score
## twb_composite_score                 0.45568342       0.22067133
## twb_sub_score                       0.39578089       0.10470840
## growth_score                        0.27689832       0.17656754
## wellbeing_score                     0.17565999       0.14033541
## acceptance_score                    0.21868204       0.13704497
## adaptability_score                  0.23385347       0.15830180
## depletion_score                    -0.08009443      -0.04329267
## foundational_support_score          0.50564746       0.22074012
## lmc_composite_score                 1.00000000       0.81957193
## union_view_score                    0.81957193       1.00000000
## school_lmc_score                    0.89443896       0.55853774
## district_lmc_score                  0.89095718       0.55743177
## job_satisfaction_score              0.41773500       0.20780129
## job_stress_score                   -0.14464716      -0.05740210
## retention_score                     0.40398578       0.24262730
## nps_score                           0.45051661       0.21427274
##                            school_lmc_score district_lmc_score
## twb_composite_score              0.49376701         0.48125681
## twb_sub_score                    0.50320590         0.43458980
## growth_score                     0.27649871         0.27108264
## wellbeing_score                  0.15085847         0.16690721
## acceptance_score                 0.21472977         0.22044627
## adaptability_score               0.22897252         0.22375942
## depletion_score                 -0.07375677        -0.09324737
## foundational_support_score       0.53262690         0.57586531
## lmc_composite_score              0.89443896         0.89095718
## union_view_score                 0.55853774         0.55743177
## school_lmc_score                 1.00000000         0.77758211
## district_lmc_score               0.77758211         1.00000000
## job_satisfaction_score           0.44037041         0.44797508
## job_stress_score                -0.13362952        -0.19034982
## retention_score                  0.38967102         0.42587932
## nps_score                        0.47408323         0.49460375
##                            job_satisfaction_score job_stress_score
## twb_composite_score                     0.5416045       -0.1519165
## twb_sub_score                           0.5883221       -0.2359759
## growth_score                            0.3717968       -0.1085983
## wellbeing_score                         0.2407085       -0.1614493
## acceptance_score                        0.3209581       -0.2461240
## adaptability_score                      0.2754101       -0.1537617
## depletion_score                        -0.3268685        0.4970843
## foundational_support_score              0.6491439       -0.3383081
## lmc_composite_score                     0.4177350       -0.1446472
## union_view_score                        0.2078013       -0.0574021
## school_lmc_score                        0.4403704       -0.1336295
## district_lmc_score                      0.4479751       -0.1903498
## job_satisfaction_score                  1.0000000       -0.3709560
## job_stress_score                       -0.3709560        1.0000000
## retention_score                         0.5619743       -0.1951259
## nps_score                               0.7374753       -0.3093455
##                            retention_score  nps_score
## twb_composite_score              0.4199550  0.5492254
## twb_sub_score                    0.4201940  0.6231290
## growth_score                     0.2801225  0.3438317
## wellbeing_score                  0.1437810  0.1457336
## acceptance_score                 0.2022098  0.2707030
## adaptability_score               0.1812599  0.2824174
## depletion_score                 -0.1478746 -0.2449766
## foundational_support_score       0.5103111  0.6663052
## lmc_composite_score              0.4039858  0.4505166
## union_view_score                 0.2426273  0.2142727
## school_lmc_score                 0.3896710  0.4740832
## district_lmc_score               0.4258793  0.4946038
## job_satisfaction_score           0.5619743  0.7374753
## job_stress_score                -0.1951259 -0.3093455
## retention_score                  1.0000000  0.5689682
## nps_score                        0.5689682  1.0000000

cor_test_result$p    # p-values

##                            twb_composite_score twb_sub_score  growth_score
## twb_composite_score               0.000000e+00 2.449574e-276 8.570017e-210
## twb_sub_score                    2.111702e-278  0.000000e+00  2.187768e-67
## growth_score                     7.584086e-212  2.700948e-69  0.000000e+00
## wellbeing_score                   4.174680e-93  1.258624e-17  5.048802e-63
## acceptance_score                 2.814636e-174  3.344775e-45  3.127432e-61
## adaptability_score               2.397601e-189  2.654829e-49  3.342682e-79
## depletion_score                   8.552500e-01  2.222256e-15  6.119068e-08
## foundational_support_score       1.812581e-212 2.712358e-189  2.637120e-44
## lmc_composite_score               1.110905e-77  2.355895e-57  9.009708e-28
## union_view_score                  5.598537e-18  4.898511e-05  5.889132e-12
## school_lmc_score                  6.881746e-93  5.775538e-97  1.079248e-27
## district_lmc_score                1.114242e-87  4.736272e-70  1.210806e-26
## job_satisfaction_score           6.188563e-115 3.430497e-140  2.593971e-50
## job_stress_score                  3.437811e-09  2.110107e-20  2.529573e-05
## retention_score                   4.577014e-65  3.811802e-65  2.076316e-28
## nps_score                        8.737269e-119 7.171718e-162  8.040676e-43
##                            wellbeing_score acceptance_score adaptability_score
## twb_composite_score           3.798958e-91    3.011660e-172      2.661337e-187
## twb_sub_score                 4.405183e-16     2.240999e-43       1.805283e-47
## growth_score                  3.837089e-61     2.314300e-59       2.908133e-77
## wellbeing_score               0.000000e+00     8.128081e-61       5.282999e-40
## acceptance_score              1.083744e-62     0.000000e+00      8.032768e-181
## adaptability_score            8.385713e-42    7.369512e-183       0.000000e+00
## depletion_score               1.384097e-43     1.122787e-34       2.231249e-13
## foundational_support_score    1.808255e-15     1.022376e-34       1.328588e-31
## lmc_composite_score           7.574750e-12     1.122596e-17       4.688144e-20
## union_view_score              4.924400e-08     1.010068e-07       7.243190e-10
## school_lmc_score              4.422466e-09     4.383428e-17       2.853150e-19
## district_lmc_score            8.019640e-11     6.059071e-18       1.875821e-18
## job_satisfaction_score        3.458413e-21     3.083916e-37       1.762246e-27
## job_stress_score              3.282932e-10     4.159292e-22       2.206429e-09
## retention_score               2.279612e-08     2.754417e-15       1.568933e-12
## nps_score                     1.461341e-08     1.431492e-26       7.217792e-29
##                            depletion_score foundational_support_score
## twb_composite_score           8.552500e-01              2.066343e-210
## twb_sub_score                 6.666767e-14              2.983593e-187
## growth_score                  6.730975e-07               1.740499e-42
## wellbeing_score               8.996632e-42               5.605590e-14
## acceptance_score              6.512166e-33               6.032020e-33
## adaptability_score            5.801248e-12               7.440093e-30
## depletion_score               0.000000e+00               1.292673e-22
## foundational_support_score    2.693069e-24               0.000000e+00
## lmc_composite_score           1.919761e-03               4.855613e-98
## union_view_score              9.393689e-02               5.464808e-18
## school_lmc_score              4.287580e-03              1.592828e-110
## district_lmc_score            3.016137e-04              4.684031e-133
## job_satisfaction_score        1.234899e-38              5.855855e-180
## job_stress_score              2.628541e-94               1.983305e-41
## retention_score               8.913773e-09              4.053215e-100
## nps_score                     6.543466e-22              6.731896e-193
##                            lmc_composite_score union_view_score
## twb_composite_score               9.553784e-76     2.131275e-16
## twb_sub_score                     1.696245e-55     3.428958e-04
## growth_score                      4.775145e-26     1.413392e-10
## wellbeing_score                   1.742193e-10     5.909280e-07
## acceptance_score                  4.041347e-16     1.010068e-06
## adaptability_score                1.969020e-18     1.448638e-08
## depletion_score                   9.598803e-03     1.878738e-01
## foundational_support_score        4.612833e-96     2.131275e-16
## lmc_composite_score               0.000000e+00     0.000000e+00
## union_view_score                  0.000000e+00     0.000000e+00
## school_lmc_score                  0.000000e+00    1.257964e-123
## district_lmc_score                0.000000e+00    4.816576e-123
## job_satisfaction_score            2.486096e-64     4.478238e-16
## job_stress_score                  1.872914e-08     2.630572e-02
## retention_score                   6.696099e-60     1.642653e-21
## nps_score                         9.216425e-76     5.122473e-17
##                            school_lmc_score district_lmc_score
## twb_composite_score            6.193572e-91       9.916752e-86
## twb_sub_score                  5.429006e-95       3.883743e-68
## growth_score                   5.612091e-26       6.054032e-25
## wellbeing_score                7.518192e-08       1.764321e-09
## acceptance_score               1.490366e-15       2.241856e-16
## adaptability_score             1.169792e-17       7.503284e-17
## depletion_score                1.715032e-02       1.809682e-03
## foundational_support_score    1.545043e-108      4.871392e-131
## lmc_composite_score            0.000000e+00       0.000000e+00
## union_view_score              1.270543e-121      4.816576e-121
## school_lmc_score               0.000000e+00      1.114778e-301
## district_lmc_score            9.528016e-304       0.000000e+00
## job_satisfaction_score         4.342942e-72       7.877491e-75
## job_stress_score               2.091936e-07       1.092773e-13
## retention_score                1.667787e-55       4.700171e-67
## nps_score                      8.668514e-85       3.030387e-93
##                            job_satisfaction_score job_stress_score
## twb_composite_score                 6.064792e-113     6.188059e-08
## twb_sub_score                       3.602022e-138     9.073460e-19
## growth_score                         1.815780e-48     2.023658e-04
## wellbeing_score                      1.521702e-19     6.894156e-09
## acceptance_score                     1.850350e-35     1.954867e-20
## adaptability_score                   8.987456e-26     4.192215e-08
## depletion_score                      7.532883e-37     2.444543e-92
## foundational_support_score          6.324323e-178     1.229649e-39
## lmc_composite_score                  1.914294e-62     2.622080e-07
## union_view_score                     1.433036e-14     7.891715e-02
## school_lmc_score                     3.604642e-70     1.882743e-06
## district_lmc_score                   6.617093e-73     2.950486e-12
## job_satisfaction_score               0.000000e+00     3.084081e-48
## job_stress_score                     4.469683e-50     0.000000e+00
## retention_score                     1.878508e-125     2.552567e-14
## nps_score                           2.785438e-257     1.396223e-34
##                            retention_score     nps_score
## twb_composite_score           3.570071e-63 8.649896e-117
## twb_sub_score                 3.011324e-63 7.602021e-160
## growth_score                  1.121211e-26  5.146033e-41
## wellbeing_score               2.963496e-07  2.192011e-07
## acceptance_score              7.987810e-14  7.014309e-25
## adaptability_score            3.922333e-11  3.969785e-27
## depletion_score               1.426204e-07  3.009994e-20
## foundational_support_score    3.891087e-98 7.539723e-191
## lmc_composite_score           4.888153e-58  7.833961e-74
## union_view_score              7.391937e-20  1.690416e-15
## school_lmc_score              1.184129e-53  7.628292e-83
## district_lmc_score            3.760137e-65  2.787956e-91
## job_satisfaction_score       1.916078e-123 3.203253e-255
## job_stress_score              7.147188e-13  7.958473e-33
## retention_score               0.000000e+00 3.190715e-127
## nps_score                    3.097781e-129  0.000000e+00

library(corrplot)

corrplot(cor_matrix, method = "color", type = "upper", tl.cex = 0.8, addCoef.col = "black")

library(dplyr)
# Rename the leadership subscale
 df <- df %>%
   rename(twb_leader_score = twb_sub_score)

# Select all relevant variables
correlation_df <- df %>%
  select(
    # TWB composite and subscales
    twb_composite_score,
    twb_leader_score,
    growth_score,
    wellbeing_score,
    acceptance_score,
    adaptability_score,
    depletion_score,
    foundational_support_score,

    # LMC composite and subscales
    lmc_composite_score,
    union_view_score,
    school_lmc_score,
    district_lmc_score,

    # Outcomes
    job_satisfaction_score,
    job_stress_score,
    retention_score,
    nps_score
  ) %>%
  drop_na()

# Run correlation matrix with significance tests
library(psych)

cor_results <- corr.test(correlation_df, use = "pairwise")

# Extract r values and p values
cor_matrix <- cor_results$r
p_matrix <- cor_results$p

# Function to format with significance asterisks
format_cor <- function(r, p) {
  stars <- ifelse(p < 0.001, "***",
            ifelse(p < 0.01, "**",
            ifelse(p < 0.05, "*", "")))
  sprintf("%.2f%s", r, stars)
}

# Apply formatting
formatted_matrix <- matrix("", nrow = nrow(cor_matrix), ncol = ncol(cor_matrix),
                           dimnames = dimnames(cor_matrix))

for (i in 1:nrow(cor_matrix)) {
  for (j in 1:ncol(cor_matrix)) {
    formatted_matrix[i, j] <- format_cor(cor_matrix[i, j], p_matrix[i, j])
  }
}

# Print the formatted correlation matrix with stars
formatted_matrix

##                            twb_composite_score twb_leader_score growth_score
## twb_composite_score        "1.00***"           "0.76***"        "0.69***"   
## twb_leader_score           "0.76***"           "1.00***"        "0.43***"   
## growth_score               "0.69***"           "0.43***"        "1.00***"   
## wellbeing_score            "0.49***"           "0.22***"        "0.41***"   
## acceptance_score           "0.64***"           "0.35***"        "0.41***"   
## adaptability_score         "0.66***"           "0.37***"        "0.46***"   
## depletion_score            "-0.00"             "-0.20***"       "-0.14***"  
## foundational_support_score "0.69***"           "0.66***"        "0.35***"   
## lmc_composite_score        "0.46***"           "0.40***"        "0.28***"   
## union_view_score           "0.22***"           "0.10***"        "0.18***"   
## school_lmc_score           "0.49***"           "0.50***"        "0.28***"   
## district_lmc_score         "0.48***"           "0.43***"        "0.27***"   
## job_satisfaction_score     "0.54***"           "0.59***"        "0.37***"   
## job_stress_score           "-0.15***"          "-0.24***"       "-0.11***"  
## retention_score            "0.42***"           "0.42***"        "0.28***"   
## nps_score                  "0.55***"           "0.62***"        "0.34***"   
##                            wellbeing_score acceptance_score adaptability_score
## twb_composite_score        "0.49***"       "0.64***"        "0.66***"         
## twb_leader_score           "0.22***"       "0.35***"        "0.37***"         
## growth_score               "0.41***"       "0.41***"        "0.46***"         
## wellbeing_score            "1.00***"       "0.41***"        "0.34***"         
## acceptance_score           "0.41***"       "1.00***"        "0.65***"         
## adaptability_score         "0.34***"       "0.65***"        "1.00***"         
## depletion_score            "-0.35***"      "-0.31***"       "-0.19***"        
## foundational_support_score "0.20***"       "0.31***"        "0.30***"         
## lmc_composite_score        "0.18***"       "0.22***"        "0.23***"         
## union_view_score           "0.14***"       "0.14***"        "0.16***"         
## school_lmc_score           "0.15***"       "0.21***"        "0.23***"         
## district_lmc_score         "0.17***"       "0.22***"        "0.22***"         
## job_satisfaction_score     "0.24***"       "0.32***"        "0.28***"         
## job_stress_score           "-0.16***"      "-0.25***"       "-0.15***"        
## retention_score            "0.14***"       "0.20***"        "0.18***"         
## nps_score                  "0.15***"       "0.27***"        "0.28***"         
##                            depletion_score foundational_support_score
## twb_composite_score        "-0.00"         "0.69***"                 
## twb_leader_score           "-0.20***"      "0.66***"                 
## growth_score               "-0.14***"      "0.35***"                 
## wellbeing_score            "-0.35***"      "0.20***"                 
## acceptance_score           "-0.31***"      "0.31***"                 
## adaptability_score         "-0.19***"      "0.30***"                 
## depletion_score            "1.00***"       "-0.26***"                
## foundational_support_score "-0.26***"      "1.00***"                 
## lmc_composite_score        "-0.08**"       "0.51***"                 
## union_view_score           "-0.04"         "0.22***"                 
## school_lmc_score           "-0.07**"       "0.53***"                 
## district_lmc_score         "-0.09***"      "0.58***"                 
## job_satisfaction_score     "-0.33***"      "0.65***"                 
## job_stress_score           "0.50***"       "-0.34***"                
## retention_score            "-0.15***"      "0.51***"                 
## nps_score                  "-0.24***"      "0.67***"                 
##                            lmc_composite_score union_view_score
## twb_composite_score        "0.46***"           "0.22***"       
## twb_leader_score           "0.40***"           "0.10***"       
## growth_score               "0.28***"           "0.18***"       
## wellbeing_score            "0.18***"           "0.14***"       
## acceptance_score           "0.22***"           "0.14***"       
## adaptability_score         "0.23***"           "0.16***"       
## depletion_score            "-0.08**"           "-0.04"         
## foundational_support_score "0.51***"           "0.22***"       
## lmc_composite_score        "1.00***"           "0.82***"       
## union_view_score           "0.82***"           "1.00***"       
## school_lmc_score           "0.89***"           "0.56***"       
## district_lmc_score         "0.89***"           "0.56***"       
## job_satisfaction_score     "0.42***"           "0.21***"       
## job_stress_score           "-0.14***"          "-0.06*"        
## retention_score            "0.40***"           "0.24***"       
## nps_score                  "0.45***"           "0.21***"       
##                            school_lmc_score district_lmc_score
## twb_composite_score        "0.49***"        "0.48***"         
## twb_leader_score           "0.50***"        "0.43***"         
## growth_score               "0.28***"        "0.27***"         
## wellbeing_score            "0.15***"        "0.17***"         
## acceptance_score           "0.21***"        "0.22***"         
## adaptability_score         "0.23***"        "0.22***"         
## depletion_score            "-0.07*"         "-0.09**"         
## foundational_support_score "0.53***"        "0.58***"         
## lmc_composite_score        "0.89***"        "0.89***"         
## union_view_score           "0.56***"        "0.56***"         
## school_lmc_score           "1.00***"        "0.78***"         
## district_lmc_score         "0.78***"        "1.00***"         
## job_satisfaction_score     "0.44***"        "0.45***"         
## job_stress_score           "-0.13***"       "-0.19***"        
## retention_score            "0.39***"        "0.43***"         
## nps_score                  "0.47***"        "0.49***"         
##                            job_satisfaction_score job_stress_score
## twb_composite_score        "0.54***"              "-0.15***"      
## twb_leader_score           "0.59***"              "-0.24***"      
## growth_score               "0.37***"              "-0.11***"      
## wellbeing_score            "0.24***"              "-0.16***"      
## acceptance_score           "0.32***"              "-0.25***"      
## adaptability_score         "0.28***"              "-0.15***"      
## depletion_score            "-0.33***"             "0.50***"       
## foundational_support_score "0.65***"              "-0.34***"      
## lmc_composite_score        "0.42***"              "-0.14***"      
## union_view_score           "0.21***"              "-0.06"         
## school_lmc_score           "0.44***"              "-0.13***"      
## district_lmc_score         "0.45***"              "-0.19***"      
## job_satisfaction_score     "1.00***"              "-0.37***"      
## job_stress_score           "-0.37***"             "1.00***"       
## retention_score            "0.56***"              "-0.20***"      
## nps_score                  "0.74***"              "-0.31***"      
##                            retention_score nps_score 
## twb_composite_score        "0.42***"       "0.55***" 
## twb_leader_score           "0.42***"       "0.62***" 
## growth_score               "0.28***"       "0.34***" 
## wellbeing_score            "0.14***"       "0.15***" 
## acceptance_score           "0.20***"       "0.27***" 
## adaptability_score         "0.18***"       "0.28***" 
## depletion_score            "-0.15***"      "-0.24***"
## foundational_support_score "0.51***"       "0.67***" 
## lmc_composite_score        "0.40***"       "0.45***" 
## union_view_score           "0.24***"       "0.21***" 
## school_lmc_score           "0.39***"       "0.47***" 
## district_lmc_score         "0.43***"       "0.49***" 
## job_satisfaction_score     "0.56***"       "0.74***" 
## job_stress_score           "-0.20***"      "-0.31***"
## retention_score            "1.00***"       "0.57***" 
## nps_score                  "0.57***"       "1.00***"

# Install if needed
# install.packages("corrplot")

library(corrplot)

# Re-run correlation test to get r and p
cor_results <- psych::corr.test(correlation_df, use = "pairwise")
cor_matrix <- cor_results$r
p_matrix <- cor_results$p

# Visualize: Mask non-significant correlations (p > .05)
corrplot(cor_matrix,
         method = "color",
         type = "upper",
         order = "hclust",
         tl.cex = 0.8,
         addCoef.col = "black",
         number.cex = 0.7,
         p.mat = p_matrix,
         sig.level = 0.05,
         insig = "blank",     # hide insignificant coefficients
         diag = FALSE)

#what is the relationship between TWB and outcomes?
#models minus the general twb factor, they create different issues controlling for the general so we can model the relationship between each factor and the outcomes
# Make sure TWB factors are numeric (if not already)



# 1. Predict LMC Composite
model_lmc_composite <- lm(lmc_composite_score ~
  #twb_composite_score +
  twb_leader_score +
  growth_score +
  wellbeing_score +
  acceptance_score +
  adaptability_score +
  depletion_score +
  foundational_support_score,
  data = df
)


# 2. Predict School-Level LMC
model_school_lmc <- lm(school_lmc_score ~
  #twb_composite_score +
  twb_leader_score +
  growth_score +
  wellbeing_score +
  acceptance_score +
  adaptability_score +
  depletion_score +
  foundational_support_score,
  data = df
)


# 3. Predict District-Level LMC
model_district_lmc <- lm(district_lmc_score ~
  #twb_composite_score +
  twb_leader_score +
  growth_score +
  wellbeing_score +
  acceptance_score +
  adaptability_score +
  depletion_score +
  foundational_support_score,
  data = df
)


# 4. Predict Job Stress
model_stress <- lm(job_stress_score ~
  #twb_composite_score +
  twb_leader_score +
  growth_score +
  wellbeing_score +
  acceptance_score +
  adaptability_score +
  depletion_score +
  foundational_support_score,
  data = df
)


# 5. Predict Job Satisfaction
model_satisfaction <- lm(job_satisfaction_score ~
 # twb_composite_score +
  twb_leader_score +
  growth_score +
  wellbeing_score +
  acceptance_score +
  adaptability_score +
  depletion_score +
  foundational_support_score,
  data = df
)


# 6. Predict Retention Intention
model_retention <- lm(retention_score ~
  #twb_composite_score +
  twb_leader_score +
  growth_score +
  wellbeing_score +
  acceptance_score +
  adaptability_score +
  depletion_score +
  foundational_support_score,
  data = df
)


# 7. Predict Net Promoter Score
model_nps <- lm(nps_score ~
  #twb_composite_score +
  twb_leader_score +
  growth_score +
  wellbeing_score +
  acceptance_score +
  adaptability_score +
  depletion_score +
  foundational_support_score,
  data = df
)

summary(model_lmc_composite)

## 
## Call:
## lm(formula = lmc_composite_score ~ twb_leader_score + growth_score + 
##     wellbeing_score + acceptance_score + adaptability_score + 
##     depletion_score + foundational_support_score, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.5916  -1.0856   0.2497   1.5148   7.4954 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 4.67085    0.66512   7.023  3.3e-12 ***
## twb_leader_score            0.16315    0.07445   2.191 0.028574 *  
## growth_score                0.22406    0.10353   2.164 0.030615 *  
## wellbeing_score             0.19344    0.08616   2.245 0.024905 *  
## acceptance_score            0.03439    0.10271   0.335 0.737793    
## adaptability_score          0.17886    0.12861   1.391 0.164523    
## depletion_score             0.18472    0.05252   3.517 0.000449 ***
## foundational_support_score  1.06110    0.07308  14.519  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.379 on 1490 degrees of freedom
##   (808 observations deleted due to missingness)
## Multiple R-squared:  0.2787, Adjusted R-squared:  0.2753 
## F-statistic: 82.23 on 7 and 1490 DF,  p-value: < 2.2e-16

summary(model_school_lmc)

## 
## Call:
## lm(formula = school_lmc_score ~ twb_leader_score + growth_score + 
##     wellbeing_score + acceptance_score + adaptability_score + 
##     depletion_score + foundational_support_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3369 -0.4323  0.0567  0.5658  2.8565 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 1.14592    0.24571   4.664 3.38e-06 ***
## twb_leader_score            0.22836    0.02757   8.282 2.63e-16 ***
## growth_score                0.04897    0.03849   1.272    0.203    
## wellbeing_score             0.03315    0.03202   1.035    0.301    
## acceptance_score            0.02004    0.03797   0.528    0.598    
## adaptability_score          0.02532    0.04764   0.531    0.595    
## depletion_score             0.07937    0.01936   4.100 4.36e-05 ***
## foundational_support_score  0.34569    0.02707  12.771  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8891 on 1520 degrees of freedom
##   (778 observations deleted due to missingness)
## Multiple R-squared:  0.3285, Adjusted R-squared:  0.3255 
## F-statistic: 106.2 on 7 and 1520 DF,  p-value: < 2.2e-16

summary(model_district_lmc)

## 
## Call:
## lm(formula = district_lmc_score ~ twb_leader_score + growth_score + 
##     wellbeing_score + acceptance_score + adaptability_score + 
##     depletion_score + foundational_support_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4315 -0.4326  0.0715  0.5144  2.9482 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 1.21659    0.23363   5.207 2.18e-07 ***
## twb_leader_score            0.05696    0.02608   2.184 0.029099 *  
## growth_score                0.04919    0.03634   1.354 0.176015    
## wellbeing_score             0.05514    0.03023   1.824 0.068386 .  
## acceptance_score            0.01936    0.03596   0.538 0.590372    
## adaptability_score          0.03282    0.04519   0.726 0.467722    
## depletion_score             0.06941    0.01844   3.764 0.000173 ***
## foundational_support_score  0.47724    0.02543  18.765  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.838 on 1502 degrees of freedom
##   (796 observations deleted due to missingness)
## Multiple R-squared:  0.3503, Adjusted R-squared:  0.3472 
## F-statistic: 115.7 on 7 and 1502 DF,  p-value: < 2.2e-16

summary(model_stress)

## 
## Call:
## lm(formula = job_stress_score ~ twb_leader_score + growth_score + 
##     wellbeing_score + acceptance_score + adaptability_score + 
##     depletion_score + foundational_support_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.8258 -1.1488  0.1394  1.2779  6.2431 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 4.868741   0.512943   9.492  < 2e-16 ***
## twb_leader_score            0.008818   0.057210   0.154  0.87753    
## growth_score                0.140065   0.080137   1.748  0.08069 .  
## wellbeing_score             0.130537   0.066830   1.953  0.05096 .  
## acceptance_score           -0.232543   0.079079  -2.941  0.00332 ** 
## adaptability_score          0.064079   0.099365   0.645  0.51909    
## depletion_score             0.760367   0.040368  18.836  < 2e-16 ***
## foundational_support_score -0.447647   0.055788  -8.024    2e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.869 on 1554 degrees of freedom
##   (744 observations deleted due to missingness)
## Multiple R-squared:  0.2999, Adjusted R-squared:  0.2968 
## F-statistic: 95.11 on 7 and 1554 DF,  p-value: < 2.2e-16

summary(model_satisfaction)

## 
## Call:
## lm(formula = job_satisfaction_score ~ twb_leader_score + growth_score + 
##     wellbeing_score + acceptance_score + adaptability_score + 
##     depletion_score + foundational_support_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.8145 -0.7823  0.0911  0.8621  5.2083 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 2.33444    0.37687   6.194 7.48e-10 ***
## twb_leader_score            0.38334    0.04203   9.120  < 2e-16 ***
## growth_score                0.25409    0.05888   4.316 1.69e-05 ***
## wellbeing_score             0.01154    0.04910   0.235   0.8142    
## acceptance_score            0.09814    0.05810   1.689   0.0914 .  
## adaptability_score         -0.09578    0.07301  -1.312   0.1897    
## depletion_score            -0.21979    0.02966  -7.411 2.05e-13 ***
## foundational_support_score  0.71313    0.04099  17.398  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.373 on 1554 degrees of freedom
##   (744 observations deleted due to missingness)
## Multiple R-squared:  0.4948, Adjusted R-squared:  0.4926 
## F-statistic: 217.5 on 7 and 1554 DF,  p-value: < 2.2e-16

summary(model_retention)

## 
## Call:
## lm(formula = retention_score ~ twb_leader_score + growth_score + 
##     wellbeing_score + acceptance_score + adaptability_score + 
##     depletion_score + foundational_support_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4138 -0.3190  0.1428  0.4654  1.9883 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 2.119299   0.202887  10.446  < 2e-16 ***
## twb_leader_score            0.078230   0.022616   3.459 0.000557 ***
## growth_score                0.123121   0.031709   3.883 0.000108 ***
## wellbeing_score            -0.017352   0.026417  -0.657 0.511365    
## acceptance_score            0.031842   0.031154   1.022 0.306900    
## adaptability_score         -0.051518   0.039292  -1.311 0.189997    
## depletion_score            -0.002841   0.015969  -0.178 0.858833    
## foundational_support_score  0.313243   0.022081  14.186  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.74 on 1556 degrees of freedom
##   (742 observations deleted due to missingness)
## Multiple R-squared:  0.2792, Adjusted R-squared:  0.276 
## F-statistic: 86.11 on 7 and 1556 DF,  p-value: < 2.2e-16

summary(model_nps)

## 
## Call:
## lm(formula = nps_score ~ twb_leader_score + growth_score + wellbeing_score + 
##     acceptance_score + adaptability_score + depletion_score + 
##     foundational_support_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.1581 -0.9338  0.1334  1.1209  6.4714 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 0.62699    0.47597   1.317 0.187935    
## twb_leader_score            0.60700    0.05309  11.434  < 2e-16 ***
## growth_score                0.21430    0.07436   2.882 0.004008 ** 
## wellbeing_score            -0.20717    0.06201  -3.341 0.000855 ***
## acceptance_score           -0.04088    0.07338  -0.557 0.577527    
## adaptability_score          0.12763    0.09220   1.384 0.166471    
## depletion_score            -0.16305    0.03746  -4.353 1.43e-05 ***
## foundational_support_score  0.95470    0.05177  18.442  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.734 on 1554 degrees of freedom
##   (744 observations deleted due to missingness)
## Multiple R-squared:  0.5054, Adjusted R-squared:  0.5031 
## F-statistic: 226.8 on 7 and 1554 DF,  p-value: < 2.2e-16

TWB and foundational support emerge as consistent strong predictors of positive outcomes LMC Composite R² = .46 → moderate prediction

Strongest predictors:

🟢 twb_leader_score (+) — feeling supported by leadership relates to better LMC

🟢 foundational_support_score (+) — infrastructure/resources strongly relate to LMC

Others not significant once these are accounted for

School-Level LMC R² = .52 → strong model

🟢 twb_leader_score and foundational_support_score again are significant positive predictors

District-Level LMC R² = .42 → solid prediction

🟢 foundational_support_score (very strong)

🟢 acceptance_score (+)

🟢 twb_leader_score (+)

🔸 depletion_score (p = .055) borderline positive — might need follow-up

R² = .31 → modest prediction

🟢 depletion_score (+) — as expected

🔴 foundational_support_score (−) — better support = less stress

Job Satisfaction R² = .53 → strong model

🟢 twb_leader_score and foundational_support_score — strong positive effects

🔴 depletion_score — negative predictor (expected)

Retention Intention R² = .28 → moderate

🟢 twb_leader_score, growth_score, foundational_support_score all positively predict retention

Net Promoter Score R² = .44 → strong

🟢 twb_leader_score and foundational_support_score — very strong predictors

🔴 depletion_score — negative

Takeaways to Report TWB leadership support and foundational supports are the clearest, most consistent drivers of positive outcomes across collaboration, satisfaction, retention, and advocacy.

Depletion reliably predicts higher stress and lower NPS.

Other subscales (growth, adaptability, etc.) may still be useful in certain contexts but show weaker unique effects once major predictors are in the model.

#now use composite score as it generally works together to model relationships

# 1. Predict LMC Composite
model_lmc_composite_comp <- lm(lmc_composite_score ~ twb_composite_score, data = df)

# 2. Predict School-Level LMC
model_school_lmc_comp <- lm(school_lmc_score ~ twb_composite_score, data = df)

# 3. Predict District-Level LMC
model_district_lmc_comp <- lm(district_lmc_score ~ twb_composite_score, data = df)

# 4. Predict Job Stress
model_stress_comp <- lm(job_stress_score ~ twb_composite_score, data = df)

# 5. Predict Job Satisfaction
model_satisfaction_comp <- lm(job_satisfaction_score ~ twb_composite_score, data = df)

# 6. Predict Retention Intention
model_retention_comp <- lm(retention_score ~ twb_composite_score, data = df)

# 7. Predict Net Promoter Score
model_nps_comp <- lm(nps_score ~ twb_composite_score, data = df)

summary(model_lmc_composite_comp)

## 
## Call:
## lm(formula = lmc_composite_score ~ twb_composite_score, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.6355  -1.2481   0.3746   1.6238   7.6152 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1.75495    0.58873   2.981  0.00292 ** 
## twb_composite_score  0.35614    0.01799  19.800  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.488 on 1496 degrees of freedom
##   (808 observations deleted due to missingness)
## Multiple R-squared:  0.2076, Adjusted R-squared:  0.2071 
## F-statistic:   392 on 1 and 1496 DF,  p-value: < 2.2e-16

summary(model_school_lmc_comp)

## 
## Call:
## lm(formula = school_lmc_score ~ twb_composite_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0040 -0.4517  0.1453  0.5973  3.4530 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -0.390906   0.220540  -1.772   0.0765 .  
## twb_composite_score  0.149071   0.006735  22.134   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9421 on 1526 degrees of freedom
##   (778 observations deleted due to missingness)
## Multiple R-squared:  0.243,  Adjusted R-squared:  0.2425 
## F-statistic: 489.9 on 1 and 1526 DF,  p-value: < 2.2e-16

summary(model_district_lmc_comp)

## 
## Call:
## lm(formula = district_lmc_score ~ twb_composite_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1827 -0.4587  0.1355  0.5863  3.9803 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -0.230051   0.214440  -1.073    0.284    
## twb_composite_score  0.139985   0.006552  21.364   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9091 on 1508 degrees of freedom
##   (796 observations deleted due to missingness)
## Multiple R-squared:  0.2323, Adjusted R-squared:  0.2318 
## F-statistic: 456.4 on 1 and 1508 DF,  p-value: < 2.2e-16

summary(model_stress_comp)

## 
## Call:
## lm(formula = job_stress_score ~ twb_composite_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.9052 -1.4753  0.3759  1.5444  4.2688 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          9.39010    0.51333  18.293  < 2e-16 ***
## twb_composite_score -0.08712    0.01568  -5.556 3.24e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.207 on 1560 degrees of freedom
##   (744 observations deleted due to missingness)
## Multiple R-squared:  0.0194, Adjusted R-squared:  0.01877 
## F-statistic: 30.87 on 1 and 1560 DF,  p-value: 3.243e-08

summary(model_satisfaction_comp)

## 
## Call:
## lm(formula = job_satisfaction_score ~ twb_composite_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.8865 -0.9114  0.1910  1.1218  8.2701 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -2.05139    0.37803  -5.427 6.65e-08 ***
## twb_composite_score  0.29087    0.01155  25.189  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.626 on 1560 degrees of freedom
##   (744 observations deleted due to missingness)
## Multiple R-squared:  0.2891, Adjusted R-squared:  0.2887 
## F-statistic: 634.5 on 1 and 1560 DF,  p-value: < 2.2e-16

summary(model_retention_comp)

## 
## Call:
## lm(formula = retention_score ~ twb_composite_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3595 -0.3416  0.1750  0.5421  2.8957 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.778910   0.183174   4.252 2.24e-05 ***
## twb_composite_score 0.101955   0.005596  18.218  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.79 on 1562 degrees of freedom
##   (742 observations deleted due to missingness)
## Multiple R-squared:  0.1752, Adjusted R-squared:  0.1747 
## F-statistic: 331.9 on 1 and 1562 DF,  p-value: < 2.2e-16

summary(model_nps_comp)

## 
## Call:
## lm(formula = nps_score ~ twb_composite_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.2592 -1.0044  0.3173  1.3400 10.2927 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -5.1515     0.4812  -10.71   <2e-16 ***
## twb_composite_score   0.3738     0.0147   25.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.069 on 1560 degrees of freedom
##   (744 observations deleted due to missingness)
## Multiple R-squared:  0.2931, Adjusted R-squared:  0.2926 
## F-statistic: 646.7 on 1 and 1560 DF,  p-value: < 2.2e-16

MODEL-BY-MODEL INTERPRETATION 1. LMC Composite R² = .33

TWB composite score significantly predicts overall LMC

But: Detailed model adds ~13% more explained variance

School-Level LMC R² = .38

TWB composite score is a strong single predictor

However, leadership and foundational support carry unique weight

District-Level LMC R² = .26

TWB composite significantly predicts perceptions of collaboration at the district level

The individual model (R² = .42) gives more nuance — especially via foundational support

Job Stress ❌ Not significant — R² = .003

TWB composite does not predict job stress

Detailed model shows depletion and support are essential

Job Satisfaction R² = .34

Composite predicts satisfaction well — simple and strong model

Still, individual predictors offer more actionable insight

Retention R² = .18

TWB composite is significantly associated with intentions to stay

Slightly less predictive than full model (R² = .28)

Net Promoter Score R² = .23

Composite explains meaningful variance in advocacy

Stronger results in full model due to leader support and depletion

What This Means Practically The TWB composite score is a useful single predictor for most outcomes — especially satisfaction, LMC, and NPS.

However, more nuanced predictors (leader support, foundational support, depletion) offer:

More predictive power

More actionable targets for intervention

# Create a helper function
get_r2 <- function(model) summary(model)$r.squared

# Composite-only models
r2_composite <- c(
  LMC_Composite   = get_r2(model_lmc_composite_comp),
  School_LMC      = get_r2(model_school_lmc_comp),
  District_LMC    = get_r2(model_district_lmc_comp),
  Job_Satisfaction = get_r2(model_satisfaction_comp),
  Job_Stress      = get_r2(model_stress_comp),
  Retention       = get_r2(model_retention_comp),
  NPS             = get_r2(model_nps_comp)
)

# Subscale models
r2_subscales <- c(
  LMC_Composite   = get_r2(model_lmc_composite),
  School_LMC      = get_r2(model_school_lmc),
  District_LMC    = get_r2(model_district_lmc),
  Job_Satisfaction = get_r2(model_satisfaction),
  Job_Stress      = get_r2(model_stress),
  Retention       = get_r2(model_retention),
  NPS             = get_r2(model_nps)
)

# Combine into a data frame for plotting
r2_df <- data.frame(
  Outcome = factor(names(r2_composite), levels = rev(names(r2_composite))),
  Composite = unname(r2_composite),
  Subscales = unname(r2_subscales)
)

# Reshape to long format for ggplot
library(tidyr)
r2_long <- pivot_longer(r2_df, cols = c(Composite, Subscales),
                        names_to = "Model", values_to = "R2")

# View the final tidy dataset
print(r2_long)

## # A tibble: 14 × 3
##    Outcome          Model         R2
##    <fct>            <chr>      <dbl>
##  1 LMC_Composite    Composite 0.208 
##  2 LMC_Composite    Subscales 0.279 
##  3 School_LMC       Composite 0.243 
##  4 School_LMC       Subscales 0.329 
##  5 District_LMC     Composite 0.232 
##  6 District_LMC     Subscales 0.350 
##  7 Job_Satisfaction Composite 0.289 
##  8 Job_Satisfaction Subscales 0.495 
##  9 Job_Stress       Composite 0.0194
## 10 Job_Stress       Subscales 0.300 
## 11 Retention        Composite 0.175 
## 12 Retention        Subscales 0.279 
## 13 NPS              Composite 0.293 
## 14 NPS              Subscales 0.505

# Make sure you have ggplot2 loaded
library(ggplot2)

# Barplot: Predictive Power (R²)
ggplot(r2_long, aes(x = Outcome, y = R2, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  scale_fill_manual(values = c("Composite" = "#4C78A8", "Subscales" = "#F58518")) +
  labs(
    title = "Predictive Power of TWB Composite vs Subscales",
    x = NULL,
    y = "R-squared (Explained Variance)",
    fill = "Model Type"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    axis.text.x = element_text(angle = 30, hjust = 1),
    panel.grid.major.y = element_line(color = "grey90")
  )

Here’s the visualization comparing the predictive power (R²) of the TWB composite score vs subscales across all major outcomes:

🟢 TWB Subscales consistently outperform the composite for all outcomes

🧠 The biggest gains are in:

Job Stress (where the composite fails entirely)

School LMC and Job Satisfaction (with major jumps in R²)

This suggests that while the TWB composite is a strong general predictor, breaking it into subdomains offers richer, more precise insight — especially for actionable organizational improvements.

#need to upload the demographic data and pair based on PID
#import dataset
demo_df <- Spring_2025_NEA_Data_Demographics_1_

# Merge with main df by PID
df_combined <- df %>%
  left_join(demo_df, by = "PID")


# Clean column names
df_combined <- df_combined %>%
  clean_names()

# View cleaned names
glimpse(df_combined)

## Rows: 2,306
## Columns: 83
## $ pid                                                 <chr> "Clifton1", "Clift…
## $ twb1                                                <dbl> 1, 1, 1, 1, 1, 1, …
## $ twb2                                                <dbl> 1, 1, 1, 3, 1, 1, …
## $ twb3                                                <dbl> 1, 1, 1, 4, 1, 1, …
## $ twb4                                                <dbl> 1, 1, 1, 5, 1, 1, …
## $ twb5                                                <dbl> 1, 1, 1, 4, 1, 1, …
## $ twb6                                                <dbl> 1, 1, 6, 1, 1, 1, …
## $ twb7                                                <dbl> 1, 1, 6, 4, 1, 1, …
## $ twb8                                                <dbl> 1, 1, 1, 2, 1, 1, …
## $ twb9                                                <dbl> 1, 1, 1, 5, 1, 1, …
## $ twb10                                               <dbl> 1, 1, 1, 4, 1, 1, …
## $ twb11                                               <dbl> 1, 1, 1, 2, 1, 1, …
## $ twb12                                               <dbl> 1, 1, 1, 4, 1, 1, …
## $ twb13                                               <dbl> 1, 4, 1, 5, 1, 1, …
## $ twb14                                               <dbl> 1, 1, 1, 1, 1, 1, …
## $ twb15                                               <dbl> 1, 1, 1, 4, 1, 3, …
## $ twb16                                               <dbl> 1, 1, 6, 4, 1, 3, …
## $ twb17                                               <dbl> 1, 1, 1, 5, 1, 2, …
## $ twb18                                               <dbl> 1, 4, 6, 4, 1, 3, …
## $ twb19                                               <dbl> 1, 4, 6, 6, 1, 3, …
## $ twb20                                               <dbl> 1, 1, 6, 5, 5, 1, …
## $ twb21                                               <dbl> 1, 1, 5, 5, 4, 1, …
## $ twb22                                               <dbl> 1, 1, 5, 6, 5, 1, …
## $ twb23                                               <dbl> 1, 1, 5, 6, 5, 1, …
## $ twb24                                               <dbl> 3, 4, 4, 5, 4, 3, …
## $ twb25                                               <dbl> 1, 1, 2, 2, 3, 1, …
## $ twb26                                               <dbl> 6, 2, 1, 3, 2, 1, …
## $ twb27                                               <dbl> 6, 4, 6, 4, 4, 3, …
## $ twb28                                               <dbl> 6, 4, 5, 3, 4, 3, …
## $ twb29                                               <dbl> 6, 5, 6, 5, 3, 3, …
## $ twb30                                               <dbl> 6, 5, 4, 5, 4, 3, …
## $ twb31                                               <dbl> 6, 5, 2, 4, 5, 3, …
## $ twb32                                               <dbl> 5, 5, 2, 5, 5, 3, …
## $ twb33                                               <dbl> 5, 5, 3, 5, 5, 3, …
## $ twb34                                               <dbl> 5, 5, 3, 5, 4, 4, …
## $ twb35                                               <dbl> 10, 9, 8, 6, 10, 1…
## $ twb36                                               <dbl> 8, 4, 9, 7, 5, 10,…
## $ twb37                                               <dbl> 10, 9, 10, 5, 10, …
## $ twb38                                               <dbl> 10, 10, 7, 10, 10,…
## $ twb39                                               <fct> No, Yes, No, Yes, …
## $ twb40                                               <dbl> 1, 2, 1, 1, 2, 1, …
## $ twb41                                               <dbl> 2, 1, 2, 6, 6, 1, …
## $ twb42                                               <dbl> 5, 4, 4, 6, 4, 1, …
## $ twb43                                               <dbl> 5, 6, 4, 4, 4, 1, …
## $ twb44                                               <dbl> 5, 5, 5, 5, 5, 6, …
## $ twb45                                               <dbl> 5, 5, 5, 5, 4, 2, …
## $ twb46                                               <dbl> 5, 5, 2, 6, 6, 3, …
## $ twb47                                               <dbl> 5, 5, 1, 5, 4, 3, …
## $ twb48                                               <dbl> 5, 5, 4, 5, 5, 3, …
## $ twb49                                               <dbl> 5, 6, 5, 5, 5, 3, …
## $ twb50                                               <dbl> 5, 6, 3, 5, 5, 3, …
## $ twb51                                               <dbl> 5, 6, 4, 5, 5, 4, …
## $ twb52                                               <dbl> 5, 6, 4, 5, 5, 3, …
## $ twb53                                               <dbl> 5, 6, 3, 5, 5, 3, …
## $ twb_leader_score                                    <dbl> 1.000000, 1.214286…
## $ growth_score                                        <dbl> 1.000000, 1.000000…
## $ wellbeing_score                                     <dbl> 1.0, 4.0, 6.0, 5.0…
## $ acceptance_score                                    <dbl> 1.0, 1.0, 5.5, 5.0…
## $ adaptability_score                                  <dbl> 1.0, 1.0, 5.0, 6.0…
## $ depletion_score                                     <dbl> 2.0, 2.5, 3.0, 3.5…
## $ foundational_support_score                          <dbl> 6.000000, 4.166667…
## $ twb_composite_score                                 <dbl> 13.00000, 14.88095…
## $ union_view_score                                    <dbl> 4.000000, 3.666667…
## $ school_lmc_score                                    <dbl> 5.00, 5.00, 3.25, …
## $ district_lmc_score                                  <dbl> 5.000000, 5.833333…
## $ lmc_composite_score                                 <dbl> 14.000000, 14.5000…
## $ job_satisfaction_score                              <dbl> 10, 9, 8, 6, 10, 1…
## $ job_stress_score                                    <dbl> 8, 4, 9, 7, 5, 10,…
## $ retention_score                                     <dbl> 5.000000, 5.000000…
## $ nps_score                                           <dbl> 10.0, 9.5, 8.5, 7.…
## $ district                                            <chr> "Clifton", "Clifto…
## $ lmc                                                 <chr> "Yes", "Yes", "Yes…
## $ are_you_a_district_or_building_based_administrator  <chr> "No", "No", "No", …
## $ school                                              <chr> "School Three", "S…
## $ gender                                              <chr> "Female", "Female"…
## $ race_ethnicity                                      <chr> "White or Caucasio…
## $ years                                               <dbl> 10, 1, NA, 10, 25,…
## $ years_group                                         <chr> "4-10 years", "0-3…
## $ collaboration                                       <chr> "3+ hours per week…
## $ role                                                <chr> "Non-Certificated …
## $ grade                                               <chr> "Elementary School…
## $ are_you_a_member_of_the_education_association_union <chr> "No", "Yes", "No",…
## $ how_involved_are_you_with_the_union_1_none_4_a_lot  <chr> "1", "2", "1", "1"…

#rename
df_combined <- df_combined %>%
  rename(union_involvement = "how_involved_are_you_with_the_union_1_none_4_a_lot")


df_combined <- df_combined %>%
  rename(district_admin = "are_you_a_district_or_building_based_administrator")


df_combined <- df_combined %>%
  rename(union_member = "are_you_a_member_of_the_education_association_union")

df_combined

## # A tibble: 2,306 × 83
##    pid    twb1  twb2  twb3  twb4  twb5  twb6  twb7  twb8  twb9 twb10 twb11 twb12
##    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 Clif…     1     1     1     1     1     1     1     1     1     1     1     1
##  2 Clif…     1     1     1     1     1     1     1     1     1     1     1     1
##  3 Clif…     1     1     1     1     1     6     6     1     1     1     1     1
##  4 Clif…     1     3     4     5     4     1     4     2     5     4     2     4
##  5 Clif…     1     1     1     1     1     1     1     1     1     1     1     1
##  6 Clif…     1     1     1     1     1     1     1     1     1     1     1     1
##  7 Clif…     1     1     1     1     1     1     1     1     1     1     1     1
##  8 Clif…     1     1     2     3     2     2     1     3     2     2     1     1
##  9 Clif…     6     5     5     5     6     6     4     5     6     6     4     6
## 10 Clif…     6     6     6     6     6     6     6     6     6     6     6     6
## # ℹ 2,296 more rows
## # ℹ 70 more variables: twb13 <dbl>, twb14 <dbl>, twb15 <dbl>, twb16 <dbl>,
## #   twb17 <dbl>, twb18 <dbl>, twb19 <dbl>, twb20 <dbl>, twb21 <dbl>,
## #   twb22 <dbl>, twb23 <dbl>, twb24 <dbl>, twb25 <dbl>, twb26 <dbl>,
## #   twb27 <dbl>, twb28 <dbl>, twb29 <dbl>, twb30 <dbl>, twb31 <dbl>,
## #   twb32 <dbl>, twb33 <dbl>, twb34 <dbl>, twb35 <dbl>, twb36 <dbl>,
## #   twb37 <dbl>, twb38 <dbl>, twb39 <fct>, twb40 <dbl>, twb41 <dbl>, …

#change to proper type
df_combined$pid <- as.factor(df_combined$pid)
df_combined$district <- as.factor(df_combined$district)
df_combined$lmc <- as.factor(df_combined$lmc)
df_combined$district_admin <- as.factor(df_combined$district_admin)
df_combined$school <- as.factor(df_combined$school)
df_combined$gender <- as.factor(df_combined$gender)
df_combined$race_ethnicity <- as.factor(df_combined$race_ethnicity)
df_combined$years_group <- as.factor(df_combined$years_group)
df_combined$collaboration <- as.factor(df_combined$collaboration)
df_combined$role <- as.factor(df_combined$role)
df_combined$grade <- as.factor(df_combined$grade)
df_combined$union_member <- as.factor(df_combined$union_member)

summary(aov(twb_composite_score ~ district, data = df_combined))

##               Df Sum Sq Mean Sq F value Pr(>F)    
## district       2   1468   733.9   62.05 <2e-16 ***
## Residuals   1561  18462    11.8                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 742 observations deleted due to missingness

summary(aov(twb_composite_score ~ gender, data = df_combined))

##               Df Sum Sq Mean Sq F value Pr(>F)  
## gender         4    159   39.65   3.127 0.0142 *
## Residuals   1532  19427   12.68                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 769 observations deleted due to missingness

summary(aov(twb_composite_score ~ race_ethnicity, data = df_combined))

##                  Df Sum Sq Mean Sq F value  Pr(>F)   
## race_ethnicity   22    605   27.51   2.188 0.00119 **
## Residuals      1500  18863   12.57                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 783 observations deleted due to missingness

summary(aov(twb_composite_score ~ role, data = df_combined))

##               Df Sum Sq Mean Sq F value Pr(>F)    
## role           8   1593  199.10   16.92 <2e-16 ***
## Residuals   1532  18027   11.77                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 765 observations deleted due to missingness

summary(aov(twb_composite_score ~ grade, data = df_combined))

##               Df Sum Sq Mean Sq F value  Pr(>F)    
## grade          2    776   388.1   31.69 3.3e-14 ***
## Residuals   1514  18542    12.2                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 789 observations deleted due to missingness

summary(aov(twb_composite_score ~ collaboration, data = df_combined))

##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## collaboration    2    643   321.5   25.82 9.44e-12 ***
## Residuals     1495  18613    12.4                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 808 observations deleted due to missingness

summary(aov(twb_composite_score ~ union_involvement, data = df_combined))

##                     Df Sum Sq Mean Sq F value Pr(>F)
## union_involvement    3     77   25.65   2.023  0.109
## Residuals         1557  19741   12.68               
## 745 observations deleted due to missingness

#lmc composite scores as a function of demographics
summary(aov(lmc_composite_score ~ district, data = df_combined))

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## district       3    388  129.25   17.32 4.34e-11 ***
## Residuals   1785  13317    7.46                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 517 observations deleted due to missingness

summary(aov(lmc_composite_score ~ gender, data = df_combined))

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## gender         4    212   53.01   7.045 1.26e-05 ***
## Residuals   1775  13358    7.53                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 526 observations deleted due to missingness

summary(aov(lmc_composite_score ~ race_ethnicity, data = df_combined))

##                  Df Sum Sq Mean Sq F value   Pr(>F)    
## race_ethnicity   23    537  23.333   3.142 7.86e-07 ***
## Residuals      1743  12945   7.427                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 539 observations deleted due to missingness

summary(aov(lmc_composite_score ~ role, data = df_combined))

##               Df Sum Sq Mean Sq F value  Pr(>F)    
## role          10    563   56.32   7.667 4.3e-12 ***
## Residuals   1774  13033    7.35                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 521 observations deleted due to missingness

summary(aov(lmc_composite_score ~ grade, data = df_combined))

##               Df Sum Sq Mean Sq F value  Pr(>F)   
## grade          3     94   31.19   4.148 0.00613 **
## Residuals   1751  13167    7.52                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 551 observations deleted due to missingness

summary(aov(lmc_composite_score ~ collaboration, data = df_combined))

##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## collaboration    2    472  235.85   31.97 2.31e-14 ***
## Residuals     1747  12887    7.38                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 556 observations deleted due to missingness

summary(aov(lmc_composite_score ~ union_involvement, data = df_combined))

##                     Df Sum Sq Mean Sq F value Pr(>F)    
## union_involvement    3   1416   472.0   68.53 <2e-16 ***
## Residuals         1784  12288     6.9                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 518 observations deleted due to missingness

# Step 1: Inspect unique grade levels
table(df_combined$district)

## 
##       Clifton     Hopatcong Pleasantville    White Bear 
##          1199            96           309           702

# Run the ANOVA
twb_district_aov <- aov(twb_composite_score ~ district, data = df_combined)

# Summary of ANOVA
summary(twb_district_aov)

##               Df Sum Sq Mean Sq F value Pr(>F)    
## district       2   1468   733.9   62.05 <2e-16 ***
## Residuals   1561  18462    11.8                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 742 observations deleted due to missingness

# Post-hoc test: Tukey HSD to find which districts differ
tukey_district <- TukeyHSD(twb_district_aov)
print(tukey_district)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = twb_composite_score ~ district, data = df_combined)
## 
## $district
##                               diff        lwr       upr     p adj
## Hopatcong-Clifton       -2.3403544 -3.2227912 -1.457918 0.0000000
## Pleasantville-Clifton   -2.2050673 -2.7263444 -1.683790 0.0000000
## Pleasantville-Hopatcong  0.1352871 -0.8339634  1.104538 0.9426145

# Optional: Convert to data frame for easy inspection
tukey_df <- as.data.frame(tukey_district$district)
tukey_df$comparison <- rownames(tukey_df)

# Load ggplot2 if not already
library(ggplot2)

# Boxplot to visualize TWB differences across districts
ggplot(df_combined, aes(x = district, y = twb_composite_score, fill = district)) +
  geom_boxplot(alpha = 0.7, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.2) +
  theme_minimal() +
  labs(
    title = "Teacher Well-Being by District",
    x = "District",
    y = "TWB Composite Score"
  ) +
  theme(legend.position = "none")

clifton seems to have significantly greater wellbeing measured by the composite

# Step 1: Inspect unique grade levels
table(df_combined$district)

## 
##       Clifton     Hopatcong Pleasantville    White Bear 
##          1199            96           309           702

# Run the ANOVA
lmc_district_aov <- aov(lmc_composite_score ~ district, data = df_combined)

# Summary of ANOVA
summary(lmc_district_aov)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## district       3    388  129.25   17.32 4.34e-11 ***
## Residuals   1785  13317    7.46                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 517 observations deleted due to missingness

# Post-hoc test: Tukey HSD to find which districts differ
tukey_lmc_district <- TukeyHSD(lmc_district_aov)
print(tukey_lmc_district)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = lmc_composite_score ~ district, data = df_combined)
## 
## $district
##                                diff         lwr         upr     p adj
## Hopatcong-Clifton        -0.8676040 -1.64044380 -0.09476415 0.0205423
## Pleasantville-Clifton    -1.2330901 -1.69727519 -0.76890497 0.0000000
## White Bear-Clifton       -0.1034675 -0.58141869  0.37448367 0.9447454
## Pleasantville-Hopatcong  -0.3654861 -1.21801323  0.48704102 0.6880983
## White Bear-Hopatcong      0.7641365 -0.09596353  1.62423646 0.1019883
## White Bear-Pleasantville  1.1296226  0.53132630  1.72791884 0.0000078

# Optional: Convert to data frame for easy inspection
tukey_lmc_district <- as.data.frame(tukey_lmc_district$district)
tukey_lmc_district$comparison <- rownames(tukey_lmc_district)

# Load ggplot2 if not already
library(ggplot2)

# Boxplot to visualize TWB differences across districts
ggplot(df_combined, aes(x = district, y = lmc_composite_score, fill = district)) +
  geom_boxplot(alpha = 0.7, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.2) +
  theme_minimal() +
  labs(
    title = "LMC by District",
    x = "District",
    y = "LMC Composite Score"
  ) +
  theme(legend.position = "none")

Statistically Significant Differences Pleasantville vs. Clifton: Pleasantville scores are 1.23 points lower, highly significant (p < .001)

Hopatcong vs. Clifton: Hopatcong scores are 0.87 points lower, significant (p = .021)

White Bear vs. Pleasantville: White Bear scores are 1.13 points higher, significant (p < .001) Clifton generally reports higher well-being than Pleasantville and Hopatcong

White Bear is significantly higher than Pleasantville, but not different from Clifton or Hopatcong

Pleasantville and Hopatcong report similarly lower scores

#742 missing rowws removed when looking at the model so investigate further, think it is due to teh composite score not the district variable
# Check how many rows are missing either variable
df_combined %>%
  filter(is.na(twb_composite_score) | is.na(district)) %>%
  nrow()

## [1] 742

df_combined %>%
  filter(district == "White Bear") %>%
  summarise(
    n_total = n(),
    n_twb_complete = sum(!is.na(twb_composite_score))
  )

## # A tibble: 1 × 2
##   n_total n_twb_complete
##     <int>          <int>
## 1     702              0

#ya no one from the white bear district have complete measure so must have had short responses or somethign liek that

library(naniar)

# Visual summary of missingness
miss_var_summary(df_combined)

## # A tibble: 83 × 3
##    variable                   n_miss pct_miss
##    <chr>                       <int>    <num>
##  1 twb_composite_score           742     32.2
##  2 foundational_support_score    710     30.8
##  3 twb29                         707     30.7
##  4 twb30                         703     30.5
##  5 twb31                         540     23.4
##  6 lmc_composite_score           517     22.4
##  7 district_lmc_score            493     21.4
##  8 twb50                         483     20.9
##  9 twb49                         479     20.8
## 10 twb51                         476     20.6
## # ℹ 73 more rows

# Where missing data occur
gg_miss_upset(df_combined, nsets = 10)

# Filter complete cases and drop unused factor levels
df_district_subset <- df_combined %>%
  filter(!is.na(twb_composite_score) & !is.na(district)) %>%
  droplevels()

# Run ANOVA and Tukey HSD
twb_district_aov <- aov(twb_composite_score ~ district, data = df_district_subset)
summary(twb_district_aov)

##               Df Sum Sq Mean Sq F value Pr(>F)    
## district       2   1468   733.9   62.05 <2e-16 ***
## Residuals   1561  18462    11.8                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

tukey_district <- TukeyHSD(twb_district_aov)
tukey_df <- as.data.frame(tukey_district$district)
tukey_df$comparison <- rownames(tukey_df)

# Calculate means and 95% CIs by district
district_summary <- df_district_subset %>%
  group_by(district) %>%
  summarise(
    mean_twb = mean(twb_composite_score, na.rm = TRUE),
    se = sd(twb_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_twb - 1.96 * se,
    ci_upper = mean_twb + 1.96 * se
  )

# Plot with error bars
ggplot(district_summary, aes(x = reorder(district, -mean_twb), y = mean_twb)) +
  geom_col(fill = "steelblue", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "Teacher Well-Being by District",
    x = "District",
    y = "Mean TWB Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

dropped whitebear from visualization due to missing data. Clifton still seems to show greater TWB measured by the composite

potentially look at the leadership factor and the outcomes

# Subset the data to include only relevant gender categories
df_gender_subset <- df_combined %>%
  filter(gender %in% c("Male", "Female", "Prefer not to say"))

# Run ANOVA
twb_gender_aov <- aov(twb_composite_score ~ gender, data = df_gender_subset)
summary(twb_gender_aov)

##               Df Sum Sq Mean Sq F value  Pr(>F)   
## gender         2    140   69.85   5.499 0.00417 **
## Residuals   1522  19331   12.70                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 724 observations deleted due to missingness

# Tukey HSD for gender
tukey_gender <- TukeyHSD(twb_gender_aov)

# Convert to data frame and extract specific comparisons
tukey_gender_df <- as.data.frame(tukey_gender$gender)
tukey_gender_df$comparison <- rownames(tukey_gender_df)

# Filter comparisons of interest
tukey_gender_df_filtered <- tukey_gender_df %>%
  filter(
    grepl("Male-Female|Female-Male|Male-Prefer not to say|Prefer not to say-Male|Female-Prefer not to say|Prefer not to say-Female", comparison)
  )

print(tukey_gender_df_filtered)

##                                diff        lwr         upr       p adj
## Male-Female              -0.1454754 -0.7320466  0.44109591 0.829872021
## Prefer not to say-Female -1.1675669 -1.9946393 -0.34049453 0.002727362
## Prefer not to say-Male   -1.0220916 -1.9751458 -0.06903727 0.032085760
##                                        comparison
## Male-Female                           Male-Female
## Prefer not to say-Female Prefer not to say-Female
## Prefer not to say-Male     Prefer not to say-Male

# Visualization
library(ggplot2)

ggplot(df_gender_subset, aes(x = gender, y = twb_composite_score, fill = gender)) +
  geom_boxplot(alpha = 0.7, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.2) +
  theme_minimal() +
  labs(
    title = "Teacher Well-Being by Gender (Selected Groups)",
    x = "Gender",
    y = "TWB Composite Score"
  ) +
  theme(legend.position = "none")

#look at the count for responses, way more women to be expected 
df_gender_subset %>%
  count(gender) %>%
  mutate(percent = round(100 * n / sum(n), 1))

## # A tibble: 3 × 3
##   gender                n percent
##   <fct>             <int>   <dbl>
## 1 Female             1706    75.9
## 2 Male                365    16.2
## 3 Prefer not to say   178     7.9

peopel who select prefer not to say report lower wellbeing than female and male, but not differneces emerge between male and females

# Subset only the key gender groups and drop missing TWB values
df_gender_subset <- df_combined %>%
  filter(gender %in% c("Male", "Female", "Prefer not to say") & !is.na(twb_composite_score)) %>%
  droplevels()

# Run ANOVA
twb_gender_aov <- aov(twb_composite_score ~ gender, data = df_gender_subset)
summary(twb_gender_aov)

##               Df Sum Sq Mean Sq F value  Pr(>F)   
## gender         2    140   69.85   5.499 0.00417 **
## Residuals   1522  19331   12.70                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Tukey post-hoc test
tukey_gender <- TukeyHSD(twb_gender_aov)
tukey_gender_df <- as.data.frame(tukey_gender$gender)
tukey_gender_df$comparison <- rownames(tukey_gender_df)

# Optional: Filter comparisons of interest
tukey_gender_df_filtered <- tukey_gender_df %>%
  filter(
    grepl("Male-Female|Female-Male|Male-Prefer not to say|Prefer not to say-Male|Female-Prefer not to say|Prefer not to say-Female", comparison)
  )
print(tukey_gender_df_filtered)

##                                diff        lwr         upr       p adj
## Male-Female              -0.1454754 -0.7320466  0.44109591 0.829872021
## Prefer not to say-Female -1.1675669 -1.9946393 -0.34049453 0.002727362
## Prefer not to say-Male   -1.0220916 -1.9751458 -0.06903727 0.032085760
##                                        comparison
## Male-Female                           Male-Female
## Prefer not to say-Female Prefer not to say-Female
## Prefer not to say-Male     Prefer not to say-Male

# Summary statistics: Mean + 95% CI for each gender
gender_summary <- df_gender_subset %>%
  group_by(gender) %>%
  summarise(
    mean_twb = mean(twb_composite_score, na.rm = TRUE),
    se = sd(twb_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_twb - 1.96 * se,
    ci_upper = mean_twb + 1.96 * se
  )

# Visualization
ggplot(gender_summary, aes(x = reorder(gender, -mean_twb), y = mean_twb)) +
  geom_col(fill = "steelblue", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "Teacher Well-Being by Gender",
    x = "Gender",
    y = "Mean TWB Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

df_gender_subset %>%
  count(gender) %>%
  mutate(percent = round(100 * n / sum(n), 1))

## # A tibble: 3 × 3
##   gender                n percent
##   <fct>             <int>   <dbl>
## 1 Female             1167    76.5
## 2 Male                246    16.1
## 3 Prefer not to say   112     7.3

# Subset only the key gender groups and drop missing LMC values
df_gender_lmc <- df_combined %>%
  filter(gender %in% c("Male", "Female", "Prefer not to say") & !is.na(lmc_composite_score)) %>%
  droplevels()

# Run ANOVA
lmc_gender_aov <- aov(lmc_composite_score ~ gender, data = df_gender_lmc)
summary(lmc_gender_aov)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## gender         2    189   94.33   12.51 4.03e-06 ***
## Residuals   1763  13292    7.54                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Tukey post-hoc test
tukey_lmc_gender <- TukeyHSD(lmc_gender_aov)
tukey_lmc_gender_df <- as.data.frame(tukey_lmc_gender$gender)
tukey_lmc_gender_df$comparison <- rownames(tukey_lmc_gender_df)

# Optional: Filter comparisons of interest
tukey_lmc_gender_df_filtered <- tukey_lmc_gender_df %>%
  filter(
    grepl("Male-Female|Female-Male|Male-Prefer not to say|Prefer not to say-Male|Female-Prefer not to say|Prefer not to say-Female", comparison)
  )
print(tukey_lmc_gender_df_filtered)

##                                diff        lwr        upr        p adj
## Male-Female              -0.2863625 -0.7026435  0.1299185 2.400928e-01
## Prefer not to say-Female -1.2570864 -1.8592574 -0.6549155 3.176979e-06
## Prefer not to say-Male   -0.9707239 -1.6595203 -0.2819276 2.776983e-03
##                                        comparison
## Male-Female                           Male-Female
## Prefer not to say-Female Prefer not to say-Female
## Prefer not to say-Male     Prefer not to say-Male

# Summary statistics: Mean + 95% CI for each gender
lmc_gender_summary <- df_gender_lmc %>%
  group_by(gender) %>%
  summarise(
    mean_lmc = mean(lmc_composite_score, na.rm = TRUE),
    se = sd(lmc_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_lmc - 1.96 * se,
    ci_upper = mean_lmc + 1.96 * se
  )

# Visualization
ggplot(lmc_gender_summary, aes(x = reorder(gender, -mean_lmc), y = mean_lmc)) +
  geom_col(fill = "darkgreen", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "LMC Composite Score by Gender",
    x = "Gender",
    y = "Mean LMC Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Optional: View group sizes
df_gender_lmc %>%
  count(gender) %>%
  mutate(percent = round(100 * n / sum(n), 1))

## # A tibble: 3 × 3
##   gender                n percent
##   <fct>             <int>   <dbl>
## 1 Female             1350    76.4
## 2 Male                291    16.5
## 3 Prefer not to say   125     7.1

People who chose “Prefer not to say” for gender report substantially lower LMC composite scores, and this difference is both statistically and practically meaningful.

The lack of difference between men and women suggests that the observed variation is mostly driven by the third group.

This might warrant further investigation—e.g., is “Prefer not to say” associated with other forms of marginalization or lower perceived collaboration/support?

library(dplyr)

# Step 1: Inspect unique grade levels
table(df_combined$race_ethnicity)

## 
##                                                               American Indian/Native American or Alaska Native 
##                                                                                                              2 
##                           American Indian/Native American or Alaska Native,Spanish, Hispanic, or LatinX origin 
##                                                                                                              1 
##                                                                                                          Asian 
##                                                                                                             41 
##                                                                Asian,Native Hawaiian or Other Pacific Islander 
##                                                                                                              1 
##                                                                                                    Asian,Other 
##                                                                                                              1 
##                                                                                        Asian,Prefer not to say 
##                                                                                                              1 
##                                                                      Asian,Spanish, Hispanic, or LatinX origin 
##                                                                                                              3 
##                                                                                      Black or African American 
##                                                                                                             77 
##                                     Black or African American,American Indian/Native American or Alaska Native 
##                                                                                                              2 
## Black or African American,American Indian/Native American or Alaska Native,Spanish, Hispanic, or LatinX origin 
##                                                                                                              1 
##                                                                    Black or African American,Prefer not to say 
##                                                                                                              2 
##                                                  Black or African American,Spanish, Hispanic, or LatinX origin 
##                                                                                                              3 
##                                                                                                          Other 
##                                                                                                             55 
##                                                                                              Prefer not to say 
##                                                                                                            238 
##                                                                            Spanish, Hispanic, or LatinX origin 
##                                                                                                            206 
##                                                                      Spanish, Hispanic, or LatinX origin,Other 
##                                                                                                              1 
##                                                                                             White or Caucasion 
##                                                                                                           1573 
##                                            White or Caucasion,American Indian/Native American or Alaska Native 
##                                                                                                              4 
##        White or Caucasion,American Indian/Native American or Alaska Native,Spanish, Hispanic, or LatinX origin 
##                                                                                                              1 
##                                                                                       White or Caucasion,Asian 
##                                                                                                              6 
##                                                                   White or Caucasion,Black or African American 
##                                                                                                              4 
##            White or Caucasion,Black or African American,American Indian/Native American or Alaska Native,Asian 
##                                                                                                              1 
##                                                   White or Caucasion,Native Hawaiian or Other Pacific Islander 
##                                                                                                              1 
##                                                                                       White or Caucasion,Other 
##                                                                                                              2 
##                                                                           White or Caucasion,Prefer not to say 
##                                                                                                              6 
##                                                         White or Caucasion,Spanish, Hispanic, or LatinX origin 
##                                                                                                             20

df_combined %>%
  count(race_ethnicity, sort = TRUE) %>%
  mutate(percent = round(100 * n / sum(n), 1)) %>%
  rename(
    Race_Ethnicity = race_ethnicity,
    Count = n,
    Percent = percent
  )

## # A tibble: 27 × 3
##    Race_Ethnicity                                         Count Percent
##    <fct>                                                  <int>   <dbl>
##  1 White or Caucasion                                      1573    68.2
##  2 Prefer not to say                                        238    10.3
##  3 Spanish, Hispanic, or LatinX origin                      206     8.9
##  4 Black or African American                                 77     3.3
##  5 Other                                                     55     2.4
##  6 <NA>                                                      53     2.3
##  7 Asian                                                     41     1.8
##  8 White or Caucasion,Spanish, Hispanic, or LatinX origin    20     0.9
##  9 White or Caucasion,Asian                                   6     0.3
## 10 White or Caucasion,Prefer not to say                       6     0.3
## # ℹ 17 more rows

# Step 1: Filter out NA race_ethnicity and run ANOVA
df_race_subset <- df_combined %>%
  filter(!is.na(race_ethnicity))

twb_race_aov <- aov(twb_composite_score ~ race_ethnicity, data = df_race_subset)
summary(twb_race_aov)

##                  Df Sum Sq Mean Sq F value  Pr(>F)   
## race_ethnicity   22    605   27.51   2.188 0.00119 **
## Residuals      1500  18863   12.57                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 730 observations deleted due to missingness

# Step 2: Tukey post-hoc
tukey_race <- TukeyHSD(twb_race_aov)
tukey_race_df <- as.data.frame(tukey_race$race_ethnicity)
tukey_race_df$comparison <- rownames(tukey_race_df)

# Step 3: View top comparisons (sorted by p-value)
tukey_race_df %>%
  arrange(`p adj`) %>%
  head(10)  # view the 10 most significant differences

##                                                                                                                                               diff
## Spanish, Hispanic, or LatinX origin-Prefer not to say                                                                                    2.1115578
## White or Caucasion-Spanish, Hispanic, or LatinX origin                                                                                  -1.1421779
## White or Caucasion-Prefer not to say                                                                                                     0.9693799
## Spanish, Hispanic, or LatinX origin-Black or African American                                                                            1.5414588
## Prefer not to say-Asian                                                                                                                 -1.8530321
## Prefer not to say-Asian,Prefer not to say                                                                                               -9.4622758
## White or Caucasion,Prefer not to say-Asian,Prefer not to say                                                                           -10.0000000
## Black or African American,American Indian/Native American or Alaska Native,Spanish, Hispanic, or LatinX origin-Asian,Prefer not to say -12.5238095
## Black or African American-Asian,Prefer not to say                                                                                       -8.8921769
## White or Caucasion-Asian,Prefer not to say                                                                                              -8.4928959
##                                                                                                                                                lwr
## Spanish, Hispanic, or LatinX origin-Prefer not to say                                                                                    0.7170619
## White or Caucasion-Spanish, Hispanic, or LatinX origin                                                                                  -2.1625374
## White or Caucasion-Prefer not to say                                                                                                    -0.1439773
## Spanish, Hispanic, or LatinX origin-Black or African American                                                                           -0.2558860
## Prefer not to say-Asian                                                                                                                 -4.2871441
## Prefer not to say-Asian,Prefer not to say                                                                                              -22.3497239
## White or Caucasion,Prefer not to say-Asian,Prefer not to say                                                                           -24.0718780
## Black or African American,American Indian/Native American or Alaska Native,Spanish, Hispanic, or LatinX origin-Asian,Prefer not to say -30.6905259
## Black or African American-Asian,Prefer not to say                                                                                      -21.8294156
## White or Caucasion-Asian,Prefer not to say                                                                                             -21.3452433
##                                                                                                                                               upr
## Spanish, Hispanic, or LatinX origin-Prefer not to say                                                                                   3.5060536
## White or Caucasion-Spanish, Hispanic, or LatinX origin                                                                                 -0.1218183
## White or Caucasion-Prefer not to say                                                                                                    2.0827370
## Spanish, Hispanic, or LatinX origin-Black or African American                                                                           3.3388036
## Prefer not to say-Asian                                                                                                                 0.5810798
## Prefer not to say-Asian,Prefer not to say                                                                                               3.4251722
## White or Caucasion,Prefer not to say-Asian,Prefer not to say                                                                            4.0718780
## Black or African American,American Indian/Native American or Alaska Native,Spanish, Hispanic, or LatinX origin-Asian,Prefer not to say  5.6429069
## Black or African American-Asian,Prefer not to say                                                                                       4.0450619
## White or Caucasion-Asian,Prefer not to say                                                                                              4.3594514
##                                                                                                                                               p adj
## Spanish, Hispanic, or LatinX origin-Prefer not to say                                                                                  1.195613e-05
## White or Caucasion-Spanish, Hispanic, or LatinX origin                                                                                 1.052915e-02
## White or Caucasion-Prefer not to say                                                                                                   1.948681e-01
## Spanish, Hispanic, or LatinX origin-Black or African American                                                                          2.188284e-01
## Prefer not to say-Asian                                                                                                                4.505172e-01
## Prefer not to say-Asian,Prefer not to say                                                                                              5.278436e-01
## White or Caucasion,Prefer not to say-Asian,Prefer not to say                                                                           5.962382e-01
## Black or African American,American Indian/Native American or Alaska Native,Spanish, Hispanic, or LatinX origin-Asian,Prefer not to say 6.568727e-01
## Black or African American-Asian,Prefer not to say                                                                                      6.626113e-01
## White or Caucasion-Asian,Prefer not to say                                                                                             7.341314e-01
##                                                                                                                                                                                                                                                                    comparison
## Spanish, Hispanic, or LatinX origin-Prefer not to say                                                                                                                                                                   Spanish, Hispanic, or LatinX origin-Prefer not to say
## White or Caucasion-Spanish, Hispanic, or LatinX origin                                                                                                                                                                 White or Caucasion-Spanish, Hispanic, or LatinX origin
## White or Caucasion-Prefer not to say                                                                                                                                                                                                     White or Caucasion-Prefer not to say
## Spanish, Hispanic, or LatinX origin-Black or African American                                                                                                                                                   Spanish, Hispanic, or LatinX origin-Black or African American
## Prefer not to say-Asian                                                                                                                                                                                                                               Prefer not to say-Asian
## Prefer not to say-Asian,Prefer not to say                                                                                                                                                                                           Prefer not to say-Asian,Prefer not to say
## White or Caucasion,Prefer not to say-Asian,Prefer not to say                                                                                                                                                     White or Caucasion,Prefer not to say-Asian,Prefer not to say
## Black or African American,American Indian/Native American or Alaska Native,Spanish, Hispanic, or LatinX origin-Asian,Prefer not to say Black or African American,American Indian/Native American or Alaska Native,Spanish, Hispanic, or LatinX origin-Asian,Prefer not to say
## Black or African American-Asian,Prefer not to say                                                                                                                                                                           Black or African American-Asian,Prefer not to say
## White or Caucasion-Asian,Prefer not to say                                                                                                                                                                                         White or Caucasion-Asian,Prefer not to say

# Step 4: Plot means with 95% CIs
library(ggplot2)
library(dplyr)

df_race_subset %>%
  group_by(race_ethnicity) %>%
  summarise(
    mean_twb = mean(twb_composite_score, na.rm = TRUE),
    se = sd(twb_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_twb - 1.96 * se,
    ci_upper = mean_twb + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(race_ethnicity, -mean_twb), y = mean_twb)) +
  geom_col(fill = "steelblue", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "Teacher Well-Being by Race/Ethnicity",
    x = "Race/Ethnicity",
    y = "Mean TWB Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

harder to look at ethnicity since people identify many different ways, need to adjust the groups if so likely White or Caucasion , Prefer not to say and Spanish, Hispanic, or LatinX origin
Black or African American

library(dplyr)
library(ggplot2)

# Recode race_ethnicity into broader race_group
df_combined <- df_combined %>%
  mutate(race_group = case_when(
    grepl("^White or Caucasion$", race_ethnicity) ~ "White",
    grepl("^White or Caucasion,", race_ethnicity) ~ "White",
    grepl("Black or African American", race_ethnicity) ~ "Black",
    grepl("Spanish|Hispanic|Latinx", race_ethnicity, ignore.case = TRUE) ~ "Latinx",
    grepl("Asian", race_ethnicity) ~ "Asian",
    grepl("American Indian|Alaska Native", race_ethnicity) ~ "Indigenous",
    race_ethnicity == "Prefer not to say" ~ "Prefer not to say",
    TRUE ~ "Multiracial / Other"
  ))

# Check the distribution after recoding
df_combined %>%
  count(race_group, sort = TRUE)

## # A tibble: 7 × 2
##   race_group              n
##   <chr>               <int>
## 1 White                1618
## 2 Prefer not to say     238
## 3 Latinx                211
## 4 Multiracial / Other   108
## 5 Black                  85
## 6 Asian                  44
## 7 Indigenous              2

# Filter and drop NAs for analysis
df_race_subset <- df_combined %>%
  filter(!is.na(race_group), !is.na(twb_composite_score)) %>%
  droplevels()

# ANOVA
twb_racegroup_aov <- aov(twb_composite_score ~ race_group, data = df_race_subset)
summary(twb_racegroup_aov)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## race_group     6    462    77.0   6.159 2.14e-06 ***
## Residuals   1557  19467    12.5                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Tukey HSD
tukey_racegroup <- TukeyHSD(twb_racegroup_aov)
tukey_racegroup_df <- as.data.frame(tukey_racegroup$race_group)
tukey_racegroup_df$comparison <- rownames(tukey_racegroup_df)

# View significant comparisons
tukey_racegroup_df %>%
  arrange(`p adj`) %>%
  head(10)

##                                             diff         lwr         upr
## Prefer not to say-Latinx              -2.0699096 -3.19654273 -0.94327646
## White-Latinx                          -1.1027214 -1.92081349 -0.28462940
## Prefer not to say-Asian               -2.2138846 -4.12509974 -0.30266941
## Latinx-Black                           1.5485738  0.14255042  2.95459716
## White-Prefer not to say                0.9671881  0.06431972  1.87005658
## Multiracial / Other-Latinx            -1.4022766 -2.74391811 -0.06063516
## Black-Asian                           -1.6925488 -3.78069218  0.39559465
## Multiracial / Other-Asian             -1.5462516 -3.59159829  0.49909507
## White-Asian                           -1.2466964 -2.99389870  0.50050585
## Prefer not to say-Multiracial / Other -0.6676330 -2.06258767  0.72732175
##                                              p adj
## Prefer not to say-Latinx              1.407607e-06
## White-Latinx                          1.406277e-03
## Prefer not to say-Asian               1.144698e-02
## Latinx-Black                          2.005614e-02
## White-Prefer not to say               2.661902e-02
## Multiracial / Other-Latinx            3.373211e-02
## Black-Asian                           2.021238e-01
## Multiracial / Other-Asian             2.789556e-01
## White-Asian                           3.491881e-01
## Prefer not to say-Multiracial / Other 7.950011e-01
##                                                                  comparison
## Prefer not to say-Latinx                           Prefer not to say-Latinx
## White-Latinx                                                   White-Latinx
## Prefer not to say-Asian                             Prefer not to say-Asian
## Latinx-Black                                                   Latinx-Black
## White-Prefer not to say                             White-Prefer not to say
## Multiracial / Other-Latinx                       Multiracial / Other-Latinx
## Black-Asian                                                     Black-Asian
## Multiracial / Other-Asian                         Multiracial / Other-Asian
## White-Asian                                                     White-Asian
## Prefer not to say-Multiracial / Other Prefer not to say-Multiracial / Other

# Plot TWB by race_group with 95% CIs
df_race_subset %>%
  group_by(race_group) %>%
  summarise(
    mean_twb = mean(twb_composite_score),
    se = sd(twb_composite_score) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_twb - 1.96 * se,
    ci_upper = mean_twb + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(race_group, -mean_twb), y = mean_twb)) +
  geom_col(fill = "steelblue", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "Teacher Well-Being by Race/Ethnicity Group",
    x = "Race/Ethnicity Group",
    y = "Mean TWB Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

| Comparison | Difference | 95% CI | p adj | Interpretation | | —————————————– | ———- | ————— | ——- | ———————————————————————- | | Prefer not to say - Latinx | -2.07 | [-3.20, -0.94] | < 0.001 | Significantly lower TWB among those who prefer not to say vs Latinx | | White - Latinx | -1.10 | [-1.92, -0.28] | 0.001 | White participants had lower TWB than Latinx participants | | Prefer not to say - Asian | -2.21 | [-4.13, -0.30] | 0.011 | “Prefer not to say” group had lower TWB than Asian participants | | Latinx - Black | 1.55 | [0.14, 2.95] | 0.020 | Latinx participants had higher TWB than Black participants | | White - Prefer not to say | 0.97 | [0.06, 1.87] | 0.027 | White participants scored higher on TWB than “Prefer not to say” group | | Multiracial / Other - Latinx | -1.40 | [-2.74, -0.06] | 0.034 | Multiracial/Other had lower TWB than Latinx | | Black - Asian | -1.69 | [-3.78, 0.40] | 0.202 | Not statistically significant | | White - Asian | -1.25 | [-2.99, 0.50] | 0.349 | Not statistically significant | | Prefer not to say - Multiracial/Other | -0.67 | [-2.06, 0.73] | 0.795 | Not statistically significant |

# Check distribution
df_combined %>%
  count(race_group, sort = TRUE)

## # A tibble: 7 × 2
##   race_group              n
##   <chr>               <int>
## 1 White                1618
## 2 Prefer not to say     238
## 3 Latinx                211
## 4 Multiracial / Other   108
## 5 Black                  85
## 6 Asian                  44
## 7 Indigenous              2

# Subset data and drop NAs
df_race_subset_lmc <- df_combined %>%
  filter(!is.na(race_group), !is.na(lmc_composite_score)) %>%
  droplevels()

# Run ANOVA
lmc_racegroup_aov <- aov(lmc_composite_score ~ race_group, data = df_race_subset_lmc)
summary(lmc_racegroup_aov)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## race_group     6    358   59.74   7.976 1.65e-08 ***
## Residuals   1782  13347    7.49                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Tukey HSD post-hoc test
tukey_racegroup_lmc <- TukeyHSD(lmc_racegroup_aov)
tukey_racegroup_lmc_df <- as.data.frame(tukey_racegroup_lmc$race_group)
tukey_racegroup_lmc_df$comparison <- rownames(tukey_racegroup_lmc_df)

# View most significant pairwise comparisons
tukey_racegroup_lmc_df %>%
  arrange(`p adj`) %>%
  head(10)

##                                             diff         lwr         upr
## White-Prefer not to say                1.3524747  0.69477189  2.01017748
## Prefer not to say-Latinx              -1.4665453 -2.30854391 -0.62454668
## Prefer not to say-Asian               -1.3513897 -2.75530523  0.05252587
## White-Black                            0.8553052 -0.09957851  1.81018883
## Latinx-Black                           0.9693758 -0.12066621  2.05941776
## Multiracial / Other-Latinx            -0.7916553 -1.89229032  0.30897974
## White-Multiracial / Other              0.6775847 -0.28937384  1.64454319
## Prefer not to say-Multiracial / Other -0.6748900 -1.79792892  0.44814891
## Black-Asian                           -0.8542202 -2.41953441  0.71109410
## Prefer not to say-Black               -0.4971695 -1.60982870  0.61548966
##                                              p adj
## White-Prefer not to say               3.273428e-08
## Prefer not to say-Latinx              6.292493e-06
## Prefer not to say-Asian               6.809517e-02
## White-Black                           1.136665e-01
## Latinx-Black                          1.190670e-01
## Multiracial / Other-Latinx            3.393481e-01
## White-Multiracial / Other             3.721064e-01
## Prefer not to say-Multiracial / Other 5.660330e-01
## Black-Asian                           6.753937e-01
## Prefer not to say-Black               8.432489e-01
##                                                                  comparison
## White-Prefer not to say                             White-Prefer not to say
## Prefer not to say-Latinx                           Prefer not to say-Latinx
## Prefer not to say-Asian                             Prefer not to say-Asian
## White-Black                                                     White-Black
## Latinx-Black                                                   Latinx-Black
## Multiracial / Other-Latinx                       Multiracial / Other-Latinx
## White-Multiracial / Other                         White-Multiracial / Other
## Prefer not to say-Multiracial / Other Prefer not to say-Multiracial / Other
## Black-Asian                                                     Black-Asian
## Prefer not to say-Black                             Prefer not to say-Black

# Plot LMC by race_group with 95% CIs
df_race_subset_lmc %>%
  group_by(race_group) %>%
  summarise(
    mean_lmc = mean(lmc_composite_score),
    se = sd(lmc_composite_score) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_lmc - 1.96 * se,
    ci_upper = mean_lmc + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(race_group, -mean_lmc), y = mean_lmc)) +
  geom_col(fill = "darkgreen", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "LMC Composite Score by Race/Ethnicity Group",
    x = "Race/Ethnicity Group",
    y = "Mean LMC Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Respondents who chose not to report their race or reported indigenous consistently reported the lowest LMC scores, with large and statistically significant differences compared to White and Latinx respondents.

library(dplyr)

df_combined %>%
  count(role, sort = TRUE) %>%
  mutate(percent = round(100 * n / sum(n), 1)) %>%
  rename(
    Role = role,
    Count = n,
    Percent = percent
  )

## # A tibble: 12 × 3
##    Role                                                            Count Percent
##    <fct>                                                           <int>   <dbl>
##  1 Certificated Staff (e.g., classroom teacher, counselor, servic…   983    42.6
##  2 Certified Educator (e.g., classroom teacher, counselor, servic…   526    22.8
##  3 Professional Certified Staff                                      234    10.1
##  4 Classified Staff (e.g., paraprofessional, clerical, etc.)         172     7.5
##  5 Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)   170     7.4
##  6 Certified Educator                                                 64     2.8
##  7 Educational Support Professionals                                  49     2.1
##  8 <NA>                                                               29     1.3
##  9 Classified Staff                                                   27     1.2
## 10 School Site Administrator                                          22     1  
## 11 Support Personnel                                                  17     0.7
## 12 District Administrator                                             13     0.6

library(dplyr)
library(ggplot2)

# Step 1: Define the roles of interest
selected_roles <- c(
  "Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)",
  "Certified Educator (e.g., classroom teacher, counselor, service provider etc.)",
  "Professional Certified Staff",
  "Classified Staff (e.g., paraprofessional, clerical, etc.)",
  "Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)"
)

# Filter the dataset to only those roles
df_role_focus <- df_combined %>%
  filter(role %in% selected_roles)

# Step 2: Run ANOVA
twb_role_focus_aov <- aov(twb_composite_score ~ role, data = df_role_focus)
summary(twb_role_focus_aov)

##               Df Sum Sq Mean Sq F value Pr(>F)    
## role           2   1110   555.1   47.87 <2e-16 ***
## Residuals   1352  15679    11.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 730 observations deleted due to missingness

# Step 3: Tukey HSD post-hoc test
tukey_role_focus <- TukeyHSD(twb_role_focus_aov)
tukey_role_focus_df <- as.data.frame(tukey_role_focus$role)
tukey_role_focus_df$comparison <- rownames(tukey_role_focus_df)

# View top comparisons
tukey_role_focus_df %>%
  arrange(`p adj`) %>%
  head(10)

##                                                                                                                                                      diff
## Professional Certified Staff-Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)                                    -2.4122502
## Professional Certified Staff-Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)                                                   -2.4898535
## Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)-Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)  0.0776033
##                                                                                                                                                      lwr
## Professional Certified Staff-Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)                                    -3.001256
## Professional Certified Staff-Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)                                                   -3.320814
## Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)-Certificated Staff (e.g., classroom teacher, counselor, service provider etc.) -0.611549
##                                                                                                                                                       upr
## Professional Certified Staff-Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)                                    -1.8232443
## Professional Certified Staff-Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)                                                   -1.6588932
## Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)-Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)  0.7667556
##                                                                                                                                                       p adj
## Professional Certified Staff-Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)                                    3.741452e-14
## Professional Certified Staff-Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)                                                   9.812262e-12
## Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)-Certificated Staff (e.g., classroom teacher, counselor, service provider etc.) 9.622516e-01
##                                                                                                                                                                                                                                                                                    comparison
## Professional Certified Staff-Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)                                                                       Professional Certified Staff-Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)
## Professional Certified Staff-Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)                                                                                                     Professional Certified Staff-Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)
## Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)-Certificated Staff (e.g., classroom teacher, counselor, service provider etc.) Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)-Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)

# Step 4: Plot means with 95% CIs
df_role_focus %>%
  group_by(role) %>%
  summarise(
    mean_twb = mean(twb_composite_score, na.rm = TRUE),
    se = sd(twb_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_twb - 1.96 * se,
    ci_upper = mean_twb + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(role, -mean_twb), y = mean_twb)) +
  geom_col(fill = "darkorange", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "Teacher Well-Being by Role (Focused Comparison)",
    x = "Role",
    y = "Mean TWB Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

ptofessional certified staff report lower well being than certificated staff, and non certificated staff

library(dplyr)
library(ggplot2)

# Recode role categories into broader groups
df_combined <- df_combined %>%
  mutate(role_group = case_when(
    role %in% c("Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)",
                "Certified Educator (e.g., classroom teacher, counselor, service provider etc.)",
                "Professional Certified Staff") ~ "Certified Staff",
    role %in% c("Classified Staff (e.g., paraprofessional, clerical, etc.)",
                "Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)",
                "Educational Support Professionals") ~ "Classified Staff",
    grepl("School Site Admin", role, ignore.case = TRUE) ~ "School Site Admin",
    TRUE ~ "Other / NA"
  ))

# View updated role group distribution
df_combined %>%
  count(role_group, sort = TRUE) %>%
  mutate(percent = round(100 * n / sum(n), 1)) %>%
  rename(
    Role = role_group,
    Count = n,
    Percent = percent
  )

## # A tibble: 4 × 3
##   Role              Count Percent
##   <chr>             <int>   <dbl>
## 1 Certified Staff    1743    75.6
## 2 Classified Staff    391    17  
## 3 Other / NA          150     6.5
## 4 School Site Admin    22     1

# Filter out "Other / NA" for analysis
df_role_focus <- df_combined %>%
  filter(role_group %in% c("Certified Staff", "Classified Staff", "School Site Admin"))

# Run ANOVA
twb_role_focus_aov <- aov(twb_composite_score ~ role_group, data = df_role_focus)
summary(twb_role_focus_aov)

##               Df Sum Sq Mean Sq F value Pr(>F)  
## role_group     2     78   39.24   3.194 0.0413 *
## Residuals   1422  17468   12.28                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 731 observations deleted due to missingness

# Post-hoc Tukey HSD test
tukey_role_focus <- TukeyHSD(twb_role_focus_aov)
tukey_role_focus_df <- as.data.frame(tukey_role_focus$role_group)
tukey_role_focus_df$comparison <- rownames(tukey_role_focus_df)

# Plot means with 95% confidence intervals
df_role_focus %>%
  group_by(role_group) %>%
  summarise(
    mean_twb = mean(twb_composite_score, na.rm = TRUE),
    se = sd(twb_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_twb - 1.96 * se,
    ci_upper = mean_twb + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(role_group, -mean_twb), y = mean_twb)) +
  geom_col(fill = "darkorange", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "Teacher Well-Being by Role Group",
    x = "Role Group",
    y = "Mean TWB Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

# View updated role group distribution
df_combined %>%
  count(role_group, sort = TRUE) %>%
  mutate(percent = round(100 * n / sum(n), 1)) %>%
  rename(
    Role = role_group,
    Count = n,
    Percent = percent
  )

## # A tibble: 4 × 3
##   Role              Count Percent
##   <chr>             <int>   <dbl>
## 1 Certified Staff    1743    75.6
## 2 Classified Staff    391    17  
## 3 Other / NA          150     6.5
## 4 School Site Admin    22     1

# Filter out "Other / NA" for analysis
df_role_focus <- df_combined %>%
  filter(role_group %in% c("Certified Staff", "Classified Staff", "School Site Admin"))

# Run ANOVA for LMC
lmc_role_focus_aov <- aov(lmc_composite_score ~ role_group, data = df_role_focus)
summary(lmc_role_focus_aov)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## role_group     2    108   53.99   7.113 0.000839 ***
## Residuals   1665  12639    7.59                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 488 observations deleted due to missingness

# Post-hoc Tukey HSD test
tukey_lmc_role_focus <- TukeyHSD(lmc_role_focus_aov)
tukey_lmc_role_focus_df <- as.data.frame(tukey_lmc_role_focus$role_group)
tukey_lmc_role_focus_df$comparison <- rownames(tukey_lmc_role_focus_df)

# Display only significant mean comparisons (p < 0.05)
significant_diffs <- tukey_lmc_role_focus_df %>%
  filter(`p adj` < 0.05) %>%
  arrange(`p adj`)
print(significant_diffs)

##                                        diff       lwr      upr       p adj
## School Site Admin-Classified Staff 2.143588 0.7031420 3.584034 0.001433278
## School Site Admin-Certified Staff  1.695453 0.3068267 3.084080 0.011784720
##                                                            comparison
## School Site Admin-Classified Staff School Site Admin-Classified Staff
## School Site Admin-Certified Staff   School Site Admin-Certified Staff

# Plot means with 95% confidence intervals
df_role_focus %>%
  group_by(role_group) %>%
  summarise(
    mean_lmc = mean(lmc_composite_score, na.rm = TRUE),
    se = sd(lmc_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_lmc - 1.96 * se,
    ci_upper = mean_lmc + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(role_group, -mean_lmc), y = mean_lmc)) +
  geom_col(fill = "darkgreen", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "LMC Composite Score by Role Group",
    x = "Role Group",
    y = "Mean LMC Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

admin report higher LMC than staff

library(dplyr)
library(ggplot2)

# Step 1: Inspect unique grade levels
table(df_combined$grade)

## 
##   Early Childhood Elementary School       High School     Middle School 
##                36              1191               516               444

# Step 2: Filter out NAs
df_grade_subset <- df_combined %>%
  filter(!is.na(grade))

# Step 3: Run ANOVA
twb_grade_aov <- aov(twb_composite_score ~ grade, data = df_grade_subset)
summary(twb_grade_aov)

##               Df Sum Sq Mean Sq F value  Pr(>F)    
## grade          2    776   388.1   31.69 3.3e-14 ***
## Residuals   1514  18542    12.2                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 670 observations deleted due to missingness

# Step 4: Tukey HSD post-hoc
tukey_grade <- TukeyHSD(twb_grade_aov)
tukey_grade_df <- as.data.frame(tukey_grade$grade)
tukey_grade_df$comparison <- rownames(tukey_grade_df)

# Step 5: View most significant comparisons
tukey_grade_df %>%
  arrange(`p adj`) %>%
  head(10)

##                                       diff        lwr        upr        p adj
## High School-Elementary School   -1.5755421 -2.1107573 -1.0403270 1.967815e-11
## Middle School-Elementary School -1.2952183 -1.8310423 -0.7593942 5.091151e-08
## Middle School-High School        0.2803238 -0.3672235  0.9278712 5.671012e-01
##                                                      comparison
## High School-Elementary School     High School-Elementary School
## Middle School-Elementary School Middle School-Elementary School
## Middle School-High School             Middle School-High School

# Step 6: Plot means with 95% CIs
df_grade_subset %>%
  group_by(grade) %>%
  summarise(
    mean_twb = mean(twb_composite_score, na.rm = TRUE),
    se = sd(twb_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_twb - 1.96 * se,
    ci_upper = mean_twb + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(grade, -mean_twb), y = mean_twb)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "Teacher Well-Being by Grade Level",
    x = "Grade Level",
    y = "Mean TWB Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

elementary school staff report significantly higher wellbeing than middle and high school staff. no difference between middle and high school.

library(dplyr)
library(ggplot2)

# Step 1: Inspect unique grade levels
table(df_combined$grade)

## 
##   Early Childhood Elementary School       High School     Middle School 
##                36              1191               516               444

# Step 2: Filter out NAs for LMC composite
df_grade_subset <- df_combined %>%
  filter(!is.na(grade), !is.na(lmc_composite_score))

# Step 3: Run ANOVA
lmc_grade_aov <- aov(lmc_composite_score ~ grade, data = df_grade_subset)
summary(lmc_grade_aov)

##               Df Sum Sq Mean Sq F value  Pr(>F)   
## grade          3     94   31.19   4.148 0.00613 **
## Residuals   1751  13167    7.52                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Step 4: Tukey HSD post-hoc
tukey_grade <- TukeyHSD(lmc_grade_aov)
tukey_grade_df <- as.data.frame(tukey_grade$grade)
tukey_grade_df$comparison <- rownames(tukey_grade_df)

# Step 5: View most significant comparisons
tukey_grade_df %>%
  arrange(`p adj`) %>%
  head(10)

##                                          diff        lwr         upr
## Middle School-High School          0.66862134  0.1610090  1.17623367
## High School-Elementary School     -0.43330331 -0.8479735 -0.01863316
## Middle School-Elementary School    0.23531804 -0.1985515  0.66918754
## High School-Early Childhood       -0.70881877 -2.9656072  1.54796965
## Elementary School-Early Childhood -0.27551546 -2.5168692  1.96583825
## Middle School-Early Childhood     -0.04019743 -2.3005924  2.22019753
##                                         p adj                        comparison
## Middle School-High School         0.004020654         Middle School-High School
## High School-Elementary School     0.036553437     High School-Elementary School
## Middle School-Elementary School   0.502804664   Middle School-Elementary School
## High School-Early Childhood       0.850864167       High School-Early Childhood
## Elementary School-Early Childhood 0.989067409 Elementary School-Early Childhood
## Middle School-Early Childhood     0.999965664     Middle School-Early Childhood

# Step 6: Plot means with 95% CIs
df_grade_subset %>%
  group_by(grade) %>%
  summarise(
    mean_lmc = mean(lmc_composite_score, na.rm = TRUE),
    se = sd(lmc_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_lmc - 1.96 * se,
    ci_upper = mean_lmc + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(grade, -mean_lmc), y = mean_lmc)) +
  geom_col(fill = "darkgreen", alpha = 0.8) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "Labor-Management Collaboration (LMC) by Grade Level",
    x = "Grade Level",
    y = "Mean LMC Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

High school educators report lower perceived LMC compared to both middle and elementary levels.

Middle school stands out as the most positive group in terms of LMC perception.

Early childhood differences are too imprecise to draw conclusions

library(psych)
describe(df$twb_composite_score)

##    vars    n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 1564 32.54 3.57  32.74   32.69 3.35  13  42    29 -0.6      1.5 0.09

library(dplyr)
library(ggplot2)

# Step 1: Inspect sample sizes
df_combined %>%
  count(union_involvement) %>%
  mutate(percent = round(100 * n / sum(n), 1))

## # A tibble: 6 × 3
##   union_involvement     n percent
##   <chr>             <int>   <dbl>
## 1 1                   416    18  
## 2 2                   750    32.5
## 3 3                   575    24.9
## 4 4                   153     6.6
## 5 No                    1     0  
## 6 <NA>                411    17.8

# Step 2: Filter out NAs and run ANOVA
df_union <- df_combined %>%
  filter(!is.na(union_involvement))

twb_union_aov <- aov(twb_composite_score ~ union_involvement, data = df_union)
summary(twb_union_aov)

##                     Df Sum Sq Mean Sq F value Pr(>F)
## union_involvement    3     77   25.65   2.023  0.109
## Residuals         1557  19741   12.68               
## 334 observations deleted due to missingness

# Step 3: Tukey post-hoc test
tukey_union <- TukeyHSD(twb_union_aov)
tukey_union_df <- as.data.frame(tukey_union$union_involvement)
tukey_union_df$comparison <- rownames(tukey_union_df)

# View top comparisons
tukey_union_df %>%
  arrange(`p adj`) %>%
  head(10)

##            diff        lwr        upr      p adj comparison
## 4-3 -0.91910020 -1.8995344 0.06133404 0.07542358        4-3
## 4-1 -0.85637629 -1.8528593 0.14010675 0.12086547        4-1
## 4-2 -0.78555387 -1.7344393 0.16333158 0.14431158        4-2
## 3-2  0.13354633 -0.4347217 0.70181441 0.93069805        3-2
## 2-1 -0.07082242 -0.6663522 0.52470740 0.99007282        2-1
## 3-1  0.06272391 -0.5818872 0.70733501 0.99449898        3-1

# Step 4: Plot means with 95% CIs
df_union %>%
  group_by(union_involvement) %>%
  summarise(
    mean_twb = mean(twb_composite_score, na.rm = TRUE),
    se = sd(twb_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_twb - 1.96 * se,
    ci_upper = mean_twb + 1.96 * se
  ) %>%
  ggplot(aes(x = union_involvement, y = mean_twb)) +
  geom_col(fill = "purple", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "Teacher Well-Being by Union Involvement",
    x = "Union Involvement (1 = None, 4 = A Lot)",
    y = "Mean TWB Composite Score"
  )

library(dplyr)
library(ggplot2)

# Step 1: Inspect sample sizes
df_combined %>%
  count(union_involvement) %>%
  mutate(percent = round(100 * n / sum(n), 1))

## # A tibble: 6 × 3
##   union_involvement     n percent
##   <chr>             <int>   <dbl>
## 1 1                   416    18  
## 2 2                   750    32.5
## 3 3                   575    24.9
## 4 4                   153     6.6
## 5 No                    1     0  
## 6 <NA>                411    17.8

# Step 2: Filter out NAs and run ANOVA
df_union <- df_combined %>%
  filter(!is.na(union_involvement), !is.na(lmc_composite_score))

lmc_union_aov <- aov(lmc_composite_score ~ union_involvement, data = df_union)
summary(lmc_union_aov)

##                     Df Sum Sq Mean Sq F value Pr(>F)    
## union_involvement    3   1416   472.0   68.53 <2e-16 ***
## Residuals         1784  12288     6.9                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Step 3: Tukey post-hoc test
tukey_union <- TukeyHSD(lmc_union_aov)
tukey_union_df <- as.data.frame(tukey_union$union_involvement)
tukey_union_df$comparison <- rownames(tukey_union_df)

# View top comparisons
tukey_union_df %>%
  arrange(`p adj`) %>%
  head(10)

##          diff        lwr      upr        p adj comparison
## 3-1 2.3089872  1.8568584 2.761116 9.833578e-12        3-1
## 2-1 1.5807242  1.1504229 2.011025 9.870216e-12        2-1
## 4-1 2.7049635  2.0514042 3.358523 9.870771e-12        4-1
## 3-2 0.7282631  0.3452951 1.111231 6.545805e-06        3-2
## 4-2 1.1242393  0.5164700 1.732009 1.264190e-05        4-2
## 4-3 0.3959762 -0.2274377 1.019390 3.600668e-01        4-3

# Step 4: Plot means with 95% CIs
df_union %>%
  group_by(union_involvement) %>%
  summarise(
    mean_lmc = mean(lmc_composite_score, na.rm = TRUE),
    se = sd(lmc_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_lmc - 1.96 * se,
    ci_upper = mean_lmc + 1.96 * se
  ) %>%
  ggplot(aes(x = as.factor(union_involvement), y = mean_lmc)) +
  geom_col(fill = "purple", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "LMC Composite Score by Union Involvement",
    x = "Union Involvement (1 = None, 4 = A Lot)",
    y = "Mean LMC Composite Score"
  )

There is a strong, consistent, and statistically significant positive association between union involvement and perceived LMC.

Every increase from level 1 to 4 brings a meaningful jump in LMC score—except the final jump (level 3 to 4), which plateaus.

Those with no union involvement (level 1) report much lower labor-management collaboration than those with moderate to high involvement.

library(ggplot2)
library(dplyr)
library(dplyr)
library(ggplot2)

# Step 1: Create new data frame and ensure numeric type
df_union_cont <- df_combined %>%
  select(union_involvement, twb_composite_score) %>%
  filter(!is.na(union_involvement), !is.na(twb_composite_score)) %>%
  mutate(union_involvement = as.numeric(as.character(union_involvement)))

# Step 2: Confirm variable type
str(df_union_cont)

## tibble [1,561 × 2] (S3: tbl_df/tbl/data.frame)
##  $ union_involvement  : num [1:1561] 1 2 1 1 2 1 1 2 1 2 ...
##  $ twb_composite_score: num [1:1561] 13 14.9 27.9 31 19.7 ...

# Step 3: Run linear regression
union_lm <- lm(twb_composite_score ~ union_involvement, data = df_union_cont)
summary(union_lm)

## 
## Call:
## lm(formula = twb_composite_score ~ union_involvement, data = df_union_cont)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.6803  -2.0877   0.2429   2.3857   9.6687 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        32.7966     0.2419  135.59   <2e-16 ***
## union_involvement  -0.1163     0.1030   -1.13    0.259    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.564 on 1559 degrees of freedom
## Multiple R-squared:  0.0008177,  Adjusted R-squared:  0.0001768 
## F-statistic: 1.276 on 1 and 1559 DF,  p-value: 0.2588

# Step 4: Plot relationship with regression line
ggplot(df_union_cont, aes(x = union_involvement, y = twb_composite_score)) +
  geom_jitter(width = 0.1, alpha = 0.3) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  theme_minimal() +
  labs(
    title = "Teacher Well-Being by Union Involvement (Continuous)",
    x = "Union Involvement (1 = None, 4 = A Lot)",
    y = "TWB Composite Score"
  )

the rating of union involvement as a continous variable is not associated with greater wellbeing, in fact the opposite relationship is evident but not significant.

library(ggplot2)
library(dplyr)

# Step 1: Create new data frame and ensure numeric type
df_union_cont <- df_combined %>%
  select(union_involvement, lmc_composite_score) %>%
  filter(!is.na(union_involvement), !is.na(lmc_composite_score)) %>%
  mutate(union_involvement = as.numeric(as.character(union_involvement)))

# Step 2: Confirm variable type
str(df_union_cont)

## tibble [1,788 × 2] (S3: tbl_df/tbl/data.frame)
##  $ union_involvement  : num [1:1788] 1 2 1 1 2 1 1 1 2 1 ...
##  $ lmc_composite_score: num [1:1788] 14 14.5 10.4 15.6 14.4 ...

# Step 3: Run linear regression
union_lm <- lm(lmc_composite_score ~ union_involvement, data = df_union_cont)
summary(union_lm)

## 
## Call:
## lm(formula = lmc_composite_score ~ union_involvement, data = df_union_cont)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.1641 -1.4141  0.4235  1.8359  5.8359 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       11.20782    0.17162   65.31   <2e-16 ***
## union_involvement  0.95623    0.07062   13.54   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.638 on 1786 degrees of freedom
## Multiple R-squared:  0.0931, Adjusted R-squared:  0.0926 
## F-statistic: 183.4 on 1 and 1786 DF,  p-value: < 2.2e-16

# Step 4: Plot relationship with regression line
ggplot(df_union_cont, aes(x = union_involvement, y = lmc_composite_score)) +
  geom_jitter(width = 0.1, alpha = 0.3) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  theme_minimal() +
  labs(
    title = "Labor-Management Collaboration by Union Involvement (Continuous)",
    x = "Union Involvement (1 = None, 4 = A Lot)",
    y = "LMC Composite Score"
  )

# Define demographic variables
demographic_vars <- c("gender", "race_group", "role", "district", "grade", "union_involvement")

# Create a summary table of counts and percentages
demo_summary <- df_combined %>%
  select(all_of(demographic_vars)) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
  filter(!is.na(Response)) %>%
  group_by(Variable, Response) %>%
  summarise(Count = n(), .groups = "drop") %>%
  group_by(Variable) %>%
  mutate(Percent = round(100 * Count / sum(Count), 1)) %>%
  arrange(Variable, desc(Count))

# Display
demo_summary %>%
  kable("html", caption = "Table 1. Demographic Characteristics", align = "lrr") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))

Table 1. Demographic Characteristics
Variable	Response	Count	Percent
district	Clifton	1199	52.0
district	White Bear	702	30.4
district	Pleasantville	309	13.4
district	Hopatcong	96	4.2
gender	Female	1706	75.2
gender	Male	365	16.1
gender	Prefer not to say	178	7.8
gender	Prefer to self-describe	14	0.6
gender	Non-binary / third gender	6	0.3
grade	Elementary School	1191	54.5
grade	High School	516	23.6
grade	Middle School	444	20.3
grade	Early Childhood	36	1.6
race_group	White	1618	70.2
race_group	Prefer not to say	238	10.3
race_group	Latinx	211	9.2
race_group	Multiracial / Other	108	4.7
race_group	Black	85	3.7
race_group	Asian	44	1.9
race_group	Indigenous	2	0.1
role	Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)	983	43.2
role	Certified Educator (e.g., classroom teacher, counselor, service provider etc.)	526	23.1
role	Professional Certified Staff	234	10.3
role	Classified Staff (e.g., paraprofessional, clerical, etc.)	172	7.6
role	Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)	170	7.5
role	Certified Educator	64	2.8
role	Educational Support Professionals	49	2.2
role	Classified Staff	27	1.2
role	School Site Administrator	22	1.0
role	Support Personnel	17	0.7
role	District Administrator	13	0.6
union_involvement	2	750	39.6
union_involvement	3	575	30.3
union_involvement	1	416	22.0
union_involvement	4	153	8.1
union_involvement	No	1	0.1

# Load required libraries
library(dplyr)
library(tidyr)
library(knitr)
library(kableExtra)

# Specify variables of interest
continuous_vars <- c(
  "twb_composite_score",
  "job_stress_score",
  "job_satisfaction_score",
  "retention_score",
  "nps_score",
  "union_view_score",
  "school_lmc_score",
  "district_lmc_score",
  "lmc_composite_score",
  "years"
)

# Create descriptive statistics summary
continuous_summary <- df_combined %>%
  select(all_of(continuous_vars)) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  summarise(
    N = sum(!is.na(Value)),
    Mean = round(mean(Value, na.rm = TRUE), 2),
    SD   = round(sd(Value, na.rm = TRUE), 2),
    Min  = round(min(Value, na.rm = TRUE), 2),
    Max  = round(max(Value, na.rm = TRUE), 2),
    .groups = "drop"
  )

# Display the summary table
continuous_summary %>%
  kable("html", caption = "Table 2. Summary Statistics for Key Outcome and Collaboration Measures", align = "lrrrrr") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))

Table 2. Summary Statistics for Key Outcome and Collaboration Measures
Variable	N	Mean	SD	Min	Max
district_lmc_score	1813	4.35	1.02	1	6
job_satisfaction_score	2304	7.32	1.89	1	10
job_stress_score	2304	6.58	2.17	1	10
lmc_composite_score	1789	13.37	2.77	3	18
nps_score	2300	7.05	2.35	1	10
retention_score	2306	4.04	0.87	1	5
school_lmc_score	1838	4.43	1.09	1	6
twb_composite_score	1564	32.54	3.57	13	42
union_view_score	1875	4.56	1.12	1	6
years	2081	13.78	8.51	0	47

# Function to summarize TWB by a grouping variable
twb_by_group <- function(var) {
  df_combined %>%
    filter(!is.na(.data[[var]]), !is.na(twb_composite_score)) %>%
    group_by(.data[[var]]) %>%
    summarise(
      N = n(),
      Mean_TWB = round(mean(twb_composite_score), 2),
      SD_TWB = round(sd(twb_composite_score), 2),
      .groups = "drop"
    ) %>%
    rename(Group = 1) %>%
    mutate(Variable = var)
}

# Apply to selected variables and combine
group_summary <- bind_rows(
  twb_by_group("race_group"),
  twb_by_group("gender"),
  twb_by_group("district"),
  twb_by_group("role")
)

# Display
group_summary %>%
  select(Variable, Group, N, Mean_TWB, SD_TWB) %>%
  kable("html", caption = "Table 3. TWB Composite Score by Key Demographics", align = "llrrr") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))

Table 3. TWB Composite Score by Key Demographics
Variable	Group	N	Mean_TWB	SD_TWB
race_group	Asian	37	33.75	3.17
race_group	Black	77	32.06	3.35
race_group	Indigenous	1	30.81	NA
race_group	Latinx	194	33.61	3.23
race_group	Multiracial / Other	88	32.21	3.30
race_group	Prefer not to say	154	31.54	3.28
race_group	White	1013	32.50	3.67
gender	Female	1167	32.67	3.46
gender	Male	246	32.53	4.18
gender	Non-binary / third gender	4	30.51	2.69
gender	Prefer not to say	112	31.50	3.12
gender	Prefer to self-describe	8	33.08	3.25
district	Clifton	1173	33.10	3.34
district	Hopatcong	90	30.75	3.88
district	Pleasantville	301	30.89	3.67
role	Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)	972	33.06	3.32
role	Certified Educator	61	30.52	4.05
role	Classified Staff	25	31.12	3.69
role	District Administrator	13	34.18	2.79
role	Educational Support Professionals	48	31.67	3.33
role	Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)	156	33.14	3.50
role	Professional Certified Staff	227	30.65	3.70
role	School Site Administrator	22	34.46	2.37
role	Support Personnel	17	31.84	4.38

# Load libraries
library(broom)
library(dplyr)
library(knitr)
library(kableExtra)

# List of outcomes and formulas
outcome_vars <- c(
  "lmc_composite_score",
  "school_lmc_score",
  "district_lmc_score",
  "job_stress_score",
  "job_satisfaction_score",
  "retention_score",
  "nps_score"
)

# Run models and extract key stats
model_summary <- lapply(outcome_vars, function(outcome) {
  model <- lm(as.formula(paste(outcome, "~ twb_composite_score")), data = df_combined)
  tidy_mod <- tidy(model)
  glance_mod <- glance(model)

  data.frame(
    Outcome = outcome,
    Intercept = round(tidy_mod$estimate[1], 3),
    TWB_Estimate = round(tidy_mod$estimate[2], 3),
    Std_Error = round(tidy_mod$std.error[2], 3),
    t_value = round(tidy_mod$statistic[2], 2),
    p_value = signif(tidy_mod$p.value[2], 3),
    R2 = round(glance_mod$r.squared, 3),
    N = glance_mod$nobs
  )
}) %>% bind_rows()

# Clean names
model_summary$Outcome <- recode(model_summary$Outcome,
  lmc_composite_score = "LMC (Composite)",
  school_lmc_score = "LMC (School)",
  district_lmc_score = "LMC (District)",
  job_stress_score = "Job Stress",
  job_satisfaction_score = "Job Satisfaction",
  retention_score = "Retention Intentions",
  nps_score = "NPS Score"
)

# Create and display the table
model_summary %>%
  kable("html", caption = "Table 3. Linear Models Predicting Key Outcomes from Teacher Well-Being", align = "lrrrrrrr") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed")) %>%
  add_header_above(c(" " = 1, "Coefficients" = 4, "Model Fit" = 2, " " = 1))

Table 3. Linear Models Predicting Key Outcomes from Teacher Well-Being
	Coefficients				Model Fit
Outcome	Intercept	TWB_Estimate	Std_Error	t_value	p_value	R2	N
LMC (Composite)	1.755	0.356	0.018	19.80	0	0.208	1498
LMC (School)	-0.391	0.149	0.007	22.13	0	0.243	1528
LMC (District)	-0.230	0.140	0.007	21.36	0	0.232	1510
Job Stress	9.390	-0.087	0.016	-5.56	0	0.019	1562
Job Satisfaction	-2.051	0.291	0.012	25.19	0	0.289	1562
Retention Intentions	0.779	0.102	0.006	18.22	0	0.175	1564
NPS Score	-5.151	0.374	0.015	25.43	0	0.293	1562

# Load libraries
library(broom)
library(dplyr)
library(knitr)
library(kableExtra)

# Define predictors (all used to predict LMC Composite)
predictor_vars <- c(
  "school_lmc_score",
  "district_lmc_score",
  "job_stress_score",
  "job_satisfaction_score",
  "retention_score",
  "nps_score",
  "twb_composite_score"
)

# Run models: lmc_composite_score ~ each predictor
model_summary <- lapply(predictor_vars, function(predictor) {
  model <- lm(as.formula(paste("lmc_composite_score ~", predictor)), data = df_combined)
  tidy_mod <- tidy(model)
  glance_mod <- glance(model)

  data.frame(
    Predictor = predictor,
    Intercept = round(tidy_mod$estimate[1], 2),
    Estimate = round(tidy_mod$estimate[2], 2),
    Std_Error = round(tidy_mod$std.error[2], 2),
    t_value = round(tidy_mod$statistic[2], 2),
    p_value = signif(tidy_mod$p.value[2], 2),
    R2 = round(glance_mod$r.squared, 2),
    N = glance_mod$nobs
  )
}) %>% bind_rows()

# Clean names for display
model_summary$Predictor <- recode(model_summary$Predictor,
  school_lmc_score = "LMC (School)",
  district_lmc_score = "LMC (District)",
  school_lmc_score = "LMC (School)",
  job_stress_score = "Job Stress",
  job_satisfaction_score = "Job Satisfaction",
  retention_score = "Retention Intentions",
  nps_score = "NPS Score",
  twb_composite_score = "Teacher Well-Being"
)

# Create and display the table
model_summary %>%
  kable("html", caption = "Linear Models Predicting LMC (Composite) from Key Variables", align = "lrrrrrrr") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed")) %>%
  add_header_above(c(" " = 1, "Coefficients" = 4, "Model Fit" = 2, " " = 1))

Linear Models Predicting LMC (Composite) from Key Variables
	Coefficients				Model Fit
Predictor	Intercept	Estimate	Std_Error	t_value	p_value	R2	N
LMC (School)	3.28	2.27	0.03	80.31	0	0.78	1789
LMC (District)	2.80	2.43	0.03	81.95	0	0.79	1789
Job Stress	14.68	-0.20	0.03	-6.72	0	0.02	1789
Job Satisfaction	8.97	0.60	0.03	19.36	0	0.17	1789
Retention Intentions	8.08	1.30	0.07	18.75	0	0.16	1789
NPS Score	9.60	0.54	0.02	22.55	0	0.22	1788
Teacher Well-Being	1.75	0.36	0.02	19.80	0	0.21	1498

library(broom)
library(purrr)
library(dplyr)

# Define outcome variables
outcome_vars <- c(
  "lmc_composite_score", "school_lmc_score", "district_lmc_score",
  "job_satisfaction_score", "job_stress_score",
  "retention_score", "nps_score"
)

# Run regressions and collect results
twb_leader_results <- map_dfr(outcome_vars, function(outcome) {
  model <- lm(as.formula(paste(outcome, "~ twb_leader_score")), data = df_combined)
  tidy(model) %>%
    filter(term == "twb_leader_score") %>%
    mutate(
      Outcome = outcome,
      R_squared = summary(model)$r.squared,
      N = nobs(model)
    )
})

# Display results in a clean table
twb_leader_results %>%
  select(Outcome, estimate, std.error, statistic, p.value, R_squared, N) %>%
  arrange(p.value) %>%
  knitr::kable("html", caption = "TWB Leader Score Predicting Key Outcomes") %>%
  kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))

TWB Leader Score Predicting Key Outcomes
Outcome	estimate	std.error	statistic	R_squared	N
nps_score	1.2307242	0.0345261	35.64620	0.3593825	2267
job_satisfaction_score	0.9662048	0.0281334	34.34366	0.3421295	2270
school_lmc_score	0.4597026	0.0192065	23.93475	0.2397083	1819
retention_score	0.3205246	0.0144800	22.13566	0.1775325	2272
district_lmc_score	0.3673739	0.0187113	19.63381	0.1771141	1793
lmc_composite_score	0.9399567	0.0517559	18.16134	0.1570016	1773
job_stress_score	-0.4105224	0.0387657	-10.58984	0.0471168	2270

# Load libraries
library(broom)
library(dplyr)
library(knitr)
library(kableExtra)

# Define function to format p-values with stars
p_format <- function(p) {
  stars <- case_when(
    p < 0.001 ~ "***",
    p < 0.01 ~ "**",
    p < 0.05 ~ "*",
    p < 0.1 ~ ".",
    TRUE ~ ""
  )
  paste0(formatC(p, format = "e", digits = 2), " ", stars)
}

# Define all predictor variables for LMC Composite model
predictor_vars <- c(
  "twb_composite_score",
  "growth_score",
  "wellbeing_score",
  "acceptance_score",
  "adaptability_score",
  "depletion_score",
  "foundational_support_score",
  "union_view_score",
  "school_lmc_score",
  "district_lmc_score",
  "job_satisfaction_score",
  "job_stress_score",
  "retention_score",
  "nps_score"
)

# Run models and extract summary info
model_summary <- lapply(predictor_vars, function(predictor) {
  model <- lm(as.formula(paste("lmc_composite_score ~", predictor)), data = df_combined)
  tidy_mod <- tidy(model)
  glance_mod <- glance(model)

  data.frame(
    Predictor = predictor,
    Intercept = round(tidy_mod$estimate[1], 2),
    Estimate = round(tidy_mod$estimate[2], 2),
    Std_Error = round(tidy_mod$std.error[2], 2),
    t_value = round(tidy_mod$statistic[2], 2),
    p_value = p_format(tidy_mod$p.value[2]),
    R2 = round(glance_mod$r.squared, 3),
    N = glance_mod$nobs
  )
}) %>% bind_rows()

# Clean display names
model_summary$Predictor <- recode(model_summary$Predictor,
  twb_composite_score = "TWB (Composite)",
  growth_score = "Growth Mindset",
  wellbeing_score = "Well-Being",
  acceptance_score = "Acceptance",
  adaptability_score = "Adaptability",
  depletion_score = "Depletion",
  foundational_support_score = "Foundational Support",
  union_view_score = "Union View",
  school_lmc_score = "LMC (School)",
  district_lmc_score = "LMC (District)",
  job_satisfaction_score = "Job Satisfaction",
  job_stress_score = "Job Stress",
  retention_score = "Retention Intentions",
  nps_score = "NPS Score"
)

# Display the results in a formatted table
model_summary %>%
  kable("html", caption = "Linear Models Predicting LMC (Composite) from Key Predictors", align = "lrrrrrrr") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed")) %>%
  add_header_above(c(" " = 1, "Coefficients" = 4, "Model Fit" = 2, " " = 1))

Linear Models Predicting LMC (Composite) from Key Predictors
	Coefficients				Model Fit
Predictor	Intercept	Estimate	Std_Error	t_value	p_value	R2	N
TWB (Composite)	1.75	0.36	0.02	19.80	1.11e-77 ***	0.208	1498
Growth Mindset	7.96	1.03	0.09	11.90	1.79e-31 ***	0.074	1786
Well-Being	10.62	0.53	0.08	7.05	2.49e-12 ***	0.027	1784
Acceptance	9.90	0.72	0.08	9.32	3.33e-20 ***	0.046	1785
Adaptability	8.34	0.97	0.10	10.01	5.28e-23 ***	0.053	1786
Depletion	14.05	-0.19	0.05	-3.78	1.64e-04 ***	0.008	1788
Foundational Support	8.72	1.24	0.05	22.78	4.83e-99 ***	0.255	1520
Union View	3.96	2.05	0.03	61.41	0.00e+00 ***	0.678	1789
LMC (School)	3.28	2.27	0.03	80.31	0.00e+00 ***	0.783	1789
LMC (District)	2.80	2.43	0.03	81.95	0.00e+00 ***	0.790	1789
Job Satisfaction	8.97	0.60	0.03	19.36	5.95e-76 ***	0.173	1789
Job Stress	14.68	-0.20	0.03	-6.72	2.36e-11 ***	0.025	1789
Retention Intentions	8.08	1.30	0.07	18.75	1.01e-71 ***	0.164	1789
NPS Score	9.60	0.54	0.02	22.55	2.95e-99 ***	0.222	1788

library(dplyr)
library(purrr)

# Define demographic variables
demographic_vars <- c("district", "gender", "race_group", "role")

# Get summary stats for twb_leader_score by demographic group
twb_leader_by_demo <- map_dfr(demographic_vars, function(var) {
  df_combined %>%
    group_by(.data[[var]]) %>%
    summarise(
      N = n(),
      Mean = round(mean(twb_leader_score, na.rm = TRUE), 2),
      SD = round(sd(twb_leader_score, na.rm = TRUE), 2),
      Min = round(min(twb_leader_score, na.rm = TRUE), 2),
      Max = round(max(twb_leader_score, na.rm = TRUE), 2),
      .groups = "drop"
    ) %>%
    mutate(Demographic = var) %>%
    rename(Group = 1)
})

# Display the table
twb_leader_by_demo %>%
  select(Demographic, Group, N, Mean, SD, Min, Max) %>%
  knitr::kable("html", caption = "TWB Leader Score by Demographic Group") %>%
  kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))

TWB Leader Score by Demographic Group
Demographic	Group	N	Mean	SD	Min	Max
district	Clifton	1199	4.96	0.93	1.00	6.00
district	Hopatcong	96	4.08	1.54	1.00	6.00
district	Pleasantville	309	4.09	1.45	1.00	6.00
district	White Bear	702	4.55	1.12	1.00	6.00
gender	Female	1706	4.76	1.09	1.00	6.00
gender	Male	365	4.70	1.24	1.00	6.00
gender	Non-binary / third gender	6	3.14	1.19	1.57	4.71
gender	Prefer not to say	178	4.13	1.22	1.07	6.00
gender	Prefer to self-describe	14	3.80	1.57	1.50	6.00
gender	NA	37	4.33	1.29	1.00	6.00
race_group	Asian	44	5.02	1.09	1.00	6.00
race_group	Black	85	4.78	1.13	1.64	6.00
race_group	Indigenous	2	3.93	1.82	2.64	5.21
race_group	Latinx	211	4.98	0.97	1.43	6.00
race_group	Multiracial / Other	108	4.64	1.25	1.00	6.00
race_group	Prefer not to say	238	4.20	1.23	1.07	6.00
race_group	White	1618	4.70	1.13	1.00	6.00
role	Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)	983	4.92	0.95	1.00	6.00
role	Certified Educator	64	3.99	1.57	1.00	6.00
role	Certified Educator (e.g., classroom teacher, counselor, service provider etc.)	526	4.56	1.09	1.00	6.00
role	Classified Staff	27	4.28	1.51	1.00	6.00
role	Classified Staff (e.g., paraprofessional, clerical, etc.)	172	4.53	1.18	1.07	6.00
role	District Administrator	13	5.36	0.70	3.93	6.00
role	Educational Support Professionals	49	4.44	1.23	1.43	6.00
role	Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)	170	5.16	0.79	1.00	6.00
role	Professional Certified Staff	234	3.99	1.49	1.00	6.00
role	School Site Administrator	22	4.99	0.78	2.64	5.93
role	Support Personnel	17	4.53	1.44	1.64	6.00
role	NA	29	4.30	1.40	1.00	6.00

library(dplyr)
library(purrr)

# Define demographic variables
demographic_vars <- c("district", "gender", "race_group", "role")

# Get summary stats for lmc_composite_score by demographic group
lmc_by_demo <- map_dfr(demographic_vars, function(var) {
  df_combined %>%
    group_by(.data[[var]]) %>%
    summarise(
      N = n(),
      Mean = round(mean(lmc_composite_score, na.rm = TRUE), 2),
      SD = round(sd(lmc_composite_score, na.rm = TRUE), 2),
      Min = round(min(lmc_composite_score, na.rm = TRUE), 2),
      Max = round(max(lmc_composite_score, na.rm = TRUE), 2),
      .groups = "drop"
    ) %>%
    mutate(Demographic = var) %>%
    rename(Group = 1)
})

# Display the table
lmc_by_demo %>%
  select(Demographic, Group, N, Mean, SD, Min, Max) %>%
  knitr::kable("html", caption = "LMC Composite Score by Demographic Group") %>%
  kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))

LMC Composite Score by Demographic Group
Demographic	Group	N	Mean	SD	Min	Max
district	Clifton	1199	13.63	2.74	3.00	18.00
district	Hopatcong	96	12.76	2.85	3.00	17.33
district	Pleasantville	309	12.39	2.81	5.33	18.00
district	White Bear	702	13.52	2.55	4.33	18.00
gender	Female	1706	13.52	2.66	3.00	18.00
gender	Male	365	13.23	2.95	3.00	18.00
gender	Non-binary / third gender	6	11.94	1.12	10.58	13.33
gender	Prefer not to say	178	12.26	3.17	4.42	18.00
gender	Prefer to self-describe	14	12.15	2.61	8.17	16.00
gender	NA	37	13.14	4.11	3.00	17.83
race_group	Asian	44	13.55	3.26	5.25	18.00
race_group	Black	85	12.70	2.76	3.00	18.00
race_group	Indigenous	2	10.08	NA	10.08	10.08
race_group	Latinx	211	13.67	2.69	3.00	18.00
race_group	Multiracial / Other	108	12.88	3.05	3.00	17.17
race_group	Prefer not to say	238	12.20	2.98	4.42	18.00
race_group	White	1618	13.56	2.67	3.00	18.00
role	Certificated Staff (e.g., classroom teacher, counselor, service provider etc.)	983	13.69	2.69	3.00	18.00
role	Certified Educator	64	12.95	2.45	4.42	17.33
role	Certified Educator (e.g., classroom teacher, counselor, service provider etc.)	526	13.61	2.57	4.33	18.00
role	Classified Staff	27	12.68	3.15	4.00	16.67
role	Classified Staff (e.g., paraprofessional, clerical, etc.)	172	12.92	2.34	7.50	18.00
role	District Administrator	13	14.38	2.21	10.17	18.00
role	Educational Support Professionals	49	13.18	2.34	7.00	18.00
role	Non-Certificated Staff (e.g., paraprofessional, clerical, etc.)	170	12.96	3.06	3.00	18.00
role	Professional Certified Staff	234	12.22	2.90	5.42	18.00
role	School Site Administrator	22	15.14	1.89	10.08	18.00
role	Support Personnel	17	12.28	2.80	5.33	15.92
role	NA	29	10.94	5.34	3.00	14.50

ggplot(df_combined, aes(x = race_group, y = twb_leader_score, fill = race_group)) +
  geom_boxplot(outlier.shape = NA, alpha = 0.7) +
  geom_jitter(width = 0.2, alpha = 0.2) +
  theme_minimal() +
  labs(title = "TWB Leadership Score by Race Group", x = "Race Group", y = "TWB Leader Score") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")

summary(aov(twb_leader_score ~ race_group, data = df_combined))

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## race_group     6   80.9  13.482    10.5 1.63e-11 ***
## Residuals   2265 2909.5   1.285                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 34 observations deleted due to missingness

TukeyHSD(aov(twb_leader_score ~ race_group, data = df_combined))

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = twb_leader_score ~ race_group, data = df_combined)
## 
## $race_group
##                                              diff        lwr         upr
## Black-Asian                           -0.24363909 -0.8733743  0.38609617
## Indigenous-Asian                      -1.09468439 -3.5140482  1.32467946
## Latinx-Asian                          -0.04111296 -0.6014059  0.51917995
## Multiracial / Other-Asian             -0.37971840 -0.9852630  0.22582620
## Prefer not to say-Asian               -0.82760364 -1.3832869 -0.27192036
## White-Asian                           -0.32078569 -0.8376322  0.19606086
## Indigenous-Black                      -0.85104530 -3.2447021  1.54261146
## Latinx-Black                           0.20252613 -0.2335924  0.63864468
## Multiracial / Other-Black             -0.13607931 -0.6289845  0.35682587
## Prefer not to say-Black               -0.58396455 -1.0141449 -0.15378416
## White-Black                           -0.07714660 -0.4558307  0.30153745
## Latinx-Indigenous                      1.05357143 -1.3227607  3.42990354
## Multiracial / Other-Indigenous         0.71496599 -1.6724406  3.10237262
## Prefer not to say-Indigenous           0.26708075 -2.1081687  2.64233023
## White-Indigenous                       0.77389870 -1.5925663  3.14036369
## Multiracial / Other-Latinx            -0.33860544 -0.7390017  0.06179081
## Prefer not to say-Latinx              -0.78649068 -1.1065169 -0.46646442
## White-Latinx                          -0.27967273 -0.5261748 -0.03317069
## Prefer not to say-Multiracial / Other -0.44788524 -0.8418052 -0.05396528
## White-Multiracial / Other              0.05893271 -0.2779937  0.39585909
## White-Prefer not to say                0.50681795  0.2709811  0.74265483
##                                           p adj
## Black-Asian                           0.9151277
## Indigenous-Asian                      0.8353750
## Latinx-Asian                          0.9999914
## Multiracial / Other-Asian             0.5138291
## Prefer not to say-Asian               0.0002337
## White-Asian                           0.5266691
## Indigenous-Black                      0.9423610
## Latinx-Black                          0.8176673
## Multiracial / Other-Black             0.9835334
## Prefer not to say-Black               0.0012452
## White-Black                           0.9967911
## Latinx-Indigenous                     0.8482712
## Multiracial / Other-Indigenous        0.9750333
## Prefer not to say-Indigenous          0.9998936
## White-Indigenous                      0.9613574
## Multiracial / Other-Latinx            0.1612336
## Prefer not to say-Latinx              0.0000000
## White-Latinx                          0.0145032
## Prefer not to say-Multiracial / Other 0.0141578
## White-Multiracial / Other             0.9986352
## White-Prefer not to say               0.0000000

df_combined %>%
  filter(!is.na(district), !is.na(twb_leader_score)) %>%
  group_by(district) %>%
  summarise(
    mean_leader = mean(twb_leader_score, na.rm = TRUE),
    se = sd(twb_leader_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_leader - 1.96 * se,
    ci_upper = mean_leader + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(district, -mean_leader), y = mean_leader)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.3) +
  theme_minimal() +
  labs(title = "Leadership Score by District",
       x = "District", y = "Mean TWB Leadership Score") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

df_combined %>%
  filter(!is.na(gender), !is.na(twb_leader_score)) %>%
  group_by(gender) %>%
  summarise(
    mean_leader = mean(twb_leader_score, na.rm = TRUE),
    se = sd(twb_leader_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_leader - 1.96 * se,
    ci_upper = mean_leader + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(gender, -mean_leader), y = mean_leader)) +
  geom_col(fill = "seagreen", alpha = 0.8) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.3) +
  theme_minimal() +
  labs(title = "Leadership Score by Gender",
       x = "Gender", y = "Mean TWB Leadership Score") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

df_combined %>%
  filter(!is.na(race_group), !is.na(twb_leader_score)) %>%
  group_by(race_group) %>%
  summarise(
    mean_leader = mean(twb_leader_score, na.rm = TRUE),
    se = sd(twb_leader_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_leader - 1.96 * se,
    ci_upper = mean_leader + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(race_group, -mean_leader), y = mean_leader)) +
  geom_col(fill = "purple", alpha = 0.8) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.3) +
  theme_minimal() +
  labs(title = "Leadership Score by Race/Ethnicity Group",
       x = "Race/Ethnicity", y = "Mean TWB Leadership Score") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

df_combined %>%
  filter(!is.na(role_group), !is.na(twb_leader_score)) %>%
  group_by(role_group) %>%
  summarise(
    mean_leader = mean(twb_leader_score, na.rm = TRUE),
    se = sd(twb_leader_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_leader - 1.96 * se,
    ci_upper = mean_leader + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(role_group, -mean_leader), y = mean_leader)) +
  geom_col(fill = "darkgreen", alpha = 0.8) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.3) +
  theme_minimal() +
  labs(title = "Leadership Score by Role Group",
       x = "Role Group", y = "Mean TWB Leadership Score") +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

df_combined %>%
  filter(union_involvement %in% 1:4, !is.na(twb_leader_score)) %>%
  group_by(union_involvement) %>%
  summarise(
    mean_leader = mean(twb_leader_score, na.rm = TRUE),
    se = sd(twb_leader_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_leader - 1.96 * se,
    ci_upper = mean_leader + 1.96 * se
  ) %>%
  ggplot(aes(x = factor(union_involvement), y = mean_leader)) +
  geom_col(fill = "purple", alpha = 0.8) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.3) +
  theme_minimal() +
  labs(title = "Leadership Score by Union Involvement Level (1–4)",
       x = "Union Involvement (1 = A little, 4 = A lot)",
       y = "Mean TWB Leadership Score") +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5))

# Step 1: Inspect unique grade levels
table(df_combined$district)

## 
##       Clifton     Hopatcong Pleasantville    White Bear 
##          1199            96           309           702

# Run the ANOVA
lmc_district_aov <- aov(lmc_composite_score ~ district, data = df_combined)

# Summary of ANOVA
summary(lmc_district_aov)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## district       3    388  129.25   17.32 4.34e-11 ***
## Residuals   1785  13317    7.46                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 517 observations deleted due to missingness

# Post-hoc test: Tukey HSD to find which districts differ
tukey_lmc_district <- TukeyHSD(lmc_district_aov)
print(tukey_lmc_district)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = lmc_composite_score ~ district, data = df_combined)
## 
## $district
##                                diff         lwr         upr     p adj
## Hopatcong-Clifton        -0.8676040 -1.64044380 -0.09476415 0.0205423
## Pleasantville-Clifton    -1.2330901 -1.69727519 -0.76890497 0.0000000
## White Bear-Clifton       -0.1034675 -0.58141869  0.37448367 0.9447454
## Pleasantville-Hopatcong  -0.3654861 -1.21801323  0.48704102 0.6880983
## White Bear-Hopatcong      0.7641365 -0.09596353  1.62423646 0.1019883
## White Bear-Pleasantville  1.1296226  0.53132630  1.72791884 0.0000078

# Optional: Convert to data frame for easy inspection
tukey_lmc_district <- as.data.frame(tukey_lmc_district$district)
tukey_lmc_district$comparison <- rownames(tukey_lmc_district)

# Load ggplot2 if not already
library(ggplot2)

# Boxplot to visualize TWB differences across districts
ggplot(df_combined, aes(x = district, y = lmc_composite_score, fill = district)) +
  geom_boxplot(alpha = 0.7, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.2) +
  theme_minimal() +
  labs(
    title = "LMC by District",
    x = "District",
    y = "LMC Composite Score"
  ) +
  theme(legend.position = "none")

library(dplyr)
library(ggplot2)

# Step 1: Inspect unique districts
table(df_combined$district)

## 
##       Clifton     Hopatcong Pleasantville    White Bear 
##          1199            96           309           702

# Step 2: Run ANOVA
lmc_district_aov <- aov(lmc_composite_score ~ district, data = df_combined)
summary(lmc_district_aov)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## district       3    388  129.25   17.32 4.34e-11 ***
## Residuals   1785  13317    7.46                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 517 observations deleted due to missingness

# Step 3: Tukey post-hoc test
tukey_lmc_district <- TukeyHSD(lmc_district_aov)
tukey_df <- as.data.frame(tukey_lmc_district$district)
tukey_df$comparison <- rownames(tukey_df)

# Step 4: Plot bar chart with means and 95% CIs
df_combined %>%
  filter(!is.na(district)) %>%
  group_by(district) %>%
  summarise(
    mean_lmc = mean(lmc_composite_score, na.rm = TRUE),
    se = sd(lmc_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_lmc - 1.96 * se,
    ci_upper = mean_lmc + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(district, -mean_lmc), y = mean_lmc, fill = district)) +
  geom_col(alpha = 0.8) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "Mean LMC Composite Score by District",
    x = "District",
    y = "LMC Composite Score"
  ) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 30, hjust = 1))

library(dplyr)
library(ggplot2)
library(knitr)
library(kableExtra)

# Step 1: Inspect unique districts
table(df_combined$district)

## 
##       Clifton     Hopatcong Pleasantville    White Bear 
##          1199            96           309           702

# Step 2: Run ANOVA
lmc_district_aov <- aov(lmc_composite_score ~ district, data = df_combined)
summary(lmc_district_aov)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## district       3    388  129.25   17.32 4.34e-11 ***
## Residuals   1785  13317    7.46                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 517 observations deleted due to missingness

# Step 3: Tukey post-hoc test
tukey_lmc_district <- TukeyHSD(lmc_district_aov)
tukey_df <- as.data.frame(tukey_lmc_district$district)
tukey_df$comparison <- rownames(tukey_df)

# Step 4: Filter for significant comparisons (p < .05)
sig_comparisons <- tukey_df %>%
  filter(`p adj` < 0.05) %>%
  arrange(`p adj`) %>%
  mutate(across(where(is.numeric), ~round(., 3))) %>%
  rename(
    Mean_Diff = diff,
    Lower_CI = lwr,
    Upper_CI = upr,
    p_value = `p adj`
  )

# Step 5: Print significant comparisons table
sig_comparisons %>%
  select(comparison, Mean_Diff, Lower_CI, Upper_CI, p_value) %>%
  kable("html", caption = "Significant Pairwise Comparisons: LMC Composite Score by District") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))

Significant Pairwise Comparisons: LMC Composite Score by District
	comparison	Mean_Diff	Lower_CI	Upper_CI	p_value
Pleasantville-Clifton	Pleasantville-Clifton	-1.233	-1.697	-0.769	0.000
White Bear-Pleasantville	White Bear-Pleasantville	1.130	0.531	1.728	0.000
Hopatcong-Clifton	Hopatcong-Clifton	-0.868	-1.640	-0.095	0.021

# Step 6: Summary stats for bar chart with 95% CIs
lmc_summary <- df_combined %>%
  filter(!is.na(district)) %>%
  group_by(district) %>%
  summarise(
    mean_lmc = mean(lmc_composite_score, na.rm = TRUE),
    se = sd(lmc_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n(),
    .groups = "drop"
  ) %>%
  mutate(
    ci_lower = mean_lmc - 1.96 * se,
    ci_upper = mean_lmc + 1.96 * se
  )

# Step 7: Plot bar chart with 95% CIs
ggplot(lmc_summary, aes(x = reorder(district, -mean_lmc), y = mean_lmc, fill = district)) +
  geom_col(alpha = 0.8) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "Mean LMC Composite Score by District",
    x = "District",
    y = "LMC Composite Score"
  ) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 30, hjust = 1))

# Subset only the key gender groups and drop missing LMC values
df_gender_lmc <- df_combined %>%
  filter(gender %in% c("Male", "Female", "Prefer not to say") & !is.na(lmc_composite_score)) %>%
  droplevels()

# Run ANOVA
lmc_gender_aov <- aov(lmc_composite_score ~ gender, data = df_gender_lmc)
summary(lmc_gender_aov)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## gender         2    189   94.33   12.51 4.03e-06 ***
## Residuals   1763  13292    7.54                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Tukey post-hoc test
tukey_lmc_gender <- TukeyHSD(lmc_gender_aov)
tukey_lmc_gender_df <- as.data.frame(tukey_lmc_gender$gender)
tukey_lmc_gender_df$comparison <- rownames(tukey_lmc_gender_df)

# Optional: Filter comparisons of interest
tukey_lmc_gender_df_filtered <- tukey_lmc_gender_df %>%
  filter(
    grepl("Male-Female|Female-Male|Male-Prefer not to say|Prefer not to say-Male|Female-Prefer not to say|Prefer not to say-Female", comparison)
  )
print(tukey_lmc_gender_df_filtered)

##                                diff        lwr        upr        p adj
## Male-Female              -0.2863625 -0.7026435  0.1299185 2.400928e-01
## Prefer not to say-Female -1.2570864 -1.8592574 -0.6549155 3.176979e-06
## Prefer not to say-Male   -0.9707239 -1.6595203 -0.2819276 2.776983e-03
##                                        comparison
## Male-Female                           Male-Female
## Prefer not to say-Female Prefer not to say-Female
## Prefer not to say-Male     Prefer not to say-Male

# Summary statistics: Mean + 95% CI for each gender
lmc_gender_summary <- df_gender_lmc %>%
  group_by(gender) %>%
  summarise(
    mean_lmc = mean(lmc_composite_score, na.rm = TRUE),
    se = sd(lmc_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_lmc - 1.96 * se,
    ci_upper = mean_lmc + 1.96 * se
  )

# Visualization
ggplot(lmc_gender_summary, aes(x = reorder(gender, -mean_lmc), y = mean_lmc)) +
  geom_col(fill = "darkgreen", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "LMC Composite Score by Gender",
    x = "Gender",
    y = "Mean LMC Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Optional: View group sizes
df_gender_lmc %>%
  count(gender) %>%
  mutate(percent = round(100 * n / sum(n), 1))

## # A tibble: 3 × 3
##   gender                n percent
##   <fct>             <int>   <dbl>
## 1 Female             1350    76.4
## 2 Male                291    16.5
## 3 Prefer not to say   125     7.1

# Check distribution
df_combined %>%
  count(race_group, sort = TRUE)

## # A tibble: 7 × 2
##   race_group              n
##   <chr>               <int>
## 1 White                1618
## 2 Prefer not to say     238
## 3 Latinx                211
## 4 Multiracial / Other   108
## 5 Black                  85
## 6 Asian                  44
## 7 Indigenous              2

# Subset data and drop NAs
df_race_subset_lmc <- df_combined %>%
  filter(!is.na(race_group), !is.na(lmc_composite_score)) %>%
  droplevels()

# Run ANOVA
lmc_racegroup_aov <- aov(lmc_composite_score ~ race_group, data = df_race_subset_lmc)
summary(lmc_racegroup_aov)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## race_group     6    358   59.74   7.976 1.65e-08 ***
## Residuals   1782  13347    7.49                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Tukey HSD post-hoc test
tukey_racegroup_lmc <- TukeyHSD(lmc_racegroup_aov)
tukey_racegroup_lmc_df <- as.data.frame(tukey_racegroup_lmc$race_group)
tukey_racegroup_lmc_df$comparison <- rownames(tukey_racegroup_lmc_df)

# View most significant pairwise comparisons
tukey_racegroup_lmc_df %>%
  arrange(`p adj`) %>%
  head(10)

##                                             diff         lwr         upr
## White-Prefer not to say                1.3524747  0.69477189  2.01017748
## Prefer not to say-Latinx              -1.4665453 -2.30854391 -0.62454668
## Prefer not to say-Asian               -1.3513897 -2.75530523  0.05252587
## White-Black                            0.8553052 -0.09957851  1.81018883
## Latinx-Black                           0.9693758 -0.12066621  2.05941776
## Multiracial / Other-Latinx            -0.7916553 -1.89229032  0.30897974
## White-Multiracial / Other              0.6775847 -0.28937384  1.64454319
## Prefer not to say-Multiracial / Other -0.6748900 -1.79792892  0.44814891
## Black-Asian                           -0.8542202 -2.41953441  0.71109410
## Prefer not to say-Black               -0.4971695 -1.60982870  0.61548966
##                                              p adj
## White-Prefer not to say               3.273428e-08
## Prefer not to say-Latinx              6.292493e-06
## Prefer not to say-Asian               6.809517e-02
## White-Black                           1.136665e-01
## Latinx-Black                          1.190670e-01
## Multiracial / Other-Latinx            3.393481e-01
## White-Multiracial / Other             3.721064e-01
## Prefer not to say-Multiracial / Other 5.660330e-01
## Black-Asian                           6.753937e-01
## Prefer not to say-Black               8.432489e-01
##                                                                  comparison
## White-Prefer not to say                             White-Prefer not to say
## Prefer not to say-Latinx                           Prefer not to say-Latinx
## Prefer not to say-Asian                             Prefer not to say-Asian
## White-Black                                                     White-Black
## Latinx-Black                                                   Latinx-Black
## Multiracial / Other-Latinx                       Multiracial / Other-Latinx
## White-Multiracial / Other                         White-Multiracial / Other
## Prefer not to say-Multiracial / Other Prefer not to say-Multiracial / Other
## Black-Asian                                                     Black-Asian
## Prefer not to say-Black                             Prefer not to say-Black

# Plot LMC by race_group with 95% CIs
df_race_subset_lmc %>%
  group_by(race_group) %>%
  summarise(
    mean_lmc = mean(lmc_composite_score),
    se = sd(lmc_composite_score) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_lmc - 1.96 * se,
    ci_upper = mean_lmc + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(race_group, -mean_lmc), y = mean_lmc)) +
  geom_col(fill = "darkgreen", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "LMC Composite Score by Race/Ethnicity Group",
    x = "Race/Ethnicity Group",
    y = "Mean LMC Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# View updated role group distribution
df_combined %>%
  count(role_group, sort = TRUE) %>%
  mutate(percent = round(100 * n / sum(n), 1)) %>%
  rename(
    Role = role_group,
    Count = n,
    Percent = percent
  )

## # A tibble: 4 × 3
##   Role              Count Percent
##   <chr>             <int>   <dbl>
## 1 Certified Staff    1743    75.6
## 2 Classified Staff    391    17  
## 3 Other / NA          150     6.5
## 4 School Site Admin    22     1

# Filter out "Other / NA" for analysis
df_role_focus <- df_combined %>%
  filter(role_group %in% c("Certified Staff", "Classified Staff", "School Site Admin"))

# Run ANOVA for LMC
lmc_role_focus_aov <- aov(lmc_composite_score ~ role_group, data = df_role_focus)
summary(lmc_role_focus_aov)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## role_group     2    108   53.99   7.113 0.000839 ***
## Residuals   1665  12639    7.59                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 488 observations deleted due to missingness

# Post-hoc Tukey HSD test
tukey_lmc_role_focus <- TukeyHSD(lmc_role_focus_aov)
tukey_lmc_role_focus_df <- as.data.frame(tukey_lmc_role_focus$role_group)
tukey_lmc_role_focus_df$comparison <- rownames(tukey_lmc_role_focus_df)

# Display only significant mean comparisons (p < 0.05)
significant_diffs <- tukey_lmc_role_focus_df %>%
  filter(`p adj` < 0.05) %>%
  arrange(`p adj`)
print(significant_diffs)

##                                        diff       lwr      upr       p adj
## School Site Admin-Classified Staff 2.143588 0.7031420 3.584034 0.001433278
## School Site Admin-Certified Staff  1.695453 0.3068267 3.084080 0.011784720
##                                                            comparison
## School Site Admin-Classified Staff School Site Admin-Classified Staff
## School Site Admin-Certified Staff   School Site Admin-Certified Staff

# Plot means with 95% confidence intervals
df_role_focus %>%
  group_by(role_group) %>%
  summarise(
    mean_lmc = mean(lmc_composite_score, na.rm = TRUE),
    se = sd(lmc_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_lmc - 1.96 * se,
    ci_upper = mean_lmc + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(role_group, -mean_lmc), y = mean_lmc)) +
  geom_col(fill = "darkgreen", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "LMC Composite Score by Role Group",
    x = "Role Group",
    y = "Mean LMC Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

library(dplyr)
library(ggplot2)

# Step 1: Inspect unique grade levels
table(df_combined$grade)

## 
##   Early Childhood Elementary School       High School     Middle School 
##                36              1191               516               444

# Step 2: Filter out NAs for LMC composite
df_grade_subset <- df_combined %>%
  filter(!is.na(grade), !is.na(lmc_composite_score))

# Step 3: Run ANOVA
lmc_grade_aov <- aov(lmc_composite_score ~ grade, data = df_grade_subset)
summary(lmc_grade_aov)

##               Df Sum Sq Mean Sq F value  Pr(>F)   
## grade          3     94   31.19   4.148 0.00613 **
## Residuals   1751  13167    7.52                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Step 4: Tukey HSD post-hoc
tukey_grade <- TukeyHSD(lmc_grade_aov)
tukey_grade_df <- as.data.frame(tukey_grade$grade)
tukey_grade_df$comparison <- rownames(tukey_grade_df)

# Step 5: View most significant comparisons
tukey_grade_df %>%
  arrange(`p adj`) %>%
  head(10)

##                                          diff        lwr         upr
## Middle School-High School          0.66862134  0.1610090  1.17623367
## High School-Elementary School     -0.43330331 -0.8479735 -0.01863316
## Middle School-Elementary School    0.23531804 -0.1985515  0.66918754
## High School-Early Childhood       -0.70881877 -2.9656072  1.54796965
## Elementary School-Early Childhood -0.27551546 -2.5168692  1.96583825
## Middle School-Early Childhood     -0.04019743 -2.3005924  2.22019753
##                                         p adj                        comparison
## Middle School-High School         0.004020654         Middle School-High School
## High School-Elementary School     0.036553437     High School-Elementary School
## Middle School-Elementary School   0.502804664   Middle School-Elementary School
## High School-Early Childhood       0.850864167       High School-Early Childhood
## Elementary School-Early Childhood 0.989067409 Elementary School-Early Childhood
## Middle School-Early Childhood     0.999965664     Middle School-Early Childhood

# Step 6: Plot means with 95% CIs
df_grade_subset %>%
  group_by(grade) %>%
  summarise(
    mean_lmc = mean(lmc_composite_score, na.rm = TRUE),
    se = sd(lmc_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_lmc - 1.96 * se,
    ci_upper = mean_lmc + 1.96 * se
  ) %>%
  ggplot(aes(x = reorder(grade, -mean_lmc), y = mean_lmc)) +
  geom_col(fill = "darkgreen", alpha = 0.8) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "Labor-Management Collaboration (LMC) by Grade Level",
    x = "Grade Level",
    y = "Mean LMC Composite Score"
  ) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

library(dplyr)
library(ggplot2)

# Step 1: Inspect sample sizes
df_combined %>%
  count(union_involvement) %>%
  mutate(percent = round(100 * n / sum(n), 1))

## # A tibble: 6 × 3
##   union_involvement     n percent
##   <chr>             <int>   <dbl>
## 1 1                   416    18  
## 2 2                   750    32.5
## 3 3                   575    24.9
## 4 4                   153     6.6
## 5 No                    1     0  
## 6 <NA>                411    17.8

# Step 2: Filter out NAs and run ANOVA
df_union <- df_combined %>%
  filter(!is.na(union_involvement), !is.na(lmc_composite_score))

lmc_union_aov <- aov(lmc_composite_score ~ union_involvement, data = df_union)
summary(lmc_union_aov)

##                     Df Sum Sq Mean Sq F value Pr(>F)    
## union_involvement    3   1416   472.0   68.53 <2e-16 ***
## Residuals         1784  12288     6.9                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Step 3: Tukey post-hoc test
tukey_union <- TukeyHSD(lmc_union_aov)
tukey_union_df <- as.data.frame(tukey_union$union_involvement)
tukey_union_df$comparison <- rownames(tukey_union_df)

# View top comparisons
tukey_union_df %>%
  arrange(`p adj`) %>%
  head(10)

##          diff        lwr      upr        p adj comparison
## 3-1 2.3089872  1.8568584 2.761116 9.833578e-12        3-1
## 2-1 1.5807242  1.1504229 2.011025 9.870216e-12        2-1
## 4-1 2.7049635  2.0514042 3.358523 9.870771e-12        4-1
## 3-2 0.7282631  0.3452951 1.111231 6.545805e-06        3-2
## 4-2 1.1242393  0.5164700 1.732009 1.264190e-05        4-2
## 4-3 0.3959762 -0.2274377 1.019390 3.600668e-01        4-3

# Step 4: Plot means with 95% CIs
df_union %>%
  group_by(union_involvement) %>%
  summarise(
    mean_lmc = mean(lmc_composite_score, na.rm = TRUE),
    se = sd(lmc_composite_score, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  mutate(
    ci_lower = mean_lmc - 1.96 * se,
    ci_upper = mean_lmc + 1.96 * se
  ) %>%
  ggplot(aes(x = as.factor(union_involvement), y = mean_lmc)) +
  geom_col(fill = "purple", alpha = 0.7) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  theme_minimal() +
  labs(
    title = "LMC Composite Score by Union Involvement",
    x = "Union Involvement (1 = None, 4 = A Lot)",
    y = "Mean LMC Composite Score"
  )

library(ggplot2)
library(dplyr)

# Step 1: Create new data frame and ensure numeric type
df_union_cont <- df_combined %>%
  select(union_involvement, lmc_composite_score) %>%
  filter(!is.na(union_involvement), !is.na(lmc_composite_score)) %>%
  mutate(union_involvement = as.numeric(as.character(union_involvement)))

# Step 2: Confirm variable type
str(df_union_cont)

## tibble [1,788 × 2] (S3: tbl_df/tbl/data.frame)
##  $ union_involvement  : num [1:1788] 1 2 1 1 2 1 1 1 2 1 ...
##  $ lmc_composite_score: num [1:1788] 14 14.5 10.4 15.6 14.4 ...

# Step 3: Run linear regression
union_lm <- lm(lmc_composite_score ~ union_involvement, data = df_union_cont)
summary(union_lm)

## 
## Call:
## lm(formula = lmc_composite_score ~ union_involvement, data = df_union_cont)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.1641 -1.4141  0.4235  1.8359  5.8359 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       11.20782    0.17162   65.31   <2e-16 ***
## union_involvement  0.95623    0.07062   13.54   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.638 on 1786 degrees of freedom
## Multiple R-squared:  0.0931, Adjusted R-squared:  0.0926 
## F-statistic: 183.4 on 1 and 1786 DF,  p-value: < 2.2e-16

# Step 4: Plot relationship with regression line
ggplot(df_union_cont, aes(x = union_involvement, y = lmc_composite_score)) +
  geom_jitter(width = 0.1, alpha = 0.3) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  theme_minimal() +
  labs(
    title = "Labor-Management Collaboration by Union Involvement (Continuous)",
    x = "Union Involvement (1 = None, 4 = A Lot)",
    y = "LMC Composite Score"
  )

why doesnt mound view show up?

Ed_thriving_TWB_LMC

K.Evans

2025-05-24