Load packages

library(corrr)
library(corrplot)
## corrplot 0.92 loaded
library(psych)
library(psy)
## 
## Attaching package: 'psy'
## The following object is masked from 'package:psych':
## 
##     wkappa
library(GPArotation)
## 
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
library(knitr)
library(kableExtra)
library(lavaan)
## This is lavaan 0.6-18
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
## 
##     cor2cov
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()      masks psych::%+%()
## ✖ ggplot2::alpha()    masks psych::alpha()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Load data

data <- read.csv("~/Google drive/My Drive/YEAR 2/PROJECTS/DEREK/Status Distribution/Studies/Study 1: Scale/Supplemental Study/raw_study1supplement_data_8.8.25.csv")

Exclusions

data <- data %>% 
  slice(-c(1:3)) %>% 
  filter(attn_bots != "14285733") %>% 
  filter(attn == 24)  %>%
   unite(geolocation, LocationLatitude, LocationLongitude) %>%
   group_by(geolocation) %>%
   mutate(geo_frequency = n()) %>%
   filter(geo_frequency < 3) %>%
   ungroup()

Data cleaning

Gather and clean numeric data

df_numeric <- data %>%
  select(c(ResponseId, dist_1R:attr_12R, admire_proneness_1:admire_proneness_12, OCBI_1:OCBI_8, UPBs_1:UPBs_6, SZSB_1:SZSB_8R, Extraversion:Open, sdo_1:sdo_8R, need.for.status_1:need.for.status_8, dom_1:prest_9R, IRI_1:IRI_7, fv_inclusivity)) %>% 
  mutate(across(-ResponseId, as.numeric))
df_numeric <- df_numeric %>%
  mutate(across(ends_with("R"), ~ 8 - ., .names = "{.col}_Recoded")) 

Create average scores

# Using 13-item scale here, but can update as needed

df_numeric <- df_numeric %>%
  mutate(
    status_expansion_belief_new = rowMeans(select(., 
      dist_1R_Recoded,
      dist_2R_Recoded, 
      dist_5R_Recoded, 
      dist_11,
      dist_7, 
      dist_8, 
      dist_10, 
      dist_12R_Recoded,
      attr_3, 
      attr_4, 
      attr_5, 
      attr_11R_Recoded,
      attr_6, 
      attr_8, 
      attr_10,
      attr_12R_Recoded), 
      na.rm = TRUE)
  )
# Again, including the extra distribution prescriptive item here

df_numeric <- df_numeric %>%
  mutate(
    dist_avg = rowMeans(select(., 
      dist_1R_Recoded,
      dist_2R_Recoded, 
      dist_5R_Recoded, 
      dist_11,
      dist_7, 
      dist_8, 
      dist_10,
      dist_12R_Recoded), 
      na.rm = TRUE)
  )
df_numeric <- df_numeric %>%
  mutate(
    attribute_avg = rowMeans(select(., 
      attr_3, 
      attr_4, 
      attr_5, 
      attr_11R_Recoded,
      attr_6, 
      attr_8, 
      attr_10,
      attr_12R_Recoded
    ), na.rm = TRUE)
  )
df_numeric <- df_numeric %>%
  mutate(
    admire_avg = rowMeans(select(., 
      admire_proneness_1, 
      admire_proneness_2, 
      admire_proneness_3, 
      admire_proneness_4, 
      admire_proneness_5, 
      admire_proneness_6, 
      admire_proneness_7, 
      admire_proneness_8, 
      admire_proneness_9, 
      admire_proneness_10, 
      admire_proneness_11, 
      admire_proneness_12
    ), na.rm = TRUE)
  )
df_numeric <- df_numeric %>%
  mutate(
    OCBI_avg = rowMeans(select(., 
      OCBI_1, 
      OCBI_2, 
      OCBI_3, 
      OCBI_4, 
      OCBI_5, 
      OCBI_6, 
      OCBI_7, 
      OCBI_8
    ), na.rm = TRUE)
  )
df_numeric <- df_numeric %>%
  mutate(
    UPB_avg = rowMeans(select(., 
      UPBs_1, 
      UPBs_2, 
      UPBs_3, 
      UPBs_4, 
      UPBs_5, 
      UPBs_6
    ), na.rm = TRUE)
  )
df_numeric <- df_numeric %>%
  mutate(
    szsb_avg = rowMeans(select(., 
      SZSB_1, 
      SZSB_2, 
      SZSB_3, 
      SZSB_4, 
      SZSB_5R_Recoded, 
      SZSB_6R_Recoded, 
      SZSB_7R_Recoded, 
      SZSB_8R_Recoded
    ), na.rm = TRUE)
  )
df_numeric <- df_numeric %>%
  mutate(
    nfs_avg = rowMeans(select(., 
      need.for.status_1, 
      need.for.status_2R_Recoded, 
      need.for.status_3, 
      need.for.status_4, 
      need.for.status_5, 
      need.for.status_6, 
      need.for.status_7R_Recoded, 
      need.for.status_8
    ), na.rm = TRUE)
  )
df_numeric <- df_numeric %>%
  mutate(
    sdo_avg = rowMeans(select(., 
      sdo_1, 
      sdo_2,
      sdo_3R_Recoded,
      sdo_4R_Recoded, 
      sdo_5,
      sdo_6,
      sdo_7R_Recoded,
      sdo_8R_Recoded
    ), na.rm = TRUE)
  )
df_numeric <- df_numeric %>%
  mutate(
    dominance_avg = rowMeans(select(., 
      dom_1, 
      dom_2, 
      dom_3, 
      dom_4, 
      dom_5R_Recoded, 
      dom_6, 
      dom_7R_Recoded, 
      dom_8
    ), na.rm = TRUE)
  )
df_numeric <- df_numeric %>%
  mutate(
    prestige_avg = rowMeans(select(., 
      prest_1, 
      prest_2R_Recoded, 
      prest_3, 
      prest_4R_Recoded, 
      prest_5, 
      prest_6, 
      prest_7, 
      prest_8, 
      prest_9R_Recoded
    ), na.rm = TRUE)
  )
df_numeric <- df_numeric %>%
  mutate(
    perspective_avg = rowMeans(select(., 
      IRI_1,
      IRI_2R_Recoded,
      IRI_3,
      IRI_4,
      IRI_5R_Recoded, 
      IRI_6, 
      IRI_7
    ), na.rm = TRUE)
  )

Initial metrics w/ new items (16-item scale)

items <- df_numeric %>% 
  select(dist_1R_Recoded,
      dist_2R_Recoded, 
      dist_5R_Recoded,
      dist_11,
      dist_7, 
      dist_8, 
      dist_10,
      dist_12R_Recoded,
      attr_3, 
      attr_4, 
      attr_5,
      attr_11R_Recoded,
      attr_6, 
      attr_8, 
      attr_10,
      attr_12R_Recoded) %>% 
  na.omit()

cronbach(items) # alpha = 0.8887
## $sample.size
## [1] 98
## 
## $number.of.items
## [1] 16
## 
## $alpha
## [1] 0.8887143
x <- cor(items)

corrplot(x,method="number")

bartlett.test(items)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  items
## Bartlett's K-squared = 150.26, df = 15, p-value < 2.2e-16
KMO(x) # >.5 minimum, >.9 is great
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = x)
## Overall MSA =  0.85
## MSA for each item = 
##  dist_1R_Recoded  dist_2R_Recoded  dist_5R_Recoded          dist_11 
##             0.85             0.82             0.87             0.84 
##           dist_7           dist_8          dist_10 dist_12R_Recoded 
##             0.80             0.84             0.87             0.89 
##           attr_3           attr_4           attr_5 attr_11R_Recoded 
##             0.91             0.87             0.91             0.86 
##           attr_6           attr_8          attr_10 attr_12R_Recoded 
##             0.81             0.87             0.89             0.62

CFA w/ new items

model_quad <- '
  dist_desc =~ dist_1R_Recoded + dist_2R_Recoded + dist_5R_Recoded + dist_11
  dist_prescr =~ dist_7 + dist_8 + dist_10 + dist_12R_Recoded
  attr_desc =~ attr_3 + attr_4 + attr_5 + attr_11R_Recoded
  attr_prescr =~ attr_6 + attr_8 + attr_10 + attr_12R_Recoded
'

fit_quad <- cfa(model_quad, data = df_numeric, estimator = "MLM")
## Warning: lavaan->lav_object_post_check():  
##    covariance matrix of latent variables is not positive definite ; use 
##    lavInspect(fit, "cov.lv") to investigate.
summary(fit_quad, fit.measures = TRUE, standardized = TRUE)
## lavaan 0.6-18 ended normally after 50 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        38
## 
##   Number of observations                            98
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                               251.969     192.247
##   Degrees of freedom                                98          98
##   P-value (Chi-square)                           0.000       0.000
##   Scaling correction factor                                  1.311
##     Satorra-Bentler correction                                    
## 
## Model Test Baseline Model:
## 
##   Test statistic                               963.762     671.614
##   Degrees of freedom                               120         120
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.435
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.818       0.829
##   Tucker-Lewis Index (TLI)                       0.777       0.791
##                                                                   
##   Robust Comparative Fit Index (CFI)                         0.844
##   Robust Tucker-Lewis Index (TLI)                            0.809
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -2325.621   -2325.621
##   Loglikelihood unrestricted model (H1)      -2199.637   -2199.637
##                                                                   
##   Akaike (AIC)                                4727.242    4727.242
##   Bayesian (BIC)                              4825.471    4825.471
##   Sample-size adjusted Bayesian (SABIC)       4705.472    4705.472
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.127       0.099
##   90 Percent confidence interval - lower         0.107       0.081
##   90 Percent confidence interval - upper         0.146       0.117
##   P-value H_0: RMSEA <= 0.050                    0.000       0.000
##   P-value H_0: RMSEA >= 0.080                    1.000       0.957
##                                                                   
##   Robust RMSEA                                               0.113
##   90 Percent confidence interval - lower                     0.090
##   90 Percent confidence interval - upper                     0.137
##   P-value H_0: Robust RMSEA <= 0.050                         0.000
##   P-value H_0: Robust RMSEA >= 0.080                         0.988
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.108       0.108
## 
## Parameter Estimates:
## 
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   dist_desc =~                                                          
##     dist_1R_Recodd    1.000                               1.488    0.854
##     dist_2R_Recodd    1.072    0.073   14.686    0.000    1.595    0.926
##     dist_5R_Recodd    1.009    0.079   12.740    0.000    1.502    0.878
##     dist_11           0.567    0.102    5.539    0.000    0.844    0.542
##   dist_prescr =~                                                        
##     dist_7            1.000                               0.811    0.488
##     dist_8            0.976    0.207    4.721    0.000    0.791    0.705
##     dist_10           1.110    0.209    5.318    0.000    0.900    0.584
##     dist_12R_Recdd    1.255    0.255    4.924    0.000    1.017    0.602
##   attr_desc =~                                                          
##     attr_3            1.000                               0.834    0.807
##     attr_4            0.899    0.116    7.756    0.000    0.750    0.800
##     attr_5            1.043    0.089   11.689    0.000    0.870    0.805
##     attr_11R_Recdd    0.929    0.195    4.764    0.000    0.775    0.508
##   attr_prescr =~                                                        
##     attr_6            1.000                               0.697    0.699
##     attr_8            1.245    0.173    7.187    0.000    0.868    0.796
##     attr_10           0.957    0.119    8.062    0.000    0.667    0.640
##     attr_12R_Recdd    0.351    0.194    1.807    0.071    0.245    0.163
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   dist_desc ~~                                                          
##     dist_prescr       0.773    0.223    3.460    0.001    0.641    0.641
##     attr_desc         0.883    0.190    4.636    0.000    0.711    0.711
##     attr_prescr       0.371    0.155    2.397    0.017    0.357    0.357
##   dist_prescr ~~                                                        
##     attr_desc         0.413    0.147    2.813    0.005    0.610    0.610
##     attr_prescr       0.551    0.176    3.136    0.002    0.974    0.974
##   attr_desc ~~                                                          
##     attr_prescr       0.414    0.131    3.160    0.002    0.711    0.711
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .dist_1R_Recodd    0.819    0.211    3.888    0.000    0.819    0.270
##    .dist_2R_Recodd    0.426    0.109    3.927    0.000    0.426    0.143
##    .dist_5R_Recodd    0.668    0.222    3.013    0.003    0.668    0.228
##    .dist_11           1.717    0.272    6.322    0.000    1.717    0.707
##    .dist_7            2.104    0.324    6.490    0.000    2.104    0.762
##    .dist_8            0.632    0.193    3.274    0.001    0.632    0.502
##    .dist_10           1.563    0.442    3.534    0.000    1.563    0.659
##    .dist_12R_Recdd    1.816    0.314    5.781    0.000    1.816    0.637
##    .attr_3            0.372    0.091    4.075    0.000    0.372    0.348
##    .attr_4            0.317    0.093    3.410    0.001    0.317    0.360
##    .attr_5            0.410    0.104    3.926    0.000    0.410    0.351
##    .attr_11R_Recdd    1.726    0.236    7.311    0.000    1.726    0.742
##    .attr_6            0.509    0.097    5.224    0.000    0.509    0.512
##    .attr_8            0.436    0.092    4.754    0.000    0.436    0.366
##    .attr_10           0.643    0.296    2.177    0.029    0.643    0.591
##    .attr_12R_Recdd    2.192    0.317    6.912    0.000    2.192    0.973
##     dist_desc         2.215    0.346    6.395    0.000    1.000    1.000
##     dist_prescr       0.657    0.255    2.580    0.010    1.000    1.000
##     attr_desc         0.696    0.171    4.063    0.000    1.000    1.000
##     attr_prescr       0.486    0.163    2.979    0.003    1.000    1.000

Confirmatory factor analysis of the four-factor model (distribution × attribute × descriptive × prescriptive) indicated poor model fit in this sample: robust CFI = 0.829, TLI = 0.791, SRMR = 0.108, and RMSEA = 0.099 (90% CI = [.081, .117]). Several items loaded significantly onto their respective factors, but the model raised a warning that the covariance matrix of latent variables is not positive definite, and overall fit indices fell below commonly accepted thresholds. These results suggest that the hypothesized structure did not hold well in this sample (N = 98), potentially due to sample size, item overlap, or problematic covariances between latent factors.

Discriminant validity

Look at subscales

dist_desc <- df_numeric %>% 
  select(dist_1R_Recoded,
      dist_2R_Recoded, 
      dist_5R_Recoded, 
      dist_11) %>% 
  na.omit()

dist_prescr <- df_numeric %>% 
  select(dist_7, 
      dist_8,
      dist_10,
      dist_12R_Recoded) %>% 
  na.omit()

attr_desc <- df_numeric %>% 
  select(attr_3, 
      attr_4, 
      attr_5,
      attr_11R_Recoded) %>% 
  na.omit()

attr_prescr <- df_numeric %>% 
  select(attr_6, 
      attr_8, 
      attr_10,
      attr_12R_Recoded) %>% 
  na.omit()


cronbach(dist_desc) # alpha = 0.87
## $sample.size
## [1] 98
## 
## $number.of.items
## [1] 4
## 
## $alpha
## [1] 0.8726284
cronbach(dist_prescr) # alpha = 0.67
## $sample.size
## [1] 98
## 
## $number.of.items
## [1] 4
## 
## $alpha
## [1] 0.6741665
cronbach(attr_desc) # alpha = 0.75
## $sample.size
## [1] 98
## 
## $number.of.items
## [1] 4
## 
## $alpha
## [1] 0.759744
cronbach(attr_prescr) # alpha = 0.61
## $sample.size
## [1] 98
## 
## $number.of.items
## [1] 4
## 
## $alpha
## [1] 0.6179648
psych::alpha(dist_desc)
## 
## Reliability analysis   
## Call: psych::alpha(x = dist_desc)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.87      0.87    0.87      0.63 6.7 0.021  4.3 1.4     0.64
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.83  0.87  0.91
## Duhachek  0.83  0.87  0.91
## 
##  Reliability if an item is dropped:
##                 raw_alpha std.alpha G6(smc) average_r  S/N alpha se  var.r
## dist_1R_Recoded      0.82      0.82    0.80      0.60  4.5    0.032 0.0362
## dist_2R_Recoded      0.79      0.79    0.75      0.56  3.8    0.036 0.0255
## dist_5R_Recoded      0.79      0.79    0.77      0.56  3.8    0.036 0.0520
## dist_11              0.92      0.92    0.89      0.79 11.0    0.015 0.0026
##                 med.r
## dist_1R_Recoded  0.54
## dist_2R_Recoded  0.54
## dist_5R_Recoded  0.44
## dist_11          0.81
## 
##  Item statistics 
##                  n raw.r std.r r.cor r.drop mean  sd
## dist_1R_Recoded 98  0.88  0.87  0.84   0.77  4.2 1.8
## dist_2R_Recoded 98  0.91  0.91  0.90   0.83  4.3 1.7
## dist_5R_Recoded 98  0.91  0.91  0.88   0.83  4.3 1.7
## dist_11         98  0.69  0.71  0.54   0.50  4.4 1.6
## 
## Non missing response frequency for each item
##                    1    2    3    4    5    6    7 miss
## dist_1R_Recoded 0.05 0.15 0.18 0.16 0.16 0.18 0.10    0
## dist_2R_Recoded 0.03 0.12 0.24 0.15 0.13 0.18 0.13    0
## dist_5R_Recoded 0.05 0.09 0.27 0.14 0.15 0.18 0.11    0
## dist_11         0.02 0.07 0.26 0.16 0.17 0.22 0.09    0
psych::alpha(dist_prescr) # could keep in 12R
## 
## Reliability analysis   
## Call: psych::alpha(x = dist_prescr)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.67      0.69    0.65      0.36 2.2 0.054  5.1 1.1     0.37
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.55  0.67  0.77
## Duhachek  0.57  0.67  0.78
## 
##  Reliability if an item is dropped:
##                  raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r
## dist_7                0.64      0.66    0.57      0.40 2.0    0.062 0.0033
## dist_8                0.60      0.61    0.53      0.34 1.5    0.071 0.0184
## dist_10               0.53      0.56    0.47      0.30 1.3    0.082 0.0116
## dist_12R_Recoded      0.66      0.66    0.58      0.40 2.0    0.057 0.0126
##                  med.r
## dist_7            0.42
## dist_8            0.33
## dist_10           0.27
## dist_12R_Recoded  0.44
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop mean  sd
## dist_7           98  0.71  0.68  0.51   0.41  4.3 1.7
## dist_8           98  0.68  0.74  0.61   0.50  5.9 1.1
## dist_10          98  0.78  0.78  0.69   0.57  5.4 1.5
## dist_12R_Recoded 98  0.70  0.68  0.50   0.39  4.9 1.7
## 
## Non missing response frequency for each item
##                     1    2    3    4    5    6    7 miss
## dist_7           0.09 0.05 0.16 0.24 0.17 0.20 0.07    0
## dist_8           0.00 0.02 0.02 0.08 0.10 0.45 0.33    0
## dist_10          0.03 0.03 0.08 0.07 0.24 0.28 0.27    0
## dist_12R_Recoded 0.00 0.12 0.14 0.11 0.13 0.30 0.19    0
psych::alpha(attr_desc)
## 
## Reliability analysis   
## Call: psych::alpha(x = attr_desc)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.76       0.8    0.78       0.5   4 0.042  5.4 0.89     0.52
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.67  0.76  0.83
## Duhachek  0.68  0.76  0.84
## 
##  Reliability if an item is dropped:
##                  raw_alpha std.alpha G6(smc) average_r S/N alpha se   var.r
## attr_3                0.64      0.69    0.65      0.43 2.2    0.065 4.5e-02
## attr_4                0.67      0.71    0.66      0.45 2.4    0.060 3.9e-02
## attr_5                0.66      0.71    0.66      0.45 2.4    0.060 3.6e-02
## attr_11R_Recoded      0.86      0.86    0.80      0.67 6.1    0.025 1.5e-05
##                  med.r
## attr_3            0.31
## attr_4            0.37
## attr_5            0.37
## attr_11R_Recoded  0.67
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop mean   sd
## attr_3           98  0.83  0.86  0.81   0.69  5.7 1.04
## attr_4           98  0.80  0.84  0.79   0.66  5.7 0.94
## attr_5           98  0.81  0.84  0.79   0.65  5.7 1.09
## attr_11R_Recoded 98  0.71  0.63  0.40   0.37  4.6 1.53
## 
## Non missing response frequency for each item
##                     1    2    3    4    5    6    7 miss
## attr_3           0.00 0.01 0.03 0.04 0.29 0.39 0.24    0
## attr_4           0.00 0.01 0.02 0.05 0.30 0.47 0.15    0
## attr_5           0.00 0.02 0.02 0.06 0.27 0.40 0.23    0
## attr_11R_Recoded 0.01 0.07 0.19 0.22 0.19 0.17 0.13    0
psych::alpha(attr_prescr)
## 
## Reliability analysis   
## Call: psych::alpha(x = attr_prescr)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.62      0.67    0.66      0.33   2 0.066  5.6 0.81     0.41
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.48  0.62  0.73
## Duhachek  0.49  0.62  0.75
## 
##  Reliability if an item is dropped:
##                  raw_alpha std.alpha G6(smc) average_r  S/N alpha se   var.r
## attr_6                0.41      0.47    0.44      0.23 0.87    0.107 6.8e-02
## attr_8                0.48      0.53    0.52      0.28 1.15    0.094 6.2e-02
## attr_10               0.54      0.58    0.52      0.32 1.40    0.082 3.3e-02
## attr_12R_Recoded      0.76      0.76    0.68      0.51 3.14    0.042 2.9e-05
##                  med.r
## attr_6            0.14
## attr_8            0.31
## attr_10           0.31
## attr_12R_Recoded  0.51
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop mean  sd
## attr_6           98  0.79  0.82  0.76   0.62  5.7 1.0
## attr_8           98  0.73  0.77  0.66   0.50  5.9 1.1
## attr_10          98  0.67  0.72  0.62   0.42  5.8 1.0
## attr_12R_Recoded 98  0.62  0.52  0.26   0.19  4.8 1.5
## 
## Non missing response frequency for each item
##                     1    2    3    4    5    6    7 miss
## attr_6           0.00 0.01 0.00 0.09 0.30 0.36 0.24    0
## attr_8           0.00 0.01 0.05 0.01 0.19 0.40 0.34    0
## attr_10          0.01 0.00 0.01 0.07 0.22 0.42 0.27    0
## attr_12R_Recoded 0.02 0.06 0.11 0.17 0.26 0.24 0.13    0

Ok, there are issues here. Let’s look at the performance of the original three items per subscale (4-items for distribution prescriptive).

Re-do process w/ 13-item scale

df_numeric <- df_numeric %>%
  mutate(
    status_expansion_belief = rowMeans(select(., 
      dist_1R_Recoded,
      dist_2R_Recoded, 
      dist_5R_Recoded, 
      dist_7, 
      dist_8, 
      dist_10, 
      dist_12R_Recoded,
      attr_3, 
      attr_4, 
      attr_5, 
      attr_6, 
      attr_8, 
      attr_10), 
      na.rm = TRUE)
  )

Initial metrics w/ 13-item scale

items <- df_numeric %>% 
  select(dist_1R_Recoded,
      dist_2R_Recoded, 
      dist_5R_Recoded,
      dist_7, 
      dist_8, 
      dist_10,
      dist_12R_Recoded,
      attr_3, 
      attr_4, 
      attr_5,
      attr_6, 
      attr_8, 
      attr_10) %>% 
  na.omit()

cronbach(items) # alpha = 0.8793
## $sample.size
## [1] 98
## 
## $number.of.items
## [1] 13
## 
## $alpha
## [1] 0.8793732
x <- cor(items)

corrplot(x,method="number")

bartlett.test(items)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  items
## Bartlett's K-squared = 144.56, df = 12, p-value < 2.2e-16
KMO(x) # >.5 minimum, >.9 is great
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = x)
## Overall MSA =  0.85
## MSA for each item = 
##  dist_1R_Recoded  dist_2R_Recoded  dist_5R_Recoded           dist_7 
##             0.81             0.79             0.84             0.74 
##           dist_8          dist_10 dist_12R_Recoded           attr_3 
##             0.87             0.85             0.87             0.91 
##           attr_4           attr_5           attr_6           attr_8 
##             0.90             0.90             0.84             0.87 
##          attr_10 
##             0.89

CFA w/ 13-item scale

model_quad <- '
  dist_desc =~ dist_1R_Recoded + dist_2R_Recoded + dist_5R_Recoded 
  dist_prescr =~ dist_7 + dist_8 + dist_10 + dist_12R_Recoded
  attr_desc =~ attr_3 + attr_4 + attr_5 
  attr_prescr =~ attr_6 + attr_8 + attr_10 
'

fit_quad <- cfa(model_quad, data = df_numeric, estimator = "MLM")
## Warning: lavaan->lav_object_post_check():  
##    covariance matrix of latent variables is not positive definite ; use 
##    lavInspect(fit, "cov.lv") to investigate.
summary(fit_quad, fit.measures = TRUE, standardized = TRUE)
## lavaan 0.6-18 ended normally after 49 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        32
## 
##   Number of observations                            98
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                               105.718      83.537
##   Degrees of freedom                                59          59
##   P-value (Chi-square)                           0.000       0.019
##   Scaling correction factor                                  1.266
##     Satorra-Bentler correction                                    
## 
## Model Test Baseline Model:
## 
##   Test statistic                               761.568     486.635
##   Degrees of freedom                                78          78
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.565
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.932       0.940
##   Tucker-Lewis Index (TLI)                       0.910       0.921
##                                                                   
##   Robust Comparative Fit Index (CFI)                         0.951
##   Robust Tucker-Lewis Index (TLI)                            0.936
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -1811.764   -1811.764
##   Loglikelihood unrestricted model (H1)      -1758.905   -1758.905
##                                                                   
##   Akaike (AIC)                                3687.528    3687.528
##   Bayesian (BIC)                              3770.247    3770.247
##   Sample-size adjusted Bayesian (SABIC)       3669.196    3669.196
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.090       0.065
##   90 Percent confidence interval - lower         0.061       0.033
##   90 Percent confidence interval - upper         0.117       0.092
##   P-value H_0: RMSEA <= 0.050                    0.014       0.192
##   P-value H_0: RMSEA >= 0.080                    0.735       0.198
##                                                                   
##   Robust RMSEA                                               0.073
##   90 Percent confidence interval - lower                     0.031
##   90 Percent confidence interval - upper                     0.108
##   P-value H_0: Robust RMSEA <= 0.050                         0.152
##   P-value H_0: Robust RMSEA >= 0.080                         0.398
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.072       0.072
## 
## Parameter Estimates:
## 
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   dist_desc =~                                                          
##     dist_1R_Recodd    1.000                               1.499    0.861
##     dist_2R_Recodd    1.081    0.074   14.698    0.000    1.621    0.941
##     dist_5R_Recodd    0.985    0.085   11.552    0.000    1.477    0.864
##   dist_prescr =~                                                        
##     dist_7            1.000                               0.786    0.473
##     dist_8            1.021    0.223    4.585    0.000    0.803    0.716
##     dist_10           1.138    0.221    5.154    0.000    0.895    0.581
##     dist_12R_Recdd    1.295    0.275    4.712    0.000    1.018    0.603
##   attr_desc =~                                                          
##     attr_3            1.000                               0.839    0.812
##     attr_4            0.918    0.120    7.659    0.000    0.770    0.821
##     attr_5            1.063    0.089   11.883    0.000    0.892    0.825
##   attr_prescr =~                                                        
##     attr_6            1.000                               0.685    0.686
##     attr_8            1.270    0.184    6.906    0.000    0.870    0.798
##     attr_10           0.977    0.124    7.864    0.000    0.669    0.641
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   dist_desc ~~                                                          
##     dist_prescr       0.729    0.220    3.318    0.001    0.618    0.618
##     attr_desc         0.802    0.191    4.199    0.000    0.638    0.638
##     attr_prescr       0.345    0.153    2.260    0.024    0.336    0.336
##   dist_prescr ~~                                                        
##     attr_desc         0.387    0.144    2.690    0.007    0.587    0.587
##     attr_prescr       0.524    0.175    3.000    0.003    0.974    0.974
##   attr_desc ~~                                                          
##     attr_prescr       0.407    0.130    3.124    0.002    0.710    0.710
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .dist_1R_Recodd    0.785    0.208    3.772    0.000    0.785    0.259
##    .dist_2R_Recodd    0.343    0.098    3.503    0.000    0.343    0.115
##    .dist_5R_Recodd    0.742    0.241    3.073    0.002    0.742    0.254
##    .dist_7            2.143    0.330    6.501    0.000    2.143    0.776
##    .dist_8            0.614    0.194    3.163    0.002    0.614    0.488
##    .dist_10           1.572    0.445    3.532    0.000    1.572    0.663
##    .dist_12R_Recdd    1.814    0.315    5.758    0.000    1.814    0.636
##    .attr_3            0.364    0.099    3.694    0.000    0.364    0.341
##    .attr_4            0.287    0.092    3.127    0.002    0.287    0.326
##    .attr_5            0.373    0.101    3.707    0.000    0.373    0.319
##    .attr_6            0.527    0.100    5.274    0.000    0.527    0.529
##    .attr_8            0.433    0.089    4.880    0.000    0.433    0.364
##    .attr_10           0.641    0.294    2.178    0.029    0.641    0.589
##     dist_desc         2.248    0.353    6.375    0.000    1.000    1.000
##     dist_prescr       0.618    0.252    2.456    0.014    1.000    1.000
##     attr_desc         0.703    0.176    3.985    0.000    1.000    1.000
##     attr_prescr       0.469    0.163    2.870    0.004    1.000    1.000

Confirmatory factor analysis of the four-factor model (distribution × attribute × descriptive × prescriptive) indicated acceptable to borderline fit in this sample: robust CFI = 0.951, TLI = 0.936, SRMR = 0.072, and RMSEA = 0.065 (90% CI = [.033, .092]). All items loaded significantly onto their respective factors (all p < .001), with standardized loadings ranging from 0.34 to 0.94. Inter-factor correlations ranged from r = .34 to r = .71, suggesting the four constructs are conceptually distinct but moderately related. These results offer preliminary support for the multidimensional structure of status beliefs, though some fit indices fall just short of ideal thresholds, likely due to small sample size (N = 98).

Model Fit Indices: Robust CFI = 0.951 (> .95 = good) Robust TLI = 0.936 (> .95 = ideal) SRMR = 0.072 (< .08 = acceptable) Robust RMSEA = 0.065 (< .06 = borderline; upper CI = 0.092) Chi-square (SB) χ²(59) = 83.54, p = .019 (significant; not unusual given small N)

Discriminant validity

Look at subscales

dist_desc <- df_numeric %>% 
  select(dist_1R_Recoded,
      dist_2R_Recoded, 
      dist_5R_Recoded) %>% 
  na.omit()

dist_prescr <- df_numeric %>% 
  select(dist_7, 
      dist_8,
      dist_10, 
      dist_12R_Recoded) %>% 
  na.omit()

attr_desc <- df_numeric %>% 
  select(attr_3, 
      attr_4, 
      attr_5) %>% 
  na.omit()

attr_prescr <- df_numeric %>% 
  select(attr_6, 
      attr_8, 
      attr_10) %>% 
  na.omit()


cronbach(dist_desc) # alpha = 0.92
## $sample.size
## [1] 98
## 
## $number.of.items
## [1] 3
## 
## $alpha
## [1] 0.916475
cronbach(dist_prescr) # alpha = 0.674
## $sample.size
## [1] 98
## 
## $number.of.items
## [1] 4
## 
## $alpha
## [1] 0.6741665
cronbach(attr_desc) # alpha = 0.85
## $sample.size
## [1] 98
## 
## $number.of.items
## [1] 3
## 
## $alpha
## [1] 0.8578114
cronbach(attr_prescr) # alpha = 0.76
## $sample.size
## [1] 98
## 
## $number.of.items
## [1] 3
## 
## $alpha
## [1] 0.7579787

This looks better, let’s go forward with just these items. The original + new dist_prescr (with one additional item)

Make subscale

# Distribution × Descriptive (3 items)
df_numeric$dist_desc <- rowMeans(df_numeric[, c("dist_1R_Recoded", "dist_2R_Recoded", "dist_5R_Recoded")], na.rm = TRUE)

# Distribution × Prescriptive (4 items)
df_numeric$dist_prescr <- rowMeans(df_numeric[, c("dist_7", "dist_8", "dist_10", "dist_12R_Recoded")], na.rm = TRUE)

# Attribute × Descriptive (3 items)
df_numeric$attr_desc <- rowMeans(df_numeric[, c("attr_3", "attr_4", "attr_5")], na.rm = TRUE)

# Attribute × Prescriptive (3 items)
df_numeric$attr_prescr <- rowMeans(df_numeric[, c("attr_6", "attr_8", "attr_10")], na.rm = TRUE)
library(psych)

psych::alpha(dist_desc)
## 
## Reliability analysis   
## Call: psych::alpha(x = dist_desc)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.92      0.92    0.89      0.79  11 0.015  4.3 1.6     0.81
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.88  0.92  0.94
## Duhachek  0.89  0.92  0.95
## 
##  Reliability if an item is dropped:
##                 raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## dist_1R_Recoded      0.90      0.90    0.81      0.81 8.5    0.021    NA  0.81
## dist_2R_Recoded      0.84      0.84    0.73      0.73 5.3    0.032    NA  0.73
## dist_5R_Recoded      0.90      0.90    0.82      0.82 9.1    0.020    NA  0.82
## 
##  Item statistics 
##                  n raw.r std.r r.cor r.drop mean  sd
## dist_1R_Recoded 98  0.92  0.92  0.85   0.81  4.2 1.8
## dist_2R_Recoded 98  0.95  0.95  0.92   0.88  4.3 1.7
## dist_5R_Recoded 98  0.91  0.91  0.84   0.81  4.3 1.7
## 
## Non missing response frequency for each item
##                    1    2    3    4    5    6    7 miss
## dist_1R_Recoded 0.05 0.15 0.18 0.16 0.16 0.18 0.10    0
## dist_2R_Recoded 0.03 0.12 0.24 0.15 0.13 0.18 0.13    0
## dist_5R_Recoded 0.05 0.09 0.27 0.14 0.15 0.18 0.11    0
psych::alpha(dist_prescr)
## 
## Reliability analysis   
## Call: psych::alpha(x = dist_prescr)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.67      0.69    0.65      0.36 2.2 0.054  5.1 1.1     0.37
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.55  0.67  0.77
## Duhachek  0.57  0.67  0.78
## 
##  Reliability if an item is dropped:
##                  raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r
## dist_7                0.64      0.66    0.57      0.40 2.0    0.062 0.0033
## dist_8                0.60      0.61    0.53      0.34 1.5    0.071 0.0184
## dist_10               0.53      0.56    0.47      0.30 1.3    0.082 0.0116
## dist_12R_Recoded      0.66      0.66    0.58      0.40 2.0    0.057 0.0126
##                  med.r
## dist_7            0.42
## dist_8            0.33
## dist_10           0.27
## dist_12R_Recoded  0.44
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop mean  sd
## dist_7           98  0.71  0.68  0.51   0.41  4.3 1.7
## dist_8           98  0.68  0.74  0.61   0.50  5.9 1.1
## dist_10          98  0.78  0.78  0.69   0.57  5.4 1.5
## dist_12R_Recoded 98  0.70  0.68  0.50   0.39  4.9 1.7
## 
## Non missing response frequency for each item
##                     1    2    3    4    5    6    7 miss
## dist_7           0.09 0.05 0.16 0.24 0.17 0.20 0.07    0
## dist_8           0.00 0.02 0.02 0.08 0.10 0.45 0.33    0
## dist_10          0.03 0.03 0.08 0.07 0.24 0.28 0.27    0
## dist_12R_Recoded 0.00 0.12 0.14 0.11 0.13 0.30 0.19    0
psych::alpha(attr_desc)
## 
## Reliability analysis   
## Call: psych::alpha(x = attr_desc)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.86      0.86     0.8      0.67 6.1 0.025  5.7 0.9     0.67
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.80  0.86  0.90
## Duhachek  0.81  0.86  0.91
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## attr_3       0.8      0.81    0.67      0.67 4.1     0.04    NA  0.67
## attr_4       0.8      0.80    0.67      0.67 4.1     0.04    NA  0.67
## attr_5       0.8      0.80    0.67      0.67 4.0     0.04    NA  0.67
## 
##  Item statistics 
##         n raw.r std.r r.cor r.drop mean   sd
## attr_3 98  0.88  0.88  0.79   0.73  5.7 1.04
## attr_4 98  0.87  0.88  0.79   0.73  5.7 0.94
## attr_5 98  0.89  0.89  0.80   0.74  5.7 1.09
## 
## Non missing response frequency for each item
##           2    3    4    5    6    7 miss
## attr_3 0.01 0.03 0.04 0.29 0.39 0.24    0
## attr_4 0.01 0.02 0.05 0.30 0.47 0.15    0
## attr_5 0.02 0.02 0.06 0.27 0.40 0.23    0
psych::alpha(attr_prescr)
## 
## Reliability analysis   
## Call: psych::alpha(x = attr_prescr)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.76      0.76    0.68      0.51 3.1 0.042  5.8 0.86     0.51
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.66  0.76  0.83
## Duhachek  0.68  0.76  0.84
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## attr_6       0.68      0.68    0.52      0.52 2.1    0.064    NA  0.52
## attr_8       0.68      0.68    0.51      0.51 2.1    0.066    NA  0.51
## attr_10      0.67      0.67    0.51      0.51 2.1    0.066    NA  0.51
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## attr_6  98  0.81  0.82  0.67   0.58  5.7 1.0
## attr_8  98  0.83  0.82  0.68   0.59  5.9 1.1
## attr_10 98  0.82  0.82  0.68   0.59  5.8 1.0
## 
## Non missing response frequency for each item
##            1    2    3    4    5    6    7 miss
## attr_6  0.00 0.01 0.00 0.09 0.30 0.36 0.24    0
## attr_8  0.00 0.01 0.05 0.01 0.19 0.40 0.34    0
## attr_10 0.01 0.00 0.01 0.07 0.22 0.42 0.27    0
cor_vars <- df_numeric %>%
  select(dist_desc, dist_prescr, attr_desc, attr_prescr,
         sdo_avg, nfs_avg, szsb_avg, dominance_avg, prestige_avg)

cor_matrix <- cor(cor_vars, use = "pairwise.complete.obs")
round(cor_matrix, 2)
##               dist_desc dist_prescr attr_desc attr_prescr sdo_avg nfs_avg
## dist_desc          1.00        0.52      0.57        0.25   -0.37    0.11
## dist_prescr        0.52        1.00      0.42        0.64   -0.34    0.19
## attr_desc          0.57        0.42      1.00        0.58   -0.11    0.34
## attr_prescr        0.25        0.64      0.58        1.00   -0.28    0.40
## sdo_avg           -0.37       -0.34     -0.11       -0.28    1.00   -0.13
## nfs_avg            0.11        0.19      0.34        0.40   -0.13    1.00
## szsb_avg          -0.25       -0.35     -0.22       -0.35    0.25   -0.21
## dominance_avg     -0.30       -0.34     -0.22       -0.28    0.28    0.24
## prestige_avg       0.10        0.19      0.27        0.34   -0.18    0.82
##               szsb_avg dominance_avg prestige_avg
## dist_desc        -0.25         -0.30         0.10
## dist_prescr      -0.35         -0.34         0.19
## attr_desc        -0.22         -0.22         0.27
## attr_prescr      -0.35         -0.28         0.34
## sdo_avg           0.25          0.28        -0.18
## nfs_avg          -0.21          0.24         0.82
## szsb_avg          1.00          0.40        -0.13
## dominance_avg     0.40          1.00         0.20
## prestige_avg     -0.13          0.20         1.00

Fornell-Larcker test

library(semTools)
## 
## ###############################################################################
## This is semTools 0.5-7
## All users of R (or SEM) are invited to submit functions or ideas for functions.
## ###############################################################################
## 
## Attaching package: 'semTools'
## The following object is masked from 'package:readr':
## 
##     clipboard
## The following objects are masked from 'package:psych':
## 
##     reliability, skew
AVE(fit_quad) 
##   dist_desc dist_prescr   attr_desc attr_prescr 
##       0.791       0.335       0.671       0.511
lavInspect(fit_quad, "cor.lv")  # get latent variable correlations
##             dst_ds dst_pr attr_d attr_p
## dist_desc    1.000                     
## dist_prescr  0.618  1.000              
## attr_desc    0.638  0.587  1.000       
## attr_prescr  0.336  0.974  0.710  1.000
ave_values <- AVE(fit_quad)
cor_lv <- lavInspect(fit_quad, "cor.lv")
squared_cor_lv <- cor_lv^2
for (i in names(ave_values)) {
  max_sq_corr <- max(squared_cor_lv[i, names(ave_values) != i])
  cat(i, ": AVE =", round(ave_values[i], 3),
      "| Max shared variance =", round(max_sq_corr, 3),
      "|", ifelse(ave_values[i] > max_sq_corr, "Pass", "Fail"), "\n")
}
## dist_desc : AVE = 0.791 | Max shared variance = 0.407 | Pass 
## dist_prescr : AVE = 0.335 | Max shared variance = 0.948 | Fail 
## attr_desc : AVE = 0.671 | Max shared variance = 0.504 | Pass 
## attr_prescr : AVE = 0.511 | Max shared variance = 0.948 | Fail

The two prescriptive ones fail…

Distributions

df_long <- df_numeric %>%
  select(dist_desc, dist_prescr, attr_desc, attr_prescr) %>%
  pivot_longer(cols = everything(), names_to = "Subscale", values_to = "Score")

# Plot
ggplot(df_long, aes(x = Score, fill = Subscale, color = Subscale)) +
  geom_density(alpha = 0.4) +
  labs(
    title = "Overlaid Density of Status Belief Subscales",
    x = "Subscale Score",
    y = "Density"
  ) +
  theme_minimal()

Preregistered Analysis: Using Status Expansion Beliefs (13-item scale) to Predict OCBs

fit_ocbi <- lm(OCBI_avg ~ status_expansion_belief + szsb_avg + nfs_avg + sdo_avg + dominance_avg + prestige_avg, data = df_numeric)

summary(fit_ocbi)
## 
## Call:
## lm(formula = OCBI_avg ~ status_expansion_belief + szsb_avg + 
##     nfs_avg + sdo_avg + dominance_avg + prestige_avg, data = df_numeric)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.88714 -0.75595  0.06271  0.68730  2.26863 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.94178    1.09370   0.861 0.391450    
## status_expansion_belief  0.53333    0.14959   3.565 0.000582 ***
## szsb_avg                 0.02301    0.14331   0.161 0.872803    
## nfs_avg                  0.35147    0.18210   1.930 0.056705 .  
## sdo_avg                  0.08264    0.08321   0.993 0.323320    
## dominance_avg           -0.13878    0.12294  -1.129 0.261947    
## prestige_avg            -0.10637    0.19654  -0.541 0.589699    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.058 on 91 degrees of freedom
## Multiple R-squared:  0.2741, Adjusted R-squared:  0.2263 
## F-statistic: 5.728 on 6 and 91 DF,  p-value: 4.335e-05

Supports preregistered hypothesis: Status expansion beliefs (i.e., beliefs that status is and should be widely accessible and based on diverse attributes) predict greater organizational citizenship behaviors.

Exploratory Analyses

1. We will perform our primary regression model with UPB as the outcome variable.

fit_upb <- lm(UPB_avg ~ status_expansion_belief + szsb_avg + nfs_avg + sdo_avg + dominance_avg + prestige_avg, data = df_numeric)

summary(fit_upb)
## 
## Call:
## lm(formula = UPB_avg ~ status_expansion_belief + szsb_avg + nfs_avg + 
##     sdo_avg + dominance_avg + prestige_avg, data = df_numeric)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.90011 -0.66585 -0.04967  0.70594  3.05654 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              2.17691    1.08414   2.008   0.0476 *  
## status_expansion_belief -0.28057    0.14829  -1.892   0.0617 .  
## szsb_avg                 0.04559    0.14206   0.321   0.7490    
## nfs_avg                 -0.38451    0.18051  -2.130   0.0359 *  
## sdo_avg                  0.04130    0.08249   0.501   0.6178    
## dominance_avg            0.65696    0.12187   5.391 5.49e-07 ***
## prestige_avg             0.31956    0.19482   1.640   0.1044    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.049 on 91 degrees of freedom
## Multiple R-squared:  0.4468, Adjusted R-squared:  0.4103 
## F-statistic: 12.25 on 6 and 91 DF,  p-value: 4.648e-10

Result doesn’t replicate (but is marginal)

2. We will perform regression analyses using individual subscales (distribution descriptive, distribution prescriptive, attribution descriptive, attribution prescriptive) as predictors.

fit_ocbi_subscale <- lm(OCBI_avg ~ dist_desc + dist_prescr + attr_desc + attr_prescr + szsb_avg + nfs_avg + sdo_avg + dominance_avg + prestige_avg, data = df_numeric)

summary(fit_ocbi_subscale)
## 
## Call:
## lm(formula = OCBI_avg ~ dist_desc + dist_prescr + attr_desc + 
##     attr_prescr + szsb_avg + nfs_avg + sdo_avg + dominance_avg + 
##     prestige_avg, data = df_numeric)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.83891 -0.77191  0.04196  0.68839  2.24580 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    0.78814    1.19305   0.661   0.5106  
## dist_desc      0.15213    0.10689   1.423   0.1582  
## dist_prescr    0.12319    0.15608   0.789   0.4321  
## attr_desc      0.03679    0.19235   0.191   0.8488  
## attr_prescr    0.24442    0.22007   1.111   0.2698  
## szsb_avg       0.02919    0.14655   0.199   0.8426  
## nfs_avg        0.34055    0.19151   1.778   0.0788 .
## sdo_avg        0.09552    0.09019   1.059   0.2925  
## dominance_avg -0.13265    0.12613  -1.052   0.2958  
## prestige_avg  -0.10512    0.19993  -0.526   0.6004  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.074 on 88 degrees of freedom
## Multiple R-squared:  0.277,  Adjusted R-squared:  0.203 
## F-statistic: 3.746 on 9 and 88 DF,  p-value: 0.0005062

Effect of attr_prescr doesn’t replicate.

fit_upb_subscale <- lm(UPB_avg ~ dist_desc + dist_prescr + attr_desc + attr_prescr + szsb_avg + nfs_avg + sdo_avg + dominance_avg + prestige_avg, data = df_numeric)

summary(fit_upb_subscale)
## 
## Call:
## lm(formula = UPB_avg ~ dist_desc + dist_prescr + attr_desc + 
##     attr_prescr + szsb_avg + nfs_avg + sdo_avg + dominance_avg + 
##     prestige_avg, data = df_numeric)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.88299 -0.71027 -0.02998  0.65852  2.94949 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.57631    1.17017   2.202   0.0303 *  
## dist_desc     -0.13655    0.10484  -1.302   0.1962    
## dist_prescr    0.02527    0.15308   0.165   0.8693    
## attr_desc      0.14116    0.18866   0.748   0.4563    
## attr_prescr   -0.37297    0.21585  -1.728   0.0875 .  
## szsb_avg       0.03142    0.14374   0.219   0.8274    
## nfs_avg       -0.35341    0.18784  -1.881   0.0632 .  
## sdo_avg        0.01057    0.08846   0.120   0.9051    
## dominance_avg  0.64062    0.12371   5.179  1.4e-06 ***
## prestige_avg   0.31547    0.19609   1.609   0.1113    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.054 on 88 degrees of freedom
## Multiple R-squared:  0.4605, Adjusted R-squared:  0.4053 
## F-statistic: 8.346 on 9 and 88 DF,  p-value: 6.771e-09

3. We will perform regression analyses controlling for Big 5 personality traits, race, gender, SES, and political orientation.

demo <- data %>% 
  select(ResponseId, race, gender, ses, overall_poli) %>% 
  mutate(race = ifelse(grepl(",", race), "8", race),
    race = as.factor(race),
    gender = as.factor(gender),
    ses = as.factor(ses),
    overall_poli = as.numeric(overall_poli))
  
df_numeric_demo <- df_numeric %>% 
  left_join(demo, by = "ResponseId")
fit_ocbi_demo <- lm(OCBI_avg ~ status_expansion_belief + szsb_avg + nfs_avg + sdo_avg + dominance_avg + prestige_avg + Extraversion + Agreeable + Conscientious + Stable + Open + race + gender + ses + overall_poli, data = df_numeric_demo)

summary(fit_ocbi_demo)
## 
## Call:
## lm(formula = OCBI_avg ~ status_expansion_belief + szsb_avg + 
##     nfs_avg + sdo_avg + dominance_avg + prestige_avg + Extraversion + 
##     Agreeable + Conscientious + Stable + Open + race + gender + 
##     ses + overall_poli, data = df_numeric_demo)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.57383 -0.45323  0.01125  0.62284  1.99716 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)              0.77899    1.70812   0.456   0.6497  
## status_expansion_belief  0.34888    0.16924   2.061   0.0428 *
## szsb_avg                 0.01957    0.15236   0.128   0.8982  
## nfs_avg                  0.11654    0.19966   0.584   0.5612  
## sdo_avg                 -0.02909    0.12340  -0.236   0.8143  
## dominance_avg           -0.12321    0.13618  -0.905   0.3685  
## prestige_avg             0.07756    0.20488   0.379   0.7061  
## Extraversion             0.11468    0.07348   1.561   0.1228  
## Agreeable                0.18138    0.14138   1.283   0.2035  
## Conscientious            0.03932    0.12458   0.316   0.7532  
## Stable                   0.05535    0.10896   0.508   0.6130  
## Open                    -0.02622    0.12146  -0.216   0.8297  
## race2                   -0.26590    0.39979  -0.665   0.5081  
## race3                   -0.59470    0.48377  -1.229   0.2229  
## race5                   -0.74519    0.48394  -1.540   0.1279  
## race7                    1.99348    1.16528   1.711   0.0913 .
## race8                   -0.38890    0.41412  -0.939   0.3507  
## race9                   -0.80436    1.11675  -0.720   0.4736  
## gender2                  0.51001    0.25598   1.992   0.0500 .
## gender3                  0.18001    1.09487   0.164   0.8699  
## ses3                     0.19207    0.31541   0.609   0.5444  
## ses4                     0.42451    0.42613   0.996   0.3224  
## ses5                     0.09091    0.40958   0.222   0.8249  
## overall_poli            -0.10925    0.08581  -1.273   0.2069  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.006 on 74 degrees of freedom
## Multiple R-squared:  0.4669, Adjusted R-squared:  0.3013 
## F-statistic: 2.818 on 23 and 74 DF,  p-value: 0.0004085

This replicates, good.

4. We will examine whether admiration proneness, perspective taking, and inclusion (separately) mediate the relationship between status expansion beliefs and OCBs.

Admiration proneness

# Define the SEM model with specified coefficients
library(lavaan)
library(parallel)


model <- '
  # Regression coefficients
  admire_avg ~ a*status_expansion_belief
  OCBI_avg ~ cprime*status_expansion_belief + b*admire_avg

  # Indirect effect
  indirect := a*b
'

# Fit the model
fit <- sem(model, data = df_numeric)

# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                            98
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   admire_avg ~                                        
##     stts_x_    (a)    0.628    0.127    4.947    0.000
##   OCBI_avg ~                                          
##     stts_x_ (cprm)    0.388    0.124    3.128    0.002
##     admr_vg    (b)    0.394    0.088    4.451    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .admire_avg        1.218    0.174    7.000    0.000
##    .OCBI_avg          0.933    0.133    7.000    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect          0.247    0.075    3.309    0.001
library(lavaanPlot)

lavaanPlot(model = fit, 
           coefs = TRUE, 
           stars = "regress",
           node_options = list(shape = "box", fontname = "Helvetica"), 
           edge_options = list(color = "grey"))

Admiration proneness partially mediates the relationship between status_expansion_belief and OCBI_avg. Both the indirect and direct effects are meaningful: 1. People who believe status can expand tend to admire others more → leading to more OCBI. 2. But there’s also a direct path, suggesting additional mechanisms beyond admiration may connect belief in status expansion and prosocial behavior.

Perspective taking

model <- '
  # Regression coefficients
  perspective_avg ~ a*status_expansion_belief
  OCBI_avg ~ cprime*status_expansion_belief + b*perspective_avg

  # Indirect effect
  indirect := a*b
'

# Fit the model
fit <- sem(model, data = df_numeric)

# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                            98
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                     Estimate  Std.Err  z-value  P(>|z|)
##   perspective_avg ~                                    
##     stts_x_    (a)     0.323    0.104    3.097    0.002
##   OCBI_avg ~                                           
##     stts_x_ (cprm)     0.609    0.127    4.784    0.000
##     prspct_    (b)     0.082    0.118    0.696    0.487
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .perspective_vg    0.824    0.118    7.000    0.000
##    .OCBI_avg          1.116    0.159    7.000    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect          0.026    0.039    0.679    0.497
lavaanPlot(model = fit, 
           coefs = TRUE, 
           stars = "regress",
           node_options = list(shape = "box", fontname = "Helvetica"), 
           edge_options = list(color = "grey"))

Perspective taking does not mediate

Inclusivity

model <- '
  # Regression coefficients
  fv_inclusivity ~ a*status_expansion_belief
  OCBI_avg ~ cprime*status_expansion_belief + b*fv_inclusivity

  # Indirect effect
  indirect := a*b
'

# Fit the model
fit <- sem(model, data = df_numeric)

# Summarize results
summary(fit)
## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                            98
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   fv_inclusivity ~                                    
##     stts_x_    (a)    0.492    0.087    5.679    0.000
##   OCBI_avg ~                                          
##     stts_x_ (cprm)    0.264    0.118    2.228    0.026
##     fv_ncls    (b)    0.755    0.120    6.307    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .fv_inclusivity    0.568    0.081    7.000    0.000
##    .OCBI_avg          0.798    0.114    7.000    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect          0.372    0.088    4.220    0.000
lavaanPlot(model = fit, 
           coefs = TRUE, 
           stars = "regress",
           node_options = list(shape = "box", fontname = "Helvetica"), 
           edge_options = list(color = "grey"))

Full mediation doesn’t replicate.