library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(fs) 
library(here)
## here() starts at /Users/sjg2222/Library/CloudStorage/GoogleDrive-sjg2222@columbia.edu/My Drive/YEAR 1/PROJECTS/SHAI/Dominance and Job Preference/Code
library(stringr)
library(digest)
set.seed(34349785)

options(scipen = 999) # turn off scientific notation
##### Load Data ######

d_study <- read.csv("~/Google drive/My Drive/YEAR 1/PROJECTS/SHAI/Dominance and Job Preference/Data/study2B.csv")

# Remove first two rows
d_study = d_study[-1,]
d_study = d_study[-1,]


##### Exclusions  ######
d_study = d_study %>% mutate(workerID = row_number())
# Remove previews and non-finished
d_study <- d_study %>% 
  filter(!((Finished == "Finished") |
             Finished == "{\"ImportId\":\"finished\"}")) %>% 
  filter(attn == 24) %>% #  n = 492
  filter(employment == 1 | employment == 2) # n = 366... yeesh. we are losing so much data here! we are going to be super underpowered

d_study$updated_selfinterest_1 = as.numeric(d_study$updated_selfinterest_1)
d_study$updated_selfinterest_2 = as.numeric(d_study$updated_selfinterest_2)
d_study$updated_selfinterest_3 = as.numeric(d_study$updated_selfinterest_3)

d_study = d_study %>% 
  dplyr::rename(zsb_si_order = FL_114_DO) %>% 
  select(-contains("DO")) %>% 
  rowwise() %>% 
  mutate(updated_selfinterest_3 = 8 - updated_selfinterest_3) %>% 
  ungroup()
zsb = d_study %>% 
  select(contains("zsb")) %>% 
  names()

d_study$zeroSumBeliefs_score = d_study %>% 
  select(all_of(zsb)) %>% 
  mutate_all(as.numeric) %>% 
  rowMeans(na.rm = T)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `zsb_si_order = .Primitive("as.double")(zsb_si_order)`.
## Caused by warning:
## ! NAs introduced by coercion
selfinterest = d_study %>% 
  select(contains("selfinterest")) %>% 
  names()

d_study$selfinterest_score = d_study %>% select(all_of(selfinterest)) %>% mutate_all(as.numeric) %>% rowMeans(na.rm = T)


socialGoodNames = d_study %>% 
  select(contains("socialimpact")) %>% 
  names()

d_study$socialimpact_score = d_study %>% 
  select(all_of(socialGoodNames)) %>% 
  mutate_all(as.numeric) %>% 
  rowMeans(na.rm = T)

Primary models

model1 <- d_study %>%
  mutate(type = relevel(as.factor(type), ref = "non-profit")) %>% 
  lm(zeroSumBeliefs_score ~ type, .)

summary(model1)
## 
## Call:
## lm(formula = zeroSumBeliefs_score ~ type, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5463 -1.0463 -0.0664  0.9537  3.6836 
## 
## Coefficients:
##                Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)      2.8164     0.1006  27.984 < 0.0000000000000002 ***
## typefor-profit   0.7299     0.1401   5.212          0.000000314 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.339 on 364 degrees of freedom
## Multiple R-squared:  0.06944,    Adjusted R-squared:  0.06688 
## F-statistic: 27.16 on 1 and 364 DF,  p-value: 0.0000003141
model2 <- d_study %>%
  mutate(type = relevel(as.factor(type), ref = "non-profit")) %>% 
  lm(selfinterest_score ~ type, .)

summary(model2)
## 
## Call:
## lm(formula = selfinterest_score ~ type, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.98942 -0.85499  0.01058  0.81168  3.14501 
## 
## Coefficients:
##                Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)     3.18832    0.08346   38.20 <0.0000000000000002 ***
## typefor-profit  1.46776    0.11615   12.64 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.11 on 364 degrees of freedom
## Multiple R-squared:  0.3049, Adjusted R-squared:  0.303 
## F-statistic: 159.7 on 1 and 364 DF,  p-value: < 0.00000000000000022
# Define the SEM model with specified coefficients
library(lavaan)
## This is lavaan 0.6-17
## lavaan is FREE software! Please report any bugs.
library(parallel)


model <- '
  # Regression coefficients
  selfinterest_score ~ a*type
  zeroSumBeliefs_score ~ cprime*type + b*selfinterest_score

  # Indirect effect
  indirect := a*b
'

# Fit the model
fit <- sem(model, data = d_study)

# Summarize results
summary(fit)
## lavaan 0.6.17 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           366
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                          Estimate  Std.Err  z-value  P(>|z|)
##   selfinterest_score ~                                      
##     type       (a)         -1.468    0.116  -12.672    0.000
##   zeroSumBeliefs_score ~                                    
##     type    (cprm)          0.077    0.149    0.514    0.607
##     slfntr_    (b)          0.550    0.056    9.795    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .selfintrst_scr    1.226    0.091   13.528    0.000
##    .zeroSmBlfs_scr    1.413    0.104   13.528    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect         -0.807    0.104   -7.750    0.000

Exploratory models

Order effects?

model3 <- d_study %>%
  mutate(type = relevel(as.factor(type), ref = "non-profit")) %>% 
  lm(zeroSumBeliefs_score ~ type * zsb_si_order, .)

summary(model3)
## 
## Call:
## lm(formula = zeroSumBeliefs_score ~ type * zsb_si_order, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6413 -0.8986 -0.1246  1.1014  3.7247 
## 
## Coefficients:
##                                             Estimate Std. Error t value
## (Intercept)                                  2.85795    0.14280  20.014
## typefor-profit                               0.78335    0.18973   4.129
## zsb_si_orderZSB|SelfInterest                -0.08267    0.20138  -0.411
## typefor-profit:zsb_si_orderZSB|SelfInterest -0.15998    0.28357  -0.564
##                                                         Pr(>|t|)    
## (Intercept)                                 < 0.0000000000000002 ***
## typefor-profit                                         0.0000453 ***
## zsb_si_orderZSB|SelfInterest                               0.682    
## typefor-profit:zsb_si_orderZSB|SelfInterest                0.573    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.34 on 362 degrees of freedom
## Multiple R-squared:  0.07365,    Adjusted R-squared:  0.06598 
## F-statistic: 9.594 on 3 and 362 DF,  p-value: 0.000004132

Alpha for mediator

psych::alpha(d_study[,c("updated_selfinterest_1", "updated_selfinterest_2", "updated_selfinterest_3")])
## 
## Reliability analysis   
## Call: psych::alpha(x = d_study[, c("updated_selfinterest_1", "updated_selfinterest_2", 
##     "updated_selfinterest_3")])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.84      0.83    0.81      0.63   5 0.015  3.9 1.3     0.54
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.81  0.84  0.86
## Duhachek  0.81  0.84  0.87
## 
##  Reliability if an item is dropped:
##                        raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r
## updated_selfinterest_1      0.68      0.68    0.52      0.52 2.1    0.033    NA
## updated_selfinterest_2      0.70      0.71    0.54      0.54 2.4    0.031    NA
## updated_selfinterest_3      0.90      0.90    0.82      0.82 9.1    0.010    NA
##                        med.r
## updated_selfinterest_1  0.52
## updated_selfinterest_2  0.54
## updated_selfinterest_3  0.82
## 
##  Item statistics 
##                          n raw.r std.r r.cor r.drop mean  sd
## updated_selfinterest_1 366  0.92  0.91  0.88   0.79  4.1 1.6
## updated_selfinterest_2 366  0.91  0.90  0.86   0.77  4.3 1.6
## updated_selfinterest_3 366  0.78  0.79  0.59   0.56  3.4 1.4
## 
## Non missing response frequency for each item
##                           1    2    3    4    5    6    7 miss
## updated_selfinterest_1 0.05 0.14 0.18 0.17 0.24 0.17 0.04    0
## updated_selfinterest_2 0.04 0.13 0.14 0.17 0.26 0.21 0.05    0
## updated_selfinterest_3 0.05 0.24 0.25 0.22 0.14 0.08 0.02    0
# alpha = 0.83 -- all good!