library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(fs) 
library(here)
## here() starts at /Users/sjg2222/Library/CloudStorage/GoogleDrive-sjg2222@columbia.edu/My Drive/YEAR 1/PROJECTS/SHAI/Dominance and Job Preference/Code
library(stringr)
library(digest)
set.seed(34349785)

options(scipen = 999) # turn off scientific notation
##### Load Data ######

d_study <- read.csv("~/Google drive/My Drive/YEAR 1/PROJECTS/SHAI/Dominance and Job Preference/Data/study2A_pilot.csv")

# Remove first two rows
d_study = d_study[-1,]
d_study = d_study[-1,]


##### Exclusions  ######
d_study = d_study %>% mutate(workerID = row_number())
# Remove previews and non-finished
d_study <- d_study %>% 
  filter(!((Finished == "Finished") |
             Finished == "{\"ImportId\":\"finished\"}")) %>% 
  filter(attn == 24) # %>% 
  # filter(employment == 1 | employment == 2) 


d_study = d_study %>% 
  select(-contains("DO"))
zsb = d_study %>% 
  select(contains("zsb")) %>% 
  names()

d_study$zeroSumBeliefs_score = d_study %>% 
  select(all_of(zsb)) %>% 
  mutate_all(as.numeric) %>% 
  rowMeans(na.rm = T)


socialGoodNames = d_study %>% 
  select(contains("socialimpact")) %>% 
  names()

d_study$socialimpact_score = d_study %>% 
  select(all_of(socialGoodNames)) %>% 
  mutate_all(as.numeric) %>% 
  rowMeans(na.rm = T)

Primary models

Pairwise comparisons: ZSB

# Green Machine vs. Machine

model1 <- d_study %>%
  filter(name != "War Machine Inc.") %>% 
  mutate(name = relevel(as.factor(name), ref = "Machine Inc.")) %>% 
  lm(zeroSumBeliefs_score ~ name, .)

summary(model1)
## 
## Call:
## lm(formula = zeroSumBeliefs_score ~ name, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4350 -1.4087 -0.1587  1.0848  3.8413 
## 
## Coefficients:
##                        Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)              3.1587     0.2175   14.52 <0.0000000000000002 ***
## nameGreen Machine Inc.   0.2763     0.3106    0.89               0.376    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.568 on 100 degrees of freedom
## Multiple R-squared:  0.007852,   Adjusted R-squared:  -0.00207 
## F-statistic: 0.7914 on 1 and 100 DF,  p-value: 0.3758
# War Machine vs. Machine

model2 <- d_study %>%
  filter(name != "Green Machine Inc.") %>% 
  mutate(name = relevel(as.factor(name), ref = "Machine Inc.")) %>% 
  lm(zeroSumBeliefs_score ~ name, .)

summary(model2)
## 
## Call:
## lm(formula = zeroSumBeliefs_score ~ name, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6979 -1.4087 -0.1587  1.3021  3.8413 
## 
## Coefficients:
##                      Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)            3.1587     0.2218  14.240 <0.0000000000000002 ***
## nameWar Machine Inc.   0.5393     0.3202   1.684              0.0953 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.6 on 98 degrees of freedom
## Multiple R-squared:  0.02813,    Adjusted R-squared:  0.01822 
## F-statistic: 2.837 on 1 and 98 DF,  p-value: 0.09531
# Green Machine vs. War Machine

model3 <- d_study %>%
  filter(name != "Machine Inc.") %>% 
  mutate(name = relevel(as.factor(name), ref = "War Machine Inc.")) %>% 
  lm(zeroSumBeliefs_score ~ name, .)

summary(model3)
## 
## Call:
## lm(formula = zeroSumBeliefs_score ~ name, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6979 -1.4350 -0.0729  1.2428  3.5650 
## 
## Coefficients:
##                        Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)              3.6979     0.2321  15.933 <0.0000000000000002 ***
## nameGreen Machine Inc.  -0.2629     0.3249  -0.809                0.42    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.608 on 96 degrees of freedom
## Multiple R-squared:  0.006774,   Adjusted R-squared:  -0.003572 
## F-statistic: 0.6547 on 1 and 96 DF,  p-value: 0.4204

Pairwise comparisons: Social good

# Green Machine vs. Machine

model1 <- d_study %>%
  filter(name != "War Machine Inc.") %>% 
  mutate(name = relevel(as.factor(name), ref = "Machine Inc.")) %>% 
  lm(socialimpact_score ~ name, .)

summary(model1)
## 
## Call:
## lm(formula = socialimpact_score ~ name, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9295 -0.6190  0.0705  0.7067  2.0705 
## 
## Coefficients:
##                        Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)              4.9295     0.1617  30.482 <0.0000000000000002 ***
## nameGreen Machine Inc.   0.3638     0.2310   1.575               0.118    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.166 on 100 degrees of freedom
## Multiple R-squared:  0.02421,    Adjusted R-squared:  0.01445 
## F-statistic: 2.481 on 1 and 100 DF,  p-value: 0.1184
# War Machine vs. Machine

model2 <- d_study %>%
  filter(name != "Green Machine Inc.") %>% 
  mutate(name = relevel(as.factor(name), ref = "Machine Inc.")) %>% 
  lm(socialimpact_score ~ name, .)

summary(model2)
## 
## Call:
## lm(formula = socialimpact_score ~ name, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9295 -0.9295  0.0735  1.0705  3.4097 
## 
## Coefficients:
##                      Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)            4.9295     0.2028  24.302 < 0.0000000000000002 ***
## nameWar Machine Inc.  -1.3392     0.2928  -4.574             0.000014 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.463 on 98 degrees of freedom
## Multiple R-squared:  0.1759, Adjusted R-squared:  0.1675 
## F-statistic: 20.92 on 1 and 98 DF,  p-value: 0.00001401
# Green Machine vs. War Machine

model3 <- d_study %>%
  filter(name != "Machine Inc.") %>% 
  mutate(name = relevel(as.factor(name), ref = "War Machine Inc.")) %>% 
  lm(socialimpact_score ~ name, .)

summary(model3)
## 
## Call:
## lm(formula = socialimpact_score ~ name, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2933 -0.9509  0.2249  0.7340  3.4097 
## 
## Coefficients:
##                        Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)              3.5903     0.2004   17.91 < 0.0000000000000002 ***
## nameGreen Machine Inc.   1.7031     0.2806    6.07         0.0000000255 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.389 on 96 degrees of freedom
## Multiple R-squared:  0.2773, Adjusted R-squared:  0.2698 
## F-statistic: 36.84 on 1 and 96 DF,  p-value: 0.00000002548

Plot

library(ggpubr) # Package for visualizing data
library(see) # Package for visualizing data

my_comparisons <- list(c("Machine Inc.", "Green Machine Inc."), c("War Machine Inc.", "Machine Inc."), c("War Machine Inc.", "Green Machine Inc."))

ggplot(data = d_study,
       mapping = aes(x = name, 
                     y = zeroSumBeliefs_score, 
                     color = name)) +
  # means with confidence intervals 
  geom_violinhalf(position = position_nudge(0.1),
                  #fill = "gray23",
                  alpha = 0.4) +
  geom_point(alpha = 0.3,
             size = 2,
             position = position_jitter(0.1)) +
  stat_summary(fun.data = "mean_cl_boot",
               size = 1,
               geom = "linerange",
               color = "grey50",
               position = position_nudge(x = 0.2)) +
  stat_summary(fun = "mean",
               size = 0.3,
               position = position_nudge(x = 0.2))+
  # individual data points (jittered horizontally)
  theme_bw()+
  theme(legend.position="none")+
  ylab("Zero Sum Belief")+
  stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test")+
  scale_color_manual(values = c("War Machine Inc." = "darkblue", "Green Machine Inc." = "darkgreen", "Machine Inc." = "grey")) 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_segment()`).

ggplot(data = d_study,
       mapping = aes(x = name, 
                     y = socialimpact_score, 
                     color = name)) +
  # means with confidence intervals 
  geom_violinhalf(position = position_nudge(0.1),
                  #fill = "gray23",
                  alpha = 0.4) +
  geom_point(alpha = 0.3,
             size = 2,
             position = position_jitter(0.1)) +
  stat_summary(fun.data = "mean_cl_boot",
               size = 1,
               geom = "linerange",
               color = "grey50",
               position = position_nudge(x = 0.2)) +
  stat_summary(fun = "mean",
               size = 0.3,
               position = position_nudge(x = 0.2))+
  # individual data points (jittered horizontally)
  theme_bw()+
  theme(legend.position="none")+
  ylab("Social Impact")+
  stat_compare_means(comparisons = my_comparisons, label = "p.signif", method = "t.test")+
  scale_color_manual(values = c("War Machine Inc." = "darkblue", "Green Machine Inc." = "darkgreen", "Machine Inc." = "grey")) 
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_segment()`).