library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(here)
## here() starts at /Users/caoanjie/Desktop/projects/CCRR_analysis/study_2
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(effectsize)
tidy_d <- read_csv(here("data/4_processed/with_human_coded_main.csv"))
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 41284 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): subject, culture, task_name, task_info, trial_info, resp_type, resp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Change Detection

Model

Change detection (linear regression): log(reaction_time) ~ culture * type_of_change + (type_of_change | subject) + (culture | picture)

cd_df <- tidy_d %>% 
  filter(task_name == "CD") %>% 
  mutate(reaction_time = as.numeric(resp), 
         type_of_change = task_info,
         picture = trial_info) %>% 
  select(culture, subject,reaction_time,type_of_change, picture) %>% 
  filter(!is.na(reaction_time))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
#cd_model <- lmer(log(reaction_time) ~ culture * type_of_change + (type_of_change | subject) + (culture | picture),  data = cd_df)

#cd_model <- lmer(log(reaction_time) ~ culture * type_of_change + (type_of_change | subject) ,  data = cd_df)

cd_model <- lmer(log(reaction_time) ~ culture * type_of_change + (1 | subject) ,  data = cd_df)

summary(cd_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(reaction_time) ~ culture * type_of_change + (1 | subject)
##    Data: cd_df
## 
## REML criterion at convergence: 25977.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.8360 -0.6794 -0.0535  0.6175  4.4740 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subject  (Intercept) 0.2218   0.4710  
##  Residual             0.4732   0.6879  
## Number of obs: 11857, groups:  subject, 468
## 
## Fixed effects:
##                                Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                   8.796e+00  3.849e-02 5.192e+02 228.513  < 2e-16
## cultureUS                     1.001e-01  4.859e-02 5.231e+02   2.060 0.039892
## type_of_changefocal           7.093e-02  2.064e-02 1.142e+04   3.436 0.000592
## cultureUS:type_of_changefocal 3.844e-02  2.628e-02 1.143e+04   1.463 0.143551
##                                  
## (Intercept)                   ***
## cultureUS                     *  
## type_of_changefocal           ***
## cultureUS:type_of_changefocal    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cltrUS typ_f_
## cultureUS   -0.792              
## typ_f_chngf -0.245  0.194       
## cltrUS:ty__  0.192 -0.246 -0.785

Visualization

raw_CD <- tidy_d %>% 
  filter(task_name == "CD") %>% 
  mutate(log_rt = log(as.numeric(resp))) 
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
ggplot(data = raw_CD, 
       aes(y = log_rt, x = culture, color = culture)) +
geom_point(alpha = .2, position = position_jitter(width = .1)) + 
  stat_summary(fun.data = "mean_cl_boot", color = "black") +
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("log RT (ms)") + 
xlab("") +
theme_classic() + 
labs(title = "Change Detection") +
theme(plot.title = element_text(hjust = 0.5, size = 8), 
      plot.subtitle = element_text(hjust = 0.5, size = 6), 
      text = element_text(size=8))+
  facet_wrap(~task_info)
## Warning: Removed 80 rows containing non-finite values (stat_summary).
## Warning: Removed 80 rows containing missing values (geom_point).

by item exploration

cn_coded <- read_csv(here("data/annotated/CN/CD_wr.csv"))
## Rows: 5190 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): subject, culture, prompt, response, coder
## dbl (1): correct
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
us_coded <- read_csv(here("data/annotated/US/CD_wr.csv"))
## Rows: 8880 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): subject, culture, prompt, response, coder
## dbl (1): correct
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
coded_df <- bind_rows(cn_coded, us_coded)

coded_df %>% 
  ggplot(aes(x = culture, y = correct)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~prompt)

# so it's clear that there are some big item level variations
# what if we took out all the ones with cultural differences in accuracy 

c <- coded_df %>% 
 nest_by(prompt) 


c$culture_pred <- lapply(c$data, 
       function(d){
         (glm(correct ~ culture , family = binomial, data = d) %>% 
           summary())$coefficients %>% 
           as.data.frame() %>% 
           rownames_to_column() %>% filter(rowname == "cultureUS") %>% pull(`Pr(>|z|)`)
       }) 
c %>% 
  select(prompt, culture_pred) %>% 
  unnest(culture_pred) %>% 
  left_join(coded_df, by = "prompt") %>% 
  filter(culture_pred < 0.05) %>% 
  ggplot(aes(x = culture, y = correct)) + 
  
  geom_text(aes(label = round(culture_pred, 2))) + 
  stat_summary(fun.data = "mean_cl_boot") +
  facet_wrap(~prompt)

cultural_diff_prompt <- c %>% 
  filter(culture_pred < 0.05) %>% 
  pull(prompt)

raw_CD %>% 
  filter(!(trial_info %in% cultural_diff_prompt)) %>% 
ggplot(
       aes(y = log_rt, x = culture, color = culture)) +
geom_point(alpha = .2, position = position_jitter(width = .1)) + 
  stat_summary(fun.data = "mean_cl_boot", color = "black") +
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("log RT (ms)") + 
xlab("") +
theme_classic() + 
labs(title = "Change Detection") +
theme(plot.title = element_text(hjust = 0.5, size = 8), 
      plot.subtitle = element_text(hjust = 0.5, size = 6), 
      text = element_text(size=8))+
  facet_wrap(~task_info)
## Warning: Removed 50 rows containing non-finite values (stat_summary).
## Warning: Removed 50 rows containing missing values (geom_point).

cd_model_pruned <- lmer(log_rt ~ culture * task_info + (1 | subject) ,  data = raw_CD %>% filter(!(trial_info %in% cultural_diff_prompt)) )

summary(cd_model_pruned)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log_rt ~ culture * task_info + (1 | subject)
##    Data: raw_CD %>% filter(!(trial_info %in% cultural_diff_prompt))
## 
## REML criterion at convergence: 17507.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.1402 -0.6796 -0.0764  0.5986  4.1977 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subject  (Intercept) 0.2089   0.4570  
##  Residual             0.4839   0.6957  
## Number of obs: 7820, groups:  subject, 468
## 
## Fixed effects:
##                            Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)                 8.71410    0.03853  530.91174 226.170  < 2e-16 ***
## cultureUS                   0.11891    0.04863  533.59586   2.445 0.014795 *  
## task_infofocal              0.10255    0.02656 7389.18837   3.860 0.000114 ***
## cultureUS:task_infofocal   -0.04226    0.03367 7394.70876  -1.255 0.209456    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cltrUS tsk_nf
## cultureUS   -0.792              
## task_inffcl -0.266  0.211       
## cltrUS:tsk_  0.210 -0.266 -0.789

Free Description

Model

Free description: (logistic regression) first_mention ~ culture + (1 | subject) + (culture | picture)

mention_df <- tidy_d %>% 
  filter(task_name == "FD", resp_type == "first_mention_focal") %>% 
    mutate(first_mention = as.factor(case_when(
    resp == "1" ~ "focal",
    resp == "0" ~ "background")), 
    scene = trial_info) %>% 
  select(-resp, -task_info, -resp_type, -trial_info)

mention_model <- glmer(first_mention ~ culture + (1 | subject)+(culture | scene), family = binomial, data = mention_df)


#mention_model <- glmer(first_mention ~ culture + (1 | subject) , 
#                   family = binomial, data = mention_df)

mention_model
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: first_mention ~ culture + (1 | subject) + (culture | scene)
##    Data: mention_df
##       AIC       BIC    logLik  deviance  df.resid 
##  2082.526  2119.103 -1035.263  2070.526      3276 
## Random effects:
##  Groups  Name        Std.Dev. Corr 
##  subject (Intercept) 1.5592        
##  scene   (Intercept) 1.0659        
##          cultureUS   0.4595   -0.70
## Number of obs: 3282, groups:  subject, 469; scene, 7
## Fixed Effects:
## (Intercept)    cultureUS  
##       1.117        2.994
summary(mention_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: first_mention ~ culture + (1 | subject) + (culture | scene)
##    Data: mention_df
## 
##      AIC      BIC   logLik deviance df.resid 
##   2082.5   2119.1  -1035.3   2070.5     3276 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.7794  0.0901  0.1366  0.3162  3.2930 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev. Corr 
##  subject (Intercept) 2.4310   1.5592        
##  scene   (Intercept) 1.1362   1.0659        
##          cultureUS   0.2112   0.4595   -0.70
## Number of obs: 3282, groups:  subject, 469; scene, 7
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.1167     0.4290   2.603  0.00924 ** 
## cultureUS     2.9942     0.3042   9.843  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## cultureUS -0.514
summary(mention_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: first_mention ~ culture + (1 | subject) + (culture | scene)
##    Data: mention_df
## 
##      AIC      BIC   logLik deviance df.resid 
##   2082.5   2119.1  -1035.3   2070.5     3276 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.7794  0.0901  0.1366  0.3162  3.2930 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev. Corr 
##  subject (Intercept) 2.4310   1.5592        
##  scene   (Intercept) 1.1362   1.0659        
##          cultureUS   0.2112   0.4595   -0.70
## Number of obs: 3282, groups:  subject, 469; scene, 7
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.1167     0.4290   2.603  0.00924 ** 
## cultureUS     2.9942     0.3042   9.843  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## cultureUS -0.514

Visualization

FD_raw <- tidy_d %>% 
  filter(task_name == "FD") %>% 
  group_by(subject, culture) %>%
  summarise(first_mention = mean(as.numeric(resp))) 
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
FD_raw %>% 
  ggplot(aes(x = culture, y = first_mention, color = culture)) + 
  geom_point(alpha = .2, position = position_jitter(width = .1)) + 
  stat_summary(fun.data = "mean_cl_boot", color = "black") + 
  scale_y_continuous(breaks = seq(0,1,0.5), 
                     labels = {function(x) paste0(as.character(x*100),"%")})+
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("proportion first mention focal") + 
xlab("") +
theme_classic() + 
labs(title = "Free description") +
theme(plot.title = element_text(hjust = 0.5, size = 8), 
      plot.subtitle = element_text(hjust = 0.5, size = 6), 
      text = element_text(size=8))

Causal attribution

Model

Causal attribution (linear regression): rating ~ culture * attribution_type + (attribution_type | subject) + (culture | item)

ca_df <- tidy_d %>% 
  filter(task_name == "CA") %>% 
  mutate(attribution_type = task_info, 
         item = trial_info,
         rating = as.numeric(resp)) %>% 
  select(culture, subject,attribution_type,item, rating)

ca_model <- lmer(rating ~ culture * attribution_type + (attribution_type | subject) + (culture | item),  data = ca_df)

summary(ca_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: rating ~ culture * attribution_type + (attribution_type | subject) +  
##     (culture | item)
##    Data: ca_df
## 
## REML criterion at convergence: 17076.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8871 -0.6313 -0.0996  0.5959  3.5181 
## 
## Random effects:
##  Groups   Name                        Variance Std.Dev. Corr 
##  subject  (Intercept)                 0.94212  0.9706        
##           attribution_typesituational 1.04324  1.0214   -0.72
##  item     (Intercept)                 0.12027  0.3468        
##           cultureUS                   0.08851  0.2975   0.30 
##  Residual                             1.77337  1.3317        
## Number of obs: 4703, groups:  subject, 409; item, 12
## 
## Fixed effects:
##                                       Estimate Std. Error       df t value
## (Intercept)                            3.16954    0.17526 18.14969  18.085
## cultureUS                             -0.07967    0.17218 25.45322  -0.463
## attribution_typesituational           -0.21294    0.23357 14.34566  -0.912
## cultureUS:attribution_typesituational -1.32982    0.22290 17.96075  -5.966
##                                       Pr(>|t|)    
## (Intercept)                           4.66e-13 ***
## cultureUS                                0.648    
## attribution_typesituational              0.377    
## cultureUS:attribution_typesituational 1.22e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cltrUS attrb_
## cultureUS   -0.180              
## attrbtn_typ -0.702  0.086       
## cltrUS:ttr_  0.089 -0.701 -0.077

Visualization

raw_CA <- tidy_d %>% 
  filter(task_name == "CA") %>% 
  mutate(resp = as.numeric(resp) + 1)

ggplot(data = raw_CA, 
       aes(y = resp, x = culture, color = culture)) +
 geom_point(alpha = .2, position = position_jitter(width = .1)) + 
  stat_summary(fun.data = "mean_cl_boot", color = "black")  +
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("LS rating (1-7)") + 
xlab("") +
theme_classic() + 
labs(title = "Causal Attribution") +
theme(plot.title = element_text(hjust = 0.5, size = 8), 
      plot.subtitle = element_text(hjust = 0.5, size = 6), 
      text = element_text(size=8)) + 
  facet_wrap(~task_info)
## Warning: Removed 361 rows containing non-finite values (stat_summary).
## Warning: Removed 361 rows containing missing values (geom_point).

Symbolic Self Inflation

Model

Symbolic self-inflation (linear regression): percent_inflation ~ culture

si_df <- tidy_d %>% 
  filter(task_name == "SSI") %>% 
  filter(resp_type == "task_score_ratio") %>% 
  mutate(score = as.numeric(resp)) %>% 
  select(-resp, -task_info, -trial_info, -resp_type)

si_model <- glm(score ~ culture, family=gaussian, data = si_df)

si_model
## 
## Call:  glm(formula = score ~ culture, family = gaussian, data = si_df)
## 
## Coefficients:
## (Intercept)    cultureUS  
##      1.4392      -0.1268  
## 
## Degrees of Freedom: 416 Total (i.e. Null);  415 Residual
## Null Deviance:       142.4 
## Residual Deviance: 140.9     AIC: 736.8
summary(si_model)
## 
## Call:
## glm(formula = score ~ culture, family = gaussian, data = si_df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1877  -0.3675  -0.1396   0.2042   3.1062  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.43921    0.04591  31.345   <2e-16 ***
## cultureUS   -0.12678    0.05860  -2.163   0.0311 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.3394127)
## 
##     Null deviance: 142.44  on 416  degrees of freedom
## Residual deviance: 140.86  on 415  degrees of freedom
## AIC: 736.81
## 
## Number of Fisher Scoring iterations: 2

Visualization

SSI_raw <- tidy_d %>% 
  filter(task_name == "SSI") %>% 
  mutate(resp = as.numeric(resp)) 


ggplot(data = SSI_raw %>% filter(resp_type == "task_score_ratio"), 
       aes(y = resp, x = culture, color = culture)) +

geom_point(aes(y = resp, color = culture), 
           position = position_jitter(width = .15), size = .5, alpha = 0.8) +
  stat_summary(fun.data = "mean_cl_boot", color = "black")  +

scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("") + 
xlab("") +
theme_classic() + 
labs(title = "Symbolic Self Inflation") +
theme(plot.title = element_text(hjust = 0.5, size = 8), 
      plot.subtitle = element_text(hjust = 0.5, size = 6), 
      text = element_text(size=8)) 

Triads

Model

Taxonomic/thematic similarity task: (logistic regression) choice ~ culture + (1 | subject) + (culture | item)

TD_catch_failed <- tidy_d %>% 
  filter(task_name == "TD") %>% 
  filter(task_info == "catch") %>% 
  filter(resp = FALSE) %>% 
  pull(subject)

triads_d <- tidy_d %>% 
    filter(!subject %in% TD_catch_failed) %>% 
  filter(task_name == "TD") %>% 
  mutate(choice = as.factor(resp), 
         item = trial_info) %>% 
  select(choice, culture, subject, item)
 

triads_model <- glmer(choice ~ culture + (1 | subject) + (culture | item), family = binomial, data = triads_d)
 
summary(triads_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: choice ~ culture + (1 | subject) + (culture | item)
##    Data: triads_d
## 
##      AIC      BIC   logLik deviance df.resid 
##   6370.2   6412.1  -3179.1   6358.2     7967 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.7870 -0.3425  0.1279  0.4031 13.3032 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev. Corr
##  subject (Intercept) 3.0046   1.7334       
##  item    (Intercept) 5.5174   2.3489       
##          cultureUS   0.4674   0.6836   1.00
## Number of obs: 7973, groups:  subject, 469; item, 60
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -0.1052     0.4598  -0.229   0.8191  
## cultureUS     1.9036     0.8782   2.168   0.0302 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## cultureUS -0.524

Visualization

TD_catch_failed <- tidy_d %>% 
  filter(task_name == "TD") %>% 
  filter(task_info == "catch") %>% 
  filter(resp = FALSE) %>% 
  pull(subject)

TD_raw <- tidy_d %>% 
  filter(!subject %in% TD_catch_failed) %>% 
  filter(task_name == "TD") %>% 
  group_by(subject, culture) %>%
  summarise(tax_match = mean(as.logical(resp)))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
ggplot(data = TD_raw, 
       aes(y = tax_match, x = culture, color = culture)) +
geom_point(aes(y = tax_match, color = culture), 
           position = position_jitter(width = .15), size = .5, alpha = 0.8) +
  stat_summary(fun.data = "mean_cl_boot", color = "black")  +
scale_y_continuous(breaks = seq(0,1,0.5), 
                     labels = {function(x) paste0(as.character(x*100),"%")})+
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("Triads taxonomic match") + 
xlab("") +
theme_classic() + 
labs(title = "Triads") +
theme(plot.title = element_text(hjust = 0.5, size = 8), 
      plot.subtitle = element_text(hjust = 0.5, size = 6), 
      text = element_text(size=8))

Semantic Intuition

Model

Semantic intuition: (logistic regression) choice ~ culture + (1 | subject) + (culture | item)

SeI_df <- tidy_d %>% 
  filter(task_name == "SeI") %>% 
  filter(task_info == "critical") %>% 
  mutate(
    choice_causal = case_when(
      resp == "causal_historical" ~ TRUE, 
      resp == "descriptivist" ~ FALSE
    )
  ) %>% 
  mutate(item = trial_info) %>% 
  select(choice_causal, culture, subject, item)

#SeI_model <- glmer(choice_causal ~ culture + (1 | subject) + (culture | item), family = binomial, data = SeI_df)

SeI_model <- glmer(choice_causal ~ culture + (1 | subject) , family = binomial, data = SeI_df, 
                   control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e5)))

summary(SeI_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: choice_causal ~ culture + (1 | subject)
##    Data: SeI_df
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##      AIC      BIC   logLik deviance df.resid 
##   1086.3   1100.8   -540.1   1080.3      935 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2532 -0.4073  0.2405  0.3685  0.9718 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  subject (Intercept) 7.242    2.691   
## Number of obs: 938, groups:  subject, 469
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.2645     0.2771   0.955     0.34    
## cultureUS     1.7941     0.3988   4.499 6.83e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## cultureUS -0.636

Visualization

SeI_raw <- tidy_d %>% 
  filter(task_name == "SeI") %>% 
  filter(task_info == "critical") %>% 
  mutate(causal_historical_choice = case_when(
    resp == "causal_historical" ~ TRUE,
    resp == "descriptivist" ~ FALSE
  )) %>% 
  group_by(subject, culture) %>%
  summarise(causal_historical_resp = mean(causal_historical_choice))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
ggplot(data = SeI_raw, 
       aes(y = causal_historical_resp, x = culture, color = culture)) +
geom_point(aes(y = causal_historical_resp, color = culture), 
           position = position_jitter(width = .15), size = .5, alpha = 0.8) +
  stat_summary(fun.data = "mean_cl_boot", color = "black")  +
scale_y_continuous(breaks = seq(0,1,0.5), 
                     labels = {function(x) paste0(as.character(x*100),"%")})+
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("Causal Historical Choice") + 
xlab("") +
theme_classic() + 
labs(title = "Semantic Intuition") +
theme(plot.title = element_text(hjust = 0.5, size = 8), 
      plot.subtitle = element_text(hjust = 0.5, size = 6), 
      text = element_text(size=8))

Ambiguous RMTS

Model

Ambiguous RMTS (logistic regression): choice ~ culture + (trial_num | subject)

rmts_df <- tidy_d %>% 
  filter(task_name == "RMTS") %>% 
  mutate(choice = as.factor(case_when(
    resp == "1" ~ "rel",
    resp == "0" ~ "obj"))
         ) %>%
  group_by(subject) %>% 
  mutate(trial_num = as.factor(row_number())) %>% 
  select(-resp, -task_info, -trial_info, -resp_type)
# model 0: not converging 
#rmts_model <- glmer(choice ~ culture + (trial_num | subject), family = binomial, data = rmts_df)

# model 1: 
rmts_model <- glmer(choice ~ culture + (1 | subject), family = binomial, data = rmts_df)

#rmts_model
summary(rmts_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: choice ~ culture + (1 | subject)
##    Data: rmts_df
## 
##      AIC      BIC   logLik deviance df.resid 
##   1721.6   1738.2   -857.8   1715.6     1873 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6016 -0.1880 -0.1848  0.2487  1.7378 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  subject (Intercept) 16.58    4.071   
## Number of obs: 1876, groups:  subject, 469
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  -1.1874     0.3890  -3.052  0.00227 **
## cultureUS     0.1067     0.4842   0.220  0.82562   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## cultureUS -0.770

Visualization

RMTS_raw <- tidy_d %>% 
  filter(task_name == "RMTS") %>% 
  group_by(subject, culture) %>%
  summarise(relational_choice = mean(as.numeric(resp)))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
ggplot(data = RMTS_raw, 
       aes(y = relational_choice, x = culture, color = culture)) +
geom_point(aes(y = relational_choice, color = culture), 
           position = position_jitter(width = .15), size = .5, alpha = 0.8) +
stat_summary(fun.data = "mean_cl_boot", color = "black") + 
scale_y_continuous(breaks = seq(0,1,0.5), 
                     labels = {function(x) paste0(as.character(x*100),"%")})+
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("Proportion relational choice") + 
xlab("") +
theme_classic() + 
labs(title = "Ambiguous RMTS") +
theme(plot.title = element_text(hjust = 0.5, size = 8), 
      plot.subtitle = element_text(hjust = 0.5, size = 6), 
      text = element_text(size=8))

Ravens

Model

Raven (logistic regression): acc ~ culture + (1 | subject) + (1 | trial)

rv_df <- tidy_d %>% 
  filter(task_name == "RV") %>% 
  mutate(acc = as.numeric(resp)) %>% 
  group_by(subject) %>% 
  mutate(trial = as.factor(row_number())) %>% 
  select(-resp, -task_info, -trial_info, -resp_type)

rv_model <- glmer(acc ~ culture + (1 | subject) + (culture | trial), family = binomial, data = rv_df)

rv_model
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: acc ~ culture + (1 | subject) + (culture | trial)
##    Data: rv_df
##       AIC       BIC    logLik  deviance  df.resid 
##  5470.205  5510.018 -2729.103  5458.205      5622 
## Random effects:
##  Groups  Name        Std.Dev. Corr 
##  subject (Intercept) 1.689         
##  trial   (Intercept) 1.336         
##          cultureUS   0.568    -0.11
## Number of obs: 5628, groups:  subject, 469; trial, 12
## Fixed Effects:
## (Intercept)    cultureUS  
##       1.734       -1.786
summary(rv_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: acc ~ culture + (1 | subject) + (culture | trial)
##    Data: rv_df
## 
##      AIC      BIC   logLik deviance df.resid 
##   5470.2   5510.0  -2729.1   5458.2     5622 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.2272 -0.4963  0.1863  0.4771 10.8082 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev. Corr 
##  subject (Intercept) 2.8515   1.689         
##  trial   (Intercept) 1.7855   1.336         
##          cultureUS   0.3226   0.568    -0.11
## Number of obs: 5628, groups:  subject, 469; trial, 12
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.7343     0.4140   4.189 2.80e-05 ***
## cultureUS    -1.7862     0.2472  -7.226 4.99e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## cultureUS -0.290

Visualization

RV_raw <- tidy_d %>% 
  filter(task_name == "RV") %>% 
  group_by(subject, culture) %>%
  summarise(RV_resp = mean(as.numeric(resp)))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
ggplot(data = RV_raw, 
       aes(y = RV_resp, x = culture, color = culture)) +
geom_point(aes(y = RV_resp, color = culture), 
           position = position_jitter(width = .15), size = .5, alpha = 0.8) +
stat_summary(fun.data = "mean_cl_boot", color = "black") + 
scale_y_continuous(breaks = seq(0,1,0.5), 
                     labels = {function(x) paste0(as.character(x*100),"%")})+
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("Ravens proportion correct") + 
xlab("") +
theme_classic() + 
labs(title = "Ravens") +
theme(plot.title = element_text(hjust = 0.5, size = 8), 
      plot.subtitle = element_text(hjust = 0.5, size = 6), 
      text = element_text(size=8))

Meta plots

Partial pooling DIY

helper function

https://stats.stackexchange.com/questions/257985/how-can-i-derive-effect-sizes-in-lme4-and-describe-the-magnitude-of-fixed-effect

get_d_ci_df <- function(d, main_df, task_name_str){
  
  subject_count_df <- main_df %>% 
    filter(task_name == task_name_str) %>% 
    distinct(subject, culture) %>% 
    group_by(culture) %>% 
    count()
  
  var_d <- (sum(subject_count_df$n)/prod(subject_count_df$n)) + 
    (d^2) / (2 * (sum(subject_count_df$n)))
  
  ci_lower <- d - 1.96 * sqrt(var_d)
  ci_upper <- d + 1.96 * sqrt(var_d)
  
  d_ci_df <- tibble(task_name = task_name_str, 
                    d = d, 
                    d_ci_lower = ci_lower, 
                    d_ci_upper = ci_upper)
    
  return (d_ci_df)
}

get_d_partial_pooling <- function(main_df, model, task_name_str){
  
  # task_name_str 
  interaction_effect_task <- c("CD", "CA", "EBB")

  # get variance 
  varcor_df <- summary(model)$varcor %>% 
               as.data.frame() %>% 
               # not entirely clear what this means 
               # but the doc says: second variable (NA for variance parameters)
               filter(is.na(var2)) 
  
  # calculate the pooled variance 
  pooled_var <- sqrt(sum(varcor_df$vcov))
  
  # get estimate for fixed effect
  fixed_effect_df <- summary(model)$coefficients %>% 
    as.data.frame() %>% rownames_to_column("term_name") 
  
  # branching because some cultural effects are main and some are interactions
  if(!(task_name_str %in% interaction_effect_task)){
    estimate <- fixed_effect_df %>% 
      filter(term_name == "cultureUS") %>% 
      pull(Estimate)
  }else{
    if(task_name_str == "EBB"){
       estimate <- fixed_effect_df %>% 
      filter(grepl("cultureUS:contextNC:size_diff", term_name)) %>% 
      pull(Estimate)
    }else{
      estimate <- fixed_effect_df %>% 
      filter(grepl("cultureUS:", term_name)) %>% 
      pull(Estimate)
    }
    
    
  }
  
  d <- estimate / pooled_var
  d_ci_df <- get_d_ci_df(d, main_df, task_name_str)
  
  return (d_ci_df)
  
}

get_d_no_pooling <- function(main_df, task_name_str){
  # needs to make sure it's us - cn to keep it consistent 
  # in complete yet 
  
  if(task_name_str == "SSI"){

    task_df <- main_df %>% 
     filter(resp_type == "task_score_ratio") %>% 
      filter(task_name == task_name_str) %>% 
      mutate(resp = as.numeric(resp)) 

  }else if(task_name_str == "CP"){
    task_df <- main_df %>% 
      filter(task_name == task_name_str)
    
  }else if(task_name_str == "SI"){
    task_df <- main_df %>% 
      filter(task_name == task_name_str) %>% 
      filter(resp_type == "inflation_score_ratio")
  }else if(task_name_str == "CA"){
    # for study 1
    task_df <- main_df %>% 
      # first is for s1, then for s2
      filter(grepl("situation", resp_type) | 
             grepl("situation", trial_info))
  }else if(task_name_str == "HZ"){
    task_df <- main_df %>% 
      filter(task_name == "HZ", resp_type == "hz_height")
  }
  
  
    mean_df <- task_df %>% 
          group_by(culture) %>% 
          summarise(mean_resp = mean(resp, na.rm = TRUE))
    sd <- sd(task_df$resp, na.rm = TRUE)
    
    d = (filter(mean_df, culture == "US")$mean_resp - filter(mean_df, culture == "CN")$mean_resp) / sd
    
    d_var = get_d_ci_df(d, main_df, task_name_str)
    return (d_var)
  
}

Stduy 1 ES

s1_d <- read_csv(here("data/s1_tidy_main.csv"))
## New names:
## * `` -> ...1
## Rows: 37595 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): subject, culture, task_name, task_info, trial_info, resp_type
## dbl (2): ...1, resp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# RMTS 
rmts_df <- s1_d %>% 
  filter(task_name == "RMTS") %>% 
  mutate(choice = as.factor(case_when(
    resp == "1" ~ "rel",
    resp == "0" ~ "obj"))
         ) %>%
  group_by(subject) %>% 
  mutate(trial_num = as.factor(row_number())) %>% 
  select(-resp, -task_info, -trial_info, -resp_type)
# model 0: not converging 
#rmts_model <- glmer(choice ~ culture + (trial_num | subject), family = binomial, data = rmts_df)

# model 1: 
s1_rmts_model <- glmer(choice ~ culture + (1 | subject), family = binomial, data = rmts_df)



# RV
rv_df <- s1_d %>% 
  filter(task_name == "RV") %>% 
  mutate(acc = as.numeric(resp)) %>% 
  group_by(subject) %>% 
  mutate(trial = as.factor(row_number())) %>% 
  select(-resp, -task_info, -trial_info, -resp_type)

s1_rv_model <- glmer(acc ~ culture + (1 | subject) + (culture | trial), family = binomial, data = rv_df)


# CP
cp_df <- s1_d %>% 
  filter(task_name == "CP") %>% 
  mutate(choice = as.factor(case_when(
    resp == "1" ~ "uniq",
    resp == "0" ~ "non_uniq"))
         ) %>% 
  select(-resp, -task_info, -trial_info, -resp_type)

cp_model <- glm(choice ~ culture, 
                   family=binomial(link="logit"),
                  data = cp_df)

# SSI 
ssi_df <- s1_d %>% 
  filter(task_name == "SI") %>% 
  filter(resp_type == "inflation_score_ratio") %>% 
  mutate(score = resp) %>% 
  select(-resp, -task_info, -trial_info, -resp_type)

s1_ssi_model <- glm(score ~ culture, family=gaussian, data = ssi_df)


# CA

ca_df <- s1_d %>% 
  filter(task_name == "CA") %>% 
  mutate(attrib_num = as.numeric(resp),
         attrib_binary = replace(attrib_num, attrib_num > 1, 1),
         trial = trial_info,
         attrib_type = factor(resp_type)) %>% 
  select(-resp, -task_info)

#ca_model <- glmer(attrib_num ~ attrib_type * culture + (attrib_type | subject) + (culture | trial), family=poisson, data = ca_df, control=glmerControl(optimizer="bobyqa"))
#boundary (singular) fit: see ?isSingular

#ca_model_binary <- glmer(attrib_binary ~ attrib_type * culture + (attrib_type | subject) + (culture | trial), family=binomial, data = ca_df, control=glmerControl(optimizer="bobyqa"))
#boundary (singular) fit: see ?isSingular

#ca_model1 <- glmer(attrib_num ~ attrib_type * culture + (attrib_type | subject) + (1 | trial), family=poisson, data = ca_df, control=glmerControl(optimizer="bobyqa"))
#boundary (singular) fit: see ?isSingular

#ca_model2 <- glmer(attrib_num ~ attrib_type * culture + (1 | subject) + (1 | trial), family=poisson, data = ca_df, control=glmerControl(optimizer="bobyqa"))
#boundary (singular) fit: see ?isSingular

#ca_model3 <- glmer(attrib_num ~ attrib_type * culture + (1 | subject), family=poisson, data = ca_df, control=glmerControl(optimizer="bobyqa"))
#boundary (singular) fit: see ?isSingular

s1_ca_model <- glm(attrib_num ~ attrib_type * culture, family=poisson, data = ca_df)



# HZ
HZ_height_df <- s1_d %>% 
  filter(task_name == "HZ", resp_type == "hz_height") %>% 
  mutate(
    height = resp
         ) %>% 
  select(-resp, -task_info, -trial_info)

HZ_height_model <- lm(height ~ culture, 
                      data = HZ_height_df)


ebb_df <- s1_d %>% 
  filter(task_name == "EBB", task_info != "HELPFUL") %>% 
  mutate(correct = as.factor(case_when(
    resp == "1" ~ "correct",
    resp == "0" ~ "incorrect")), 
         context = task_info, 
         size_diff = as.numeric(trial_info), 
         ) %>% 
  select(-resp, -task_info, -trial_info)
  
#full model
#ebb_model <- glmer(correct ~ culture * context * size_diff + (size_diff * context | subject), family = binomial, data = ebb_df, control=glmerControl(optimizer="bobyqa"))
#convergence code 1 from bobyqa: bobyqa -- maximum number of function evaluations exceeded
#boundary (singular) fit: see ?isSingular

#model 2 (if full does not converge)
#ebb_model <- glmer(correct ~ culture * context * size_diff + (context | subject), family = binomial, data = ebb_df, control=glmerControl(optimizer="bobyqa"))
#boundary (singular) fit: see ?isSingular


#model 3
ebb_model <- glmer(correct ~ culture * context * size_diff + ( 1 | subject), family = binomial, data = ebb_df, control=glmerControl(optimizer="bobyqa"))


# FD

s1_mention_df <- s1_d %>% 
  filter(task_name == "FD", resp_type == "first_mention_focal") %>% 
    mutate(first_mention = as.factor(case_when(
    resp == "1" ~ "focal",
    resp == "0" ~ "background")), 
    scene = trial_info) %>% 
  select(-resp, -task_info, -resp_type, -trial_info)

#mention_model <- glmer(first_mention ~ culture + (1 | subject)+(culture | scene), family = binomial, data = mention_df)
#Error: number of observations (=1146) < number of random effects (=1148) for term (scene | subject); the random-effects parameters are probably unidentifiable

s1_mention_model <- glmer(first_mention ~ culture + (1 | subject) , 
                   family = binomial, data = s1_mention_df)

Calculating ES

s1_df_pp <- bind_rows(
  get_d_partial_pooling(s1_d, s1_rmts_model, "RMTS"), 
  get_d_partial_pooling(s1_d, s1_rv_model, "RV"),
  get_d_no_pooling(s1_d, "CP"),
  get_d_no_pooling(s1_d, "SI"),
  get_d_no_pooling(s1_d, "CA"),
  get_d_no_pooling(s1_d, "HZ"),
  get_d_partial_pooling(s1_d, ebb_model, "EBB"),
  get_d_partial_pooling(s1_d, s1_mention_model, "FD")
) %>% 
  mutate(study_num = "Study 1")


s2_df_pp <- bind_rows(
  get_d_partial_pooling(tidy_d, cd_model, "CD"), 
  get_d_partial_pooling(tidy_d, mention_model, "FD"),
  get_d_partial_pooling(tidy_d, ca_model, "CA") , 
  get_d_no_pooling(tidy_d,  "SSI") , 
  get_d_partial_pooling(tidy_d, triads_model, "TD"), 
  get_d_partial_pooling(tidy_d, SeI_model, "SeI"), 
  get_d_partial_pooling(tidy_d, rmts_model, "RMTS"), 
  get_d_partial_pooling(tidy_d, rv_model, "RV")) %>% 
  mutate(study_num = "Study 2")


d_df <- bind_rows(s1_df_pp, s2_df_pp) %>% 
  mutate(
    task_name_print = case_when(
        grepl("RMTS", task_name) ~ "RMTS", 
        grepl("RV", task_name) ~ "Ravens", 
        grepl("SI", task_name) | grepl("SSI", task_name) ~ "Symbolic Self Inflation", 
        grepl("FD", task_name) ~ "Free Description", 
        grepl("CP", task_name) ~ "Uniqueness Preference", 
        grepl("CA", task_name) & study_num == "Study 1" ~ "Children Causal Attribution", 
        grepl("CA", task_name) & study_num == "Study 2" ~ "Adult Causal Attribution", 
        grepl("HZ", task_name) ~ "Horizon", 
        grepl("EBB", task_name) ~ "Ebbinghaus Illusion", 
        grepl("CD", task_name) ~ "Change Detection", 
        grepl("TD", task_name) ~ "Triads", 
        grepl("SeI", task_name) ~ "Semantic Intuition"
    )) %>% 
  mutate(significance =  !(d_ci_lower < 0 & d_ci_upper > 0), 
         advantage = case_when(
           d > 0 & significance ~ "US", 
           d < 0 & significance ~ "CN", 
           TRUE ~ "No Difference"
         ))

d_df %>% 
  arrange(-d) %>% 
  ggplot(aes(x = reorder(task_name_print,d), y = d, 
             ymin = d_ci_lower, ymax = d_ci_upper, 
             color = advantage, 
             shape = significance)) + 
  geom_hline(yintercept = 0, lty = 2) + 
  geom_pointrange(aes( 
    x = reorder(task_name_print,d), y = d, 
             ymin = d_ci_lower, ymax = d_ci_upper, group = study_num
    ), position = position_dodge(width = .6)) + 
   scale_shape_manual(values = c(1, 16), guide = "none") + 
  scale_color_manual(values = c("red", "gray", "blue")) + 
  coord_flip() + 
  theme_classic() + 
  ylab("US-CN difference") + 
  xlab("") + 
  theme(legend.title = element_blank())