library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## here() starts at /Users/caoanjie/Desktop/projects/CCRR_analysis/study_2
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(effectsize)
demog_d <- read_csv(here("data/4_processed/demog_df.csv"))
## Rows: 24877 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): subject, culture, demog_question, demog_response
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
age_range <- demog_d %>%
filter(demog_question == "age") %>%
mutate(demog_response = as.numeric(demog_response)) %>%
group_by(culture) %>%
summarise(
max_age = max(demog_response),
min_age = min(demog_response),
mean_age = mean(demog_response),
sd_age = sd(demog_response)
)
age_df <- demog_d %>%
filter(demog_question == "age") %>%
mutate(demog_response = as.numeric(demog_response)) %>%
filter(culture == "CN" |
culture == "US" &
demog_response < 35) %>%
group_by(culture) %>%
summarise(
max_age = max(demog_response),
min_age = min(demog_response),
mean_age = mean(demog_response),
sd_age = sd(demog_response)
)
match_subject <- demog_d %>%
filter(demog_question == "age") %>%
mutate(demog_response = as.numeric(demog_response)) %>%
filter(culture == "CN" |
culture == "US" &
demog_response < 35) %>%
pull(subject)
tidy_d <- read_csv(here("data/4_processed/with_human_coded_main.csv")) %>%
filter(subject %in% match_subject)
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 41284 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): subject, culture, task_name, task_info, trial_info, resp_type, resp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
binomial_model_to_es <- function(model, task_name){
z <- ((summary(model)$coefficient) %>% as.data.frame())$`z value`[[2]] # predictor culture US
obs <- length(summary(model)$residuals)
if(obs == 0){
obs <- summary(model)$df.residual # this needs to be fixed later
}
es <- z_to_d(z, obs) %>%
as.data.frame() %>%
mutate(task_name = task_name)
return (es)
}
age_df
## # A tibble: 2 × 5
## culture max_age min_age mean_age sd_age
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CN 50 18 22.2 3.99
## 2 US 34 19 27.9 3.97
demog_d %>%
filter(demog_question == "age") %>%
mutate(demog_response = as.numeric(demog_response)) %>%
filter(culture == "CN" |
culture == "US" &
demog_response < 35) %>%
group_by(culture) %>%
count()
## # A tibble: 2 × 2
## # Groups: culture [2]
## culture n
## <chr> <int>
## 1 CN 168
## 2 US 119
Change detection (linear regression): log(reaction_time) ~ culture * type_of_change + (type_of_change | subject) + (culture | picture)
cd_df <- tidy_d %>%
filter(task_name == "CD") %>%
mutate(reaction_time = as.numeric(resp),
type_of_change = task_info,
picture = trial_info) %>%
select(culture, subject,reaction_time,type_of_change, picture) %>%
filter(!is.na(reaction_time))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
#cd_model <- lmer(log(reaction_time) ~ culture * type_of_change + (type_of_change | subject) + (culture | picture), data = cd_df)
#cd_model <- lmer(log(reaction_time) ~ culture * type_of_change + (type_of_change | subject) , data = cd_df)
cd_model <- lmer(log(reaction_time) ~ culture * type_of_change + (1 | subject) , data = cd_df)
summary(cd_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(reaction_time) ~ culture * type_of_change + (1 | subject)
## Data: cd_df
##
## REML criterion at convergence: 16466.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.7906 -0.6784 -0.0697 0.6077 4.4744
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.2536 0.5036
## Residual 0.4769 0.6906
## Number of obs: 7477, groups: subject, 286
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 8.78751 0.04144 318.17086 212.058 < 2e-16
## cultureUS -0.02435 0.06448 317.94796 -0.378 0.705944
## type_of_changefocal 0.07115 0.02103 7209.09538 3.384 0.000719
## cultureUS:type_of_changefocal 0.03289 0.03278 7208.45958 1.003 0.315781
##
## (Intercept) ***
## cultureUS
## type_of_changefocal ***
## cultureUS:type_of_changefocal
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cltrUS typ_f_
## cultureUS -0.643
## typ_f_chngf -0.232 0.149
## cltrUS:ty__ 0.149 -0.232 -0.641
raw_CD <- tidy_d %>%
filter(task_name == "CD") %>%
mutate(log_rt = log(as.numeric(resp)))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
ggplot(data = raw_CD,
aes(y = log_rt, x = culture, color = culture)) +
geom_point(alpha = .2, position = position_jitter(width = .1)) +
stat_summary(fun.data = "mean_cl_boot", color = "black") +
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("log RT (ms)") +
xlab("") +
theme_classic() +
labs(title = "Change Detection") +
theme(plot.title = element_text(hjust = 0.5, size = 8),
plot.subtitle = element_text(hjust = 0.5, size = 6),
text = element_text(size=8))+
facet_wrap(~task_info)
## Warning: Removed 39 rows containing non-finite values (stat_summary).
## Warning: Removed 39 rows containing missing values (geom_point).
cn_coded <- read_csv(here("data/annotated/CN/CD_wr.csv"))
## Rows: 5190 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): subject, culture, prompt, response, coder
## dbl (1): correct
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
us_coded <- read_csv(here("data/annotated/US/CD_wr.csv"))
## Rows: 8880 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): subject, culture, prompt, response, coder
## dbl (1): correct
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
coded_df <- bind_rows(cn_coded, us_coded)
coded_df %>%
ggplot(aes(x = culture, y = correct)) +
stat_summary(fun.data = "mean_cl_boot") +
facet_wrap(~prompt)
# so it's clear that there are some big item level variations
# what if we took out all the ones with cultural differences in accuracy
c <- coded_df %>%
nest_by(prompt)
c$culture_pred <- lapply((coded_df %>%
nest_by(prompt))$data ,
function(d){
(glm(correct ~ culture , family = binomial, data = d) %>%
summary())$coefficients %>%
as.data.frame() %>%
rownames_to_column() %>% filter(rowname == "cultureUS") %>% pull(`Pr(>|z|)`)
})
c %>%
select(prompt, culture_pred) %>%
unnest(culture_pred) %>%
left_join(coded_df, by = "prompt") %>%
filter(culture_pred < 0.05) %>%
ggplot(aes(x = culture, y = correct)) +
geom_text(aes(label = round(culture_pred, 2))) +
stat_summary(fun.data = "mean_cl_boot") +
facet_wrap(~prompt)
cultural_diff_prompt <- c %>%
filter(culture_pred < 0.05) %>%
pull(prompt)
raw_CD %>%
filter(!(trial_info %in% cultural_diff_prompt)) %>%
ggplot(
aes(y = log_rt, x = culture, color = culture)) +
geom_point(alpha = .2, position = position_jitter(width = .1)) +
stat_summary(fun.data = "mean_cl_boot", color = "black") +
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("log RT (ms)") +
xlab("") +
theme_classic() +
labs(title = "Change Detection") +
theme(plot.title = element_text(hjust = 0.5, size = 8),
plot.subtitle = element_text(hjust = 0.5, size = 6),
text = element_text(size=8))+
facet_wrap(~task_info)
## Warning: Removed 28 rows containing non-finite values (stat_summary).
## Warning: Removed 28 rows containing missing values (geom_point).
cd_model_pruned <- lmer(log_rt ~ culture * task_info + (1 | subject) , data = raw_CD %>% filter(!(trial_info %in% cultural_diff_prompt)) )
summary(cd_model_pruned)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log_rt ~ culture * task_info + (1 | subject)
## Data: raw_CD %>% filter(!(trial_info %in% cultural_diff_prompt))
##
## REML criterion at convergence: 11008
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0963 -0.6843 -0.0903 0.5938 4.1830
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.2308 0.4804
## Residual 0.4911 0.7008
## Number of obs: 4875, groups: subject, 286
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 8.70500 0.04079 326.98782 213.390 < 2e-16 ***
## cultureUS -0.01454 0.06338 324.71362 -0.229 0.818708
## task_infofocal 0.10209 0.02718 4611.74233 3.756 0.000175 ***
## cultureUS:task_infofocal -0.02728 0.04238 4613.02494 -0.644 0.519716
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cltrUS tsk_nf
## cultureUS -0.644
## task_inffcl -0.257 0.165
## cltrUS:tsk_ 0.165 -0.252 -0.641
Free description: (logistic regression) first_mention ~ culture + (1 | subject) + (culture | picture)
mention_df <- tidy_d %>%
filter(task_name == "FD", resp_type == "first_mention_focal") %>%
mutate(first_mention = as.factor(case_when(
resp == "1" ~ "focal",
resp == "0" ~ "background")),
scene = trial_info) %>%
select(-resp, -task_info, -resp_type, -trial_info)
mention_model <- glmer(first_mention ~ culture + (1 | subject)+(culture | scene), family = binomial, data = mention_df)
#mention_model <- glmer(first_mention ~ culture + (1 | subject) ,
# family = binomial, data = mention_df)
mention_model
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: first_mention ~ culture + (1 | subject) + (culture | scene)
## Data: mention_df
## AIC BIC logLik deviance df.resid
## 1645.3763 1679.0087 -816.6882 1633.3763 2003
## Random effects:
## Groups Name Std.Dev. Corr
## subject (Intercept) 1.4222
## scene (Intercept) 1.0929
## cultureUS 0.2272 -0.48
## Number of obs: 2009, groups: subject, 287; scene, 7
## Fixed Effects:
## (Intercept) cultureUS
## 1.091 2.580
summary(mention_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: first_mention ~ culture + (1 | subject) + (culture | scene)
## Data: mention_df
##
## AIC BIC logLik deviance df.resid
## 1645.4 1679.0 -816.7 1633.4 2003
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.4003 0.0743 0.1849 0.3971 3.2163
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 2.0228 1.4222
## scene (Intercept) 1.1945 1.0929
## cultureUS 0.0516 0.2272 -0.48
## Number of obs: 2009, groups: subject, 287; scene, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0910 0.4364 2.500 0.0124 *
## cultureUS 2.5798 0.3085 8.362 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## cultureUS -0.244
summary(mention_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: first_mention ~ culture + (1 | subject) + (culture | scene)
## Data: mention_df
##
## AIC BIC logLik deviance df.resid
## 1645.4 1679.0 -816.7 1633.4 2003
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.4003 0.0743 0.1849 0.3971 3.2163
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 2.0228 1.4222
## scene (Intercept) 1.1945 1.0929
## cultureUS 0.0516 0.2272 -0.48
## Number of obs: 2009, groups: subject, 287; scene, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0910 0.4364 2.500 0.0124 *
## cultureUS 2.5798 0.3085 8.362 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## cultureUS -0.244
FD_raw <- tidy_d %>%
filter(task_name == "FD") %>%
group_by(subject, culture) %>%
summarise(first_mention = mean(as.numeric(resp)))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
FD_raw %>%
ggplot(aes(x = culture, y = first_mention, color = culture)) +
geom_point(alpha = .2, position = position_jitter(width = .1)) +
stat_summary(fun.data = "mean_cl_boot", color = "black") +
scale_y_continuous(breaks = seq(0,1,0.5),
labels = {function(x) paste0(as.character(x*100),"%")})+
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("proportion first mention focal") +
xlab("") +
theme_classic() +
labs(title = "Free description") +
theme(plot.title = element_text(hjust = 0.5, size = 8),
plot.subtitle = element_text(hjust = 0.5, size = 6),
text = element_text(size=8))
Causal attribution (linear regression): rating ~ culture * attribution_type + (attribution_type | subject) + (culture | item)
ca_df <- tidy_d %>%
filter(task_name == "CA") %>%
mutate(attribution_type = task_info,
item = trial_info,
rating = as.numeric(resp)) %>%
select(culture, subject,attribution_type,item, rating)
ca_model <- lmer(rating ~ culture * attribution_type + (attribution_type | subject) + (culture | item), data = ca_df)
summary(ca_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: rating ~ culture * attribution_type + (attribution_type | subject) +
## (culture | item)
## Data: ca_df
##
## REML criterion at convergence: 9728.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9673 -0.6411 -0.0611 0.6260 3.5863
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.9796 0.9897
## attribution_typesituational 0.9697 0.9847 -0.68
## item (Intercept) 0.1236 0.3515
## cultureUS 0.1075 0.3278 0.44
## Residual 1.6752 1.2943
## Number of obs: 2708, groups: subject, 235; item, 12
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 3.1695 0.1773 18.2400 17.874
## cultureUS -0.1933 0.1983 27.0737 -0.975
## attribution_typesituational -0.2060 0.2339 13.8856 -0.881
## cultureUS:attribution_typesituational -1.0881 0.2499 17.3790 -4.354
## Pr(>|t|)
## (Intercept) 5.21e-13 ***
## cultureUS 0.338293
## attribution_typesituational 0.393452
## cultureUS:attribution_typesituational 0.000412 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cltrUS attrb_
## cultureUS -0.067
## attrbtn_typ -0.692 -0.008
## cltrUS:ttr_ -0.008 -0.685 0.059
raw_CA <- tidy_d %>%
filter(task_name == "CA") %>%
mutate(resp = as.numeric(resp) + 1)
ggplot(data = raw_CA,
aes(y = resp, x = culture, color = culture)) +
geom_point(alpha = .2, position = position_jitter(width = .1)) +
stat_summary(fun.data = "mean_cl_boot", color = "black") +
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("LS rating (1-7)") +
xlab("") +
theme_classic() +
labs(title = "Causal Attribution") +
theme(plot.title = element_text(hjust = 0.5, size = 8),
plot.subtitle = element_text(hjust = 0.5, size = 6),
text = element_text(size=8)) +
facet_wrap(~task_info)
## Warning: Removed 148 rows containing non-finite values (stat_summary).
## Warning: Removed 148 rows containing missing values (geom_point).
Symbolic self-inflation (linear regression): percent_inflation ~ culture
si_df <- tidy_d %>%
filter(task_name == "SSI") %>%
filter(resp_type == "task_score_ratio") %>%
mutate(score = as.numeric(resp)) %>%
select(-resp, -task_info, -trial_info, -resp_type)
si_model <- glm(score ~ culture, family=gaussian, data = si_df)
si_model
##
## Call: glm(formula = score ~ culture, family = gaussian, data = si_df)
##
## Coefficients:
## (Intercept) cultureUS
## 1.4526 -0.1548
##
## Degrees of Freedom: 260 Total (i.e. Null); 259 Residual
## Null Deviance: 91.75
## Residual Deviance: 90.25 AIC: 469.5
summary(si_model)
##
## Call:
## glm(formula = score ~ culture, family = gaussian, data = si_df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2011 -0.3861 -0.1309 0.2137 3.0739
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.45259 0.04726 30.735 <2e-16 ***
## cultureUS -0.15483 0.07451 -2.078 0.0387 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.3484453)
##
## Null deviance: 91.752 on 260 degrees of freedom
## Residual deviance: 90.247 on 259 degrees of freedom
## AIC: 469.51
##
## Number of Fisher Scoring iterations: 2
SSI_raw <- tidy_d %>%
filter(task_name == "SSI") %>%
mutate(resp = as.numeric(resp))
ggplot(data = SSI_raw %>% filter(resp_type == "task_score_ratio"),
aes(y = resp, x = culture, color = culture)) +
geom_point(aes(y = resp, color = culture),
position = position_jitter(width = .15), size = .5, alpha = 0.8) +
stat_summary(fun.data = "mean_cl_boot", color = "black") +
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("") +
xlab("") +
theme_classic() +
labs(title = "Symbolic Self Inflation") +
theme(plot.title = element_text(hjust = 0.5, size = 8),
plot.subtitle = element_text(hjust = 0.5, size = 6),
text = element_text(size=8))
Taxonomic/thematic similarity task: (logistic regression) choice ~ culture + (1 | subject) + (culture | item)
TD_catch_failed <- tidy_d %>%
filter(task_name == "TD") %>%
filter(task_info == "catch") %>%
filter(resp = FALSE) %>%
pull(subject)
triads_d <- tidy_d %>%
filter(!subject %in% TD_catch_failed) %>%
filter(task_name == "TD") %>%
mutate(choice = as.factor(resp),
item = trial_info) %>%
select(choice, culture, subject, item)
triads_model <- glmer(choice ~ culture + (1 | subject) + (culture | item), family = binomial, data = triads_d)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
summary(triads_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: choice ~ culture + (1 | subject) + (culture | item)
## Data: triads_d
##
## AIC BIC logLik deviance df.resid
## 4174.5 4213.4 -2081.2 4162.5 4873
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.1049 -0.4257 0.0829 0.4087 14.3208
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 3.0922 1.7585
## item (Intercept) 5.6642 2.3800
## cultureUS 0.2429 0.4929 1.00
## Number of obs: 4879, groups: subject, 287; item, 59
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1473 0.4693 -0.314 0.7536
## cultureUS 1.7413 0.8635 2.016 0.0437 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## cultureUS -0.544
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
TD_catch_failed <- tidy_d %>%
filter(task_name == "TD") %>%
filter(task_info == "catch") %>%
filter(resp = FALSE) %>%
pull(subject)
TD_raw <- tidy_d %>%
filter(!subject %in% TD_catch_failed) %>%
filter(task_name == "TD") %>%
group_by(subject, culture) %>%
summarise(tax_match = mean(as.logical(resp)))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
ggplot(data = TD_raw,
aes(y = tax_match, x = culture, color = culture)) +
geom_point(aes(y = tax_match, color = culture),
position = position_jitter(width = .15), size = .5, alpha = 0.8) +
stat_summary(fun.data = "mean_cl_boot", color = "black") +
scale_y_continuous(breaks = seq(0,1,0.5),
labels = {function(x) paste0(as.character(x*100),"%")})+
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("Triads taxonomic match") +
xlab("") +
theme_classic() +
labs(title = "Triads") +
theme(plot.title = element_text(hjust = 0.5, size = 8),
plot.subtitle = element_text(hjust = 0.5, size = 6),
text = element_text(size=8))
Semantic intuition: (logistic regression) choice ~ culture + (1 | subject) + (culture | item)
SeI_df <- tidy_d %>%
filter(task_name == "SeI") %>%
filter(task_info == "critical") %>%
mutate(
choice_causal = case_when(
resp == "causal_historical" ~ TRUE,
resp == "descriptivist" ~ FALSE
)
) %>%
mutate(item = trial_info) %>%
select(choice_causal, culture, subject, item)
#SeI_model <- glmer(choice_causal ~ culture + (1 | subject) + (culture | item), family = binomial, data = SeI_df)
SeI_model <- glmer(choice_causal ~ culture + (1 | subject) , family = binomial, data = SeI_df,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e5)))
summary(SeI_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: choice_causal ~ culture + (1 | subject)
## Data: SeI_df
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
##
## AIC BIC logLik deviance df.resid
## 703.8 716.8 -348.9 697.8 571
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.2387 -0.4382 0.2837 0.4005 0.9719
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 5.814 2.411
## Number of obs: 574, groups: subject, 287
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.2227 0.2556 0.871 0.38362
## cultureUS 1.4315 0.4359 3.284 0.00102 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## cultureUS -0.537
SeI_raw <- tidy_d %>%
filter(task_name == "SeI") %>%
filter(task_info == "critical") %>%
mutate(causal_historical_choice = case_when(
resp == "causal_historical" ~ TRUE,
resp == "descriptivist" ~ FALSE
)) %>%
group_by(subject, culture) %>%
summarise(causal_historical_resp = mean(causal_historical_choice))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
ggplot(data = SeI_raw,
aes(y = causal_historical_resp, x = culture, color = culture)) +
geom_point(aes(y = causal_historical_resp, color = culture),
position = position_jitter(width = .15), size = .5, alpha = 0.8) +
stat_summary(fun.data = "mean_cl_boot", color = "black") +
scale_y_continuous(breaks = seq(0,1,0.5),
labels = {function(x) paste0(as.character(x*100),"%")})+
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("Causal Historical Choice") +
xlab("") +
theme_classic() +
labs(title = "Semantic Intuition") +
theme(plot.title = element_text(hjust = 0.5, size = 8),
plot.subtitle = element_text(hjust = 0.5, size = 6),
text = element_text(size=8))
Ambiguous RMTS (logistic regression): choice ~ culture + (trial_num | subject)
rmts_df <- tidy_d %>%
filter(task_name == "RMTS") %>%
mutate(choice = as.factor(case_when(
resp == "1" ~ "rel",
resp == "0" ~ "obj"))
) %>%
group_by(subject) %>%
mutate(trial_num = as.factor(row_number())) %>%
select(-resp, -task_info, -trial_info, -resp_type)
# model 0: not converging
#rmts_model <- glmer(choice ~ culture + (trial_num | subject), family = binomial, data = rmts_df)
# model 1:
rmts_model <- glmer(choice ~ culture + (1 | subject), family = binomial, data = rmts_df)
#rmts_model
summary(rmts_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: choice ~ culture + (1 | subject)
## Data: rmts_df
##
## AIC BIC logLik deviance df.resid
## 1090.9 1106.0 -542.4 1084.9 1145
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6109 -0.2109 -0.1950 0.2663 1.7367
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 14.37 3.791
## Number of obs: 1148, groups: subject, 287
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.1622 0.3711 -3.131 0.00174 **
## cultureUS 0.4961 0.5673 0.874 0.38190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## cultureUS -0.633
RMTS_raw <- tidy_d %>%
filter(task_name == "RMTS") %>%
group_by(subject, culture) %>%
summarise(relational_choice = mean(as.numeric(resp)))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
ggplot(data = RMTS_raw,
aes(y = relational_choice, x = culture, color = culture)) +
geom_point(aes(y = relational_choice, color = culture),
position = position_jitter(width = .15), size = .5, alpha = 0.8) +
stat_summary(fun.data = "mean_cl_boot", color = "black") +
scale_y_continuous(breaks = seq(0,1,0.5),
labels = {function(x) paste0(as.character(x*100),"%")})+
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("Proportion relational choice") +
xlab("") +
theme_classic() +
labs(title = "Ambiguous RMTS") +
theme(plot.title = element_text(hjust = 0.5, size = 8),
plot.subtitle = element_text(hjust = 0.5, size = 6),
text = element_text(size=8))
Raven (logistic regression): acc ~ culture + (1 | subject) + (1 | trial)
rv_df <- tidy_d %>%
filter(task_name == "RV") %>%
mutate(acc = as.numeric(resp)) %>%
group_by(subject) %>%
mutate(trial = as.factor(row_number())) %>%
select(-resp, -task_info, -trial_info, -resp_type)
rv_model <- glmer(acc ~ culture + (1 | subject) + (culture | trial), family = binomial, data = rv_df)
rv_model
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: acc ~ culture + (1 | subject) + (culture | trial)
## Data: rv_df
## AIC BIC logLik deviance df.resid
## 3202.019 3238.885 -1595.009 3190.019 3438
## Random effects:
## Groups Name Std.Dev. Corr
## subject (Intercept) 1.7806
## trial (Intercept) 1.3603
## cultureUS 0.6494 -0.04
## Number of obs: 3444, groups: subject, 287; trial, 12
## Fixed Effects:
## (Intercept) cultureUS
## 1.738 -1.529
summary(rv_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: acc ~ culture + (1 | subject) + (culture | trial)
## Data: rv_df
##
## AIC BIC logLik deviance df.resid
## 3202.0 3238.9 -1595.0 3190.0 3438
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.5399 -0.4178 0.1916 0.4482 7.5633
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 3.1704 1.7806
## trial (Intercept) 1.8505 1.3603
## cultureUS 0.4217 0.6494 -0.04
## Number of obs: 3444, groups: subject, 287; trial, 12
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.7375 0.4237 4.100 4.12e-05 ***
## cultureUS -1.5292 0.3039 -5.032 4.84e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## cultureUS -0.221
RV_raw <- tidy_d %>%
filter(task_name == "RV") %>%
group_by(subject, culture) %>%
summarise(RV_resp = mean(as.numeric(resp)))
## `summarise()` has grouped output by 'subject'. You can override using the
## `.groups` argument.
ggplot(data = RV_raw,
aes(y = RV_resp, x = culture, color = culture)) +
geom_point(aes(y = RV_resp, color = culture),
position = position_jitter(width = .15), size = .5, alpha = 0.8) +
stat_summary(fun.data = "mean_cl_boot", color = "black") +
scale_y_continuous(breaks = seq(0,1,0.5),
labels = {function(x) paste0(as.character(x*100),"%")})+
scale_color_manual(values = c("red", "blue"))+
scale_fill_manual(values = c("red", "blue"))+
guides(fill = "none") +
guides(color = "none") +
ylab("Ravens proportion correct") +
xlab("") +
theme_classic() +
labs(title = "Ravens") +
theme(plot.title = element_text(hjust = 0.5, size = 8),
plot.subtitle = element_text(hjust = 0.5, size = 6),
text = element_text(size=8))
s1_d <- read_csv(here("data/s1_tidy_main.csv"))
## New names:
## * `` -> ...1
## Rows: 37595 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): subject, culture, task_name, task_info, trial_info, resp_type
## dbl (2): ...1, resp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# RMTS
rmts_df <- s1_d %>%
filter(task_name == "RMTS") %>%
mutate(choice = as.factor(case_when(
resp == "1" ~ "rel",
resp == "0" ~ "obj"))
) %>%
group_by(subject) %>%
mutate(trial_num = as.factor(row_number())) %>%
select(-resp, -task_info, -trial_info, -resp_type)
# model 0: not converging
#rmts_model <- glmer(choice ~ culture + (trial_num | subject), family = binomial, data = rmts_df)
# model 1:
s1_rmts_model <- glmer(choice ~ culture + (1 | subject), family = binomial, data = rmts_df)
s1_rmts_es <- binomial_model_to_es(s1_rmts_model, "s1_rmts")
# RV
rv_df <- s1_d %>%
filter(task_name == "RV") %>%
mutate(acc = as.numeric(resp)) %>%
group_by(subject) %>%
mutate(trial = as.factor(row_number())) %>%
select(-resp, -task_info, -trial_info, -resp_type)
s1_rv_model <- glmer(acc ~ culture + (1 | subject) + (culture | trial), family = binomial, data = rv_df)
s1_rv_es <- binomial_model_to_es(s1_rv_model, "s1_rv")
# CP
cp_df <- s1_d %>%
filter(task_name == "CP") %>%
mutate(choice = as.factor(case_when(
resp == "1" ~ "uniq",
resp == "0" ~ "non_uniq"))
) %>%
select(-resp, -task_info, -trial_info, -resp_type)
cp_model <- glm(choice ~ culture,
family=binomial(link="logit"),
data = cp_df)
s1_cp_es <- binomial_model_to_es(cp_model, "s1_cp")
# SSI
ssi_df <- s1_d %>%
filter(task_name == "SI") %>%
filter(resp_type == "inflation_score_ratio") %>%
mutate(score = resp) %>%
select(-resp, -task_info, -trial_info, -resp_type)
s1_ssi_model <- glm(score ~ culture, family=gaussian, data = ssi_df)
ssi_t <- (summary(s1_ssi_model)$coefficient %>% as.data.frame())$`t value`[[2]]
ssi_df_error <- (summary(s1_ssi_model)$df.residual)
s1_ssi_es <- t_to_d(ssi_t, ssi_df_error) %>%
as.data.frame() %>%
mutate(task_name = "s1_ssi")
# CA
ca_df <- s1_d %>%
filter(task_name == "CA") %>%
mutate(attrib_num = as.numeric(resp),
attrib_binary = replace(attrib_num, attrib_num > 1, 1),
trial = trial_info,
attrib_type = factor(resp_type)) %>%
select(-resp, -task_info)
#ca_model <- glmer(attrib_num ~ attrib_type * culture + (attrib_type | subject) + (culture | trial), family=poisson, data = ca_df, control=glmerControl(optimizer="bobyqa"))
#boundary (singular) fit: see ?isSingular
#ca_model_binary <- glmer(attrib_binary ~ attrib_type * culture + (attrib_type | subject) + (culture | trial), family=binomial, data = ca_df, control=glmerControl(optimizer="bobyqa"))
#boundary (singular) fit: see ?isSingular
#ca_model1 <- glmer(attrib_num ~ attrib_type * culture + (attrib_type | subject) + (1 | trial), family=poisson, data = ca_df, control=glmerControl(optimizer="bobyqa"))
#boundary (singular) fit: see ?isSingular
#ca_model2 <- glmer(attrib_num ~ attrib_type * culture + (1 | subject) + (1 | trial), family=poisson, data = ca_df, control=glmerControl(optimizer="bobyqa"))
#boundary (singular) fit: see ?isSingular
#ca_model3 <- glmer(attrib_num ~ attrib_type * culture + (1 | subject), family=poisson, data = ca_df, control=glmerControl(optimizer="bobyqa"))
#boundary (singular) fit: see ?isSingular
ca_model4 <- glm(attrib_num ~ attrib_type * culture, family=poisson, data = ca_df)
s1_ca_z <- ((summary(ca_model4)$coefficient) %>% as.data.frame())$`z value`[[4]] # predictor culture US
s1_ca_obs <- summary(ca_model4)$df.residual
s1_ca_es <- z_to_d(s1_ca_z, s1_ca_obs) %>%
as.data.frame() %>%
mutate(task_name = "s1_ca")
# CA
HZ_height_df <- s1_d %>%
filter(task_name == "HZ", resp_type == "hz_height") %>%
mutate(
height = resp
) %>%
select(-resp, -task_info, -trial_info)
HZ_height_model <- lm(height ~ culture,
data = HZ_height_df)
summary(HZ_height_model)
##
## Call:
## lm(formula = height ~ culture, data = HZ_height_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52010 -0.10810 -0.00867 0.09899 0.46375
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.53625 0.01357 39.505 <2e-16 ***
## cultureUS 0.03283 0.01914 1.715 0.0873 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1754 on 334 degrees of freedom
## Multiple R-squared: 0.00873, Adjusted R-squared: 0.005762
## F-statistic: 2.942 on 1 and 334 DF, p-value: 0.08725
hz_df <- (((summary(HZ_height_model))$fstatistic) %>% as.data.frame) %>% rownames_to_column("info")
s1_hz_es <- F_to_d(hz_df$.[[1]], hz_df$.[[2]], hz_df$.[[3]]) %>%
as.data.frame() %>%
mutate(task_name = "s1_hz")
ebb_df <- s1_d %>%
filter(task_name == "EBB", task_info != "HELPFUL") %>%
mutate(correct = as.factor(case_when(
resp == "1" ~ "correct",
resp == "0" ~ "incorrect")),
context = task_info,
size_diff = as.numeric(trial_info),
) %>%
select(-resp, -task_info, -trial_info)
#full model
#ebb_model <- glmer(correct ~ culture * context * size_diff + (size_diff * context | subject), family = binomial, data = ebb_df, control=glmerControl(optimizer="bobyqa"))
#convergence code 1 from bobyqa: bobyqa -- maximum number of function evaluations exceeded
#boundary (singular) fit: see ?isSingular
#model 2 (if full does not converge)
#ebb_model <- glmer(correct ~ culture * context * size_diff + (context | subject), family = binomial, data = ebb_df, control=glmerControl(optimizer="bobyqa"))
#boundary (singular) fit: see ?isSingular
#model 3
ebb_model <- glmer(correct ~ culture * context * size_diff + ( 1 | subject), family = binomial, data = ebb_df, control=glmerControl(optimizer="bobyqa"))
s1_ebb_es <- binomial_model_to_es(ebb_model, "s1_ebb")
# FD
s1_mention_df <- s1_d %>%
filter(task_name == "FD", resp_type == "first_mention_focal") %>%
mutate(first_mention = as.factor(case_when(
resp == "1" ~ "focal",
resp == "0" ~ "background")),
scene = trial_info) %>%
select(-resp, -task_info, -resp_type, -trial_info)
#mention_model <- glmer(first_mention ~ culture + (1 | subject)+(culture | scene), family = binomial, data = mention_df)
#Error: number of observations (=1146) < number of random effects (=1148) for term (scene | subject); the random-effects parameters are probably unidentifiable
s1_mention_model <- glmer(first_mention ~ culture + (1 | subject) ,
family = binomial, data = s1_mention_df)
s1_fd_es <- binomial_model_to_es(s1_mention_model, "s1_fd")
s1_es <- bind_rows(s1_rmts_es, s1_rv_es, s1_cp_es,
s1_ssi_es, s1_ca_es, s1_hz_es, s1_ebb_es,
s1_fd_es)
# change detection
clean_cd_df <- anova(cd_model) %>%
as.data.frame() %>%
janitor::clean_names() %>%
rownames_to_column("predictor") %>%
filter(predictor == "culture:type_of_change")
cd_es <- F_to_d(clean_cd_df$f_value, clean_cd_df$num_df, clean_cd_df$den_df) %>%
as.data.frame() %>%
mutate(task_name = "s2_cd")
# free description
fd_z <- ((summary(mention_model)$coefficient) %>% as.data.frame())$`z value`[[2]] # predictor culture US
fd_obs <- length(summary(mention_model)$residuals)
fd_es <- z_to_d(fd_z, fd_obs) %>%
as.data.frame() %>%
mutate(task_name = "s2_fd")
# causal attribution
clean_ca_df <- anova(ca_model) %>%
as.data.frame() %>%
janitor::clean_names() %>%
rownames_to_column("predictor") %>%
filter(predictor == "culture:attribution_type")
ca_es <- F_to_d(clean_ca_df$f_value, clean_ca_df$num_df, clean_ca_df$den_df) %>%
as.data.frame() %>%
mutate(task_name = "s2_ca")
# symbolic self inflation
ssi_t <- (summary(si_model)$coefficient %>% as.data.frame())$`t value`[[2]]
ssi_df_error <- (summary(si_model)$df.residual)
ssi_es <- t_to_d(ssi_t, ssi_df_error) %>%
as.data.frame() %>%
mutate(task_name = "s2_ssi")
# triads
td_z <- ((summary(triads_model)$coefficient) %>% as.data.frame())$`z value`[[2]] # predictor culture US
td_obs <- length(summary(triads_model)$residuals)
td_es <- z_to_d(td_z, td_obs) %>%
as.data.frame() %>%
mutate(task_name = "s2_td")
# SeI model
sei_z <- ((summary(SeI_model)$coefficient) %>% as.data.frame())$`z value`[[2]] # predictor culture US
sei_obs <- length(summary(SeI_model)$residuals)
sei_es <- z_to_d(sei_z, sei_obs) %>%
as.data.frame() %>%
mutate(task_name = "s2_sei")
# RMTS model
rmts_z <- ((summary(rmts_model)$coefficient) %>% as.data.frame())$`z value`[[2]] # predictor culture US
rmts_obs <- length(summary(rmts_model)$residuals)
rmts_es <- z_to_d(rmts_z, rmts_obs) %>%
as.data.frame() %>%
mutate(task_name = "s2_rmts")
# RV model
rv_z <- ((summary(rv_model)$coefficient) %>% as.data.frame())$`z value`[[2]] # predictor culture US
rv_obs <- length(summary(rv_model)$residuals)
rv_es <- z_to_d(rv_z, rv_obs) %>%
as.data.frame() %>%
mutate(task_name = "s2_rv")
s2_es <- bind_rows(cd_es, fd_es, ca_es, ssi_es, td_es, sei_es, rmts_es, rv_es)
binomial_model_to_es <- function(model, task_name){
z <- ((summary(model)$coefficient) %>% as.data.frame())$`z value`[[2]] # predictor culture US
obs <- length(summary(model)$residuals)
if(obs == 0){
obs <- summary(model)$df.residual # this needs to be fixed later
}
es <- z_to_d(z, obs) %>%
as.data.frame() %>%
mutate(task_name = task_name)
return (es)
}
all_es <- bind_rows(s1_es, s2_es)
all_es %>%
ggplot(aes(x = task_name, y = d,
ymin = CI_low, ymax = CI_high)) +
geom_pointrange() +
coord_flip()