d_clean <- d %>%
mutate(
age = case_when(
Age == -1 ~ "kid",
Age == 1 ~ "adult",
TRUE ~ "NA"
),
culture = case_when(
Culture == -1 ~ "US",
Culture == 1 ~ "CN",
TRUE ~ "NA"
)
) %>%
rename(
participant = Participant,
control_yes_greg = `Control statements`,
control_yes_walter = X5,
control_yes_james = X6,
control_no_owen = X7,
control_no_leo = X8,
control_no_jason = X9,
control_no_alvin = X10,
control_no_blaze = X11,
test_peter = `Critical statements`,
test_pickes = X13
) %>%
select(participant, age, culture, starts_with("control"), starts_with("test")) %>%
filter(!is.na(participant))
d_clean
## # A tibble: 164 x 13
## participant age culture control_yes_greg control_yes_walt… control_yes_jam…
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1 kid US 1 1 1
## 2 2 kid US 1 1 1
## 3 3 kid US 1 1 1
## 4 4 kid US 1 1 1
## 5 5 kid US 1 1 1
## 6 6 kid US 1 1 1
## 7 7 kid US 1 0 1
## 8 8 kid US 0 1 0
## 9 9 kid US 1 1 1
## 10 10 kid US 1 1 0
## # … with 154 more rows, and 7 more variables: control_no_owen <chr>,
## # control_no_leo <chr>, control_no_jason <chr>, control_no_alvin <chr>,
## # control_no_blaze <chr>, test_peter <chr>, test_pickes <chr>
d_long <- d_clean %>%
pivot_longer(cols = c(starts_with("control"), starts_with("test")),
names_to = "response_type",
values_to = "response") %>%
mutate(
type = case_when(
grepl("control_yes", response_type) ~ "control_yes",
grepl("control_no", response_type) ~ "control_no",
grepl("test", response_type) ~ "test"
)
)
d_long
## # A tibble: 1,640 x 6
## participant age culture response_type response type
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1 kid US control_yes_greg 1 control_yes
## 2 1 kid US control_yes_walter 1 control_yes
## 3 1 kid US control_yes_james 1 control_yes
## 4 1 kid US control_no_owen 1 control_no
## 5 1 kid US control_no_leo 1 control_no
## 6 1 kid US control_no_jason 1 control_no
## 7 1 kid US control_no_alvin 1 control_no
## 8 1 kid US control_no_blaze 0 control_no
## 9 1 kid US test_peter 1 test
## 10 1 kid US test_pickes 1 test
## # … with 1,630 more rows
d_long %>%
group_by(age, culture, type) %>%
filter(grepl("control", response_type)) %>%
summarise(
percentage_of_correct = mean(as.numeric(response), na.rm = TRUE)
)
## # A tibble: 8 x 4
## # Groups: age, culture [4]
## age culture type percentage_of_correct
## <chr> <chr> <chr> <dbl>
## 1 adult CN control_no 0.987
## 2 adult CN control_yes 0.915
## 3 adult US control_no 0.949
## 4 adult US control_yes 0.929
## 5 kid CN control_no 0.8
## 6 kid CN control_yes 0.8
## 7 kid US control_no 0.845
## 8 kid US control_yes 0.858
yep everything was as reported
Controls.lmer = glmer (Correctness ∼ Culture * Age + (1|Participant) + (1|Statement), data = Controls, family = binomial)
glmer(response ~ culture * age + (1|participant) + (1|response_type),
data = filter(d_long, grepl("control", response_type)) %>% mutate(response = as.factor(response)),
family = "binomial") %>%
summary()
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: response ~ culture * age + (1 | participant) + (1 | response_type)
## Data:
## filter(d_long, grepl("control", response_type)) %>% mutate(response = as.factor(response))
##
## AIC BIC logLik deviance df.resid
## 804.0 835.1 -396.0 792.0 1306
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.6040 0.1809 0.2272 0.3502 0.8591
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant (Intercept) 0.4094 0.6398
## response_type (Intercept) 0.1503 0.3877
## Number of obs: 1312, groups: participant, 164; response_type, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.4372 0.3354 10.247 < 2e-16 ***
## cultureUS -0.4069 0.3755 -1.084 0.2786
## agekid -1.9018 0.3562 -5.339 9.36e-08 ***
## cultureUS:agekid 0.7991 0.4708 1.697 0.0896 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cltrUS agekid
## cultureUS -0.644
## agekid -0.733 0.606
## cultrUS:gkd 0.530 -0.798 -0.745
To determine whether there were any cultural differences in the responses to the control statements, a binomial mixed-effects model was constructed using the R programming language, with culture, age and their interaction as fixed effects and participant and statement as random effects.3 We found a main effect of age (z = 6.274, p < 0.001) but no main effect of culture (z = 0.031, p > 0.1) and no interaction between age and culture (z = 1.697, p > 0.05). Separate analyses of the responses to Yes-controls and No-controls also found no reliable effects or interactions of culture (all p’s > 0.1).
not so sure where the number comes from
d_critic_test <- d_long %>%
group_by(age, culture, type) %>%
filter(grepl("test", response_type)) %>%
summarise(
percentage_of_correct = mean(as.numeric(response), na.rm = TRUE)
)
try the model
glmer(response ~ culture * age + (1|participant) + (1|response_type),
data = filter(d_long, grepl("test", response_type)) %>% mutate(response = as.factor(response)),
family = "binomial") %>%
summary()
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: response ~ culture * age + (1 | participant) + (1 | response_type)
## Data:
## filter(d_long, grepl("test", response_type)) %>% mutate(response = as.factor(response))
##
## AIC BIC logLik deviance df.resid
## 413.5 436.3 -200.8 401.5 322
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.2168 -0.4599 0.2944 0.4090 1.1934
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant (Intercept) 6.25636 2.5013
## response_type (Intercept) 0.02368 0.1539
## Number of obs: 328, groups: participant, 164; response_type, 2
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1876 0.5107 -0.367 0.7133
## cultureUS 1.5012 0.7542 1.990 0.0465 *
## agekid -0.9327 0.8053 -1.158 0.2467
## cultureUS:agekid 0.4880 1.0886 0.448 0.6540
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cltrUS agekid
## cultureUS -0.666
## agekid -0.591 0.349
## cultrUS:gkd 0.442 -0.594 -0.728
the table looks right, but is there item level difference?
critical_d <- d_long %>%
group_by(age, culture, type) %>%
filter(grepl("test", response_type)) %>%
summarise(
percentage_of_correct = mean(as.numeric(response), na.rm = TRUE)
)
d_long %>%
group_by(age, culture, response_type) %>%
filter(grepl("test", response_type)) %>%
ggplot(aes(x = culture, y = as.numeric(response), color = response_type)) +
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2))+
geom_hline(data = critical_d %>%
mutate(culture_print = case_when(
culture == "CN" ~ "Reported Average for China",
culture == "US" ~ "Reported Average for US"
)), aes(x = culture, yintercept = percentage_of_correct,
color = culture_print)) +
guides(legend = NULL) +
facet_wrap(~age) +
theme_classic() +
ylab("Proportion of Causal-historical answer")
maybe the reported CA is inflated…? idk or maybe there’s just a statement that’s worse.
another way to ask the question is: are these four groups of participants equally consistent?
d_long %>%
group_by(age, culture, response_type) %>%
filter(grepl("test", response_type)) %>%
ungroup() %>%
group_by(participant, age, culture) %>%
summarise(
total_test_score = sum(as.numeric(response))
) %>%
mutate(
consistencty = case_when(
total_test_score == 2 ~ "consistent CH answer",
total_test_score == 0 ~ "consistent D answer",
total_test_score == 1 ~ "half and half"
)
) %>%
ungroup() %>%
group_by(culture, age, consistencty) %>%
summarise(n_subject = n()) %>%
ggplot(aes(x = consistencty, y = n_subject, fill = culture)) +
geom_col(position = position_dodge()) +
facet_grid( ~ age) +
ylab("Number of subjects") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = .9, hjust = 1)) +
xlab("")