df_4yo <- read_excel("Disjunction_Data.xlsx", sheet = "Replication 4yos") %>%
rename(subject_id = `Subject ID`) %>%
filter(!is.na(subject_id)) %>%
mutate(response_yes = case_when(
`Response (Y/N)` == "N" ~ 0,
`Response (Y/N)` == "Y" ~ 1,
.default = NA)) %>%
mutate(Months = as.numeric(Months))
df_5yo <- read_excel("Disjunction_Data.xlsx", sheet = "Replication 5yos") %>%
rename(subject_id = `Subject ID`) %>%
filter(!is.na(subject_id)) %>%
mutate(response_yes = case_when(
`Response (Y/N)` == "N" ~ 0,
`Response (Y/N)` == "Y" ~ 1,
.default = NA)) %>%
mutate(Months = as.numeric(Months))
df_all <- bind_rows(df_4yo, df_5yo)
#can't replicate the number for the critical trials, regardless of how I code the NAs
df_all %>%
group_by(`Trial Type`) %>%
summarise(mean = mean(response_yes, na.rm = T))
## # A tibble: 5 × 2
## `Trial Type` mean
## <chr> <dbl>
## 1 0_Disjunct_True 0.210
## 2 1_Disjunct_True 0.667
## 3 2_Disjunct_True 0.810
## 4 Control_False 0.164
## 5 Control_True 0.918
df_all %>%
mutate(response_yes = ifelse(is.na(response_yes), 0, response_yes)) %>%
group_by(`Trial Type`) %>%
summarise(mean = mean(response_yes, na.rm = T))
## # A tibble: 5 × 2
## `Trial Type` mean
## <chr> <dbl>
## 1 0_Disjunct_True 0.207
## 2 1_Disjunct_True 0.659
## 3 2_Disjunct_True 0.805
## 4 Control_False 0.164
## 5 Control_True 0.903
Plotting the original data:
df_4yo_crit <- df_4yo %>%
filter(`Trial Type` %in% c("1_Disjunct_True", "2_Disjunct_True"))
df_5yo_crit <- df_5yo %>%
filter(`Trial Type` %in% c("1_Disjunct_True", "2_Disjunct_True"))
df_crit <- bind_rows(df_4yo_crit, df_5yo_crit) #41 ppts, I think the original paper counted 'NA' as a ppt lol
df_crit_sum <- df_crit %>%
group_by(`Trial Type`) %>%
summarise(sum_response_true = mean(response_yes, na.rm = T))
ggplot(df_crit,
aes(x = `Trial Type`, y = response_yes)) +
stat_summary(fun.data = "mean_cl_boot",
geom = "pointrange",
position = position_dodge2(0.9)) +
ylim(0,1)
# each participant see 4 1-disjunct-true and 4 2-disjunct-true, so binomial should be out of 8
N_TRIALS = 8
N_TRIALS_PER_TYPE = 4
df_crit_sum <- df_crit %>%
group_by(subject_id) %>%
summarise(sum_response_true = sum(response_yes, na.rm = T)) # treat NAs as 0
ggplot(df_crit_sum,
mapping = aes(x = sum_response_true)) +
geom_histogram(binwidth = 1, color = "black") +
scale_x_continuous(breaks= seq(0, N_TRIALS, 1))
If all kids are at chance:
SAMPLE_N = 10000
N_TRIALS = 8
N_TRIALS_PER_TYPE = 4
df.random_sim_sum <- data.frame(
subject_id = as.factor(rep(1:SAMPLE_N)),
sum_response_true = rbinom(SAMPLE_N, N_TRIALS, 0.5)
)
ggplot(df.random_sim_sum,
mapping = aes(x = sum_response_true)) +
geom_histogram(binwidth = 1, color = "black") +
scale_x_continuous(breaks= seq(0, N_TRIALS, 1))
If k is the proportion of kids that are random, and the rest perseverate (and their first response to a trial type is random):
(SPOILER: The plot uses k that was fitted later on)
perseverate_prob <- (c(0.25, 0.5, 0.25))
perseverate_sums <- c(0, N_TRIALS / 2, N_TRIALS)
get_perseverate_sim <- function(k) { # k is essentially percentage of kids being completely random
if (rbinom(1, 1, k)) {
return(rbinom(1, N_TRIALS, 0.5)) #draw from binomial distribution from 0-8 successes
} else {
return(sample(perseverate_sums, size = 1, replace = T, prob = perseverate_prob))
}
}
df.perseverate_sim_sum <- data.frame(
subject_id = as.factor(rep(1:SAMPLE_N))
)
df.perseverate_sim_sum <- df.perseverate_sim_sum %>%
rowwise() %>%
mutate(sum_response_true = get_perseverate_sim(0.591)) # looks good, just need to replace 1 with fitted k
ggplot(df.perseverate_sim_sum,
mapping = aes(x = sum_response_true)) +
geom_histogram(binwidth = 1, color = "black") +
scale_x_continuous(breaks= seq(0, N_TRIALS, 1))
If k is the proportion of kids that are random, and the rest are inclusive (answer T on all trials):
(SPOILER: The plot uses k that was fitted later on)
get_inclusive_sim <- function(k) { # k is essentially percentage of kids being completely random
if (rbinom(1, 1, k)) {
return(rbinom(1, N_TRIALS, 0.5)) #draw from binomial distribution from 0-8 successes
} else {
return(N_TRIALS)
}
}
df.inclusive_sim_sum <- data.frame(
subject_id = as.factor(rep(1:SAMPLE_N))
)
df.inclusive_sim_sum <- df.inclusive_sim_sum %>%
rowwise() %>%
mutate(sum_response_true = get_inclusive_sim(0.71)) # looks good, just need to replace 1 with fitted k
ggplot(df.inclusive_sim_sum,
mapping = aes(x = sum_response_true)) +
geom_histogram(binwidth = 1, color = "black") +
scale_x_continuous(breaks= seq(0, N_TRIALS, 1))
If k is the proportion of kids that are random, and the rest are conjunctive (only answer T on 2-disjunct-true, and F on 1-disjunct-true):
(NOTE: k fitted in the actual model is 1 i.e. all kids are random. But here we’ll just plot with the k for perseverate_sim to show some sort of difference)
get_conjunctive_sim <- function(k) { # k is essentially percentage of kids being completely random
if (rbinom(1, 1, k)) {
return(rbinom(1, N_TRIALS, 0.5)) #draw from binomial distribution from 0-8 successes
} else {
return(N_TRIALS_PER_TYPE)
}
}
df.conjunctive_sim_sum <- data.frame(
subject_id = as.factor(rep(1:SAMPLE_N))
)
df.conjunctive_sim_sum <- df.conjunctive_sim_sum %>%
rowwise() %>%
mutate(sum_response_true = get_conjunctive_sim(0.591)) # looks good, just need to replace 1 with fitted k
ggplot(df.conjunctive_sim_sum,
mapping = aes(x = sum_response_true)) +
geom_histogram(binwidth = 1, color = "black") +
scale_x_continuous(breaks= seq(0, N_TRIALS, 1))
If k is the proportion of kids that are random, and the rest are exclusive (only answer T on 1-disjunct-true, and F on 2-disjunct-true) – NOTE: THESE MODELS DO NOT CURRENTLY DIFFERENTIATE BETWEEN CONJUNCTIVE AND EXCLUSIVE KIDS! But currently what we care about is whether kids are conjunctive or perseverating so we’ll punt this problem to later.
(NOTE: k fitted in the actual model is 1 i.e. all kids are random. But here we’ll just plot with the k for perseverate_sim to show some sort of difference)
get_exclusive_sim <- function(k) { # k is essentially percentage of kids being completely random
if (rbinom(1, 1, k)) {
return(rbinom(1, N_TRIALS, 0.5)) #draw from binomial distribution from 0-8 successes
} else {
return(N_TRIALS_PER_TYPE)
}
}
df.exclusive_sim_sum <- data.frame(
subject_id = as.factor(rep(1:SAMPLE_N))
)
df.exclusive_sim_sum <- df.exclusive_sim_sum %>%
rowwise() %>%
mutate(sum_response_true = get_exclusive_sim(0.591)) # looks good, just need to replace 1 with fitted k
ggplot(df.exclusive_sim_sum,
mapping = aes(x = sum_response_true)) +
geom_histogram(binwidth = 1, color = "black") +
scale_x_continuous(breaks= seq(0, N_TRIALS, 1))
Last model: If k_random is the proportion of kids that are random, k_perseverate is the proportion that perseverate (and their first response to a trial type is random), and the rest are conjunctive (only answer T on 2-disjunct-true, and F on 1-disjunct-true)
(NOTE: the model fit k_random = k_perseverate = 0.5. So no conjunctive kids at all…)
get_perseverate_conjunctive_sim <- function(k_random, k_perseverate) { # k is essentially percentage of kids being completely random
kid_type = sample(c('random', 'perseverate', 'conjunctive'),
size = 1, replace = T,
prob = c(k_random, k_perseverate, 1-k_random-k_perseverate))
if(kid_type == 'random') {
return(rbinom(1, N_TRIALS, 0.5)) #draw from binomial distribution from 0-8 successes
}
else if (kid_type == 'perseverate') {
return(sample(perseverate_sums, size = 1, replace = T, prob = perseverate_prob))
}
else {
return(N_TRIALS_PER_TYPE) # conjunctive is whatever left
}
}
get_perseverate_conjunctive_sim(0.4, 0.4)
## [1] 3
df.perseverate_conjunctive_sim_sum <- data.frame(
subject_id = as.factor(rep(1:SAMPLE_N))
)
df.perseverate_conjunctive_sim_sum <- df.perseverate_conjunctive_sim_sum %>%
rowwise() %>%
mutate(sum_response_true = get_perseverate_conjunctive_sim(0.5, 0.5)) # no conjunctive kids at all, but this is what the fitted model tell us.
ggplot(df.perseverate_conjunctive_sim_sum,
mapping = aes(x = sum_response_true)) +
geom_histogram(binwidth = 1, color = "black") +
scale_x_continuous(breaks= seq(0, N_TRIALS, 1))
#loglik_random: k_random kids are drawn from a binomial distribution, we do not make any predictions about the other kids
loglik_random <- function(subject_sum, k_random) {
sum(log(k_random * dbinom(subject_sum, N_TRIALS, 0.5))
)
}
#loglik_perseverate: k_random kids are drawn from a binomial distribution, and (1-k) has 0.25 chance of answering T for 0 / N_TRIALS (8), and 0.5 chance of answering T for N_TRIALS_PER_TYPE (4)
loglik_perseverate <- function(subject_sum, k_random) {
sum(
case_when(
subject_sum == 0 | subject_sum == N_TRIALS ~ log(k_random * dbinom(subject_sum, N_TRIALS, 0.5) + (1-k_random) * 0.25),
subject_sum == N_TRIALS_PER_TYPE ~ log(k_random * dbinom(subject_sum, N_TRIALS, 0.5) + (1-k_random) * 0.5),
.default = log(k_random * dbinom(subject_sum, N_TRIALS, 0.5))
)
)
}
# Tried fitting a model where k_random kids answer randomly, k_perseverate kids answer in the perseverating manner, and the rest is undefined, but it doesn't converge correctly
# loglik_perseverate <- function(subject_sum, k_random, k_perseverate) {
# sum(
# case_when(
# subject_sum == 0 | subject_sum == N_TRIALS ~ log(k_random * dbinom(subject_sum, N_TRIALS, 0.5) + k_perseverate * 0.25),
# subject_sum == N_TRIALS_PER_TYPE ~ log(k_random * dbinom(subject_sum, N_TRIALS, 0.5) + k_perseverate * 0.5),
# .default = log(k_random * dbinom(subject_sum, N_TRIALS, 0.5))
# )
# )
# }
#loglik_inclusive: k kids are drawn from a binomial distribution, and (1-k) always answer T for all trials --> N_TRIALS (8)
loglik_inclusive <- function(subject_sum, k) { # kids are either random or conjunctive
sum(
case_when(
subject_sum == N_TRIALS ~ log(k * dbinom(subject_sum, N_TRIALS, 0.5) + (1-k)*1),
.default = log(k * dbinom(subject_sum, N_TRIALS, 0.5))
)
)
}
#loglik_conjunctive: k kids are drawn from a binomial distribution, and (1-k) always answer T for the 2-disjunct-true and F for the 1-disjunct-false --> N_TRIALS_PER_TYPE (4)
loglik_conjunctive <- function(subject_sum, k) { # kids are either random or conjunctive
sum(
case_when(
subject_sum == N_TRIALS_PER_TYPE ~ log(k * dbinom(subject_sum, N_TRIALS, 0.5) + (1-k)*1),
.default = log(k * dbinom(subject_sum, N_TRIALS, 0.5))
)
)
}
#loglik_exclusive: k kids are drawn from a binomial distribution, and (1-k) always answer T for the 1-disjunct-true and F for the 2-disjunct-false --> N_TRIALS_PER_TYPE (4)
#NOTE THAT THIS IS EXACTLY THE SAME AS LOGLIK_CONJUNCTIVE FOR THIS SET OF MODELS...
loglik_exclusive <- function(subject_sum, k) { # kids are either random or conjunctive
sum(
case_when(
subject_sum == N_TRIALS_PER_TYPE ~ log(k * dbinom(subject_sum, N_TRIALS, 0.5) + (1-k)*1),
.default = log(k * dbinom(subject_sum, N_TRIALS, 0.5))
)
)
}
# 3 types of kids: random, perseverate, conjunctive --> doesn't converge correctly
loglik_perseverate_conjunctive <- function(subject_sum, k_random, k_perseverate) {
sum(
case_when(
subject_sum == 0 | subject_sum == N_TRIALS ~ log(k_random * dbinom(subject_sum, N_TRIALS, 0.5) + k_perseverate * 0.25),
subject_sum == N_TRIALS_PER_TYPE ~ log(k_random * dbinom(subject_sum, N_TRIALS, 0.5) + k_perseverate * 0.5 + (1-k_random-k_perseverate) * 1),
.default = log(k_random * dbinom(subject_sum, N_TRIALS, 0.5))
)
)
}
mle_fit_random = function(data) {
nLL = function(k_random_fitted) {
-loglik_random(data$sum_response_true, k_random_fitted)
}
iter = 0
fits = NULL
fit = NULL
while (is.null(fits)) {
try(fit <- summary(mle(nLL, start = list(k_random_fitted = 0.3),
lower = 0, upper = 1)),
TRUE)
iter = iter + 1
if (!is.null(fit)) {
# m2logL is deviance (-2x LL)
fits = c(-0.5*fit@m2logL, length(data$sum_response_true), fit@coef[,"Estimate"])
} else {
if (iter > 500) {
fits = c(-9999, 0, 0)
}
}
}
names(fits) = c("logL", "n", "k_random_fitted")
return(fits)
}
fit_random <- mle_fit_random(df_crit_sum) #1
fit_random
## logL n k_random_fitted
## -126.9487 41.0000 1.0000
mle_fit_perseverate = function(data) {
nLL = function(k_random_fitted) {
-loglik_perseverate(data$sum_response_true,
k_random_fitted)
}
iter = 0
fits = NULL
fit = NULL
while (is.null(fits)) {
try(fit <- summary(mle(nLL, start = list(k_random_fitted = 0.3),
lower = 0, upper = 1)),
TRUE)
iter = iter + 1
if (!is.null(fit)) {
# m2logL is deviance (-2x LL)
fits = c(-0.5*fit@m2logL, length(data$sum_response_true), fit@coef[,"Estimate"])
} else {
if (iter > 500) {
fits = c(-9999, 0, 0)
}
}
}
names(fits) = c("logL", "n", "k_random_fitted")
return(fits)
}
fit_perseverate <- mle_fit_perseverate(df_crit_sum) #0.5912165
fit_perseverate
## logL n k_random_fitted
## -95.3948596 41.0000000 0.5912165
mle_fit_inclusive = function(data) {
nLL = function(k_random_fitted) {
-loglik_inclusive(data$sum_response_true, k_random_fitted)
}
iter = 0
fits = NULL
fit = NULL
while (is.null(fits)) {
try(fit <- summary(mle(nLL, start = list(k_random_fitted = 0.3),
lower = 0, upper = 1)),
TRUE)
iter = iter + 1
if (!is.null(fit)) {
# m2logL is deviance (-2x LL)
fits = c(-0.5*fit@m2logL, length(data$sum_response_true), fit@coef[,"Estimate"])
} else {
if (iter > 500) {
fits = c(-9999, 0, 0)
}
}
}
names(fits) = c("logL", "n", "k_random_fitted")
return(fits)
}
fit_inclusive <- mle_fit_inclusive(df_crit_sum) #0.71
fit_inclusive
## logL n k_random_fitted
## -85.0790176 41.0000000 0.7100902
mle_fit_conjunctive = function(data) {
nLL = function(k_random_fitted) {
-loglik_conjunctive(data$sum_response_true, k_random_fitted)
}
iter = 0
fits = NULL
fit = NULL
while (is.null(fits)) {
try(fit <- summary(mle(nLL, start = list(k_random_fitted = 0.3),
lower = 0, upper = 1)),
TRUE)
iter = iter + 1
if (!is.null(fit)) {
# m2logL is deviance (-2x LL)
fits = c(-0.5*fit@m2logL, length(data$sum_response_true), fit@coef[,"Estimate"])
} else {
if (iter > 500) {
fits = c(-9999, 0, 0)
}
}
}
names(fits) = c("logL", "n", "k_random_fitted")
return(fits)
}
fit_conjunctive <- mle_fit_conjunctive(df_crit_sum) #1
fit_conjunctive
## logL n k_random_fitted
## -126.9487 41.0000 1.0000
Something weird with this model, but for now it converges (with k_random = k_perseverate = 0.5)
#doesn't work
mle_fit_perseverate_conjunctive = function(data) {
nLL = function(k_random_fitted, k_perseverate_fitted) {
-loglik_perseverate_conjunctive(data$sum_response_true,
k_random_fitted, k_perseverate_fitted)
}
iter = 0
fits = NULL
fit = NULL
while (is.null(fits)) {
try(fit <- summary(mle(nLL, start = list(k_random_fitted = 0.1,
k_perseverate_fitted = 0.1),
lower = 0,
upper = 0.5)),
TRUE)
iter = iter + 1
print(iter)
if (!is.null(fit)) {
# m2logL is deviance (-2x LL)
fits = c(-0.5*fit@m2logL, length(data$sum_response_true), fit@coef[,"Estimate"])
} else {
if (iter > 500) {
fits = c(-9999, 0, 0, 0)
}
}
}
names(fits) = c("logL", "n", "k_random_fitted", "k_perseverate_fitted")
return(fits)
}
fit_perseverate_conjunctive <- mle_fit_perseverate_conjunctive(df_crit_sum)
## [1] 1
# Utility function to calculate BIC
# uses k parameters, n observations, log likelihood ll
get_BIC = function(ll, k, n) {
(k * log(n)) - (2 * ll)
}
BIC_random = get_BIC(fit_random['logL'],
k = 1, n = nrow(df_crit_sum))
BIC_perseverate = get_BIC(fit_perseverate['logL'],
k = 1, n = nrow(df_crit_sum))
BIC_inclusive = get_BIC(fit_inclusive['logL'],
k = 1, n = nrow(df_crit_sum))
BIC_conjunctive = get_BIC(fit_conjunctive['logL'],
k = 1, n = nrow(df_crit_sum))
BIC_perseverate_conjunctive = get_BIC(fit_perseverate_conjunctive['logL'],
k = 2, n = nrow(df_crit_sum))
#random + conjunctive is the best model
print(data.frame(BIC_random, BIC_perseverate, BIC_conjunctive, BIC_inclusive, BIC_perseverate_conjunctive))
## BIC_random BIC_perseverate BIC_conjunctive BIC_inclusive
## logL 257.6109 194.5033 257.6109 173.8716
## BIC_perseverate_conjunctive
## logL 199.2613
scale_factor = nrow(df_crit_sum) / SAMPLE_N
ggplot(df_crit_sum, aes(x = sum_response_true)) +
geom_histogram(aes(y = ..count.., fill = 'Real data'), binwidth = 1, color = 'black',
alpha = .9) +
geom_histogram(data = df.random_sim_sum, aes(y = ..count.. * scale_factor, fill = 'Simulated'), color = 'black',
binwidth = 1, alpha = .5) + #simulation - approx next
scale_x_continuous(breaks= seq(0, N_TRIALS, 1)) +
theme(axis.text.x = element_text(hjust = 1, angle = 45),
legend.position = "top") +
scale_fill_manual(name = "Data type",
values = c('Simulated' = "#827f7d", 'Real data' = "#FF7C00")) +
labs(y = "Frequency",
fill = "legend",
title = "Random Model")
ggplot(df_crit_sum, aes(x = sum_response_true)) +
geom_histogram(aes(y = ..count.., fill = 'Real data'), binwidth = 1, color = 'black',
alpha = .9) +
geom_histogram(data = df.perseverate_sim_sum, aes(y = ..count.. * scale_factor, fill = 'Simulated'), color = 'black',
binwidth = 1, alpha = .5) + #simulation - approx next
scale_x_continuous(breaks= seq(0, N_TRIALS, 1)) +
theme(axis.text.x = element_text(hjust = 1, angle = 45),
legend.position = "top") +
scale_fill_manual(name = "Data type",
values = c('Simulated' = "#827f7d", 'Real data' = "#FF7C00")) +
labs(y = "Frequency",
fill = "legend",
title = "Random + Perseverate Model")
ggplot(df_crit_sum, aes(x = sum_response_true)) +
geom_histogram(aes(y = ..count.., fill = 'Real data'), binwidth = 1, color = 'black',
alpha = .9) +
geom_histogram(data = df.inclusive_sim_sum, aes(y = ..count.. * scale_factor, fill = 'Simulated'), color = 'black',
binwidth = 1, alpha = .5) + #simulation - approx next
scale_x_continuous(breaks= seq(0, N_TRIALS, 1)) +
theme(axis.text.x = element_text(hjust = 1, angle = 45),
legend.position = "top") +
scale_fill_manual(name = "Data type",
values = c('Simulated' = "#827f7d", 'Real data' = "#FF7C00")) +
labs(y = "Frequency",
fill = "legend",
title = "Random + Inclusive Model")
ggplot(df_crit_sum, aes(x = sum_response_true)) +
geom_histogram(aes(y = ..count.., fill = 'Real data'), binwidth = 1, color = 'black',
alpha = .9) +
geom_histogram(data = df.conjunctive_sim_sum, aes(y = ..count.. * scale_factor, fill = 'Simulated'), color = 'black',
binwidth = 1, alpha = .5) + #simulation - approx next
scale_x_continuous(breaks= seq(0, N_TRIALS, 1)) +
theme(axis.text.x = element_text(hjust = 1, angle = 45),
legend.position = "top") +
scale_fill_manual(name = "Data type",
values = c('Simulated' = "#827f7d", 'Real data' = "#FF7C00")) +
labs(y = "Frequency",
fill = "legend",
title = "Random + Conjunctive Model")
ggplot(df_crit_sum, aes(x = sum_response_true)) +
geom_histogram(aes(y = ..count.., fill = 'Real data'), binwidth = 1, color = 'black',
alpha = .9) +
geom_histogram(data = df.perseverate_conjunctive_sim_sum, aes(y = ..count.. * scale_factor, fill = 'Simulated'), color = 'black',
binwidth = 1, alpha = .5) + #simulation - approx next
scale_x_continuous(breaks= seq(0, N_TRIALS, 1)) +
theme(axis.text.x = element_text(hjust = 1, angle = 45),
legend.position = "top") +
scale_fill_manual(name = "Data type",
values = c('Simulated' = "#827f7d", 'Real data' = "#FF7C00")) +
labs(y = "Frequency",
fill = "legend",
title = "Random + Perseverate + Conjunctive Model")