df.raw <- read.csv('kids_color.csv')
df <- df.raw %>%
filter(exclude == 0)
# df <- df.raw %>%
# filter(excl_training == 0)
length(unique(df$subjid)) # 33 participants
## [1] 33
summary(df)
## subjid dob dot age
## Length:14388 Length:14388 Length:14388 Min. :2.100
## Class :character Class :character Class :character 1st Qu.:2.438
## Mode :character Mode :character Mode :character Median :2.710
## Mean :2.687
## 3rd Qu.:2.925
## Max. :3.230
##
## sex lang session test.site
## Length:14388 Length:14388 Min. :1.00 Length:14388
## Class :character Class :character 1st Qu.:1.75 Class :character
## Mode :character Mode :character Median :2.50 Mode :character
## Mean :2.50
## 3rd Qu.:3.25
## Max. :4.00
##
## exp order task target
## Length:14388 Min. :1.000 Length:14388 Length:14388
## Class :character 1st Qu.:1.000 Class :character Class :character
## Mode :character Median :2.000 Mode :character Mode :character
## Mean :1.515
## 3rd Qu.:2.000
## Max. :2.000
##
## col_num col_code p_adult response
## Length:14388 Length:14388 Min. :0.000 Length:14388
## Class :character Class :character 1st Qu.:0.000 Class :character
## Mode :character Mode :character Median :0.050 Mode :character
## Mean :0.329
## 3rd Qu.:0.745
## Max. :1.000
## NA's :8052
## select sel_order correct comments
## Length:14388 Length:14388 Length:14388 Length:14388
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## exclude excl_reason excl_training
## Min. :0 Length:14388 Min. :0
## 1st Qu.:0 Class :character 1st Qu.:0
## Median :0 Mode :character Median :0
## Mean :0 Mean :0
## 3rd Qu.:0 3rd Qu.:0
## Max. :0 Max. :0
##
Can only do overextension analysis here…
proximal errors are underestimates:
blue: purple, green
green: blue, yellow
purple: blue, red
yellow: red, green
df.compre <- df %>%
filter(task == "comprehension")
color_code <- tibble(
col_code = c('n3.5', '5y8.14', '5p4.12', '5yr6.14', '5yr4.8', '5r4.14', 'n9.5', '10gy5.12', '5rp7.10', 'n0.5', '10b5.12'),
test_color = c('gray', 'yellow', 'purple', 'orange', 'brown',
'red', 'white', 'green', 'pink', 'black', 'blue'),
)
proximal_colors <- tibble(
target = c('blue', 'green', 'purple', 'yellow'),
proximal_color_1 = c('purple', 'blue', 'blue', 'red'),
proximal_color_2 = c('green', 'yellow', 'red', 'green')
)
#1412 responses
df.compre_response <- df.compre %>%
left_join(., color_code, by = 'col_code') %>%
left_join(., proximal_colors, by = 'target') %>%
filter(select == 1) %>%
group_by(subjid, session, target) %>%
mutate(selected_target = max(correct),
is_overextension = case_when(
correct == 1 ~ NA, # if correct then not an error
#did you select the target correctly somewhere else? if yes then overextension
correct == 0 & selected_target == 1 ~ 1,
.default = 0
)) %>%
mutate(is_proximal = case_when(
correct == 1 ~ NA,
correct == 0 & (test_color %in% c(proximal_color_1, proximal_color_2)) ~ 1,
.default = 0
))
#936 errors in total
get_proximal_errors_binom_comprehension <- function(df.response){
n_errors <- df.response %>%
filter(correct == 0) %>%
nrow()
df.compre_errors_label <- df.response %>%
filter(correct == 0) %>%
#group by which label used, and which stimuli responding to
group_by(target) %>%
summarise(prop = n() / n_errors)
df.compre_errors_referent <- df.response %>%
filter(correct == 0) %>%
#group by which label used, and which stimuli responding to
group_by(test_color) %>%
summarise(prop = n() / n_errors)
get_product_comprehension <- function(label, referent) {
return(df.compre_errors_label[df.compre_errors_label$target == label, ]$prop *
df.compre_errors_referent[df.compre_errors_referent$test_color == referent, ]$prop)
}
base_rate <- proximal_colors %>%
pivot_longer(cols = -c(target)) %>%
select(-name) %>%
rename(referent = value) %>%
rowwise() %>%
mutate(product = get_product_comprehension(target, referent))
overall_prior <- sum(base_rate$product)
binom.test(x=sum(df.response$is_proximal, na.rm = T),n=n_errors,p=overall_prior)
}
get_proximal_errors_binom_comprehension(df.compre_response)
##
## Exact binomial test
##
## data: sum(df.response$is_proximal, na.rm = T) and n_errors
## number of successes = 193, number of trials = 930, p-value = 0.002959
## alternative hypothesis: true probability of success is not equal to 0.1699665
## 95 percent confidence interval:
## 0.1818851 0.2350355
## sample estimates:
## probability of success
## 0.2075269
sum(df.compre_response$is_overextension, na.rm = T) #805 overextension errors, 86%
## [1] 814
sum(df.compre_response$is_proximal, na.rm = T) #193 proximal errors 20%
## [1] 193
df.production <- df %>%
filter(task == "production")
#1412 responses
df.production$target <- trimws(tolower(df.production$target))
df.production$response <- trimws(tolower(df.production$response))
df.production_response <- df.production %>%
left_join(., proximal_colors, by = 'target') %>%
rowwise() %>%
mutate(correct = ifelse(grepl(target, response), 1, 0)) %>%
group_by(subjid, session) %>%
mutate(correct_targets = list(target[correct == 1])) %>%
mutate(is_overextension = case_when(
correct == 1 ~ NA, # if correct then not an error
correct == 0 & response %in% correct_targets ~ 1,
.default = 0
)) %>%
ungroup() %>%
group_by(subjid, session, target) %>%
mutate(is_proximal = case_when(
correct == 1 ~ NA,
correct == 0 & (response %in% c(proximal_color_1, proximal_color_2)) ~ 1,
.default = 0
)) %>%
ungroup()
get_proximal_errors_binom_production <- function(df.response) {
n_errors <- df.response %>%
filter(correct == 0) %>%
nrow()
df.prod_errors_label <- df.response %>%
filter(correct == 0) %>%
#group by which label used, and which stimuli responding to
group_by(response) %>%
summarise(prop = n() / n_errors)
df.prod_errors_referent <- df.response %>%
filter(correct == 0) %>%
#group by which label used, and which stimuli responding to
group_by(target) %>%
summarise(prop = n() / n_errors)
get_product_production <- function(label, referent) {
return(df.prod_errors_label[df.prod_errors_label$response == label, ]$prop *
df.prod_errors_referent[df.prod_errors_referent$target == referent, ]$prop)
}
base_rate_production <- proximal_colors %>%
pivot_longer(cols = -c(target)) %>%
select(-name) %>%
rename(referent = value) %>%
rowwise() %>%
mutate(product = get_product_production(target, referent))
overall_prior <- sum(base_rate_production$product)
return (binom.test(x=sum(df.response$is_proximal, na.rm = T),n=n_errors,p=overall_prior))
}
get_proximal_errors_binom_production(df.production_response)
##
## Exact binomial test
##
## data: sum(df.response$is_proximal, na.rm = T) and n_errors
## number of successes = 21, number of trials = 435, p-value = 0.009795
## alternative hypothesis: true probability of success is not equal to 0.026413
## 95 percent confidence interval:
## 0.03012841 0.07285039
## sample estimates:
## probability of success
## 0.04827586
#when does a kid first start using a response / color word correctly?
df.prod_trajectory <- df.production %>%
filter(task == "production") %>%
mutate(response = tolower(response)) %>%
rowwise() %>%
mutate(correct = ifelse(grepl(target, response), 1, 0)) %>%
filter(!response %in% c('nr', 'no response')) %>%
group_by(subjid, target) %>%
filter(correct == 1) %>%
summarise(first_correct_session = min(session)) %>%
ungroup()
## `summarise()` has grouped output by 'subjid'. You can override using the
## `.groups` argument.
#some sanity check
ggplot(df.prod_trajectory,
aes(x = target, y = first_correct_session,
color = target)) +
stat_summary(fun.data = "mean_cl_boot",
geom = "pointrange")+
geom_jitter(height = 0,
alpha = 0.5,
shape = 21,
size = 3,
aes(fill = target),
color = 'black') +
theme(legend.position = "none") +
labs(x = 'Target color',
y = 'First session with correct production')
With kids who never produced a color word coded as ‘before production’ for all sessions
df.production_response_trajectory <- df.production_response %>%
left_join(., df.prod_trajectory, join_by(subjid, response == target)) %>%
mutate(prod_period_strict = case_when(
is.na(first_correct_session) ~ 'before production',
session < first_correct_session ~ 'before production',
session == first_correct_session ~ 'at production',
session > first_correct_session ~ 'after production'
),
prod_period_within = case_when(
is.na(first_correct_session) ~ "no production",
session < first_correct_session ~ 'before production',
session == first_correct_session ~ 'at production',
session > first_correct_session ~ 'after production'
)) %>%
mutate(prod_period_strict = factor(prod_period_strict,
levels = c("before production",
"at production",
"after production")),
prod_period_within = factor(prod_period_within,
levels = c("no production",
"before production",
"at production",
"after production")))
df.production_trajectory_error <- df.production_response_trajectory %>%
group_by(prod_period_strict, target) %>%
summarise(total_errors = sum(correct == 0, na.rm = T),
sum_overextension = sum(is_overextension, na.rm = T),
prop_overextension = sum_overextension / total_errors,
sum_proximal = sum(is_proximal, na.rm = T),
prop_proximal = sum_proximal / total_errors) %>%
mutate(task = 'production')
## `summarise()` has grouped output by 'prod_period_strict'. You can override
## using the `.groups` argument.
df.compre_response_trajectory <- df.compre_response %>%
left_join(., df.prod_trajectory, join_by(subjid, test_color == target)) %>%
mutate(prod_period_strict = case_when(
is.na(first_correct_session) ~ 'before production',
session < first_correct_session ~ 'before production',
session == first_correct_session ~ 'at production',
session > first_correct_session ~ 'after production'
),
prod_period_within = case_when(
is.na(first_correct_session) ~ "no production",
session < first_correct_session ~ 'before production',
session == first_correct_session ~ 'at production',
session > first_correct_session ~ 'after production'
)) %>%
mutate(prod_period_strict = factor(prod_period_strict, levels = c("before production",
"at production",
"after production")),
prod_period_within = factor(prod_period_within, levels = c("no production",
"before production",
"at production",
"after production")))
df.compre_trajectory_error <- df.compre_response_trajectory %>%
group_by(prod_period_strict, target) %>%
summarise(total_errors = sum(correct == 0, na.rm = T),
sum_overextension = sum(is_overextension, na.rm = T),
prop_overextension = sum_overextension / total_errors,
sum_proximal = sum(is_proximal, na.rm = T),
prop_proximal = sum_proximal / total_errors) %>%
mutate(task = 'comprehension')
## `summarise()` has grouped output by 'prod_period_strict'. You can override
## using the `.groups` argument.
df.error_trajectory <- bind_rows(df.compre_trajectory_error, df.production_trajectory_error) %>%
pivot_longer(cols = c(prop_overextension, prop_proximal,
sum_overextension, sum_proximal))
ggplot(df.error_trajectory %>%
filter(name %in% c("sum_overextension",
"sum_proximal")) %>%
mutate(name = factor(name, labels = c('Extension errors', 'Proximal errors'))),
aes(x = prod_period_strict, y = value,
fill = name)) +
geom_col(position = position_dodge(0.9)) +
facet_grid(~task) +
labs(x = 'Time period relative to first correct production of color word',
y = 'Number of errors',
fill = 'Error type')
ggplot(df.error_trajectory %>%
filter(name %in% c("prop_overextension",
"prop_proximal")) %>%
mutate(name = factor(name, labels = c('Extension errors', 'Proximal errors'))),
aes(x = prod_period_strict, y = value,
fill = name)) +
geom_col(position = position_dodge(0.9)) +
facet_grid(~task) +
labs(x = 'Time period relative to first correct production of color word',
y = 'Proportion of errors',
fill = 'Error type')
ggplot(df.error_trajectory %>%
filter(task == "comprehension") %>%
filter(name %in% c("sum_overextension",
"sum_proximal")) %>%
mutate(name = factor(name, labels = c('Extension errors', 'Proximal errors'))),
aes(x = prod_period_strict, y = value,
fill = name)) +
geom_col(position = position_dodge(0.9)) +
facet_grid(~target) +
labs(x = 'Time period relative to first correct production of color word',
y = 'Number of errors',
fill = 'Error type')
ggplot(df.error_trajectory %>%
filter(task == "comprehension") %>%
filter(name %in% c("prop_overextension",
"prop_proximal")) %>%
mutate(name = factor(name, labels = c('Extension errors', 'Proximal errors'))),
aes(x = prod_period_strict, y = value,
fill = name)) +
geom_col(position = position_dodge(0.9)) +
facet_grid(~target) +
labs(x = 'Time period relative to first correct production of color word',
y = 'Proportion of errors',
fill = 'Error type')
ggplot(df.error_trajectory %>%
filter(task == "production") %>%
filter(name %in% c("sum_overextension",
"sum_proximal")) %>%
mutate(name = factor(name, labels = c('Extension errors', 'Proximal errors'))),
aes(x = prod_period_strict, y = value,
fill = name)) +
geom_col(position = position_dodge(0.9)) +
facet_grid(~target) +
labs(x = 'Time period relative to first correct production of color word',
y = 'Number of errors',
fill = 'Error type')
With kids who never produced a color word coded as ‘no production’ for all sessions
df.production_trajectory_error_within <- df.production_response_trajectory %>%
group_by(prod_period_within, target) %>%
summarise(total_errors = sum(correct == 0, na.rm = T),
sum_overextension = sum(is_overextension, na.rm = T),
prop_overextension = sum_overextension / total_errors,
sum_proximal = sum(is_proximal, na.rm = T),
prop_proximal = sum_proximal / total_errors) %>%
mutate(task = 'production')
## `summarise()` has grouped output by 'prod_period_within'. You can override
## using the `.groups` argument.
df.compre_trajectory_error_within <- df.compre_response_trajectory %>%
group_by(prod_period_within, target) %>%
summarise(total_errors = sum(correct == 0, na.rm = T),
sum_overextension = sum(is_overextension, na.rm = T),
prop_overextension = sum_overextension / total_errors,
sum_proximal = sum(is_proximal, na.rm = T),
prop_proximal = sum_proximal / total_errors) %>%
mutate(task = 'comprehension')
## `summarise()` has grouped output by 'prod_period_within'. You can override
## using the `.groups` argument.
df.error_trajectory_within <- bind_rows(df.compre_trajectory_error_within, df.production_trajectory_error_within) %>%
pivot_longer(cols = c(prop_overextension, prop_proximal,
sum_overextension, sum_proximal))
ggplot(df.error_trajectory_within %>%
filter(name %in% c("sum_overextension",
"sum_proximal")) %>%
mutate(name = factor(name, labels = c('Extension errors', 'Proximal errors'))),
aes(x = prod_period_within, y = value,
fill = name)) +
geom_col(position = position_dodge(0.9)) +
facet_grid(~task) +
labs(x = 'Time period relative to first correct production of color word',
y = 'Number of errors',
fill = 'Error type') +
theme(axis.text.x=element_text(size=12))
ggplot(df.error_trajectory_within %>%
filter(name %in% c("prop_overextension",
"prop_proximal")) %>%
mutate(name = factor(name, labels = c('Extension errors', 'Proximal errors'))),
aes(x = prod_period_within, y = value,
fill = name)) +
geom_col(position = position_dodge(0.9)) +
facet_grid(~task) +
labs(x = 'Time period relative to first correct production of color word',
y = 'Proportion of errors',
fill = 'Error type') +
theme(axis.text.x=element_text(size=12))
ggplot(df.error_trajectory_within %>%
filter(task == "comprehension") %>%
filter(name %in% c("sum_overextension",
"sum_proximal")) %>%
mutate(name = factor(name, labels = c('Extension errors', 'Proximal errors'))),
aes(x = prod_period_within, y = value,
fill = name)) +
geom_col(position = position_dodge(0.9)) +
facet_grid(~target) +
labs(x = 'Time period relative to first correct production of color word',
y = 'Number of errors',
fill = 'Error type')
ggplot(df.error_trajectory_within %>%
filter(task == "comprehension") %>%
filter(name %in% c("prop_overextension",
"prop_proximal")) %>%
mutate(name = factor(name, labels = c('Extension errors', 'Proximal errors'))),
aes(x = prod_period_within, y = value,
fill = name)) +
geom_col(position = position_dodge(0.9)) +
facet_grid(~target) +
labs(x = 'Time period relative to first correct production of color word',
y = 'Proportion of errors',
fill = 'Error type')
ggplot(df.error_trajectory_within %>%
filter(task == "production") %>%
filter(name %in% c("sum_overextension",
"sum_proximal")) %>%
mutate(name = factor(name, labels = c('Extension errors', 'Proximal errors'))),
aes(x = prod_period_within, y = value,
fill = name)) +
geom_col(position = position_dodge(0.9)) +
facet_grid(~target) +
labs(x = 'Time period relative to first correct production of color word',
y = 'Number of errors',
fill = 'Error type')
#let's get comprehension stuff out as well
df.compre_cat_size <- df.compre_response_trajectory %>%
mutate(select = as.numeric(select),
correct = as.numeric(correct)) %>%
group_by(subjid, session, task, target,
first_correct_session, prod_period_within, prod_period_strict) %>%
summarise(compre_n_select = sum(select), # how many was selected per target
compre_include_correct = ifelse(sum(correct, na.rm = T) >= 1, 1, 0),
compre_correct_first = max(ifelse(correct == 1 & sel_order == 1, 1, 0))) # was the target chosen first?
## `summarise()` has grouped output by 'subjid', 'session', 'task', 'target',
## 'first_correct_session', 'prod_period_within'. You can override using the
## `.groups` argument.
df.spectrum_cat_size <- df %>%
filter(task == "spectrum" & col_num != "total") %>%
left_join(., df.prod_trajectory, join_by(subjid, target == target)) %>%
mutate(prod_period_strict = case_when(
is.na(first_correct_session) ~ 'before production',
session < first_correct_session ~ 'before production',
session == first_correct_session ~ 'at production',
session > first_correct_session ~ 'after production'
),
prod_period_within = case_when(
is.na(first_correct_session) ~ "no production",
session < first_correct_session ~ 'before production',
session == first_correct_session ~ 'at production',
session > first_correct_session ~ 'after production'
)) %>%
mutate(prod_period_strict = factor(prod_period_strict, levels = c("before production",
"at production",
"after production")),
prod_period_within = factor(prod_period_within, levels = c("no production", "before production",
"at production",
"after production"))) %>%
mutate(select = as.numeric(select),
correct = as.numeric(correct)) %>%
group_by(subjid, session, task, target,
first_correct_session, prod_period_within, prod_period_strict) %>%
summarise(spectrum_n_select = sum(select, na.rm = T), # how many was selected per target
spectrum_include_correct = ifelse(sum(correct, na.rm = T) > 1, 1, 0)) # does it include the target?
## `summarise()` has grouped output by 'subjid', 'session', 'task', 'target',
## 'first_correct_session', 'prod_period_within'. You can override using the
## `.groups` argument.
df.cat_size <- bind_rows(df.compre_cat_size, df.spectrum_cat_size)
ggplot(df.cat_size,
aes(x = prod_period_strict, y = compre_n_select)) +
geom_violin() +
stat_summary(fun.data = "mean_cl_boot",
geom = "pointrange") +
geom_jitter(height = 0,
alpha = 0.3) +
ylab("Number of squares selected as target") +
xlab("Comprehension task relative to first correct production of color word")
## Warning: Removed 264 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 264 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 264 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(df.cat_size %>%
filter(task == "comprehension"),
aes(x = prod_period_strict, y = compre_n_select,
color = target)) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.5),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.5)) +
stat_summary(fun = "mean",
geom = "line",
aes(group = target),
position = position_dodge(width = 0.5)) +
scale_color_manual(values = c('blue', 'green', 'purple', 'gold')) +
facet_grid(~target) +
theme(legend.position="none") +
ylab("Number of squares selected as target") +
xlab("Comprehension task relative to first production")
ggplot(df.cat_size %>%
filter(task == "comprehension"),
aes(x = prod_period_within, y = compre_n_select,
color = target)) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.5),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.5)) +
stat_summary(fun = "mean",
geom = "line",
aes(group = target),
position = position_dodge(width = 0.5)) +
scale_color_manual(values = c('blue', 'green', 'purple', 'gold')) +
facet_wrap(~target) +
theme(legend.position="none") +
ylab("Number of squares selected as target") +
xlab("Comprehension task relative to first correct production of color word")
ggplot(df.cat_size %>%
filter(task == "comprehension") %>%
filter(!is.na(compre_include_correct)) %>%
mutate(compre_include_correct = ifelse(
compre_include_correct == 0, "did not select target", "selected target"
)),
aes(x = prod_period_strict, y = compre_n_select,
color = target)) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.5),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.5)) +
stat_summary(fun = "mean",
geom = "line",
aes(group = target),
position = position_dodge(width = 0.5)) +
scale_color_manual(values = c('blue', 'green', 'purple', 'gold')) +
facet_grid(compre_include_correct~target) +
theme(legend.position="none") +
ylab("Number of squares selected as target") +
xlab("Comprehension task relative to first correct production of color word")
ggplot(df.cat_size %>%
filter(task == "spectrum"),
aes(x = prod_period_strict, y = spectrum_n_select)) +
geom_violin() +
stat_summary(fun.data = "mean_cl_boot",
geom = "pointrange") +
geom_jitter(height = 0,
alpha = 0.3) +
ylab("Number of squares selected as target") +
xlab("Spectrum task relative to first correct production of color word")
ggplot(df.cat_size %>%
filter(task == "spectrum"),
aes(x = prod_period_within, y = spectrum_n_select)) +
geom_violin() +
stat_summary(fun.data = "mean_cl_boot",
geom = "pointrange") +
geom_jitter(height = 0,
alpha = 0.3) +
ylab("Number of squares selected as target") +
xlab("Spectrum task relative to first correct production of color word")
ggplot(df.cat_size %>%
filter(task == "spectrum"),
aes(x = prod_period_strict, y = spectrum_n_select)) +
geom_violin() +
stat_summary(fun.data = "mean_cl_boot",
geom = "pointrange") +
geom_jitter(height = 0,
alpha = 0.3) +
facet_grid(~target) +
ylab("Number of squares selected as target") +
xlab("Comprehension task relative to first correct production of color word")
ggplot(df.cat_size %>%
filter(task == "spectrum"),
aes(x = prod_period_strict, y = spectrum_n_select,
color = target)) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.5),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.5)) +
stat_summary(fun = "mean",
geom = "line",
aes(group = target),
position = position_dodge(width = 0.5)) +
scale_color_manual(values = c('blue', 'green', 'purple', 'gold')) +
ylab("Number of squares selected as target") +
xlab("Spectrum task relative to first production")
ggplot(df.cat_size %>%
filter(task == "spectrum"),
aes(x = prod_period_within, y = spectrum_n_select,
color = target)) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.5),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.5)) +
stat_summary(fun = "mean",
geom = "line",
aes(group = target),
position = position_dodge(width = 0.5)) +
scale_color_manual(values = c('blue', 'green', 'purple', 'gold')) +
facet_grid(~target) +
theme(legend.position="none") +
ylab("Number of squares selected as target") +
xlab("Spectrum task relative to first correct production of color word")
ggplot(df.cat_size %>%
filter(task == "spectrum") %>%
filter(!is.na(spectrum_include_correct)) %>%
mutate(spectrum_include_correct = ifelse(
spectrum_include_correct == 0, "did not select focal", "selected focal"
)),
aes(x = prod_period_strict, y = spectrum_n_select,
color = target)) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.5),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.5)) +
stat_summary(fun = "mean",
geom = "line",
aes(group = target),
position = position_dodge(width = 0.5)) +
scale_color_manual(values = c('blue', 'green', 'purple', 'gold')) +
facet_grid(spectrum_include_correct~target) +
theme(legend.position="none") +
ylab("Number of squares selected as target") +
xlab("Spectrum task relative to first correct production of color word")
# plot number of unique words produced vs. average category size?
df.unique_production <- df %>%
filter(task == "production") %>%
group_by(subjid, session) %>%
mutate(response = tolower(response)) %>%
filter(!response %in% c('idk', 'nr', 'no response', '3') & !str_detect(response, " ")) %>%
summarise(unique_words_produced = list(unique(response))) %>%
rowwise() %>%
mutate(n_unique_words_produced = length(unique_words_produced)) %>%
mutate(contains_blue = ifelse('blue' %in% unique_words_produced, 1, 0),
contains_green = ifelse('green' %in% unique_words_produced, 1, 0),
contains_purple = ifelse('purple' %in% unique_words_produced, 1, 0),
contains_yellow = ifelse('yellow' %in% unique_words_produced, 1, 0)) %>%
mutate(n_compre_target_produced = sum(across(starts_with("contains"))),
n_spectrum_target_produced = sum(contains_blue, contains_green))
## `summarise()` has grouped output by 'subjid'. You can override using the
## `.groups` argument.
df.unique_production_compre <- left_join(df.unique_production, df.cat_size)
## Joining with `by = join_by(subjid, session)`
df.unique_production_compre_per_ppt <- df.unique_production_compre %>%
group_by(subjid, session, task, n_unique_words_produced,
n_compre_target_produced, n_spectrum_target_produced) %>%
summarise(mean_compre_n_select = mean(compre_n_select),
#calculate a mean of just the categories that has been 'comprehended'
mean_compre_n_select_include_target =
mean(ifelse(compre_include_correct, compre_n_select, NA), na.rm = T),
mean_spectrum_n_select = mean(spectrum_n_select),
#calculate a mean of just the categories that has been 'comprehended'
mean_spectrum_n_select_include_target =
mean(ifelse(spectrum_include_correct, spectrum_n_select, NA), na.rm = T)) %>%
mutate(mean_compre_n_select = ifelse(is.na(mean_compre_n_select), mean_spectrum_n_select, mean_compre_n_select),
mean_compre_n_select_include_target = ifelse(is.na(mean_compre_n_select_include_target), mean_spectrum_n_select_include_target, mean_compre_n_select_include_target)) %>%
rename(mean_select = mean_compre_n_select,
mean_select_include_target = mean_compre_n_select_include_target) %>%
select(-mean_spectrum_n_select,
-mean_spectrum_n_select_include_target)
## `summarise()` has grouped output by 'subjid', 'session', 'task',
## 'n_unique_words_produced', 'n_compre_target_produced'. You can override using
## the `.groups` argument.
ggplot(df.unique_production_compre_per_ppt,
aes(x = n_unique_words_produced, y = mean_select,
fill = task)) +
stat_summary(fun = "mean",
geom = "bar",
aes(fill = task),
position = position_dodge(width = 0.9)
) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.9),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.9)
) +
stat_summary(fun = "mean",
geom = "line",
aes(group = task),
position = position_dodge(width = 0.9)
) +
scale_x_continuous(breaks = seq(0, 12, 1)) +
facet_wrap(~task) +
ylab("Number of squares selected as target (per task per session)") +
xlab("Number of unique color words produced (per session)") +
theme(legend.position = "none")
ggplot(df.unique_production_compre_per_ppt,
aes(x = n_unique_words_produced, y = mean_select_include_target,
fill = task)) +
stat_summary(fun = "mean",
geom = "bar",
aes(fill = task),
position = position_dodge(width = 0.9)
) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.9),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.9)
) +
stat_summary(fun = "mean",
geom = "line",
aes(group = task),
position = position_dodge(width = 0.9)
) +
scale_x_continuous(breaks = seq(0, 12, 1)) +
facet_wrap(~task) +
ylab("Number of squares selected as CORRECT target") +
xlab("Number of unique color words produced (per session)") +
theme(legend.position = "none")
## Warning: Removed 18 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 18 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 18 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 18 rows containing non-finite outside the scale range
## (`stat_summary()`).
#ONLY counting the production of targets from the comprehension task...
ggplot(df.unique_production_compre_per_ppt %>%
filter(task == "comprehension"),
aes(x = n_compre_target_produced, y = mean_select)) +
stat_summary(fun = "mean",
geom = "bar",
aes(fill = task),
position = position_dodge(width = 0.9)
) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.9),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.9)
) +
stat_summary(fun = "mean",
geom = "line",
aes(group = task),
position = position_dodge(width = 0.9)
) +
facet_wrap(~task) +
ylab("Number of squares selected as target (per task per session)") +
xlab("Number of target color words (blue/green/purple/yellow) produced (per session)") +
theme(legend.position = "none")
# only counting production of blue / green
ggplot(df.unique_production_compre_per_ppt %>%
filter(task == "spectrum"),
aes(x = n_spectrum_target_produced, y = mean_select)) +
stat_summary(fun = "mean",
geom = "bar",
aes(fill = task),
position = position_dodge(width = 0.9)
) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.9),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.9)
) +
stat_summary(fun = "mean",
geom = "line",
aes(group = task),
position = position_dodge(width = 0.9)
) +
scale_x_continuous(breaks = seq(0, 4, 1)) +
facet_wrap(~task) +
ylab("Number of squares selected as target (per task per session)") +
xlab("Number of unique color words produced (per session)") +
theme(legend.position = "none")
# okay, but maybe what we really care about is only the categories that they succeeded in the
# comprehension tasks!
ggplot(df.unique_production_compre %>%
filter(task == "comprehension"),
aes(x = n_unique_words_produced, y = compre_n_select,
color = I(target))) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.5),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.5)) +
stat_summary(fun = "mean",
geom = "line",
aes(group = target),
position = position_dodge(width = 0.5)) +
facet_wrap(~target) +
ylab("Number of squares selected as target (per task per session)") +
xlab("Number of comprehension target words (blue, green, purple, yellow) produced (per session)")
ggplot(df.unique_production_compre %>%
filter(task == "spectrum"),
aes(x = n_unique_words_produced, y = spectrum_n_select,
color = I(target))) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.5),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.5)) +
stat_summary(fun = "mean",
geom = "line",
aes(group = target),
position = position_dodge(width = 0.5)) +
scale_color_manual(values = c('blue', 'green', 'purple', 'gold')) +
ylab("Number of squares selected as target") +
xlab("Number of unique color words produced")
df.blue <- df %>%
filter(target == "blue" | target == "green")
df.blue_prod <- df.blue %>%
filter(task == "production")
df.blue_compre <- df.blue %>%
filter(task == "comprehension") %>%
group_by(subjid, target) %>%
mutate(select = as.numeric(select)) %>%
group_by(subjid, session, target) %>%
summarise(compre_n_select = sum(select), # how many was select per target
compre_include_correct = sum(correct, na.rm = T), # does it include the target?
compre_correct_first = max(ifelse(correct == 1 & sel_order == 1, 1, 0))) # was the target chosen first?
df.blue_spectrum <- df.blue %>%
filter(task == "spectrum" & col_num != "total") %>%
group_by(subjid, target) %>%
mutate(select = as.numeric(select)) %>%
group_by(subjid, session, target) %>%
summarise(spectrum_n_select = sum(select, na.rm = T), # how many was select per target
spectrum_include_correct = sum(correct, na.rm = T)) # does it include the target?
df.blue_sum <- left_join(df.blue_prod, df.blue_compre) %>%
left_join(., df.blue_spectrum) %>%
select(-c(col_num, col_code, p_adult, select, sel_order,
comments, exclude, excl_reason))
# let's just look at the answers for blue for now, and we want to just look at kids
# who began producing it during the process
# 6 kids that learned blue over 4 sessions
df.blue_prod_change <- df.blue_sum %>%
filter(target == "blue") %>%
group_by(subjid) %>%
filter(sum(correct) < 4 & sum(correct) > 0) %>%
ungroup()
ggplot(df.blue_prod_change,
aes(x = session, y = compre_n_select, color = subjid)) +
geom_point(height = 0,
aes(color = I(ifelse(correct == 1, 'blue', 'black')))) +
geom_line(aes(group = subjid)) +
facet_wrap(~subjid) +
geom_text(aes(label = ifelse(compre_include_correct > 0, '*', '')),
nudge_x = 0.1,
size = 7)
ggplot(df.blue_prod_change,
aes(x = session, y = compre_n_select, color = subjid)) +
geom_point(height = 0,
aes(color = I(ifelse(correct == 1, 'blue', 'black')))) +
geom_line(aes(group = subjid)) +
facet_wrap(~subjid) +
geom_text(aes(label = ifelse(compre_correct_first > 0, '*', '')),
nudge_x = 0.1,
size = 7)
ggplot(df.blue_prod_change,
aes(x = session, y = spectrum_n_select, color = subjid)) +
geom_point(height = 0,
aes(color = I(ifelse(correct == 1, 'blue', 'black')))) +
geom_line(aes(group = subjid)) +
facet_wrap(~subjid) +
geom_text(aes(label = ifelse(spectrum_include_correct > 0, '*', '')),
nudge_x = 0.1,
size = 7)
# how about green?
# 8 kids that learned blue over 4 sessions
df.green_prod_change <- df.blue_sum %>%
filter(target == "green") %>%
group_by(subjid) %>%
filter(sum(correct) < 4 & sum(correct) > 0) %>%
ungroup()
ggplot(df.green_prod_change,
aes(x = session, y = compre_n_select, color = subjid)) +
geom_point(height = 0,
aes(color = I(ifelse(correct == 1, 'green', 'black')))) +
geom_line(aes(group = subjid)) +
facet_wrap(~subjid) +
geom_text(aes(label = ifelse(compre_include_correct > 0, '*', '')),
nudge_x = 0.1,
size = 7)
ggplot(df.green_prod_change,
aes(x = session, y = spectrum_n_select, color = subjid)) +
geom_point(height = 0,
aes(color = I(ifelse(correct == 1, 'green', 'black')))) +
geom_line(aes(group = subjid)) +
facet_wrap(~subjid) +
geom_text(aes(label = ifelse(spectrum_include_correct > 0, '*', '')),
nudge_x = 0.1,
size = 7)
df.spectrum <- df.learned_words %>% # 76 of this observations....
filter(task == "spectrum") %>%
filter(first_correct_session > 1) %>%
mutate(spectrum_period = case_when(
session < first_correct_session ~ 'before production',
session == first_correct_session ~ 'at production',
session > first_correct_session ~ 'after production'
)) %>%
mutate(spectrum_period = factor(spectrum_period, levels = c('before production', 'at production',
'after production')))
ggplot(df.spectrum,
aes(x = spectrum_period, y = spectrum_n_select)) +
geom_violin() +
stat_summary(fun.data = "mean_cl_boot",
geom = "pointrange")
ggplot(df.spectrum,
aes(x = spectrum_period, y = spectrum_n_select,
color = target)) +
stat_summary(fun.data = "mean_cl_boot",
geom = "errorbar",
position = position_dodge(width = 0.5),
aes(width = 0.2)) +
stat_summary(fun = "mean",
geom = "point",
position = position_dodge(width = 0.5)) +
stat_summary(fun = "mean",
geom = "line",
aes(group = target),
position = position_dodge(width = 0.5)) +
scale_color_manual(values = c('blue', 'green')) +
ylab("Number of squares selected as target") +
xlab("Spectrum task relative to first production")