Question: Does association-type position predict accuracy?
Read in data
# tdcs data
d.all = read.csv("../data/labDataPlusNorms.csv") %>%
rename(condition = electrode,
association.num = mention) %>%
select(1:4,7,9:19,21,22) %>%
mutate(condition = fct_recode(condition, "sham" = "na"))
# turker accuracy data
d.bw = read.csv("../data/backALLfinal_withALLSimilaritiesPlusNorms.csv") %>%
select(cue, labSubj, subjCode, isRight)
Calculate the proportion of turkers for each list that correctly guesed the cue. We then assign that list a 1 if it was guessed correctly more often than the median, and 0 if less.
# add trial-wise accuracy column to tdcs data based on turker data
prop.correct = d.bw %>%
group_by(cue, labSubj) %>%
summarize(prop.turk.correct = sum(isRight)/length(isRight))
median.correct = median(prop.correct$prop.turk.correct)
d.all.bw = left_join(d.all, prop.correct) %>%
mutate(above.median = ifelse(prop.turk.correct > median.correct, 1, 0)) %>%
mutate(above.median = as.factor(above.median)) %>%
filter(!is.na(above.median))
Distributions of word types across association numbers, split by correctness.
cat.dists = d.all.bw %>%
select(-hyponym, -hypernym, -meronymPart,
-meronymSubstance, -taxonomic, -synonym) %>%
gather("category", "present", 6:12) %>%
filter(present == 1, category != "metaphor", category != "holonym")
ggplot(cat.dists, aes(x = association.num,
group = above.median, fill = above.median)) +
geom_histogram(position = "dodge", binwidth =1) +
facet_grid(~category)
ggplot(cat.dists, aes(x = association.num,
group = above.median, fill = above.median)) +
geom_density(alpha = .2) +
facet_grid(~category)
first four associations only
cat.dists %>%
filter(association.num < 5) %>%
ggplot(aes(x = association.num,
group = above.median, fill = above.median)) +
geom_density(alpha = .2) +
facet_wrap(~category)
cat.dists %>%
group_by(cue, labSubj, category, above.median) %>%
summarize(association.num = mean(association.num)) %>%
group_by(category, above.median) %>%
multi_boot_standard(column = "association.num") %>%
ggplot(aes(fill = above.median, y = mean, x = category)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9))
Overall, lists are more likely to lead to correct guesses of the cue when descriptors and thematic responses appear late.
Does this differ by condition?
ggplot(cat.dists, aes(x = association.num,
group = above.median, fill = above.median)) +
geom_histogram(position = "dodge", binwidth =1) +
facet_grid(condition~category)
ggplot(cat.dists, aes(x = association.num,
group = above.median, fill = above.median)) +
geom_density(alpha = .2) +
facet_grid(condition~category)
cat.dists %>%
group_by(cue, labSubj, category, condition, above.median) %>%
summarize(association.num = mean(association.num)) %>%
group_by(condition, category, above.median) %>%
multi_boot_standard(column = "association.num") %>%
ggplot(aes(fill = above.median, y = mean, x = category)) +
facet_grid(~condition) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Sham has fewer mean number of responses than the critical conditions.
num_responses_by_trial = d.all %>%
rowwise() %>%
mutate(missing = sum(taxInclusive, partsInclusive, thematic, action, descriptor, holonym, metaphor) == 0,
missing = ifelse(missing, 1, 0)) %>%
gather(word_type, value, c(17,18, 19, 11,12,14, 10, 16)) %>%
select(-6:-11) %>%
group_by(labSubj, cue, condition) %>%
filter(value == 1) %>%
summarize(association.num = max(association.num))
mean_n_responses = num_responses_by_trial %>%
group_by(labSubj, condition) %>%
summarize(association.num = mean(association.num)) %>%
group_by(condition) %>%
multi_boot_standard(column = "association.num")
ggplot(mean_n_responses, aes(fill = condition,
y = mean, x = condition)) +
ylab("mean number of responses") +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9))
We thus normalize rank here by number of responses (e.g., 1,2,3,4 -> .25,.5, .75, 1).
# normalized rank by condition and word type
d.all.r = d.all %>%
rowwise() %>%
mutate(missing = sum(taxInclusive, partsInclusive, thematic,
action, descriptor, holonym, metaphor) == 0,
missing = ifelse(missing, 1, 0)) %>%
gather(word_type, value, c(17, 18, 19, 11, 12, 14, 10, 16)) %>%
select(-6:-11) %>%
group_by(condition, labSubj, cue) %>%
filter(value == 1) %>%
mutate(normalized.rank = association.num/(max(association.num) + 1))
As a sanity check, the mean rank response across all conditions is now .5.
# rank by condition
d.all.r %>%
group_by(condition, labSubj, cue) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, labSubj) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition) %>%
multi_boot_standard(column = "normalized.rank") %>%
ggplot(aes(fill = condition, y = mean, x = condition)) +
ylim(0,1) +
ylab("normalized mean rank") +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9))
And here is the normalized mean rank by word_type and condition
d.all.r %>%
group_by(condition, labSubj, cue, word_type) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, labSubj, word_type) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, word_type) %>%
multi_boot_standard(column = "normalized.rank") %>%
ggplot(aes(fill = condition, y = mean, x = word_type)) +
ylab("normalized mean rank") +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9))