Question: Does association-type position predict accuracy?


Read in data

# tdcs data
d.all = read.csv("../data/labDataPlusNorms.csv") %>%
  rename(condition = electrode,
         association.num = mention) %>%
  select(1:4,7,9:19,21,22) %>%
  mutate(condition = fct_recode(condition, "sham" = "na"))

# turker accuracy data
d.bw = read.csv("../data/backALLfinal_withALLSimilaritiesPlusNorms.csv") %>%  
      select(cue, labSubj, subjCode, isRight)

Median split on lists by accuracy

Calculate the proportion of turkers for each list that correctly guesed the cue. We then assign that list a 1 if it was guessed correctly more often than the median, and 0 if less.

# add trial-wise accuracy column to tdcs data based on turker data
prop.correct = d.bw %>%
  group_by(cue, labSubj) %>% 
  summarize(prop.turk.correct = sum(isRight)/length(isRight))

median.correct = median(prop.correct$prop.turk.correct)

d.all.bw = left_join(d.all, prop.correct) %>%
  mutate(above.median = ifelse(prop.turk.correct > median.correct, 1, 0)) %>%
  mutate(above.median = as.factor(above.median)) %>%
  filter(!is.na(above.median))

Order distributions by word_type

Distributions of word types across association numbers, split by correctness.

cat.dists = d.all.bw %>%
  select(-hyponym, -hypernym, -meronymPart,
         -meronymSubstance, -taxonomic, -synonym) %>% 
  gather("category", "present", 6:12) %>%
  filter(present == 1, category != "metaphor", category != "holonym")

ggplot(cat.dists, aes(x = association.num, 
                      group = above.median, fill = above.median)) +
  geom_histogram(position = "dodge", binwidth =1)  +
  facet_grid(~category)

ggplot(cat.dists, aes(x = association.num, 
                      group = above.median, fill = above.median)) +
  geom_density(alpha = .2) +
  facet_grid(~category)

first four associations only

cat.dists %>%
  filter(association.num < 5) %>%
  ggplot(aes(x = association.num, 
             group = above.median, fill = above.median)) +
  geom_density(alpha = .2) +
  facet_wrap(~category) 

cat.dists %>%
  group_by(cue, labSubj, category, above.median) %>%
  summarize(association.num = mean(association.num)) %>%
  group_by(category, above.median) %>%
  multi_boot_standard(column = "association.num")  %>%
  ggplot(aes(fill = above.median, y = mean, x = category)) +  
  geom_bar(stat = "identity", position = "dodge") +
  geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9)) 

Overall, lists are more likely to lead to correct guesses of the cue when descriptors and thematic responses appear late.

Order distributions by word_type and condition

Does this differ by condition?

ggplot(cat.dists, aes(x = association.num, 
                      group = above.median, fill = above.median)) +
  geom_histogram(position = "dodge", binwidth =1)  +
  facet_grid(condition~category)

ggplot(cat.dists, aes(x = association.num, 
                      group = above.median, fill = above.median)) +
  geom_density(alpha = .2) +
  facet_grid(condition~category)

cat.dists %>%
  group_by(cue, labSubj, category, condition, above.median) %>%
  summarize(association.num = mean(association.num)) %>%
  group_by(condition, category, above.median) %>%
  multi_boot_standard(column = "association.num")  %>%
  ggplot(aes(fill = above.median, y = mean, x = category)) +  
  facet_grid(~condition) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9)) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

Position and condition

Sham has fewer mean number of responses than the critical conditions.

num_responses_by_trial = d.all %>%
  rowwise() %>%
  mutate(missing = sum(taxInclusive, partsInclusive, thematic, action, descriptor, holonym, metaphor) == 0,
         missing = ifelse(missing, 1, 0)) %>%
  gather(word_type, value, c(17,18, 19, 11,12,14, 10, 16)) %>%
  select(-6:-11) %>%
  group_by(labSubj, cue, condition) %>%
  filter(value == 1) %>%
  summarize(association.num = max(association.num))

mean_n_responses  = num_responses_by_trial %>%
  group_by(labSubj, condition) %>%
  summarize(association.num = mean(association.num)) %>%
  group_by(condition) %>%
  multi_boot_standard(column = "association.num") 

  ggplot(mean_n_responses, aes(fill = condition, 
                               y = mean, x = condition)) +
  ylab("mean number of responses") +
  geom_bar(stat = "identity", position = "dodge") +
  geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9)) 

We thus normalize rank here by number of responses (e.g., 1,2,3,4 -> .25,.5, .75, 1).

# normalized rank by condition and word type
d.all.r = d.all %>%
  rowwise() %>%
  mutate(missing = sum(taxInclusive, partsInclusive, thematic, 
                       action, descriptor, holonym, metaphor) == 0,
         missing = ifelse(missing, 1, 0)) %>%
  gather(word_type, value, c(17, 18, 19, 11, 12, 14, 10, 16)) %>%
  select(-6:-11) %>%
  group_by(condition, labSubj, cue) %>%
  filter(value == 1) %>%
  mutate(normalized.rank = association.num/(max(association.num) + 1))

As a sanity check, the mean rank response across all conditions is now .5.

# rank by condition
d.all.r %>%
  group_by(condition, labSubj, cue) %>%
  summarize(normalized.rank = mean(normalized.rank)) %>%
  group_by(condition, labSubj) %>%
  summarize(normalized.rank = mean(normalized.rank)) %>%
  group_by(condition) %>%
  multi_boot_standard(column = "normalized.rank")  %>%
  ggplot(aes(fill = condition, y = mean, x = condition)) + 
  ylim(0,1) +
  ylab("normalized mean rank") +
  geom_bar(stat = "identity", position = "dodge") +
  geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9)) 

And here is the normalized mean rank by word_type and condition

d.all.r %>%
  group_by(condition, labSubj, cue, word_type) %>%
  summarize(normalized.rank = mean(normalized.rank)) %>%
  group_by(condition, labSubj, word_type) %>%
  summarize(normalized.rank = mean(normalized.rank)) %>%
  group_by(condition, word_type) %>%
  multi_boot_standard(column = "normalized.rank")  %>%
  ggplot(aes(fill = condition, y = mean, x = word_type)) + 
  ylab("normalized mean rank") +
  geom_bar(stat = "identity", position = "dodge") +
  geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9))