Read in data

# tdcs data
d.all = read.csv("../../data/labDataPlusNorms.csv") %>%
  rename(condition = electrode,
         association.num = mention) %>%
  select(1:4,7,9:19,21,22) %>%
  mutate(condition = fct_recode(condition, "sham" = "na")) %>%
  filter(as.character(target) != as.character(cue)) %>%
  mutate(target= str_trim(target)) 

# turker accuracy data
d.bw.raw = read.csv("../../data/backALLfinal_withALLSimilaritiesPlusNorms.csv")  

d.bw = d.bw.raw %>%
          select(cue, labSubj, subjCode, isRight)

Accuracy by item + condition

Let’s first look at the “raw” accuracy data. The color of the square indicates the proportion of turkers that correctly guessed the cue given the associates of a particular lab subject.

d = d.all %>%
  left_join(d.bw) %>%
  select(1:5, 19,20) %>%
  group_by(cue, labSubj, subjCode)

counts = d %>%
  group_by(labSubj, cue, condition) %>%
  summarise(total = length(isRight),
            correct = sum(isRight, na.rm = T)/total) %>%
  group_by(labSubj) %>%
  mutate(total = sum(correct, na.rm = T)) %>%
  group_by(cue, condition) %>%
  mutate(total_cue = sum(correct, na.rm = T))

ggplot(counts, aes(fct_reorder(labSubj, total),
                   fct_reorder(cue, -total_cue))) + 
  geom_tile(aes(fill = correct), colour = "white") + 
  scale_fill_distiller(palette = "YlGnBu", direction = 1) +
  theme_minimal() +
  facet_grid(~condition, drop = TRUE, scales = "free_x") +
  xlab("participant") +
  ylab("cue") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme(legend.position = "bottom", legend.key.width = unit(2, "cm"),
        panel.grid = element_blank()) + 
  coord_equal()

Accuracy by item

counts.item = counts %>%
  group_by(cue, condition) %>%
  summarize(correct = mean(correct))

ggplot(counts.item, aes(fct_reorder(cue, -correct), condition)) + 
  geom_tile(aes(fill = correct), colour = "white") + 
  scale_fill_distiller(palette = "YlGnBu", direction = 1) +
  theme_minimal() +
  xlab("cue") +
  ylab("condition") +
  theme(legend.position = "none") +
  coord_flip() 

Predicting accuracy of items

Can we predict variability in item difficulty with frequency and concreteness?

# read in frequency and concreteness
subtlexus.url <- getURL("https://raw.githubusercontent.com/mllewis/RC/master/data/corpus/SUBTLEXus_corpus.txt")
freqs <- read.table(text = subtlexus.url, header=TRUE) %>%
          select(Word,Lg10WF)
concreteness <-read.csv("brysbaert_corpus.csv", header=TRUE) %>%
  select(Word, Conc.M)

counts.item.group = counts.item %>%
  left_join(freqs, by=c("cue"= "Word")) %>%
  left_join(concreteness, by=c("cue"= "Word"))

counts.item.group %>%
  gather(variable, value, -1:-3) %>%
  ggplot(aes(x = value, y = correct, color = condition)) +
  facet_wrap(~variable, scales = "free")+
  geom_point() +
  geom_smooth(method = "lm")

tidy(cor.test(counts.item.group$Lg10WF, counts.item.group$correct)) %>%
  kable()
estimate statistic p.value parameter conf.low conf.high method alternative
0.2381047 2.629 0.0097341 115 0.0591275 0.4022508 Pearson’s product-moment correlation two.sided
tidy(cor.test(counts.item.group$Conc.M, counts.item.group$correct)) %>%
  kable()
estimate statistic p.value parameter conf.low conf.high method alternative
0.1805676 1.968733 0.0513895 115 -0.000998 0.350608 Pearson’s product-moment correlation two.sided

Frequency of the word predicts overall accuracy (higher frequency -> higher accuracy), but this does not differ by condition. Concreteness is marginal.

Look at responses for invididual items

E.g. bicycle is very different for anodal and cathodal

Bicycle:

p.correct = d.bw %>%
  filter(cue == "bicycle") %>%
  group_by(labSubj) %>%
  summarize(propCorrect = sum(isRight)/n(), 
            n = n())
d.all %>%
  group_by(labSubj, cue, condition) %>%
  filter(condition != "sham",
         cue == "bicycle") %>%
  arrange(labSubj, association.num) %>%
  ungroup() %>%
  select(labSubj, cue, target, condition, association.num) %>%
  group_by(condition, labSubj) %>%
  summarize(associates = paste(target,sep="",collapse=",")) %>%
  left_join(p.correct) %>%
  arrange(condition, -propCorrect) %>%
  mutate(propCorrect = round(propCorrect, 2)) %>%
  select(-n) %>%
  as.data.frame() %>%
  kable()
condition labSubj associates propCorrect
anodal PAS12 transportation,blue,wheel,handle,tires,pedal,exercise,useful 0.91
anodal PAS105 two wheel,ride,summer,childhood 0.90
anodal PAS125 cycle,ride,transportation,helmet 0.90
anodal PAS103 basket,mountain,helmet,race 0.76
anodal PAS131 bicycle,bicycle,wheel,transportation,blue,long,pedal 0.69
anodal PAS111 fast,pedal,friend,summer,adventure,wilderness 0.64
anodal PAS115 tires,blue,transportation,exercise,outdoors 0.64
anodal PAS129 transportation,shortcut,summer,sun,eco-friendly,fast,childhood,fun 0.62
anodal PAS117 transportation,beach,helmet,fall,race,hill,summer,wheel 0.58
anodal PAS119 ride,summer,tires,handle 0.50
anodal PAS123 ride,race,mountain,outdoors,active,sun,transportation,fun 0.40
anodal PAS113 mountain,road,family,fun 0.14
anodal PAS101 ride,lake,long legs,blue,rubber 0.00
anodal PAS151 bicycle,sunny days,ride,enjoyable 0.00
cathodal PAS102 helmet,wheel,brake,childhood,street,race,spokes 0.95
cathodal PAS104 ride,pedal,summer,breeze,gears 0.91
cathodal PAS118 bicycle,transportation,cheap,eco-friendly,health,sport,balance,energy 0.64
cathodal PAS122 ride,blue,wheel,childhood,bell 0.58
cathodal PAS132 ride,trip,road,traffic,journey,seat,tandem,brake 0.40
cathodal PAS130 ride,transportation,blue,spring,wheel 0.31
cathodal PAS120 race,exercise,speed,blue 0.30
cathodal PAS107 childhood,neighborhood,friend,connor,exercise,summer,bunker beach,wave pool 0.00
cathodal PAS110 blue,move,transportation,summer 0.00
cathodal PAS112 bicycle,summer,ride,siblings,travel,trip 0.00
cathodal PAS114 rollerblade,skateboard,helmet,outdoors,warm,safe 0.00
cathodal PAS124 suburbs,kid,cities,college 0.00
cathodal PAS133 exercise,outdoors,socializing,race 0.00

It looks like participants in the cathodal condition are doing more idiosyncratic responses in general. In principle, it seems like forward and backward probabilites should capture this.

Conditional probabilitiy

Merge in Nelson norms

# nelson norms
cp.url <- getURL("https://raw.githubusercontent.com/wjhopper/USF-WAN-SS/master/norms.csv")
cps <- read.csv(text = cp.url, header=FALSE) 

names(cps) = c("cue", "target", "FS", "BS")

cps = cps %>%
      mutate(cue =tolower(as.character(cue)),
             target = tolower(as.character(target)))

dcp =  d.all %>%
  left_join(cps) %>%
  select(labSubj, cue, target, condition, association.num, FS, BS)

All cues are present in Nelson norms

cuetdcs = unique(d.all$cue)
cueNelson = unique(cps$cue)

length(intersect(cuetdcs, cueNelson)) # all cues are present in nelson
## [1] 39

Lots of NAs but no difference between critical conditions

dcp %>%
  group_by(condition) %>%
  summarize(na = length(which(is.na(FS))),
            notna = length(which(!is.na(FS)))) %>%
  kable()
condition na notna
anodal 1884 1110
cathodal 1887 1040
sham 1668 924

Replacing missing associate values with zero. Note that BS and FS are skewed. Take the log.

dcp <- dcp %>%
       mutate(log.FS = log(FS),
              log.BS = log(BS),
              FS = ifelse(is.na(FS), 0, FS),
              BS = ifelse(is.na(BS), 0, BS))

dcp %>%
  gather(variable, value, 6:9) %>%
  ggplot(aes(x = value)) +
  facet_wrap(~variable, scales = "free")+
  geom_histogram() +
  theme_bw()

Do conditional probabilities differ by condition?

dcp.item = dcp %>%
  gather(variable, value, 6:9) %>%
  mutate(value = ifelse(is.infinite(value), NA, value)) %>%
  group_by(condition, labSubj, cue, variable) %>%
  summarize(value = mean(value, na.rm = T)) %>%
  group_by(labSubj, condition, variable) %>%
  summarize(value = mean(value, na.rm = T))

dcp.item %>%
  filter(is.finite(value)) %>%
  group_by(condition, variable) %>%
  multi_boot_standard(column = "value", na.rm = T) %>%
   ggplot(aes(x = condition, y = mean, fill = condition)) +
    facet_wrap(~variable, scales = "free") +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    ggtitle("Measures of forward/backward probability") +
    theme_bw() +
    theme(legend.position = "none")

No difference here for either backwards or forward probability.

Entropy analysis

Another approach: Look at the entropy over associates. If cathodal has more idiosyncratic responses, expect more entropy over distribution of associates for each cue.

H.item = d.all %>%
  count(cue, condition, target) %>%
  summarize(H = entropy(n)) 

H.item %>%
  group_by(condition) %>%
  multi_boot_standard(column = "H") %>%
    ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    ggtitle("entropy over associates") +
    theme_bw() +
    theme(legend.position = "none")

H.item %>%
  filter(condition != "sham") %>%
  select(cue, condition, H) %>%
  ungroup() %>%
  spread(condition, H) %>%
  do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE)))   %>%
  kable()
estimate statistic p.value parameter conf.low conf.high method alternative
-0.0496129 -1.989219 0.0539112 38 -0.1001032 0.0008773 Paired t-test two.sided

Marginally reliable difference (p = .054): Cathodal has higher entropy.

Response networks

A third approach: I constructed a network for each cue for each condition. Nodes are the associates given by participants. A link is added between two nodes if a participant listed both associates. Weights are determined by the number of people who listed two associates. If participants are giving more idiosyncratic responses in the cathodal group, we should expect less centrality in the cathodal networks. In the limit, if participants give ONLY idiosnycratic responses, then the network should look like a bunch of separate clusters. Conversely, if everyone gives the same responses, it should look like a hairball.

First, here are the graphs for “bicycle” in the cathodal and anodal conditions.

Modularity setup

# get all pairs
all.pairs = d.all %>%
  #filter(association.num < 6) %>%
  filter(as.character(target) != as.character(cue)) %>%
  group_by(labSubj, cue, condition) %>%
  do(m = data.table(t(combn(.$target, 2)))) %>%
  unnest() %>%
  rename(a1 = V1,
         a2 = V2)

# modularity function
get_modularity <- function(df){
    counts <- count(df, a1, a2) 
    graph <- counts %>%
      select(a1, a2) %>%
      graph_from_data_frame(directed = FALSE) %>%
      simplify() 
    
    E(graph)$weight=counts$n # add in weights
    
    clustering = cluster_leading_eigen(graph, 
                              options = list(maxiter=1000000), 
                              weights = counts$n)
    
    closeness = mean(estimate_closeness(graph, cutoff= 100))
    betweeness = mean(estimate_betweenness(graph, cutoff= 100),  
                      weights = counts$n)
    degree = mean(igraph::degree(graph))

    data.frame(Q = round(clustering$modularity,4), 
               n.groups = round(length(clustering),4),
               closeness  = round(closeness, 4),
               betweeness  = round(betweeness, 4),
               degree = mean(degree))
}
# single item example
test = all.pairs %>%
  filter(cue == "bicycle" & condition == "cathodal") %>%
  filter(a1 != a2)  %>%
    mutate(a1 = str_trim(a1),
           a2 = str_trim(a2)) 

counts.ex <- count(test, a1, a2) 

v.attr = d.all %>%
  filter(as.character(target) != as.character(cue)) %>%
  mutate(target = str_trim(target)) %>%
  filter(cue == "bicycle" & condition == "cathodal") %>%
  distinct(target, .keep_all = T) %>%
  gather(type, value, c(10, 12,14,11,16, 17,18)) %>%
  filter(value == 1) %>%
  select(target, type) 

graph <- counts.ex %>%
      select(a1, a2) %>%
      graph_from_data_frame(directed = FALSE, vertices = v.attr) %>%
      simplify() 
    
E(graph)$weight=counts.ex$n # add in weights

ggplot(ggnetwork(asNetwork(graph)), 
          aes(x = x, y = y, xend = xend, yend = yend)) +
       geom_edges(aes(size = weight), color = "grey50") +
       geom_nodes(aes(color = type)) +
       geom_nodelabel(aes(label =  vertex.names , fill = type), size = 2) +
        ggtitle("cathodal") +
       theme_blank()

  test = all.pairs %>%
  filter(cue == "bicycle" & condition == "anodal") %>%
  filter(a1 != a2)  %>%
    mutate(a1 = str_trim(a1),
             a2 = str_trim(a2)) 

counts.ex <- count(test, a1, a2) 

v.attr = d.all %>%
  filter(as.character(target) != as.character(cue)) %>%
  mutate(target = str_trim(target)) %>%
  filter(cue == "bicycle" & condition == "anodal") %>%
  distinct(target, .keep_all = T) %>%
  gather(type, value, c(10, 12,14,11,16, 17,18)) %>%
  filter(value == 1) %>%
  select(target, type) 

graph <- counts.ex %>%
      select(a1, a2) %>%
      graph_from_data_frame(directed = FALSE, vertices = v.attr) %>%
      simplify() 
    
E(graph)$weight=counts.ex$n # add in weights

ggplot(ggnetwork(asNetwork(graph)), 
          aes(x = x, y = y, xend = xend, yend = yend)) +
       geom_edges(aes(size = weight), color = "grey50") +
       geom_nodes(aes(color = type)) +
       geom_nodelabel(aes(label =  vertex.names , fill = type), size = 2) +
      ggtitle("anodal") +
       theme_blank()

…it looks like the cathodal network is more modular.

Now, let’s do this across all items and quantify modularity, using three measures: Betweenness, mean degree, and modularity.

## get all modularity
modularity = all.pairs %>%
      filter(a1 != a2)  %>%
      mutate(a1 = str_trim(a1),
             a2 = str_trim(a2)) %>%
     group_by(cue, condition) %>%
     do(get_modularity(.))

#  mean betweeness
modularity %>%
    group_by(condition) %>%
    multi_boot_standard(column = "betweeness", na.rm = T) %>%
    ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    theme_bw()  +
    ggtitle("betweeness") +
    theme(legend.position = "none")

modularity %>%
  filter(condition != "sham") %>%
  select(cue, condition, betweeness) %>%
  ungroup() %>%
  spread(condition, betweeness) %>%
  do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE)))   %>%
  kable()
estimate statistic p.value parameter conf.low conf.high method alternative
-2.714844 -3.421774 0.0015021 38 -4.321003 -1.108685 Paired t-test two.sided
#  mean degree
modularity %>%
    group_by(condition) %>%
    multi_boot_standard(column = "degree", na.rm = T) %>%
    ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    theme_bw()  +
    ggtitle("mean degree") +
    theme(legend.position = "none")

modularity %>%
  filter(condition != "sham") %>%
  select(cue, condition, degree) %>%
  ungroup() %>%
  spread(condition, degree) %>%
  do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE)))   %>%
  kable()
estimate statistic p.value parameter conf.low conf.high method alternative
0.3877107 2.541442 0.0152419 38 0.0788784 0.6965431 Paired t-test two.sided
#  clustering
modularity %>%
    group_by(condition) %>%
    multi_boot_standard(column = "Q", na.rm = T) %>%
    ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    ggtitle("modularity") +
    theme_bw() +
    theme(legend.position = "none")

modularity %>%
  filter(condition != "sham") %>%
  select(cue, condition, Q) %>%
  ungroup() %>%
  spread(condition, Q) %>%
  do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE)))   %>%
  kable()
estimate statistic p.value parameter conf.low conf.high method alternative
-0.0350205 -2.870941 0.0066548 38 -0.0597146 -0.0103264 Paired t-test two.sided

Across three different measures, anodal group is more clustered than cathodal group, suggesting they are all sampling from a shared “concept” of the cue.

Note that for two of the measures, sham looks less clustered than cathodal, and I think this is just an artifact of the fact that the sham condition has few associates overall (see below). If you subset to the first 4-5 associates the differences go away, probably because it’s underpowered. Gary suggests controling for number of associates, but I’m not totally sure how to do that with this analysis….

Mean number of associates:

d.all %>%
  group_by(cue, condition, labSubj) %>%
  summarize(n = n()) %>%
  group_by(cue, condition) %>%
  summarize(n = mean(n)) %>%
  group_by(condition) %>%
  multi_boot_standard(column = "n", na.rm = T) %>%
  ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    ggtitle("mean number of associates") +
    theme_bw() +
    theme(legend.position = "none")

Turker errors by condition

Gary: How consistent are the groups in terms of consistency?

If the network analysis is correct, we should expect participants in the anodal condition to not only make more correct guesses, but be wrong in the same way. Here I look at this with three measures: number of incorrect guesses, entropy over guesses and ICC.

Number of incorrect guesses

d.bw2 = d.bw.raw %>%
      select(electrode,  labSubj,  subjCode, cue, 
             turkerGuess, isRight) %>%
        rename(condition = electrode)  %>%     
        mutate(condition = fct_recode(condition, "sham" = "na")) %>%
        mutate(turkerGuess= str_trim(turkerGuess))

d.bw2 %>%
  filter(isRight == 0) %>%
  group_by(condition, cue) %>%
  summarize(n = length(unique(turkerGuess))) %>%
  group_by(condition) %>%
  multi_boot_standard(column = "n", na.rm = T) %>%
  ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    ggtitle("mean number of unique wrong guesses") +
    theme_bw() +
    theme(legend.position = "none")

d.bw2 %>%
  filter(isRight == 0) %>%
  group_by(condition, cue) %>%
  summarize(n = length(unique(turkerGuess))) %>%
  filter(condition != "sham") %>%
  select(cue, condition, n) %>%
  spread(condition, n) %>%
  do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE)))   %>%
  kable()
estimate statistic p.value parameter conf.low conf.high method alternative
-2.974359 -2.447356 0.0191244 38 -5.434677 -0.5140406 Paired t-test two.sided

Entropy over unique wrong guesses

d.bw2 %>%
  filter(isRight == 0) %>%
  count(condition, cue, turkerGuess) %>%
  group_by(cue, condition) %>%
  summarize(H = entropy(n)) %>%
  group_by(condition) %>%
  multi_boot_standard(column = "H", na.rm = T) %>%
  ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    ggtitle("entropy over wrong guesses") +
    theme_bw() +
    theme(legend.position = "none")

m = d.bw2 %>%
  filter(isRight == 0) %>%
  count(condition, cue, turkerGuess) %>%
  group_by(cue, condition) %>%
  summarize(H = entropy(n)) %>%
  filter(condition != "sham") %>%
  select(cue, condition, H) %>%
  spread(condition, H)

tidy(t.test(m$anodal, m$cathodal, paired=TRUE)) %>%
  kable()
estimate statistic p.value parameter conf.low conf.high method alternative
-0.083305 -2.083032 0.0440287 38 -0.1642649 -0.0023451 Paired t-test two.sided

Anodal group has fewer unique wrong guesses and lower entropy over wrong guesses than cathodal group.

Rank order correlation

rank.orders = counts %>%
  select( -total_cue) %>%
  group_by(condition, labSubj) %>%
  arrange(correct) %>%
  mutate(rank.order = 1:n())

ICCs = rank.orders %>%
  select(-correct) %>%
  spread(cue, rank.order) %>%
  group_by(condition) %>%
  do(psych::ICC(select(., -1,-2) %>% t(), missing = FALSE)$results) %>%
  filter(type == "ICC1")

ggplot(ICCs, aes(x = condition, y = ICC, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = `upper bound`, ymin = `lower bound`)) +
    ggtitle("Interaclass correlation") +
    theme_bw() +
    theme(legend.position = "none")

Numerically as predicted, but not reliable.