tDCS network analyses

Read in data

# tdcs data
d.all = read.csv("../../data/labDataPlusNorms.csv") %>%
  rename(condition = electrode,
         association.num = mention) %>%
  select(1:4,7,9:19,21,22) %>%
  mutate(condition = fct_recode(condition, "sham" = "na")) %>%
  filter(as.character(target) != as.character(cue)) %>%
  mutate(target= str_trim(target)) 

# turker accuracy data
d.bw.raw = read.csv("../../data/backALLfinal_withALLSimilaritiesPlusNorms.csv")  

d.bw = d.bw.raw %>%
          select(cue, labSubj, subjCode, isRight)

Accuracy by item + condition

Let’s first look at the “raw” accuracy data. The color of the square indicates the proportion of turkers that correctly guessed the cue given the associates of a particular lab subject.

d = d.all %>%
  left_join(d.bw) %>%
  select(1:5, 19,20) %>%
  group_by(cue, labSubj, subjCode)

counts = d %>%
  group_by(labSubj, cue, condition) %>%
  summarise(total = length(isRight),
            correct = sum(isRight, na.rm = T)/total) %>%
  group_by(labSubj) %>%
  mutate(total = sum(correct, na.rm = T)) %>%
  group_by(cue, condition) %>%
  mutate(total_cue = sum(correct, na.rm = T))

ggplot(counts, aes(fct_reorder(labSubj, total),
                   fct_reorder(cue, -total_cue))) + 
  geom_tile(aes(fill = correct), colour = "white") + 
  scale_fill_distiller(palette = "YlGnBu", direction = 1) +
  theme_minimal() +
  facet_grid(~condition, drop = TRUE, scales = "free_x") +
  xlab("participant") +
  ylab("cue") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme(legend.position = "bottom", legend.key.width = unit(2, "cm"),
        panel.grid = element_blank()) + 
  coord_equal()

Accuracy by item

counts.item = counts %>%
  group_by(cue, condition) %>%
  summarize(correct = mean(correct))

ggplot(counts.item, aes(fct_reorder(cue, -correct), condition)) + 
  geom_tile(aes(fill = correct), colour = "white") + 
  scale_fill_distiller(palette = "YlGnBu", direction = 1) +
  theme_minimal() +
  xlab("cue") +
  ylab("condition") +
  theme(legend.position = "none") +
  coord_flip()

Predicting accuracy of items

Can we predict variability in item difficulty with frequency and concreteness?

# read in frequency and concreteness
subtlexus.url <- getURL("https://raw.githubusercontent.com/mllewis/RC/master/data/corpus/SUBTLEXus_corpus.txt")
freqs <- read.table(text = subtlexus.url, header=TRUE) %>%
          select(Word,Lg10WF)
concreteness <-read.csv("brysbaert_corpus.csv", header=TRUE) %>%
  select(Word, Conc.M)

counts.item.group = counts.item %>%
  left_join(freqs, by=c("cue"= "Word")) %>%
  left_join(concreteness, by=c("cue"= "Word"))

counts.item.group %>%
  gather(variable, value, -1:-3) %>%
  ggplot(aes(x = value, y = correct, color = condition)) +
  facet_wrap(~variable, scales = "free")+
  geom_point() +
  geom_smooth(method = "lm")

tidy(cor.test(counts.item.group$Lg10WF, counts.item.group$correct)) %>%
  kable()

estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
0.2381047	2.629	0.0097341	115	0.0591275	0.4022508	Pearson’s product-moment correlation	two.sided

tidy(cor.test(counts.item.group$Conc.M, counts.item.group$correct)) %>%
  kable()

estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
0.1805676	1.968733	0.0513895	115	-0.000998	0.350608	Pearson’s product-moment correlation	two.sided

Frequency of the word predicts overall accuracy (higher frequency -> higher accuracy), but this does not differ by condition. Concreteness is marginal.

Look at responses for invididual items

E.g. bicycle is very different for anodal and cathodal

Bicycle:

p.correct = d.bw %>%
  filter(cue == "bicycle") %>%
  group_by(labSubj) %>%
  summarize(propCorrect = sum(isRight)/n(), 
            n = n())
d.all %>%
  group_by(labSubj, cue, condition) %>%
  filter(condition != "sham",
         cue == "bicycle") %>%
  arrange(labSubj, association.num) %>%
  ungroup() %>%
  select(labSubj, cue, target, condition, association.num) %>%
  group_by(condition, labSubj) %>%
  summarize(associates = paste(target,sep="",collapse=",")) %>%
  left_join(p.correct) %>%
  arrange(condition, -propCorrect) %>%
  mutate(propCorrect = round(propCorrect, 2)) %>%
  select(-n) %>%
  as.data.frame() %>%
  kable()

condition	labSubj	associates	propCorrect
anodal	PAS12	transportation,blue,wheel,handle,tires,pedal,exercise,useful	0.91
anodal	PAS105	two wheel,ride,summer,childhood	0.90
anodal	PAS125	cycle,ride,transportation,helmet	0.90
anodal	PAS103	basket,mountain,helmet,race	0.76
anodal	PAS131	bicycle,bicycle,wheel,transportation,blue,long,pedal	0.69
anodal	PAS111	fast,pedal,friend,summer,adventure,wilderness	0.64
anodal	PAS115	tires,blue,transportation,exercise,outdoors	0.64
anodal	PAS129	transportation,shortcut,summer,sun,eco-friendly,fast,childhood,fun	0.62
anodal	PAS117	transportation,beach,helmet,fall,race,hill,summer,wheel	0.58
anodal	PAS119	ride,summer,tires,handle	0.50
anodal	PAS123	ride,race,mountain,outdoors,active,sun,transportation,fun	0.40
anodal	PAS113	mountain,road,family,fun	0.14
anodal	PAS101	ride,lake,long legs,blue,rubber	0.00
anodal	PAS151	bicycle,sunny days,ride,enjoyable	0.00
cathodal	PAS102	helmet,wheel,brake,childhood,street,race,spokes	0.95
cathodal	PAS104	ride,pedal,summer,breeze,gears	0.91
cathodal	PAS118	bicycle,transportation,cheap,eco-friendly,health,sport,balance,energy	0.64
cathodal	PAS122	ride,blue,wheel,childhood,bell	0.58
cathodal	PAS132	ride,trip,road,traffic,journey,seat,tandem,brake	0.40
cathodal	PAS130	ride,transportation,blue,spring,wheel	0.31
cathodal	PAS120	race,exercise,speed,blue	0.30
cathodal	PAS107	childhood,neighborhood,friend,connor,exercise,summer,bunker beach,wave pool	0.00
cathodal	PAS110	blue,move,transportation,summer	0.00
cathodal	PAS112	bicycle,summer,ride,siblings,travel,trip	0.00
cathodal	PAS114	rollerblade,skateboard,helmet,outdoors,warm,safe	0.00
cathodal	PAS124	suburbs,kid,cities,college	0.00
cathodal	PAS133	exercise,outdoors,socializing,race	0.00

It looks like participants in the cathodal condition are doing more idiosyncratic responses in general. In principle, it seems like forward and backward probabilites should capture this.

Conditional probabilitiy

Merge in Nelson norms

# nelson norms
cp.url <- getURL("https://raw.githubusercontent.com/wjhopper/USF-WAN-SS/master/norms.csv")
cps <- read.csv(text = cp.url, header=FALSE) 

names(cps) = c("cue", "target", "FS", "BS")

cps = cps %>%
      mutate(cue =tolower(as.character(cue)),
             target = tolower(as.character(target)))

dcp =  d.all %>%
  left_join(cps) %>%
  select(labSubj, cue, target, condition, association.num, FS, BS)

All cues are present in Nelson norms

cuetdcs = unique(d.all$cue)
cueNelson = unique(cps$cue)

length(intersect(cuetdcs, cueNelson)) # all cues are present in nelson

## [1] 39

Lots of NAs but no difference between critical conditions

dcp %>%
  group_by(condition) %>%
  summarize(na = length(which(is.na(FS))),
            notna = length(which(!is.na(FS)))) %>%
  kable()

condition	na	notna
anodal	1884	1110
cathodal	1887	1040
sham	1668	924

Replacing missing associate values with zero. Note that BS and FS are skewed. Take the log.

dcp <- dcp %>%
       mutate(log.FS = log(FS),
              log.BS = log(BS),
              FS = ifelse(is.na(FS), 0, FS),
              BS = ifelse(is.na(BS), 0, BS))

dcp %>%
  gather(variable, value, 6:9) %>%
  ggplot(aes(x = value)) +
  facet_wrap(~variable, scales = "free")+
  geom_histogram() +
  theme_bw()

Do conditional probabilities differ by condition?

dcp.item = dcp %>%
  gather(variable, value, 6:9) %>%
  mutate(value = ifelse(is.infinite(value), NA, value)) %>%
  group_by(condition, labSubj, cue, variable) %>%
  summarize(value = mean(value, na.rm = T)) %>%
  group_by(labSubj, condition, variable) %>%
  summarize(value = mean(value, na.rm = T))

dcp.item %>%
  filter(is.finite(value)) %>%
  group_by(condition, variable) %>%
  multi_boot_standard(column = "value", na.rm = T) %>%
   ggplot(aes(x = condition, y = mean, fill = condition)) +
    facet_wrap(~variable, scales = "free") +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    ggtitle("Measures of forward/backward probability") +
    theme_bw() +
    theme(legend.position = "none")

No difference here for either backwards or forward probability.

Entropy analysis

Another approach: Look at the entropy over associates. If cathodal has more idiosyncratic responses, expect more entropy over distribution of associates for each cue.

H.item = d.all %>%
  count(cue, condition, target) %>%
  summarize(H = entropy(n)) 

H.item %>%
  group_by(condition) %>%
  multi_boot_standard(column = "H") %>%
    ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    ggtitle("entropy over associates") +
    theme_bw() +
    theme(legend.position = "none")

H.item %>%
  filter(condition != "sham") %>%
  select(cue, condition, H) %>%
  ungroup() %>%
  spread(condition, H) %>%
  do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE)))   %>%
  kable()

estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
-0.0496129	-1.989219	0.0539112	38	-0.1001032	0.0008773	Paired t-test	two.sided

Marginally reliable difference (p = .054): Cathodal has higher entropy.

Response networks

A third approach: I constructed a network for each cue for each condition. Nodes are the associates given by participants. A link is added between two nodes if a participant listed both associates. Weights are determined by the number of people who listed two associates. If participants are giving more idiosyncratic responses in the cathodal group, we should expect less centrality in the cathodal networks. In the limit, if participants give ONLY idiosnycratic responses, then the network should look like a bunch of separate clusters. Conversely, if everyone gives the same responses, it should look like a hairball.

First, here are the graphs for “bicycle” in the cathodal and anodal conditions.

Modularity setup

# get all pairs
all.pairs = d.all %>%
  #filter(association.num < 6) %>%
  filter(as.character(target) != as.character(cue)) %>%
  group_by(labSubj, cue, condition) %>%
  do(m = data.table(t(combn(.$target, 2)))) %>%
  unnest() %>%
  rename(a1 = V1,
         a2 = V2)

# modularity function
get_modularity <- function(df){
    counts <- count(df, a1, a2) 
    graph <- counts %>%
      select(a1, a2) %>%
      graph_from_data_frame(directed = FALSE) %>%
      simplify() 
    
    E(graph)$weight=counts$n # add in weights
    
    clustering = cluster_leading_eigen(graph, 
                              options = list(maxiter=1000000), 
                              weights = counts$n)
    
    closeness = mean(estimate_closeness(graph, cutoff= 100))
    betweeness = mean(estimate_betweenness(graph, cutoff= 100),  
                      weights = counts$n)
    degree = mean(igraph::degree(graph))

    data.frame(Q = round(clustering$modularity,4), 
               n.groups = round(length(clustering),4),
               closeness  = round(closeness, 4),
               betweeness  = round(betweeness, 4),
               degree = mean(degree))
}

# single item example
test = all.pairs %>%
  filter(cue == "bicycle" & condition == "cathodal") %>%
  filter(a1 != a2)  %>%
    mutate(a1 = str_trim(a1),
           a2 = str_trim(a2)) 

counts.ex <- count(test, a1, a2) 

v.attr = d.all %>%
  filter(as.character(target) != as.character(cue)) %>%
  mutate(target = str_trim(target)) %>%
  filter(cue == "bicycle" & condition == "cathodal") %>%
  distinct(target, .keep_all = T) %>%
  gather(type, value, c(10, 12,14,11,16, 17,18)) %>%
  filter(value == 1) %>%
  select(target, type) 

graph <- counts.ex %>%
      select(a1, a2) %>%
      graph_from_data_frame(directed = FALSE, vertices = v.attr) %>%
      simplify() 
    
E(graph)$weight=counts.ex$n # add in weights

ggplot(ggnetwork(asNetwork(graph)), 
          aes(x = x, y = y, xend = xend, yend = yend)) +
       geom_edges(aes(size = weight), color = "grey50") +
       geom_nodes(aes(color = type)) +
       geom_nodelabel(aes(label =  vertex.names , fill = type), size = 2) +
        ggtitle("cathodal") +
       theme_blank()

  test = all.pairs %>%
  filter(cue == "bicycle" & condition == "anodal") %>%
  filter(a1 != a2)  %>%
    mutate(a1 = str_trim(a1),
             a2 = str_trim(a2)) 

counts.ex <- count(test, a1, a2) 

v.attr = d.all %>%
  filter(as.character(target) != as.character(cue)) %>%
  mutate(target = str_trim(target)) %>%
  filter(cue == "bicycle" & condition == "anodal") %>%
  distinct(target, .keep_all = T) %>%
  gather(type, value, c(10, 12,14,11,16, 17,18)) %>%
  filter(value == 1) %>%
  select(target, type) 

graph <- counts.ex %>%
      select(a1, a2) %>%
      graph_from_data_frame(directed = FALSE, vertices = v.attr) %>%
      simplify() 
    
E(graph)$weight=counts.ex$n # add in weights

ggplot(ggnetwork(asNetwork(graph)), 
          aes(x = x, y = y, xend = xend, yend = yend)) +
       geom_edges(aes(size = weight), color = "grey50") +
       geom_nodes(aes(color = type)) +
       geom_nodelabel(aes(label =  vertex.names , fill = type), size = 2) +
      ggtitle("anodal") +
       theme_blank()

…it looks like the cathodal network is more modular.

Now, let’s do this across all items and quantify modularity, using three measures: Betweenness, mean degree, and modularity.

## get all modularity
modularity = all.pairs %>%
      filter(a1 != a2)  %>%
      mutate(a1 = str_trim(a1),
             a2 = str_trim(a2)) %>%
     group_by(cue, condition) %>%
     do(get_modularity(.))

#  mean betweeness
modularity %>%
    group_by(condition) %>%
    multi_boot_standard(column = "betweeness", na.rm = T) %>%
    ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    theme_bw()  +
    ggtitle("betweeness") +
    theme(legend.position = "none")

modularity %>%
  filter(condition != "sham") %>%
  select(cue, condition, betweeness) %>%
  ungroup() %>%
  spread(condition, betweeness) %>%
  do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE)))   %>%
  kable()

estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
-2.714844	-3.421774	0.0015021	38	-4.321003	-1.108685	Paired t-test	two.sided

#  mean degree
modularity %>%
    group_by(condition) %>%
    multi_boot_standard(column = "degree", na.rm = T) %>%
    ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    theme_bw()  +
    ggtitle("mean degree") +
    theme(legend.position = "none")

modularity %>%
  filter(condition != "sham") %>%
  select(cue, condition, degree) %>%
  ungroup() %>%
  spread(condition, degree) %>%
  do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE)))   %>%
  kable()

estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
0.3877107	2.541442	0.0152419	38	0.0788784	0.6965431	Paired t-test	two.sided

#  clustering
modularity %>%
    group_by(condition) %>%
    multi_boot_standard(column = "Q", na.rm = T) %>%
    ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    ggtitle("modularity") +
    theme_bw() +
    theme(legend.position = "none")

modularity %>%
  filter(condition != "sham") %>%
  select(cue, condition, Q) %>%
  ungroup() %>%
  spread(condition, Q) %>%
  do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE)))   %>%
  kable()

estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
-0.0350205	-2.870941	0.0066548	38	-0.0597146	-0.0103264	Paired t-test	two.sided

Across three different measures, anodal group is more clustered than cathodal group, suggesting they are all sampling from a shared “concept” of the cue.

Note that for two of the measures, sham looks less clustered than cathodal, and I think this is just an artifact of the fact that the sham condition has few associates overall (see below). If you subset to the first 4-5 associates the differences go away, probably because it’s underpowered. Gary suggests controling for number of associates, but I’m not totally sure how to do that with this analysis….

Mean number of associates:

d.all %>%
  group_by(cue, condition, labSubj) %>%
  summarize(n = n()) %>%
  group_by(cue, condition) %>%
  summarize(n = mean(n)) %>%
  group_by(condition) %>%
  multi_boot_standard(column = "n", na.rm = T) %>%
  ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    ggtitle("mean number of associates") +
    theme_bw() +
    theme(legend.position = "none")

Turker errors by condition

Gary: How consistent are the groups in terms of consistency?

If the network analysis is correct, we should expect participants in the anodal condition to not only make more correct guesses, but be wrong in the same way. Here I look at this with three measures: number of incorrect guesses, entropy over guesses and ICC.

Number of incorrect guesses

d.bw2 = d.bw.raw %>%
      select(electrode,  labSubj,  subjCode, cue, 
             turkerGuess, isRight) %>%
        rename(condition = electrode)  %>%     
        mutate(condition = fct_recode(condition, "sham" = "na")) %>%
        mutate(turkerGuess= str_trim(turkerGuess))

d.bw2 %>%
  filter(isRight == 0) %>%
  group_by(condition, cue) %>%
  summarize(n = length(unique(turkerGuess))) %>%
  group_by(condition) %>%
  multi_boot_standard(column = "n", na.rm = T) %>%
  ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    ggtitle("mean number of unique wrong guesses") +
    theme_bw() +
    theme(legend.position = "none")

d.bw2 %>%
  filter(isRight == 0) %>%
  group_by(condition, cue) %>%
  summarize(n = length(unique(turkerGuess))) %>%
  filter(condition != "sham") %>%
  select(cue, condition, n) %>%
  spread(condition, n) %>%
  do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE)))   %>%
  kable()

estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
-2.974359	-2.447356	0.0191244	38	-5.434677	-0.5140406	Paired t-test	two.sided

Entropy over unique wrong guesses

d.bw2 %>%
  filter(isRight == 0) %>%
  count(condition, cue, turkerGuess) %>%
  group_by(cue, condition) %>%
  summarize(H = entropy(n)) %>%
  group_by(condition) %>%
  multi_boot_standard(column = "H", na.rm = T) %>%
  ggplot(aes(x = condition, y = mean, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
    ggtitle("entropy over wrong guesses") +
    theme_bw() +
    theme(legend.position = "none")

m = d.bw2 %>%
  filter(isRight == 0) %>%
  count(condition, cue, turkerGuess) %>%
  group_by(cue, condition) %>%
  summarize(H = entropy(n)) %>%
  filter(condition != "sham") %>%
  select(cue, condition, H) %>%
  spread(condition, H)

tidy(t.test(m$anodal, m$cathodal, paired=TRUE)) %>%
  kable()

estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
-0.083305	-2.083032	0.0440287	38	-0.1642649	-0.0023451	Paired t-test	two.sided

Anodal group has fewer unique wrong guesses and lower entropy over wrong guesses than cathodal group.

Rank order correlation

rank.orders = counts %>%
  select( -total_cue) %>%
  group_by(condition, labSubj) %>%
  arrange(correct) %>%
  mutate(rank.order = 1:n())

ICCs = rank.orders %>%
  select(-correct) %>%
  spread(cue, rank.order) %>%
  group_by(condition) %>%
  do(psych::ICC(select(., -1,-2) %>% t(), missing = FALSE)$results) %>%
  filter(type == "ICC1")

ggplot(ICCs, aes(x = condition, y = ICC, fill = condition)) +
    geom_bar(position = "dodge", stat = "identity") +
    geom_linerange(aes(ymax = `upper bound`, ymin = `lower bound`)) +
    ggtitle("Interaclass correlation") +
    theme_bw() +
    theme(legend.position = "none")

Numerically as predicted, but not reliable.