Task: Participants were presented with 39 pictures cues and asked to produce 4-8 associates. Particpants were either given anodal (up-regulating), cathodal (down-regulating), or sham tdcs. In a different experiement, turkers were given the list of associates produced by the the participants in the tdcs experiment and asked to guess the picture.

Question: Does tDCS condition affect (1) the type of associates participant produce at the word level and (2) the relationship between associates? Secondly, do these measures differ as a function of accuracy in identification by turkers later?


Setup

Read in data

# tdcs data
d = read.csv("../data/labDataPlusNorms.csv") %>%
  rename(condition = electrode,
         association.num = mention) %>%
  select(condition, labSubj, cue, target, association.num) 


# turker accuracy data
d.bw = read.csv("../data/backALLfinal_withALLSimilaritiesPlusNorms.csv") %>%  
      rename(condition = electrode) %>%
      select(condition, cue, labSubj, subjCode, isRight)

Add trial-wise accuracy column to tdcs data based on turker data

prop.correct = d.bw %>%
  group_by(cue, labSubj) %>% 
  summarize(prop.turk.correct = sum(isRight)/length(isRight), 
            condition = condition[1])

median.correct = prop.correct %>%
  group_by(condition) %>%
  summarize(median.correct = median(prop.turk.correct))

d = left_join(d, prop.correct) %>%
    left_join(median.correct) %>%
    mutate(above.median = ifelse(prop.turk.correct >= median.correct, 1, 0),
           above.median = as.factor(above.median)) %>%
    mutate(condition = as.factor(condition),
         condition = fct_relevel(condition, "na", "anodal", "cathodal"))

Note that 1 cue for one participant is missing from the turk dataset labSubj == “PAS152” & cue == “fish”).

Sanity checks

d %>%
  group_by(association.num, condition) %>%
  summarize(count = n()) %>%
  ggplot(aes(x= association.num, y = count, fill = condition)) +
  geom_bar(stat = "identity", position = "dodge") 

Each participant gave 4 associations minimally, up to 8 total.

Individual association-level analyses

Frequency

subtlexus.url <- getURL("https://raw.githubusercontent.com/mllewis/RC/master/data/corpus/SUBTLEXus_corpus.txt")
freqs <- read.table(text = subtlexus.url, header = TRUE) %>%
          select(Word, Lg10WF)

d = left_join(d, freqs, by = c("target" = "Word"))  %>%
  rename(target.log.freq = Lg10WF)

d %>%
  group_by(condition) %>%
  summarize(freq = mean(target.log.freq, na.rm = T)) %>%
  kable(caption = "all associates")
all associates
condition freq
na 3.220996
anodal 3.216995
cathodal 3.221148
d %>%
  group_by(condition, association.num) %>%
  summarize(freq = mean(target.log.freq, na.rm = T)) %>%
  ggplot(aes(x= association.num, y = freq, color = condition)) +
  geom_line() 

No evience for difference associate frequency as a function of condition.

As a function of correctness

d %>%
  group_by(condition, above.median, labSubj) %>%
  filter(!is.na(above.median)) %>% # 4 missing trials (see note above)
  summarize(freq = mean(target.log.freq, na.rm = T)) %>%
  group_by(condition, above.median) %>%
  multi_boot_standard(column = "freq", na.rm = T) %>%
  ggplot(aes(fill = above.median, y = mean, x = condition, group = above.median)) +
  ylab("target log frequency") +
  geom_bar(stat = "identity", position = "dodge") +
  geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9)) 

d %>%
  group_by(condition, association.num, above.median) %>%
  summarize(freq = mean(target.log.freq, na.rm = T)) %>%
  ggplot(aes(x= association.num, y = freq, color = condition, 
             linetype = as.factor(above.median))) +
  geom_line() +
  ylim(3,3.5)

lm(target.log.freq ~ above.median * condition, d) %>%
  summary() %>%
  tidy %>%
  kable()
term estimate std.error statistic p.value
(Intercept) 3.2517607 0.0226848 143.3454469 0.0000000
above.median1 -0.0590326 0.0311175 -1.8970885 0.0578529
conditionanodal -0.0059520 0.0305713 -0.1946915 0.8456395
conditioncathodal 0.0093970 0.0309418 0.3037003 0.7613643
above.median1:conditionanodal 0.0027387 0.0422950 0.0647528 0.9483724
above.median1:conditioncathodal -0.0172213 0.0425702 -0.4045388 0.6858276

Marginal effect of correctness on frequency: lower frequency associates more likely to be correct.

Lexical class

d.pos = left_join(d, parts_of_speech, by=c("target" = "word"))

d.pos %>%
  group_by(condition, pos) %>%
  summarise (n = n()) %>%
  mutate(freq = n / sum(n)) %>%
  ggplot(aes(x= pos, y = freq, fill = condition)) +
  geom_bar(stat = "identity", position = "dodge") +
 theme(axis.text.x = element_text(angle = 60, hjust = 1))

As a function of correctness

d.pos %>%
  mutate(above.median = as.factor(above.median)) %>%
  group_by(above.median, condition, pos) %>%
  filter(!is.na(above.median)) %>% # 4 missing trials (see note above)
  summarise (n = n()) %>%
  mutate(freq = n / sum(n)) %>%
  ggplot(aes(y = freq, x = condition, fill = above.median)) +
  facet_wrap(~pos) +
  geom_bar(stat = "identity", position = "dodge") 

Collapse verb/adjective and noun

d.pos = d.pos %>%
  mutate(pos = as.factor(pos),
         pos2 = fct_collapse(pos,
         noun = c("Noun", "Plural"),
         verbadj = c("Verb (intransitive)", "Verb (transitive)",
                    "Verb (usu participle)", "Adjective"),
         other = c("Adverb", "Interjection", "Definite Article",
                   "Preposition", "Pronoun")))

d.pos %>%
  group_by(labSubj, condition, pos2) %>%
  filter(pos2 != "other")  %>% 
  summarise (n = n()) %>%
  mutate(prop = n / sum(n)) %>%
  group_by(condition, pos2) %>%
  multi_boot_standard(column = "prop")  %>%
  ggplot(aes(y = mean, fill = condition, x = pos2)) +
    geom_bar(stat = "identity", position = "dodge")  +
    geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9)) 

van = d.pos %>%
  group_by(labSubj, association.num, condition, pos2) %>%
  filter(association.num < 5)  %>%
  filter(pos2 != "other")  %>% # 4 missing trials (see note above)
  summarize(n = n()) %>%
  mutate(prop = n / sum(n)) %>%
  group_by(association.num, condition, pos2) %>%
  multi_boot_standard(column = "prop") 

van %>%
  ggplot(aes(x = association.num, group = condition)) +
  facet_wrap(~pos2) +
  geom_line(aes(color = condition, y = mean)) +
  geom_ribbon(aes(ymax = ci_upper, ymin=ci_lower), alpha = 0.1)

lm(mean ~ condition, d = filter(van, pos2 == "verbadj")) %>%
  summary() %>%
  tidy %>%
  kable()
term estimate std.error statistic p.value
(Intercept) 0.5966597 0.0077407 77.0808796 0.0000000
conditionanodal 0.0179675 0.0109470 1.6413168 0.1351496
conditioncathodal -0.0060462 0.0109470 -0.5523146 0.5941813

Suggestive of more verb/adjectives in the anodal condition, but difference not reliable.

Word length

d %>%
  mutate(nchars = nchar(as.character(target))) %>%
  group_by(cue, labSubj, condition) %>%
  summarise (nchars = mean(nchars)) %>%
  group_by(condition) %>%
  multi_boot_standard(column = "nchars", na.rm = T) %>%
  ggplot(aes(x= condition, y = mean, fill = condition)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9))

As a function of correctness

d %>%
  mutate(nchars = nchar(as.character(target))) %>%
  group_by(cue, labSubj, condition) %>%
  summarise (nchars = mean(nchars),
             above.median = above.median[1]) %>%
  group_by(condition, above.median) %>%
  filter(!is.na(above.median)) %>% # 4 missing trials (see note above)
  multi_boot_standard(column = "nchars", na.rm = T) %>%
  ggplot(aes(x= condition, y = mean, fill = condition)) +
  facet_grid(~above.median) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9)) 

Correct guess are more likelty to have long associates anodal group.

Associate-set level analyses

Build model distinguishing response profiles in three different conditions, and as a function accuracy in the turk data.

Conditional Probabilities

Compare forward transitiona probabilites across condition using the SWOW data.

cp.data = read.csv("../data/all_cond_probs_SWOW.csv") 

cues = d %>%
 group_by(cue) %>%
  slice(1) %>%
  select(cue) 

cp.data %>%
    mutate(log.trans.prob = log(trans.prob),
          cue =unlist(lapply(strsplit(as.character(bigram)," "),function(x) x[1]))) %>%
  filter(cue %in% as.character(cues$cue)) %>%
  filter(pair %in% c("cue_a1", "cue_a2", "cue_a3")) %>%
  group_by(cue) %>%
  summarize(n = n()) %>%
  kable()
cue n
alligator 10
carrot 13
grasshopper 12
lime 11
lizard 10
motorcycle 9
wrench 6
zebra 12

Not much cue data for the words in the current dataset as cues in the SWOW data.

But, can average across different pairs

cp.collapsed = cp.data %>%
  mutate(log.trans.prob = log(trans.prob)) %>%
  group_by(bigram) %>%
  summarize(log.trans.prob = mean(log.trans.prob))

d = d %>%
  mutate(bigram = paste(cue, target)) %>%
  left_join(cp.collapsed)

d %>%
  group_by(condition) %>%
  multi_boot_standard(column = "log.trans.prob", na.rm = T) %>%
  ggplot(aes(x= condition, y = mean, fill = condition)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9))

d %>%
  filter(!is.na(above.median)) %>%
  group_by(condition, above.median) %>%
  multi_boot_standard(column = "log.trans.prob", na.rm = T) %>%
  ggplot(aes(x= condition, group = above.median, y = mean, fill = above.median)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9))

Across all condition, correct guess are more likely to have lower transitional probability.

d %>%
  group_by(labSubj, cue, condition) %>%
  filter(!is.na(log.trans.prob)) %>%
  do(tidy(lm(log.trans.prob ~ association.num, data = .))) %>%
  filter(term == "association.num") %>%
  select(estimate) %>%
  group_by(condition, labSubj) %>%
  summarize(estimate = mean(estimate)) %>%
  group_by(condition) %>%
  multi_boot_standard(column = "estimate") %>%
  ggplot(aes(fill = condition, y = mean, x = condition)) +
    geom_bar(stat = "identity", position = "dodge") +
    geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9)) +
    theme(axis.text.x = element_text(angle = 60, hjust = 1))

No difference in slope of forward transitional probability across associaties.

Thematic Class Distribution

Overall distribution

d.all = read.csv("../data/labDataPlusNorms.csv") %>%
  rename(condition = electrode,
         association.num = mention) %>%
  select(1:4,7,9:19)

# this is the plot in the paper
d.all %>%
  gather(word_type, value, 6:16) %>%
  group_by(condition, word_type) %>%
  summarise(n= sum(value)) %>%
  mutate(freq = n / sum(n)) %>%
  ungroup() %>%
  mutate(word_type = as.factor(word_type),
         word_type = fct_relevel(word_type, "thematic", "taxonomic", "meronymPart",
                                 "holonym", "action", "descriptor", "metaphor"),
         condition = fct_relevel(condition, "anodal", "na", "cathodal")) %>%
  ggplot(aes(y = freq, fill = condition, x = word_type)) +
  geom_bar(stat = "identity", position = "dodge") +
    theme(axis.text.x = element_text(angle = 60, hjust = 1))

d.all %>%
  gather(word_type, value, 6:16) %>%
  group_by(condition, word_type) %>%
  summarise(n = sum(value)) %>%
  mutate(freq = n / sum(n)) %>%
  ungroup() %>%
  mutate(word_type = as.factor(word_type),
         word_type = fct_reorder(word_type, -freq),
         condition = fct_relevel(condition, "anodal", "na", "cathodal")) %>%
  ggplot(aes(y = freq, fill = condition, x = word_type)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

(Why is the particular subset of word_types shown in the paper?)

Multivariate model

Do the distribution of counts differ between the two groups?

d.wide = d.all %>%
  left_join(prop.correct) %>%
  left_join(median.correct) %>% 
  #filter(association.num < 5) %>%
  mutate(above.median = ifelse(prop.turk.correct >= median.correct, 1, 0)) %>%
  gather(word_type, value, 6:16) %>%
  group_by(labSubj, word_type, cue, condition, above.median) %>%
  summarize(n = sum(value),
            prop.turk.correct = prop.turk.correct[1]) %>%
  spread(word_type,n) %>%
  ungroup() %>%
  mutate(condition = fct_rev(condition)) %>%
  select(thematic, action, descriptor, meronymPart, hypernym, condition, 
         above.median, prop.turk.correct)

### MODEL: predicting counts as a function of condition
gdm.reg <- MGLMreg(as.matrix(d.wide[,1:5]) ~ d.wide$condition, dist="DM")

gdm.reg$coefficients %>%
  cbind(p = gdm.reg$wald.p) %>%
  as.data.frame(.) %>%
  mutate(coefficient = rownames(gdm.reg$coefficients),
         sig = ifelse(p<.05, "*", "")) %>%
  kable()
thematic action descriptor meronymPart hypernym p coefficient sig
1.2818464 0.8673766 0.8003291 0.6651433 -0.3358082 0.0000000 (Intercept) *
0.5631802 0.2854607 0.0544377 0.4639664 0.5476637 0.0000003 d.wide$conditioncathodal *
-0.0347578 -0.0501214 -0.2921893 -0.3822954 0.0681553 0.0004274 d.wide$conditionanodal *

Model parameters

# extract model parameters
se = gdm.reg$SE %>%
    data.frame(row.names = NULL) %>%
    mutate(condition = rownames(gdm.reg$SE)) %>%
    gather(word_type, value, 1:5) %>%
    mutate(condition = fct_recode(condition,"sham" = "(Intercept)",
                                      "anodal" = "d.wide$conditionanodal",
                                      "cathodal" = "d.wide$conditioncathodal")) %>%
    rename(se = value)

coeffs = gdm.reg$coefficients  %>% 
        data.frame(row.names = NULL) %>%
        mutate(condition = rownames(gdm.reg$coefficients)) %>%
        gather(word_type, value, 1:5) %>%
        mutate(condition = fct_recode(condition,"sham" = "(Intercept)",
                                      "anodal" = "d.wide$conditionanodal",
                                      "cathodal" = "d.wide$conditioncathodal")) %>%
        group_by(word_type) %>% 
        left_join(filter(., condition == "sham") %>% select(-condition),
                  by = "word_type") %>%
        mutate(beta = ifelse(condition != "sham", value.x + value.y, value.x)) %>%
        select(-value.x, -value.y) %>%
        left_join(se) %>%
        mutate(upper.ci = beta + 1.96 * se,
              lower.ci = beta - 1.96 * se)

coeffs %>%
  ungroup() %>%
  mutate(word_type = fct_reorder(word_type, -beta)) %>%
  ggplot(aes(fill= condition, y = beta, x = word_type)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_linerange(aes(ymax = upper.ci, ymin = lower.ci),
                 position = position_dodge(width = .9)) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

Yes– There’s a reliable effect of condition here: different conditions have different distributions of items.

### MODEL: predicting counts as a function of condition and correctioness
gdm.reg <- MGLMreg(as.matrix(d.wide[,1:5]) ~ d.wide$condition * d.wide$above.median, dist="DM")

gdm.reg$coefficients %>%
  cbind(p = gdm.reg$wald.p) %>%
  as.data.frame(.) %>%
  mutate(coefficient = rownames(gdm.reg$coefficients),
         sig = ifelse(p<.05, "*", "")) %>%
  kable()
thematic action descriptor meronymPart hypernym p coefficient sig
1.1994460 0.6175739 0.5978260 0.1931666 -0.6741256 0.0000000 (Intercept) *
0.9933984 0.6371586 0.4009836 0.8606985 0.9341421 0.0000161 d.wide$conditioncathodal *
-0.1621917 0.0028429 -0.2237132 -0.3754325 -0.0011458 0.2205497 d.wide$conditionanodal
0.3340896 0.6682611 0.5837346 1.0739081 0.8320109 0.0000006 d.wide$above.median *
-0.7385850 -0.5753498 -0.5692883 -0.6707855 -0.6501842 0.6710049 d.wide\(conditioncathodal:d.wide\)above.median
0.1606802 -0.1968044 -0.2385344 -0.1101378 0.0276672 0.2436137 d.wide\(conditionanodal:d.wide\)above.median

Frequency trajectories

Is the slope of the frequency of associates at the trial level different across conditions?

d %>%
  group_by(labSubj, cue, condition) %>%
  filter(association.num < 5) %>%
  filter(!is.na(target.log.freq)) %>%
  do(tidy(lm(target.log.freq ~ association.num, data = .))) %>%
  filter(term == "association.num") %>%
  select(estimate) %>%
  group_by(condition, labSubj) %>%
  summarize(estimate = mean(estimate)) %>%
  group_by(condition) %>%
  multi_boot_standard(column = "estimate") %>%
  ggplot(aes(fill = condition, y = mean, x = condition)) +
    geom_bar(stat = "identity", position = "dodge") +
    geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9)) +
    theme(axis.text.x = element_text(angle = 60, hjust = 1))

d %>%
  group_by(labSubj, cue, condition, above.median) %>%
  filter(association.num < 5) %>%
  filter(!is.na(target.log.freq)) %>%
  do(tidy(lm(target.log.freq ~ association.num, data = .))) %>%
  filter(term == "association.num") %>%
  select(estimate) %>%
  group_by(condition, labSubj, above.median) %>%
  summarize(estimate = mean(estimate)) %>%
  group_by(condition, above.median) %>%
  multi_boot_standard(column = "estimate") %>%
  filter(!is.na(above.median)) %>%
  ggplot(aes(fill= above.median, y = mean, x = condition)) +
    geom_bar(stat = "identity", position = "dodge") +
    geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
                 position = position_dodge(width = .9)) +
    theme(axis.text.x = element_text(angle = 60, hjust = 1))

Gary’s Analysis

d.wide = d.all %>%
  gather(word_type, value, 6:16) %>%
  group_by(labSubj, word_type, cue, condition) %>%
  summarize(n = sum(value)) %>%
  spread(word_type,n) %>%
  ungroup() %>%
  mutate(condition = fct_rev(condition)) %>%
  select(thematic, action, descriptor, meronymPart, hypernym, condition, cue, labSubj)

d.bw = left_join(d.bw, d.wide)

head(d.bw)
##   condition       cue labSubj       subjCode isRight thematic action
## 1    anodal alligator  PAS101 A19IWME4DCIU1H       1        4      0
## 2    anodal alligator  PAS101 A1FUBGE9CHDHU6       1        4      0
## 3    anodal alligator  PAS101 A1GV0UZU0T2ORS       1        4      0
## 4    anodal alligator  PAS101 A1IS0218RVG60K       1        4      0
## 5    anodal alligator  PAS101 A2E2UARVOVDR9Y       1        4      0
## 6    anodal alligator  PAS101  A1WDUICI1FG9F       1        4      0
##   descriptor meronymPart hypernym
## 1          0           1        1
## 2          0           1        1
## 3          0           1        1
## 4          0           1        1
## 5          0           1        1
## 6          0           1        1
sham.weights = glmer(isRight~ thematic + action + descriptor + meronymPart + hypernym+(1|subjCode)+(1|cue) +(1|labSubj), filter(d.bw, condition == "na"), family="binomial")

summary(sham.weights)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## isRight ~ thematic + action + descriptor + meronymPart + hypernym +  
##     (1 | subjCode) + (1 | cue) + (1 | labSubj)
##    Data: filter(d.bw, condition == "na")
## 
##      AIC      BIC   logLik deviance df.resid 
##   7419.4   7480.2  -3700.7   7401.4     6337 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3419 -0.6966 -0.3776  0.7655  6.5428 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subjCode (Intercept) 0.2990   0.5468  
##  cue      (Intercept) 0.7331   0.8562  
##  labSubj  (Intercept) 0.3204   0.5661  
## Number of obs: 6346, groups:  subjCode, 525; cue, 39; labSubj, 15
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.78026    0.25336  -3.080  0.00207 ** 
## thematic    -0.11095    0.04352  -2.550  0.01078 *  
## action       0.13690    0.04527   3.024  0.00249 ** 
## descriptor  -0.05081    0.04354  -1.167  0.24318    
## meronymPart  0.32235    0.05136   6.276 3.47e-10 ***
## hypernym     0.48064    0.07622   6.306 2.86e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) themtc action dscrpt mrnymP
## thematic    -0.497                            
## action      -0.446  0.463                     
## descriptor  -0.443  0.503  0.497              
## meronymPart -0.435  0.519  0.445  0.425       
## hypernym    -0.247  0.255  0.249  0.209  0.206

Gary, I think this is the anlysis you were suggesting, but I’m not sure how to compare the other groups to the baseline condition. Do you have thoughts about this?

Order Analyses

position.counts = d.all %>%
  filter(association.num < 5)   %>%
  select(thematic, action, descriptor, meronymPart, hypernym, 
         condition, association.num, cue, labSubj) %>%
  gather(word_type, value, 1:5) %>%
  mutate(type = paste0(word_type, association.num)) %>%
  select(-association.num, -word_type)  %>%
  group_by(labSubj, type, cue, condition) %>%
  summarize(n = sum(value)) %>%
  spread(type, n)

d.bw = left_join(d.bw, position.counts)

sham.weights = glmer(isRight~ +(1|subjCode)+(1|cue) +(1|labSubj), filter(d.bw, condition == "na"), family="binomial")


# do pca
pca.d.wide = position.counts %>%
            ungroup() %>%
            filter(condition == "anodal") %>%
            select(-1:-3) %>%
            prcomp(scale = TRUE, tol = .8)
# look at variance explained
fviz_eig(pca.d.wide, addlabels = TRUE, 
               linecolor ="red", geom = "line") +
  theme_bw()

barplot(pca.d.wide$sdev/pca.d.wide$sdev[1])

# plot loadings
pca.d.wide <- cbind(names(position.counts)[c(-1,-2,-3)],
                    gather(as.data.frame(pca.d.wide$rotation), 
                           pc, value))
names(pca.d.wide)[1] = "variable"
 
pca.d.wide$variable = factor(pca.d.wide$variable, levels = pca.d.wide$variable) 

# order variables
ggplot(pca.d.wide) +
  geom_bar(aes(x = variable, y = value), stat="identity") +
  facet_wrap(~pc) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))  + 
       scale_fill_manual(name = "Magnitude", 
                       values = c("grey","red1")) +
  ggtitle("PCA loadings")