Task: Participants were presented with 39 pictures cues and asked to produce 4-8 associates. Particpants were either given anodal (up-regulating), cathodal (down-regulating), or sham tdcs. In a different experiement, turkers were given the list of associates produced by the the participants in the tdcs experiment and asked to guess the picture.
Question: Does tDCS condition affect (1) the type of associates participant produce at the word level and (2) the relationship between associates? Secondly, do these measures differ as a function of accuracy in identification by turkers later?
Read in data
# tdcs data
d = read.csv("../data/labDataPlusNorms.csv") %>%
rename(condition = electrode,
association.num = mention) %>%
select(condition, labSubj, cue, target, association.num)
# turker accuracy data
d.bw = read.csv("../data/backALLfinal_withALLSimilaritiesPlusNorms.csv") %>%
rename(condition = electrode) %>%
select(condition, cue, labSubj, subjCode, isRight)
Add trial-wise accuracy column to tdcs data based on turker data
prop.correct = d.bw %>%
group_by(cue, labSubj) %>%
summarize(prop.turk.correct = sum(isRight)/length(isRight),
condition = condition[1])
median.correct = prop.correct %>%
group_by(condition) %>%
summarize(median.correct = median(prop.turk.correct))
d = left_join(d, prop.correct) %>%
left_join(median.correct) %>%
mutate(above.median = ifelse(prop.turk.correct >= median.correct, 1, 0),
above.median = as.factor(above.median)) %>%
mutate(condition = as.factor(condition),
condition = fct_relevel(condition, "na", "anodal", "cathodal"))
Note that 1 cue for one participant is missing from the turk dataset labSubj == “PAS152” & cue == “fish”).
Sanity checks
d %>%
group_by(association.num, condition) %>%
summarize(count = n()) %>%
ggplot(aes(x= association.num, y = count, fill = condition)) +
geom_bar(stat = "identity", position = "dodge")
Each participant gave 4 associations minimally, up to 8 total.
subtlexus.url <- getURL("https://raw.githubusercontent.com/mllewis/RC/master/data/corpus/SUBTLEXus_corpus.txt")
freqs <- read.table(text = subtlexus.url, header = TRUE) %>%
select(Word, Lg10WF)
d = left_join(d, freqs, by = c("target" = "Word")) %>%
rename(target.log.freq = Lg10WF)
d %>%
group_by(condition) %>%
summarize(freq = mean(target.log.freq, na.rm = T)) %>%
kable(caption = "all associates")
condition | freq |
---|---|
na | 3.220996 |
anodal | 3.216995 |
cathodal | 3.221148 |
d %>%
group_by(condition, association.num) %>%
summarize(freq = mean(target.log.freq, na.rm = T)) %>%
ggplot(aes(x= association.num, y = freq, color = condition)) +
geom_line()
No evience for difference associate frequency as a function of condition.
d %>%
group_by(condition, above.median, labSubj) %>%
filter(!is.na(above.median)) %>% # 4 missing trials (see note above)
summarize(freq = mean(target.log.freq, na.rm = T)) %>%
group_by(condition, above.median) %>%
multi_boot_standard(column = "freq", na.rm = T) %>%
ggplot(aes(fill = above.median, y = mean, x = condition, group = above.median)) +
ylab("target log frequency") +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9))
d %>%
group_by(condition, association.num, above.median) %>%
summarize(freq = mean(target.log.freq, na.rm = T)) %>%
ggplot(aes(x= association.num, y = freq, color = condition,
linetype = as.factor(above.median))) +
geom_line() +
ylim(3,3.5)
lm(target.log.freq ~ above.median * condition, d) %>%
summary() %>%
tidy %>%
kable()
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 3.2517607 | 0.0226848 | 143.3454469 | 0.0000000 |
above.median1 | -0.0590326 | 0.0311175 | -1.8970885 | 0.0578529 |
conditionanodal | -0.0059520 | 0.0305713 | -0.1946915 | 0.8456395 |
conditioncathodal | 0.0093970 | 0.0309418 | 0.3037003 | 0.7613643 |
above.median1:conditionanodal | 0.0027387 | 0.0422950 | 0.0647528 | 0.9483724 |
above.median1:conditioncathodal | -0.0172213 | 0.0425702 | -0.4045388 | 0.6858276 |
Marginal effect of correctness on frequency: lower frequency associates more likely to be correct.
d.pos = left_join(d, parts_of_speech, by=c("target" = "word"))
d.pos %>%
group_by(condition, pos) %>%
summarise (n = n()) %>%
mutate(freq = n / sum(n)) %>%
ggplot(aes(x= pos, y = freq, fill = condition)) +
geom_bar(stat = "identity", position = "dodge") +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
d.pos %>%
mutate(above.median = as.factor(above.median)) %>%
group_by(above.median, condition, pos) %>%
filter(!is.na(above.median)) %>% # 4 missing trials (see note above)
summarise (n = n()) %>%
mutate(freq = n / sum(n)) %>%
ggplot(aes(y = freq, x = condition, fill = above.median)) +
facet_wrap(~pos) +
geom_bar(stat = "identity", position = "dodge")
Collapse verb/adjective and noun
d.pos = d.pos %>%
mutate(pos = as.factor(pos),
pos2 = fct_collapse(pos,
noun = c("Noun", "Plural"),
verbadj = c("Verb (intransitive)", "Verb (transitive)",
"Verb (usu participle)", "Adjective"),
other = c("Adverb", "Interjection", "Definite Article",
"Preposition", "Pronoun")))
d.pos %>%
group_by(labSubj, condition, pos2) %>%
filter(pos2 != "other") %>%
summarise (n = n()) %>%
mutate(prop = n / sum(n)) %>%
group_by(condition, pos2) %>%
multi_boot_standard(column = "prop") %>%
ggplot(aes(y = mean, fill = condition, x = pos2)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9))
van = d.pos %>%
group_by(labSubj, association.num, condition, pos2) %>%
filter(association.num < 5) %>%
filter(pos2 != "other") %>% # 4 missing trials (see note above)
summarize(n = n()) %>%
mutate(prop = n / sum(n)) %>%
group_by(association.num, condition, pos2) %>%
multi_boot_standard(column = "prop")
van %>%
ggplot(aes(x = association.num, group = condition)) +
facet_wrap(~pos2) +
geom_line(aes(color = condition, y = mean)) +
geom_ribbon(aes(ymax = ci_upper, ymin=ci_lower), alpha = 0.1)
lm(mean ~ condition, d = filter(van, pos2 == "verbadj")) %>%
summary() %>%
tidy %>%
kable()
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 0.5966597 | 0.0077407 | 77.0808796 | 0.0000000 |
conditionanodal | 0.0179675 | 0.0109470 | 1.6413168 | 0.1351496 |
conditioncathodal | -0.0060462 | 0.0109470 | -0.5523146 | 0.5941813 |
Suggestive of more verb/adjectives in the anodal condition, but difference not reliable.
d %>%
mutate(nchars = nchar(as.character(target))) %>%
group_by(cue, labSubj, condition) %>%
summarise (nchars = mean(nchars)) %>%
group_by(condition) %>%
multi_boot_standard(column = "nchars", na.rm = T) %>%
ggplot(aes(x= condition, y = mean, fill = condition)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9))
d %>%
mutate(nchars = nchar(as.character(target))) %>%
group_by(cue, labSubj, condition) %>%
summarise (nchars = mean(nchars),
above.median = above.median[1]) %>%
group_by(condition, above.median) %>%
filter(!is.na(above.median)) %>% # 4 missing trials (see note above)
multi_boot_standard(column = "nchars", na.rm = T) %>%
ggplot(aes(x= condition, y = mean, fill = condition)) +
facet_grid(~above.median) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9))
Correct guess are more likelty to have long associates anodal group.
Build model distinguishing response profiles in three different conditions, and as a function accuracy in the turk data.
Compare forward transitiona probabilites across condition using the SWOW data.
cp.data = read.csv("../data/all_cond_probs_SWOW.csv")
cues = d %>%
group_by(cue) %>%
slice(1) %>%
select(cue)
cp.data %>%
mutate(log.trans.prob = log(trans.prob),
cue =unlist(lapply(strsplit(as.character(bigram)," "),function(x) x[1]))) %>%
filter(cue %in% as.character(cues$cue)) %>%
filter(pair %in% c("cue_a1", "cue_a2", "cue_a3")) %>%
group_by(cue) %>%
summarize(n = n()) %>%
kable()
cue | n |
---|---|
alligator | 10 |
carrot | 13 |
grasshopper | 12 |
lime | 11 |
lizard | 10 |
motorcycle | 9 |
wrench | 6 |
zebra | 12 |
Not much cue data for the words in the current dataset as cues in the SWOW data.
But, can average across different pairs
cp.collapsed = cp.data %>%
mutate(log.trans.prob = log(trans.prob)) %>%
group_by(bigram) %>%
summarize(log.trans.prob = mean(log.trans.prob))
d = d %>%
mutate(bigram = paste(cue, target)) %>%
left_join(cp.collapsed)
d %>%
group_by(condition) %>%
multi_boot_standard(column = "log.trans.prob", na.rm = T) %>%
ggplot(aes(x= condition, y = mean, fill = condition)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9))
d %>%
filter(!is.na(above.median)) %>%
group_by(condition, above.median) %>%
multi_boot_standard(column = "log.trans.prob", na.rm = T) %>%
ggplot(aes(x= condition, group = above.median, y = mean, fill = above.median)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9))
Across all condition, correct guess are more likely to have lower transitional probability.
d %>%
group_by(labSubj, cue, condition) %>%
filter(!is.na(log.trans.prob)) %>%
do(tidy(lm(log.trans.prob ~ association.num, data = .))) %>%
filter(term == "association.num") %>%
select(estimate) %>%
group_by(condition, labSubj) %>%
summarize(estimate = mean(estimate)) %>%
group_by(condition) %>%
multi_boot_standard(column = "estimate") %>%
ggplot(aes(fill = condition, y = mean, x = condition)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9)) +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
No difference in slope of forward transitional probability across associaties.
d.all = read.csv("../data/labDataPlusNorms.csv") %>%
rename(condition = electrode,
association.num = mention) %>%
select(1:4,7,9:19)
# this is the plot in the paper
d.all %>%
gather(word_type, value, 6:16) %>%
group_by(condition, word_type) %>%
summarise(n= sum(value)) %>%
mutate(freq = n / sum(n)) %>%
ungroup() %>%
mutate(word_type = as.factor(word_type),
word_type = fct_relevel(word_type, "thematic", "taxonomic", "meronymPart",
"holonym", "action", "descriptor", "metaphor"),
condition = fct_relevel(condition, "anodal", "na", "cathodal")) %>%
ggplot(aes(y = freq, fill = condition, x = word_type)) +
geom_bar(stat = "identity", position = "dodge") +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
d.all %>%
gather(word_type, value, 6:16) %>%
group_by(condition, word_type) %>%
summarise(n = sum(value)) %>%
mutate(freq = n / sum(n)) %>%
ungroup() %>%
mutate(word_type = as.factor(word_type),
word_type = fct_reorder(word_type, -freq),
condition = fct_relevel(condition, "anodal", "na", "cathodal")) %>%
ggplot(aes(y = freq, fill = condition, x = word_type)) +
geom_bar(stat = "identity", position = "dodge") +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
(Why is the particular subset of word_types shown in the paper?)
Do the distribution of counts differ between the two groups?
d.wide = d.all %>%
left_join(prop.correct) %>%
left_join(median.correct) %>%
#filter(association.num < 5) %>%
mutate(above.median = ifelse(prop.turk.correct >= median.correct, 1, 0)) %>%
gather(word_type, value, 6:16) %>%
group_by(labSubj, word_type, cue, condition, above.median) %>%
summarize(n = sum(value),
prop.turk.correct = prop.turk.correct[1]) %>%
spread(word_type,n) %>%
ungroup() %>%
mutate(condition = fct_rev(condition)) %>%
select(thematic, action, descriptor, meronymPart, hypernym, condition,
above.median, prop.turk.correct)
### MODEL: predicting counts as a function of condition
gdm.reg <- MGLMreg(as.matrix(d.wide[,1:5]) ~ d.wide$condition, dist="DM")
gdm.reg$coefficients %>%
cbind(p = gdm.reg$wald.p) %>%
as.data.frame(.) %>%
mutate(coefficient = rownames(gdm.reg$coefficients),
sig = ifelse(p<.05, "*", "")) %>%
kable()
thematic | action | descriptor | meronymPart | hypernym | p | coefficient | sig |
---|---|---|---|---|---|---|---|
1.2818464 | 0.8673766 | 0.8003291 | 0.6651433 | -0.3358082 | 0.0000000 | (Intercept) | * |
0.5631802 | 0.2854607 | 0.0544377 | 0.4639664 | 0.5476637 | 0.0000003 | d.wide$conditioncathodal | * |
-0.0347578 | -0.0501214 | -0.2921893 | -0.3822954 | 0.0681553 | 0.0004274 | d.wide$conditionanodal | * |
Model parameters
# extract model parameters
se = gdm.reg$SE %>%
data.frame(row.names = NULL) %>%
mutate(condition = rownames(gdm.reg$SE)) %>%
gather(word_type, value, 1:5) %>%
mutate(condition = fct_recode(condition,"sham" = "(Intercept)",
"anodal" = "d.wide$conditionanodal",
"cathodal" = "d.wide$conditioncathodal")) %>%
rename(se = value)
coeffs = gdm.reg$coefficients %>%
data.frame(row.names = NULL) %>%
mutate(condition = rownames(gdm.reg$coefficients)) %>%
gather(word_type, value, 1:5) %>%
mutate(condition = fct_recode(condition,"sham" = "(Intercept)",
"anodal" = "d.wide$conditionanodal",
"cathodal" = "d.wide$conditioncathodal")) %>%
group_by(word_type) %>%
left_join(filter(., condition == "sham") %>% select(-condition),
by = "word_type") %>%
mutate(beta = ifelse(condition != "sham", value.x + value.y, value.x)) %>%
select(-value.x, -value.y) %>%
left_join(se) %>%
mutate(upper.ci = beta + 1.96 * se,
lower.ci = beta - 1.96 * se)
coeffs %>%
ungroup() %>%
mutate(word_type = fct_reorder(word_type, -beta)) %>%
ggplot(aes(fill= condition, y = beta, x = word_type)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = upper.ci, ymin = lower.ci),
position = position_dodge(width = .9)) +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
Yes– There’s a reliable effect of condition here: different conditions have different distributions of items.
### MODEL: predicting counts as a function of condition and correctioness
gdm.reg <- MGLMreg(as.matrix(d.wide[,1:5]) ~ d.wide$condition * d.wide$above.median, dist="DM")
gdm.reg$coefficients %>%
cbind(p = gdm.reg$wald.p) %>%
as.data.frame(.) %>%
mutate(coefficient = rownames(gdm.reg$coefficients),
sig = ifelse(p<.05, "*", "")) %>%
kable()
thematic | action | descriptor | meronymPart | hypernym | p | coefficient | sig |
---|---|---|---|---|---|---|---|
1.1994460 | 0.6175739 | 0.5978260 | 0.1931666 | -0.6741256 | 0.0000000 | (Intercept) | * |
0.9933984 | 0.6371586 | 0.4009836 | 0.8606985 | 0.9341421 | 0.0000161 | d.wide$conditioncathodal | * |
-0.1621917 | 0.0028429 | -0.2237132 | -0.3754325 | -0.0011458 | 0.2205497 | d.wide$conditionanodal | |
0.3340896 | 0.6682611 | 0.5837346 | 1.0739081 | 0.8320109 | 0.0000006 | d.wide$above.median | * |
-0.7385850 | -0.5753498 | -0.5692883 | -0.6707855 | -0.6501842 | 0.6710049 | d.wide\(conditioncathodal:d.wide\)above.median | |
0.1606802 | -0.1968044 | -0.2385344 | -0.1101378 | 0.0276672 | 0.2436137 | d.wide\(conditionanodal:d.wide\)above.median |
Is the slope of the frequency of associates at the trial level different across conditions?
d %>%
group_by(labSubj, cue, condition) %>%
filter(association.num < 5) %>%
filter(!is.na(target.log.freq)) %>%
do(tidy(lm(target.log.freq ~ association.num, data = .))) %>%
filter(term == "association.num") %>%
select(estimate) %>%
group_by(condition, labSubj) %>%
summarize(estimate = mean(estimate)) %>%
group_by(condition) %>%
multi_boot_standard(column = "estimate") %>%
ggplot(aes(fill = condition, y = mean, x = condition)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9)) +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
d %>%
group_by(labSubj, cue, condition, above.median) %>%
filter(association.num < 5) %>%
filter(!is.na(target.log.freq)) %>%
do(tidy(lm(target.log.freq ~ association.num, data = .))) %>%
filter(term == "association.num") %>%
select(estimate) %>%
group_by(condition, labSubj, above.median) %>%
summarize(estimate = mean(estimate)) %>%
group_by(condition, above.median) %>%
multi_boot_standard(column = "estimate") %>%
filter(!is.na(above.median)) %>%
ggplot(aes(fill= above.median, y = mean, x = condition)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9)) +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
d.wide = d.all %>%
gather(word_type, value, 6:16) %>%
group_by(labSubj, word_type, cue, condition) %>%
summarize(n = sum(value)) %>%
spread(word_type,n) %>%
ungroup() %>%
mutate(condition = fct_rev(condition)) %>%
select(thematic, action, descriptor, meronymPart, hypernym, condition, cue, labSubj)
d.bw = left_join(d.bw, d.wide)
head(d.bw)
## condition cue labSubj subjCode isRight thematic action
## 1 anodal alligator PAS101 A19IWME4DCIU1H 1 4 0
## 2 anodal alligator PAS101 A1FUBGE9CHDHU6 1 4 0
## 3 anodal alligator PAS101 A1GV0UZU0T2ORS 1 4 0
## 4 anodal alligator PAS101 A1IS0218RVG60K 1 4 0
## 5 anodal alligator PAS101 A2E2UARVOVDR9Y 1 4 0
## 6 anodal alligator PAS101 A1WDUICI1FG9F 1 4 0
## descriptor meronymPart hypernym
## 1 0 1 1
## 2 0 1 1
## 3 0 1 1
## 4 0 1 1
## 5 0 1 1
## 6 0 1 1
sham.weights = glmer(isRight~ thematic + action + descriptor + meronymPart + hypernym+(1|subjCode)+(1|cue) +(1|labSubj), filter(d.bw, condition == "na"), family="binomial")
summary(sham.weights)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## isRight ~ thematic + action + descriptor + meronymPart + hypernym +
## (1 | subjCode) + (1 | cue) + (1 | labSubj)
## Data: filter(d.bw, condition == "na")
##
## AIC BIC logLik deviance df.resid
## 7419.4 7480.2 -3700.7 7401.4 6337
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3419 -0.6966 -0.3776 0.7655 6.5428
##
## Random effects:
## Groups Name Variance Std.Dev.
## subjCode (Intercept) 0.2990 0.5468
## cue (Intercept) 0.7331 0.8562
## labSubj (Intercept) 0.3204 0.5661
## Number of obs: 6346, groups: subjCode, 525; cue, 39; labSubj, 15
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.78026 0.25336 -3.080 0.00207 **
## thematic -0.11095 0.04352 -2.550 0.01078 *
## action 0.13690 0.04527 3.024 0.00249 **
## descriptor -0.05081 0.04354 -1.167 0.24318
## meronymPart 0.32235 0.05136 6.276 3.47e-10 ***
## hypernym 0.48064 0.07622 6.306 2.86e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) themtc action dscrpt mrnymP
## thematic -0.497
## action -0.446 0.463
## descriptor -0.443 0.503 0.497
## meronymPart -0.435 0.519 0.445 0.425
## hypernym -0.247 0.255 0.249 0.209 0.206
Gary, I think this is the anlysis you were suggesting, but I’m not sure how to compare the other groups to the baseline condition. Do you have thoughts about this?
position.counts = d.all %>%
filter(association.num < 5) %>%
select(thematic, action, descriptor, meronymPart, hypernym,
condition, association.num, cue, labSubj) %>%
gather(word_type, value, 1:5) %>%
mutate(type = paste0(word_type, association.num)) %>%
select(-association.num, -word_type) %>%
group_by(labSubj, type, cue, condition) %>%
summarize(n = sum(value)) %>%
spread(type, n)
d.bw = left_join(d.bw, position.counts)
sham.weights = glmer(isRight~ +(1|subjCode)+(1|cue) +(1|labSubj), filter(d.bw, condition == "na"), family="binomial")
# do pca
pca.d.wide = position.counts %>%
ungroup() %>%
filter(condition == "anodal") %>%
select(-1:-3) %>%
prcomp(scale = TRUE, tol = .8)
# look at variance explained
fviz_eig(pca.d.wide, addlabels = TRUE,
linecolor ="red", geom = "line") +
theme_bw()
barplot(pca.d.wide$sdev/pca.d.wide$sdev[1])
# plot loadings
pca.d.wide <- cbind(names(position.counts)[c(-1,-2,-3)],
gather(as.data.frame(pca.d.wide$rotation),
pc, value))
names(pca.d.wide)[1] = "variable"
pca.d.wide$variable = factor(pca.d.wide$variable, levels = pca.d.wide$variable)
# order variables
ggplot(pca.d.wide) +
geom_bar(aes(x = variable, y = value), stat="identity") +
facet_wrap(~pc) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_fill_manual(name = "Magnitude",
values = c("grey","red1")) +
ggtitle("PCA loadings")