Read in data
# tdcs data
d.all = read.csv("../../data/labDataPlusNorms.csv") %>%
rename(condition = electrode,
association.num = mention) %>%
select(1:4,7,9:19,21,22) %>%
mutate(condition = fct_recode(condition, "sham" = "na")) %>%
filter(as.character(target) != as.character(cue)) %>%
mutate(target= str_trim(target))
# turker accuracy data
d.bw.raw = read.csv("../../data/backALLfinal_withALLSimilaritiesPlusNorms.csv")
d.bw = d.bw.raw %>%
select(cue, labSubj, subjCode, isRight)
Let’s first look at the “raw” accuracy data. The color of the square indicates the proportion of turkers that correctly guessed the cue given the associates of a particular lab subject.
d = d.all %>%
left_join(d.bw) %>%
select(1:5, 19,20) %>%
group_by(cue, labSubj, subjCode)
counts = d %>%
group_by(labSubj, cue, condition) %>%
summarise(total = length(isRight),
correct = sum(isRight, na.rm = T)/total) %>%
group_by(labSubj) %>%
mutate(total = sum(correct, na.rm = T)) %>%
group_by(cue, condition) %>%
mutate(total_cue = sum(correct, na.rm = T))
ggplot(counts, aes(fct_reorder(labSubj, total),
fct_reorder(cue, -total_cue))) +
geom_tile(aes(fill = correct), colour = "white") +
scale_fill_distiller(palette = "YlGnBu", direction = 1) +
theme_minimal() +
facet_grid(~condition, drop = TRUE, scales = "free_x") +
xlab("participant") +
ylab("cue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position = "bottom", legend.key.width = unit(2, "cm"),
panel.grid = element_blank()) +
coord_equal()
Accuracy by item
counts.item = counts %>%
group_by(cue, condition) %>%
summarize(correct = mean(correct))
ggplot(counts.item, aes(fct_reorder(cue, -correct), condition)) +
geom_tile(aes(fill = correct), colour = "white") +
scale_fill_distiller(palette = "YlGnBu", direction = 1) +
theme_minimal() +
xlab("cue") +
ylab("condition") +
theme(legend.position = "none") +
coord_flip()
Can we predict variability in item difficulty with frequency and concreteness?
# read in frequency and concreteness
subtlexus.url <- getURL("https://raw.githubusercontent.com/mllewis/RC/master/data/corpus/SUBTLEXus_corpus.txt")
freqs <- read.table(text = subtlexus.url, header=TRUE) %>%
select(Word,Lg10WF)
concreteness <-read.csv("brysbaert_corpus.csv", header=TRUE) %>%
select(Word, Conc.M)
counts.item.group = counts.item %>%
left_join(freqs, by=c("cue"= "Word")) %>%
left_join(concreteness, by=c("cue"= "Word"))
counts.item.group %>%
gather(variable, value, -1:-3) %>%
ggplot(aes(x = value, y = correct, color = condition)) +
facet_wrap(~variable, scales = "free")+
geom_point() +
geom_smooth(method = "lm")
tidy(cor.test(counts.item.group$Lg10WF, counts.item.group$correct)) %>%
kable()
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| 0.2381047 | 2.629 | 0.0097341 | 115 | 0.0591275 | 0.4022508 | Pearson’s product-moment correlation | two.sided |
tidy(cor.test(counts.item.group$Conc.M, counts.item.group$correct)) %>%
kable()
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| 0.1805676 | 1.968733 | 0.0513895 | 115 | -0.000998 | 0.350608 | Pearson’s product-moment correlation | two.sided |
Frequency of the word predicts overall accuracy (higher frequency -> higher accuracy), but this does not differ by condition. Concreteness is marginal.
E.g. bicycle is very different for anodal and cathodal
Bicycle:
p.correct = d.bw %>%
filter(cue == "bicycle") %>%
group_by(labSubj) %>%
summarize(propCorrect = sum(isRight)/n(),
n = n())
d.all %>%
group_by(labSubj, cue, condition) %>%
filter(condition != "sham",
cue == "bicycle") %>%
arrange(labSubj, association.num) %>%
ungroup() %>%
select(labSubj, cue, target, condition, association.num) %>%
group_by(condition, labSubj) %>%
summarize(associates = paste(target,sep="",collapse=",")) %>%
left_join(p.correct) %>%
arrange(condition, -propCorrect) %>%
mutate(propCorrect = round(propCorrect, 2)) %>%
select(-n) %>%
as.data.frame() %>%
kable()
| condition | labSubj | associates | propCorrect |
|---|---|---|---|
| anodal | PAS12 | transportation,blue,wheel,handle,tires,pedal,exercise,useful | 0.91 |
| anodal | PAS105 | two wheel,ride,summer,childhood | 0.90 |
| anodal | PAS125 | cycle,ride,transportation,helmet | 0.90 |
| anodal | PAS103 | basket,mountain,helmet,race | 0.76 |
| anodal | PAS131 | bicycle,bicycle,wheel,transportation,blue,long,pedal | 0.69 |
| anodal | PAS111 | fast,pedal,friend,summer,adventure,wilderness | 0.64 |
| anodal | PAS115 | tires,blue,transportation,exercise,outdoors | 0.64 |
| anodal | PAS129 | transportation,shortcut,summer,sun,eco-friendly,fast,childhood,fun | 0.62 |
| anodal | PAS117 | transportation,beach,helmet,fall,race,hill,summer,wheel | 0.58 |
| anodal | PAS119 | ride,summer,tires,handle | 0.50 |
| anodal | PAS123 | ride,race,mountain,outdoors,active,sun,transportation,fun | 0.40 |
| anodal | PAS113 | mountain,road,family,fun | 0.14 |
| anodal | PAS101 | ride,lake,long legs,blue,rubber | 0.00 |
| anodal | PAS151 | bicycle,sunny days,ride,enjoyable | 0.00 |
| cathodal | PAS102 | helmet,wheel,brake,childhood,street,race,spokes | 0.95 |
| cathodal | PAS104 | ride,pedal,summer,breeze,gears | 0.91 |
| cathodal | PAS118 | bicycle,transportation,cheap,eco-friendly,health,sport,balance,energy | 0.64 |
| cathodal | PAS122 | ride,blue,wheel,childhood,bell | 0.58 |
| cathodal | PAS132 | ride,trip,road,traffic,journey,seat,tandem,brake | 0.40 |
| cathodal | PAS130 | ride,transportation,blue,spring,wheel | 0.31 |
| cathodal | PAS120 | race,exercise,speed,blue | 0.30 |
| cathodal | PAS107 | childhood,neighborhood,friend,connor,exercise,summer,bunker beach,wave pool | 0.00 |
| cathodal | PAS110 | blue,move,transportation,summer | 0.00 |
| cathodal | PAS112 | bicycle,summer,ride,siblings,travel,trip | 0.00 |
| cathodal | PAS114 | rollerblade,skateboard,helmet,outdoors,warm,safe | 0.00 |
| cathodal | PAS124 | suburbs,kid,cities,college | 0.00 |
| cathodal | PAS133 | exercise,outdoors,socializing,race | 0.00 |
It looks like participants in the cathodal condition are doing more idiosyncratic responses in general. In principle, it seems like forward and backward probabilites should capture this.
Merge in Nelson norms
# nelson norms
cp.url <- getURL("https://raw.githubusercontent.com/wjhopper/USF-WAN-SS/master/norms.csv")
cps <- read.csv(text = cp.url, header=FALSE)
names(cps) = c("cue", "target", "FS", "BS")
cps = cps %>%
mutate(cue =tolower(as.character(cue)),
target = tolower(as.character(target)))
dcp = d.all %>%
left_join(cps) %>%
select(labSubj, cue, target, condition, association.num, FS, BS)
All cues are present in Nelson norms
cuetdcs = unique(d.all$cue)
cueNelson = unique(cps$cue)
length(intersect(cuetdcs, cueNelson)) # all cues are present in nelson
## [1] 39
Lots of NAs but no difference between critical conditions
dcp %>%
group_by(condition) %>%
summarize(na = length(which(is.na(FS))),
notna = length(which(!is.na(FS)))) %>%
kable()
| condition | na | notna |
|---|---|---|
| anodal | 1884 | 1110 |
| cathodal | 1887 | 1040 |
| sham | 1668 | 924 |
Replacing missing associate values with zero. Note that BS and FS are skewed. Take the log.
dcp <- dcp %>%
mutate(log.FS = log(FS),
log.BS = log(BS),
FS = ifelse(is.na(FS), 0, FS),
BS = ifelse(is.na(BS), 0, BS))
dcp %>%
gather(variable, value, 6:9) %>%
ggplot(aes(x = value)) +
facet_wrap(~variable, scales = "free")+
geom_histogram() +
theme_bw()
Do conditional probabilities differ by condition?
dcp.item = dcp %>%
gather(variable, value, 6:9) %>%
mutate(value = ifelse(is.infinite(value), NA, value)) %>%
group_by(condition, labSubj, cue, variable) %>%
summarize(value = mean(value, na.rm = T)) %>%
group_by(labSubj, condition, variable) %>%
summarize(value = mean(value, na.rm = T))
dcp.item %>%
filter(is.finite(value)) %>%
group_by(condition, variable) %>%
multi_boot_standard(column = "value", na.rm = T) %>%
ggplot(aes(x = condition, y = mean, fill = condition)) +
facet_wrap(~variable, scales = "free") +
geom_bar(position = "dodge", stat = "identity") +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
ggtitle("Measures of forward/backward probability") +
theme_bw() +
theme(legend.position = "none")
No difference here for either backwards or forward probability.
Another approach: Look at the entropy over associates. If cathodal has more idiosyncratic responses, expect more entropy over distribution of associates for each cue.
H.item = d.all %>%
count(cue, condition, target) %>%
summarize(H = entropy(n))
H.item %>%
group_by(condition) %>%
multi_boot_standard(column = "H") %>%
ggplot(aes(x = condition, y = mean, fill = condition)) +
geom_bar(position = "dodge", stat = "identity") +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
ggtitle("entropy over associates") +
theme_bw() +
theme(legend.position = "none")
H.item %>%
filter(condition != "sham") %>%
select(cue, condition, H) %>%
ungroup() %>%
spread(condition, H) %>%
do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE))) %>%
kable()
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| -0.0496129 | -1.989219 | 0.0539112 | 38 | -0.1001032 | 0.0008773 | Paired t-test | two.sided |
Marginally reliable difference (p = .054): Cathodal has higher entropy.
A third approach: I constructed a network for each cue for each condition. Nodes are the associates given by participants. A link is added between two nodes if a participant listed both associates. Weights are determined by the number of people who listed two associates. If participants are giving more idiosyncratic responses in the cathodal group, we should expect less centrality in the cathodal networks. In the limit, if participants give ONLY idiosnycratic responses, then the network should look like a bunch of separate clusters. Conversely, if everyone gives the same responses, it should look like a hairball.
First, here are the graphs for “bicycle” in the cathodal and anodal conditions.
Modularity setup
# get all pairs
all.pairs = d.all %>%
#filter(association.num < 6) %>%
filter(as.character(target) != as.character(cue)) %>%
group_by(labSubj, cue, condition) %>%
do(m = data.table(t(combn(.$target, 2)))) %>%
unnest() %>%
rename(a1 = V1,
a2 = V2)
# modularity function
get_modularity <- function(df){
counts <- count(df, a1, a2)
graph <- counts %>%
select(a1, a2) %>%
graph_from_data_frame(directed = FALSE) %>%
simplify()
E(graph)$weight=counts$n # add in weights
clustering = cluster_leading_eigen(graph,
options = list(maxiter=1000000),
weights = counts$n)
closeness = mean(estimate_closeness(graph, cutoff= 100))
betweeness = mean(estimate_betweenness(graph, cutoff= 100),
weights = counts$n)
degree = mean(igraph::degree(graph))
data.frame(Q = round(clustering$modularity,4),
n.groups = round(length(clustering),4),
closeness = round(closeness, 4),
betweeness = round(betweeness, 4),
degree = mean(degree))
}
# single item example
test = all.pairs %>%
filter(cue == "bicycle" & condition == "cathodal") %>%
filter(a1 != a2) %>%
mutate(a1 = str_trim(a1),
a2 = str_trim(a2))
counts.ex <- count(test, a1, a2)
v.attr = d.all %>%
filter(as.character(target) != as.character(cue)) %>%
mutate(target = str_trim(target)) %>%
filter(cue == "bicycle" & condition == "cathodal") %>%
distinct(target, .keep_all = T) %>%
gather(type, value, c(10, 12,14,11,16, 17,18)) %>%
filter(value == 1) %>%
select(target, type)
graph <- counts.ex %>%
select(a1, a2) %>%
graph_from_data_frame(directed = FALSE, vertices = v.attr) %>%
simplify()
E(graph)$weight=counts.ex$n # add in weights
ggplot(ggnetwork(asNetwork(graph)),
aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(aes(size = weight), color = "grey50") +
geom_nodes(aes(color = type)) +
geom_nodelabel(aes(label = vertex.names , fill = type), size = 2) +
ggtitle("cathodal") +
theme_blank()
test = all.pairs %>%
filter(cue == "bicycle" & condition == "anodal") %>%
filter(a1 != a2) %>%
mutate(a1 = str_trim(a1),
a2 = str_trim(a2))
counts.ex <- count(test, a1, a2)
v.attr = d.all %>%
filter(as.character(target) != as.character(cue)) %>%
mutate(target = str_trim(target)) %>%
filter(cue == "bicycle" & condition == "anodal") %>%
distinct(target, .keep_all = T) %>%
gather(type, value, c(10, 12,14,11,16, 17,18)) %>%
filter(value == 1) %>%
select(target, type)
graph <- counts.ex %>%
select(a1, a2) %>%
graph_from_data_frame(directed = FALSE, vertices = v.attr) %>%
simplify()
E(graph)$weight=counts.ex$n # add in weights
ggplot(ggnetwork(asNetwork(graph)),
aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(aes(size = weight), color = "grey50") +
geom_nodes(aes(color = type)) +
geom_nodelabel(aes(label = vertex.names , fill = type), size = 2) +
ggtitle("anodal") +
theme_blank()
…it looks like the cathodal network is more modular.
Now, let’s do this across all items and quantify modularity, using three measures: Betweenness, mean degree, and modularity.
## get all modularity
modularity = all.pairs %>%
filter(a1 != a2) %>%
mutate(a1 = str_trim(a1),
a2 = str_trim(a2)) %>%
group_by(cue, condition) %>%
do(get_modularity(.))
# mean betweeness
modularity %>%
group_by(condition) %>%
multi_boot_standard(column = "betweeness", na.rm = T) %>%
ggplot(aes(x = condition, y = mean, fill = condition)) +
geom_bar(position = "dodge", stat = "identity") +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme_bw() +
ggtitle("betweeness") +
theme(legend.position = "none")
modularity %>%
filter(condition != "sham") %>%
select(cue, condition, betweeness) %>%
ungroup() %>%
spread(condition, betweeness) %>%
do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE))) %>%
kable()
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| -2.714844 | -3.421774 | 0.0015021 | 38 | -4.321003 | -1.108685 | Paired t-test | two.sided |
# mean degree
modularity %>%
group_by(condition) %>%
multi_boot_standard(column = "degree", na.rm = T) %>%
ggplot(aes(x = condition, y = mean, fill = condition)) +
geom_bar(position = "dodge", stat = "identity") +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme_bw() +
ggtitle("mean degree") +
theme(legend.position = "none")
modularity %>%
filter(condition != "sham") %>%
select(cue, condition, degree) %>%
ungroup() %>%
spread(condition, degree) %>%
do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE))) %>%
kable()
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| 0.3877107 | 2.541442 | 0.0152419 | 38 | 0.0788784 | 0.6965431 | Paired t-test | two.sided |
# clustering
modularity %>%
group_by(condition) %>%
multi_boot_standard(column = "Q", na.rm = T) %>%
ggplot(aes(x = condition, y = mean, fill = condition)) +
geom_bar(position = "dodge", stat = "identity") +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
ggtitle("modularity") +
theme_bw() +
theme(legend.position = "none")
modularity %>%
filter(condition != "sham") %>%
select(cue, condition, Q) %>%
ungroup() %>%
spread(condition, Q) %>%
do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE))) %>%
kable()
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| -0.0350205 | -2.870941 | 0.0066548 | 38 | -0.0597146 | -0.0103264 | Paired t-test | two.sided |
Across three different measures, anodal group is more clustered than cathodal group, suggesting they are all sampling from a shared “concept” of the cue.
Note that for two of the measures, sham looks less clustered than cathodal, and I think this is just an artifact of the fact that the sham condition has few associates overall (see below). If you subset to the first 4-5 associates the differences go away, probably because it’s underpowered. Gary suggests controling for number of associates, but I’m not totally sure how to do that with this analysis….
Mean number of associates:
d.all %>%
group_by(cue, condition, labSubj) %>%
summarize(n = n()) %>%
group_by(cue, condition) %>%
summarize(n = mean(n)) %>%
group_by(condition) %>%
multi_boot_standard(column = "n", na.rm = T) %>%
ggplot(aes(x = condition, y = mean, fill = condition)) +
geom_bar(position = "dodge", stat = "identity") +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
ggtitle("mean number of associates") +
theme_bw() +
theme(legend.position = "none")
Gary: How consistent are the groups in terms of consistency?
If the network analysis is correct, we should expect participants in the anodal condition to not only make more correct guesses, but be wrong in the same way. Here I look at this with three measures: number of incorrect guesses, entropy over guesses and ICC.
d.bw2 = d.bw.raw %>%
select(electrode, labSubj, subjCode, cue,
turkerGuess, isRight) %>%
rename(condition = electrode) %>%
mutate(condition = fct_recode(condition, "sham" = "na")) %>%
mutate(turkerGuess= str_trim(turkerGuess))
d.bw2 %>%
filter(isRight == 0) %>%
group_by(condition, cue) %>%
summarize(n = length(unique(turkerGuess))) %>%
group_by(condition) %>%
multi_boot_standard(column = "n", na.rm = T) %>%
ggplot(aes(x = condition, y = mean, fill = condition)) +
geom_bar(position = "dodge", stat = "identity") +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
ggtitle("mean number of unique wrong guesses") +
theme_bw() +
theme(legend.position = "none")
d.bw2 %>%
filter(isRight == 0) %>%
group_by(condition, cue) %>%
summarize(n = length(unique(turkerGuess))) %>%
filter(condition != "sham") %>%
select(cue, condition, n) %>%
spread(condition, n) %>%
do(tidy(t.test(.$anodal, .$cathodal, paired=TRUE))) %>%
kable()
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| -2.974359 | -2.447356 | 0.0191244 | 38 | -5.434677 | -0.5140406 | Paired t-test | two.sided |
d.bw2 %>%
filter(isRight == 0) %>%
count(condition, cue, turkerGuess) %>%
group_by(cue, condition) %>%
summarize(H = entropy(n)) %>%
group_by(condition) %>%
multi_boot_standard(column = "H", na.rm = T) %>%
ggplot(aes(x = condition, y = mean, fill = condition)) +
geom_bar(position = "dodge", stat = "identity") +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
ggtitle("entropy over wrong guesses") +
theme_bw() +
theme(legend.position = "none")
m = d.bw2 %>%
filter(isRight == 0) %>%
count(condition, cue, turkerGuess) %>%
group_by(cue, condition) %>%
summarize(H = entropy(n)) %>%
filter(condition != "sham") %>%
select(cue, condition, H) %>%
spread(condition, H)
tidy(t.test(m$anodal, m$cathodal, paired=TRUE)) %>%
kable()
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| -0.083305 | -2.083032 | 0.0440287 | 38 | -0.1642649 | -0.0023451 | Paired t-test | two.sided |
Anodal group has fewer unique wrong guesses and lower entropy over wrong guesses than cathodal group.
rank.orders = counts %>%
select( -total_cue) %>%
group_by(condition, labSubj) %>%
arrange(correct) %>%
mutate(rank.order = 1:n())
ICCs = rank.orders %>%
select(-correct) %>%
spread(cue, rank.order) %>%
group_by(condition) %>%
do(psych::ICC(select(., -1,-2) %>% t(), missing = FALSE)$results) %>%
filter(type == "ICC1")
ggplot(ICCs, aes(x = condition, y = ICC, fill = condition)) +
geom_bar(position = "dodge", stat = "identity") +
geom_linerange(aes(ymax = `upper bound`, ymin = `lower bound`)) +
ggtitle("Interaclass correlation") +
theme_bw() +
theme(legend.position = "none")
Numerically as predicted, but not reliable.