Question: Does association-type position predict accuracy?
Read in data
# tdcs data
d.all = read.csv("../data/labDataPlusNorms.csv") %>%
rename(condition = electrode,
association.num = mention) %>%
select(1:4,7,9:19,21,22) %>%
mutate(condition = fct_recode(condition, "sham" = "na"))
# turker accuracy data
d.bw = read.csv("../data/backALLfinal_withALLSimilaritiesPlusNorms.csv") %>%
select(cue, labSubj, subjCode, isRight)
Calculate the proportion of turkers for each list that correctly guessed the cue. We then assign that list a 1 if it was guessed correctly more often than the median, and 0 if less.
# add trial-wise accuracy column to tdcs data based on turker data
prop.correct = d.bw %>%
group_by(cue, labSubj) %>%
summarize(prop.turk.correct = sum(isRight)/length(isRight))
median.correct = median(prop.correct$prop.turk.correct)
d.all.bw = left_join(d.all, prop.correct) %>%
mutate(above.median = ifelse(prop.turk.correct > median.correct, 1, 0)) %>%
mutate(above.median = as.factor(above.median)) %>%
filter(!is.na(above.median))
Sham has fewer mean number of responses than the critical conditions.
num_responses_by_trial = d.all %>%
rowwise() %>%
mutate(missing = sum(taxInclusive, partsInclusive, thematic, action, descriptor, holonym, metaphor) == 0,
missing = ifelse(missing, 1, 0)) %>%
gather(word_type, value, c(17,18, 19, 11,12,14, 10, 16)) %>%
select(-6:-11) %>%
group_by(labSubj, cue, condition) %>%
filter(value == 1) %>%
summarize(association.num = max(association.num))
mean_n_responses = num_responses_by_trial %>%
group_by(labSubj, condition) %>%
summarize(association.num = mean(association.num)) %>%
group_by(condition) %>%
multi_boot_standard(column = "association.num")
ggplot(mean_n_responses, aes(fill = condition,
y = mean, x = condition)) +
ylab("mean number of responses") +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9))
We thus normalize rank here by number of responses (e.g., 1,2,3,4 -> .25,.5, .75, 1).
# normalized rank by condition and word type
d.all.r = d.all %>%
rowwise() %>%
mutate(missing = sum(taxInclusive, partsInclusive, thematic,
action, descriptor, holonym, metaphor) == 0,
missing = ifelse(missing, 1, 0)) %>%
gather(word_type, value, c(17, 18, 19, 11, 12, 14, 10, 16)) %>%
select(-6:-11) %>%
group_by(condition, labSubj, cue) %>%
filter(value == 1) %>%
mutate(normalized.rank = association.num/(max(association.num) + 1))
As a sanity check, the mean rank response across all conditions is now .5.
# rank by condition
d.all.r %>%
group_by(condition, labSubj, cue) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, labSubj) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition) %>%
multi_boot_standard(column = "normalized.rank") %>%
ggplot(aes(fill = condition, y = mean, x = condition)) +
ylim(0,1) +
ylab("normalized mean rank") +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9))
And here is the normalized mean rank by word_type and condition
d.all.r %>%
filter(condition != "sham") %>%
group_by(cue, labSubj, word_type, condition) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, labSubj, word_type) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
ggplot(aes(x = normalized.rank,
group = condition, fill = condition)) +
geom_density(alpha = .2) +
facet_wrap(~word_type)
d.all.r %>%
group_by(cue, labSubj, word_type, condition) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, labSubj, word_type) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, word_type) %>%
multi_boot_standard(column = "normalized.rank") %>%
ggplot(aes(fill = condition, y = mean, x = word_type)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Split by accuracy
d.all.bw.r = left_join(d.all, prop.correct) %>%
mutate(above.median = ifelse(prop.turk.correct > median.correct, 1, 0)) %>%
mutate(above.median = as.factor(above.median)) %>%
filter(!is.na(above.median)) %>%
rowwise() %>%
mutate(missing = sum(taxInclusive, partsInclusive, thematic,
action, descriptor, holonym, metaphor) == 0,
missing = ifelse(missing, 1, 0)) %>%
gather(word_type, value, c(12,14,10,16,21,18,17,11)) %>%
select(-6:-12) %>%
group_by(condition, labSubj, cue) %>%
filter(value == 1) %>%
mutate(normalized.rank = association.num/(max(association.num) + 1))
d.all.bw.r %>%
group_by(cue, labSubj, word_type, condition, above.median) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, labSubj, word_type,above.median) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, word_type,above.median) %>%
multi_boot_standard(column = "normalized.rank") %>%
ggplot(aes(fill = above.median, y = mean, x = word_type)) +
facet_wrap(~condition) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Calculate the effect size for the sham condition between above and below median group of mean rank order for each word type. This is a measure of how important having a item in that position is for accuracy.
ns = d.all.bw.r %>%
filter(condition == "sham") %>%
group_by(cue, labSubj, word_type, condition, above.median) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, labSubj, word_type,above.median) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, word_type,above.median) %>%
summarize(n = n()) %>%
spread(above.median, n) %>%
rename(n.low = `0`,
n.high = `1`)
sds = d.all.bw.r %>%
filter(condition == "sham") %>%
group_by(cue, labSubj, word_type, condition, above.median) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, labSubj, word_type,above.median) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, word_type,above.median) %>%
summarize(normalized.rank = sd(normalized.rank)) %>%
spread(above.median, normalized.rank) %>%
rename(sd.low = `0`,
sd.high = `1`)
esr = d.all.bw.r %>%
filter(condition == "sham") %>%
group_by(cue, labSubj, word_type, condition, above.median) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, labSubj, word_type,above.median) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
group_by(condition, word_type,above.median) %>%
summarize(normalized.rank = mean(normalized.rank)) %>%
spread(above.median, normalized.rank) %>%
rename(M.low = `0`,
M.high = `1`) %>%
left_join(ns) %>%
left_join(sds) %>%
ungroup()
es = mes(esr$M.high, esr$M.low, esr$sd.high, esr$sd.low, esr$n.high, esr$n.low, verbose = F)
esr$d = es$d
esr$l.d = es$l.d
esr$u.d = es$u.d
#esr$var= es$var.d
#esr$w.d= esr$d/esr$var
esr = mutate(esr, sham.direction = ifelse(d < 0, -1, 1))
ggplot(esr, aes(x = word_type, y = d)) +
geom_point(color = "red")+
geom_linerange(aes(ymax = u.d, ymin=l.d),
position = position_dodge(width = .9), color = "red") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
coord_flip() +
geom_hline(yintercept = 0, linetype = 2) +
ggtitle("effect size on accuracy for sham group")
Now, ask whether position of associate for a cue is above or below the median for the mean position of that associate type. Then, score cues accordingly.
score2: for each cue, compare the mean rank of associate type to the overall group mean. If it’s in the same direction as the effect size, score that type abs(effectsize) * num items, else -abs(effectsize) * num items. Then sum across all types for that cue. This is the overall score of this list.
score1 is a more complicated version of this (weights by distance from mean).
pos.means = d.all.bw.r %>%
filter(condition != "sham") %>%
group_by(word_type, condition, cue, labSubj) %>%
summarize(mean.normalized.rank = mean(normalized.rank)) %>%
group_by(word_type, condition, cue) %>%
summarize(mean.normalized.rank = mean(mean.normalized.rank)) %>%
group_by(word_type) %>%
summarize(group.mean.normalized.rank = mean(mean.normalized.rank))
ls.ms = d.all.bw.r %>%
filter(condition != "sham") %>%
group_by(word_type, condition, cue, labSubj) %>%
summarize(mean.normalized.rank = mean(normalized.rank, na.rm = TRUE),
n = sum(value)) %>%
left_join(pos.means) %>%
select(labSubj, condition, cue, word_type, mean.normalized.rank,
group.mean.normalized.rank,n) %>%
left_join(esr %>% select(word_type, d, sham.direction)) %>%
mutate(dif = group.mean.normalized.rank - mean.normalized.rank,
actual.direction = ifelse(dif > 0,-1, 1)) %>%
mutate(score1 = ifelse(sham.direction == actual.direction, abs(d)*n*abs(dif), -abs(d)*n*abs(dif)),
score2 = ifelse(sham.direction == actual.direction, abs(d)*n, -abs(d)*n))%>%
group_by(labSubj, condition, cue) %>%
summarize(trial.score1 = sum(score1, na.rm = TRUE),
trial.score2 = sum(score2, na.rm = TRUE))
cue.ms = ls.ms %>%
group_by(labSubj, condition) %>%
summarize(subj.score1 = mean(trial.score1, na.rm = TRUE),
subj.score2 = mean(trial.score2, na.rm = TRUE)) %>%
gather(model, value, 3:4)
condition.ms = cue.ms %>%
group_by(condition, model) %>%
multi_boot_standard(column = "value")
ggplot(condition.ms, aes(fill = condition, y = mean, x = condition)) +
facet_grid(~model) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
cue.ms %>%
ungroup() %>%
select(condition, model, value) %>%
group_by(model) %>%
do(tidy(t.test(value~condition, data=.))) %>%
kable()
model | estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
---|---|---|---|---|---|---|---|---|---|---|
subj.score1 | 0.038893 | 0.0405577 | 0.0016647 | 1.219321 | 0.2347058 | 23.72620 | -0.0269799 | 0.1047658 | Welch Two Sample t-test | two.sided |
subj.score2 | 0.283563 | 0.1284341 | -0.1551290 | 1.413419 | 0.1698677 | 24.99178 | -0.1296326 | 0.6967587 | Welch Two Sample t-test | two.sided |
tidy(lmer(trial.score1 ~ condition + (1|labSubj) + (1|cue), ls.ms))
## term estimate std.error statistic group
## 1 (Intercept) 0.04063329 0.02556794 1.589228 fixed
## 2 conditioncathodal -0.03890733 0.03193238 -1.218429 fixed
## 3 sd_(Intercept).cue 0.07539074 NA NA cue
## 4 sd_(Intercept).labSubj 0.06886952 NA NA labSubj
## 5 sd_Observation.Residual 0.30365212 NA NA Residual
tidy(lmer(trial.score2 ~ condition + (1|labSubj) + (1|cue), ls.ms))
## term estimate std.error statistic group
## 1 (Intercept) 0.1288405 0.1672982 0.7701249 fixed
## 2 conditioncathodal -0.2806677 0.2014492 -1.3932431 fixed
## 3 sd_(Intercept).cue 0.5513298 NA NA cue
## 4 sd_(Intercept).labSubj 0.3960204 NA NA labSubj
## 5 sd_Observation.Residual 2.2137144 NA NA Residual
The effect is in the right direction here (anodal groups are scored higher), but is not statistically significant.
Predict accuracy at trial level and use beta weights instead of effect size.
Use sham group to get beta weights on different word types.
pos.means = d.all.bw.r %>%
group_by(word_type, condition, cue, labSubj) %>%
summarize(mean.normalized.rank = mean(normalized.rank)) %>%
spread(word_type, mean.normalized.rank)
d.bw.rank = d.bw %>%
left_join(pos.means)
sham.model = glmer(isRight~ scale(thematic) + scale(action) + scale(descriptor) + scale(partsInclusive)+ (1|subjCode)+(1|cue) +(1|labSubj),
filter(d.bw.rank, condition == "sham"),
family="binomial")
sham.model
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isRight ~ scale(thematic) + scale(action) + scale(descriptor) +
## scale(partsInclusive) + (1 | subjCode) + (1 | cue) + (1 |
## labSubj)
## Data: filter(d.bw.rank, condition == "sham")
## AIC BIC logLik deviance df.resid
## 621.9700 655.3769 -302.9850 605.9700 473
## Random effects:
## Groups Name Std.Dev.
## subjCode (Intercept) 2.862e-05
## cue (Intercept) 8.494e-01
## labSubj (Intercept) 3.843e-01
## Number of obs: 481, groups: subjCode, 340; cue, 22; labSubj, 12
## Fixed Effects:
## (Intercept) scale(thematic) scale(action)
## -0.2254 0.1044 -0.2648
## scale(descriptor) scale(partsInclusive)
## -0.1785 0.2591
sham.weights = sham.model %>%
tidy() %>%
select(term, estimate) %>%
mutate(word_type = unlist(lapply(strsplit(term, "\\(|\\)"), function(x) x[2]))) %>%
filter(word_type != "Intercept") %>%
mutate(sham.direction = ifelse(estimate < 0, -1, 1))
kable(sham.weights)
term | estimate | word_type | sham.direction |
---|---|---|---|
scale(thematic) | 0.1043741 | thematic | 1 |
scale(action) | -0.2648485 | action | -1 |
scale(descriptor) | -0.1784625 | descriptor | -1 |
scale(partsInclusive) | 0.2590947 | partsInclusive | 1 |
Now, ask whether position of associate for a cue is above or below the median for the mean position of that associate type. Then, score cues accordingly.
pos.means = d.all.bw.r %>%
filter(condition != "sham") %>%
group_by(word_type, condition, cue, labSubj) %>%
summarize(mean.normalized.rank = mean(normalized.rank)) %>%
group_by(word_type, condition, cue) %>%
summarize(mean.normalized.rank = mean(mean.normalized.rank)) %>%
group_by(word_type) %>%
summarize(group.mean.normalized.rank = mean(mean.normalized.rank))
cue.ms = d.all.bw.r %>%
filter(condition != "sham") %>%
group_by(word_type, condition, cue, labSubj) %>%
summarize(mean.normalized.rank = mean(normalized.rank, na.rm = TRUE),
n = sum(value)) %>%
left_join(pos.means) %>%
select(labSubj, condition, cue, word_type, mean.normalized.rank,
group.mean.normalized.rank,n) %>%
left_join(sham.weights %>% select(word_type, estimate, sham.direction)) %>%
mutate(dif = group.mean.normalized.rank - mean.normalized.rank,
actual.direction = ifelse(dif > 0, -1, 1)) %>%
mutate(score1 = ifelse(sham.direction == actual.direction, abs(estimate)*n*abs(dif), -abs(estimate)*n*abs(dif)),
score2 = ifelse(sham.direction == actual.direction, abs(estimate)*n, -abs(estimate)*n)) %>%
group_by(labSubj, condition, cue) %>%
summarize(trial.score1 = sum(score1, na.rm = TRUE),
trial.score2 = sum(score2, na.rm = TRUE))
ls.ms = cue.ms %>%
group_by(labSubj, condition) %>%
summarize(subj.score1 = mean(trial.score1, na.rm = TRUE),
subj.score2 = mean(trial.score2, na.rm = TRUE)) %>%
gather(model, value, 3:4)
condition.ms = ls.ms %>%
group_by(condition, model) %>%
multi_boot_standard(column = "value", na.rm = TRUE)
ggplot(condition.ms, aes(fill = condition, y = mean, x = condition)) +
facet_grid(~model) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymax = ci_upper, ymin=ci_lower),
position = position_dodge(width = .9)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
tidy(lmer(trial.score1 ~ condition + (1|labSubj) + (1|cue), cue.ms))
## term estimate std.error statistic group
## 1 (Intercept) 0.01469243 0.00876715 1.675850 fixed
## 2 conditioncathodal -0.01497195 0.01105911 -1.353812 fixed
## 3 sd_(Intercept).cue 0.02490500 NA NA cue
## 4 sd_(Intercept).labSubj 0.02469618 NA NA labSubj
## 5 sd_Observation.Residual 0.09736134 NA NA Residual
tidy(lmer(trial.score2 ~ condition + (1|labSubj) + (1|cue), cue.ms))
## term estimate std.error statistic group
## 1 (Intercept) 0.10299021 0.04743629 2.171127 fixed
## 2 conditioncathodal -0.08203292 0.05522763 -1.485360 fixed
## 3 sd_(Intercept).cue 0.16908206 NA NA cue
## 4 sd_(Intercept).labSubj 0.10399821 NA NA labSubj
## 5 sd_Observation.Residual 0.63701242 NA NA Residual
Again, effect is in the right direction, but not significant.
ls.ms %>%
filter(labSubj == "PAS113" & cue == "ball") %>%
as.data.frame()