Are semantic associations related to individuals differences? Namely: self-report measures of visual, linguistic and spatial processing preferences.
The task: participants given cue and asked to produce 4 associates.
Read in and summarize data.
d.raw = read.csv("../data/assocData_complete.csv", header = TRUE)%>%
select(workerId, cue, associate, num, forwardStrength, backwardStrength, visual, spatial, word, Dom_PoS_SUBTLEX, Lg10WF)
py.measures = read.csv("ranked_LV/all_associate_stringsP.csv")
d.raw = left_join(d.raw, py.measures) %>%
select(-a1, -a2, -a3, -a4)
mean.associates = d.raw %>%
group_by(workerId) %>%
summarise(n = n()) %>%
summarise(mean = mean(n))
There are 65 participants. The mean number of associates per participants is 156.
measures = d.raw %>%
group_by(workerId) %>%
slice(1) %>%
ungroup() %>%
select(workerId, visual, spatial, word)
measures %>%
gather("measure", "value", 2:4) %>%
ggplot(aes(x = value, fill = measure)) +
geom_density(alpha = .4) +
facet_grid(~ measure) +
theme_bw() +
theme(legend.position ="none")
Visual and spatial correlation
kable(tidy(cor.test(measures$visual, measures$spatial)))
| estimate | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|
| 0.1928055 | 1.55961 | 0.1238619 | 63 | -0.0536148 | 0.4170909 |
Visual and word correlation
kable(tidy(cor.test(measures$visual, measures$word)))
| estimate | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|
| 0.0018503 | 0.0146864 | 0.9883287 | 63 | -0.2421581 | 0.2456386 |
Word and spatial correlation
kable(tidy(cor.test(measures$word, measures$spatial)))
| estimate | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|
| -0.0206234 | -0.1637279 | 0.8704695 | 63 | -0.2631986 | 0.2244045 |
Measures are uncorrelated. Now on to the key analyses….
Are there differences in agreement with the average partiicpant, by processing preference? This is the numWordMatch measure taken from the rankedComparision.py script.
overall.word.matches = d.raw%>%
group_by(workerId) %>%
summarize(mean.numWordMatch = mean(numWordMatch),
visual = mean(visual),
spatial = mean(spatial),
word = mean(word)) %>%
gather("measure", "value", 3:5)
ggplot(overall.word.matches, aes(y = mean.numWordMatch, x = value)) +
facet_grid(.~measure) +
geom_point() +
geom_smooth(method = "lm") +
ggtitle("Number of matched words") +
theme_bw()
Within a given word/visual bin, and given the same cue, what proportion/number of matches are there? Two measures: The raw number of counts (Counted) and proportion two randomly sampled participants give the same associate (Sampled).
d.raw$word.bin = as.factor(ifelse(d.raw$word > median(measures$word), 1, 0))
d.raw$visual.bin = as.factor(ifelse(d.raw$visual > median(measures$visual), 1, 0))
d.raw$word.quant = quantcut(d.raw$word)
d.raw$visual.quant = quantcut(d.raw$visual)
get_num_matchesW <- function(data, this.cue, this.word.bin, this.num, this.associate){
all_associates = filter(data,
cue == this.cue & word.bin == this.word.bin
& num == this.num)$associate
num_matches = sum(all_associates == this.associate)
num_matches
}
same.responses = d.raw %>%
rowwise() %>%
mutate(num_same = get_num_matchesW(d.raw, cue, word.bin, num, associate))
same.responses %>%
group_by(word.bin, num) %>%
multi_boot_standard(column = "num_same", na.rm = T) %>%
ggplot(aes(x = num, y = mean, group = word.bin,
color = word.bin)) +
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
xlab("Associate") +
ylab("Num matches") +
geom_point() +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Stats – collapsing across num.
d.word.stats = same.responses %>%
group_by(workerId, word.bin) %>%
summarise(num_same = mean(num_same))
kable(tidy(t.test(filter(d.word.stats, word.bin == 1)$num_same,
filter(d.word.stats, word.bin == 0)$num_same)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 0.2724359 | 3.387821 | 3.115385 | 2.617174 | 0.0112349 | 59.24685 | 0.0641595 | 0.4807123 |
Quantiles
get_num_matchesWQ <- function(data, this.cue, this.word.quant, this.num, this.associate){
all_associates = filter(data,
cue == this.cue & word.quant== this.word.quant
& num == this.num)$associate
num_matches = sum(all_associates == this.associate)
num_matches
}
same.responses = d.raw %>%
rowwise() %>%
mutate(num_same = get_num_matchesWQ(d.raw, cue, word.quant, num, associate))
same.responses %>%
group_by(word.quant) %>%
multi_boot_standard(column = "num_same", na.rm = T) %>%
ggplot(aes(x = word.quant, y = mean,
color = word.quant)) +
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
xlab("Word quantile") +
ylab("Num matches") +
geom_point() +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
get_num_matchesV <- function(data, this.cue, this.visual.bin,
this.num, this.associate){
all_associates = filter(data,
cue == this.cue & visual.bin == this.visual.bin
& num == this.num)$associate
num_matches = sum(all_associates == this.associate)
num_matches
}
same.responses = d.raw %>%
rowwise() %>%
mutate(num_same = get_num_matchesV(d.raw, cue, visual.bin, num, associate))
same.responses %>%
group_by(visual.bin, num) %>%
multi_boot_standard(column = "num_same", na.rm = T) %>%
ggplot(aes(x = num, y = mean, group = visual.bin,
color = visual.bin)) +
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
xlab("Associate") +
ylab("Num matches") +
geom_point() +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Stats – collapsing across num.
d.visual.stats = same.responses %>%
group_by(workerId, visual.bin) %>%
summarise(num_same = mean(num_same))
kable(tidy(t.test(filter(d.visual.stats, visual.bin == 1)$num_same,
filter(d.visual.stats, visual.bin == 0)$num_same)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 0.0774704 | 3.275434 | 3.197964 | 0.7183286 | 0.4753458 | 59.93184 | -0.1382631 | 0.293204 |
So, there is a difference for prop same for word (more word, more agreement), but not for visual.
What about the two “high” groups? Comparing high word to high visual. There is no difference there.
kable(tidy(t.test(filter(d.visual.stats, visual.bin == 1)$num_same,
filter(d.word.stats, word.bin == 1)$num_same)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| -0.1123863 | 3.275434 | 3.387821 | -1.241645 | 0.2191316 | 60.86193 | -0.2933886 | 0.0686161 |
Here I calculated the probability that two participants sampled from the same word bin and given the same cue will produce the same associate. To do this, we randomly sample 2 participants x N_PAIR_SAMPLED for each cue with an education level. We then take the proportion agreement across samples, and then average across cues within an education level.
Do people who are higher on word produce more similiar responses to one another?
one_sample <- function(associate) {
function(k) {
sample.is = sample(1:length(associate), 2, replace = T)
associate[sample.is[1]] == associate[sample.is[2]]
}
}
all_samples <- function(associate, n_pair_samples) {
sample_values <- 1:n_pair_samples %>%
map(one_sample(associate)) %>%
unlist()
data.frame(prop = sum(sample_values)/length(sample_values))
}
N_PAIR_SAMPLED = 100
sim.responses = d.raw %>%
filter(associate != "NA") %>%
group_by(cue, word.bin, num) %>%
mutate(prop_similar = unlist(all_samples(associate,
N_PAIR_SAMPLED)))
sim.responses %>%
group_by(word.bin, num) %>%
multi_boot_standard(column = "prop_similar", na.rm = T) %>%
ggplot(aes(x = num, y = mean, group = word.bin,
color = word.bin)) +
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
xlab("Associate") +
ylab("Proportion Same") +
geom_point() +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Quantiles
sim.responses = d.raw %>%
filter(associate != "NA") %>%
group_by(cue, word.quant, num) %>%
mutate(prop_similar = unlist(all_samples(associate,
N_PAIR_SAMPLED)))
sim.responses %>%
group_by(word.quant) %>%
multi_boot_standard(column = "prop_similar", na.rm = T) %>%
ggplot(aes(x = word.quant, y = mean,
color = word.quant)) +
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
xlab("Associate") +
ylab("Proportion Same") +
geom_point() +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Do people who are higher on visual produce less similiar responses to one another?
d.raw$visual.bin = as.factor(ifelse(d.raw$visual > median(measures$visual), 1, 0))
N_PAIR_SAMPLED = 100
sim.responses = d.raw %>%
filter(associate != "NA") %>%
group_by(cue, visual.bin, num) %>%
mutate(prop_similar = unlist(all_samples(associate,
N_PAIR_SAMPLED)))
sim.responses %>%
group_by(visual.bin, num) %>%
multi_boot_standard(column = "prop_similar", na.rm = T) %>%
ggplot(aes(x = num, y = mean, group = visual.bin,
color = visual.bin)) +
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
xlab("Associate") +
ylab("Proportion Same") +
geom_point() +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Overall, this measure looks like the counted measure.
Using the by-word procedure you emailed.
# spread associates
d.raw.wide = d.raw %>%
select(workerId, cue, num, associate, word.bin, visual.bin, workerId) %>%
spread(num, associate) %>%
rename(a1 = `1`, a2 = `2`, a3 = `3`, a4 = `4`)
# write.csv(select(d.raw.wide, workerId, cue, a1, a2, a3, a4), "all_associate_strings.csv") # for rankedComparision.py
d.raw.wide$all = paste(d.raw.wide$a1,
d.raw.wide$a2, d.raw.wide$a3, d.raw.wide$a4, " ")
d.raw.wide$all= gsub("NA","", d.raw.wide$all)
get_edit_distance <- function(this.cue, this.workerId) {
this.string = filter(d.raw.wide,
cue == this.cue & workerId == this.workerId)$all
all.strings = filter(d.raw.wide,
cue == this.cue & workerId != this.workerId)$all
this.string.int = hashr::hash(strsplit(this.string,"[[:blank:]]+"))
all.strings.int = hashr::hash(strsplit(all.strings,"[[:blank:]]+"))
edits = unlist(lapply(all.strings.int,
function(x) seq_dist(x, this.string.int)))
mean(edits, na.rm = T)
}
word.edits = d.raw.wide %>%
group_by(cue, workerId) %>%
summarize(edit_distance = get_edit_distance(cue[1], workerId[1])) %>%
select(cue, workerId, edit_distance) %>%
group_by(workerId) %>% # collapse across trials for each worker
summarise(mean.edit.distance = mean(edit_distance, na.rm = T))
word.edits.full = left_join(word.edits, d.raw %>% ungroup() %>% group_by(workerId) %>% slice(1) %>% ungroup() %>% select(spatial, visual, word, workerId, word.bin, visual.bin), by = "workerId")
word.edits.long = select(word.edits.full, workerId, visual, spatial, word, mean.edit.distance) %>%
gather("measure", "value", 2:4)
ggplot(word.edits.long, aes(y= mean.edit.distance, x = value)) +
facet_grid(.~measure) +
geom_point() +
geom_smooth(method = "lm") +
theme_bw()
kable(word.edits.long %>%
group_by(measure) %>%
do(tidy(cor.test(.$mean.edit.distance, .$value))))
| measure | estimate | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|---|
| spatial | -0.0644754 | -0.5128243 | 0.6098677 | 63 | -0.3036002 | 0.1822904 |
| visual | -0.1451754 | -1.1646320 | 0.2485592 | 63 | -0.3757692 | 0.1023477 |
| word | -0.0176941 | -0.1404649 | 0.8887408 | 63 | -0.2604692 | 0.2271854 |
There is not a significant correlation between edit distance and any of the processing preferences.
kable(tidy(t.test(filter(word.edits.full, word.bin == 1)$mean.edit.distance,
filter(word.edits.full, visual.bin == 1)$mean.edit.distance)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| -0.0036752 | 3.712891 | 3.716566 | -0.1857602 | 0.8532517 | 60.75032 | -0.0432408 | 0.0358903 |
No difference in edit distance for high word and visual groups.
kable(tidy(t.test(filter(word.edits.full, word.bin == 0)$mean.edit.distance,
filter(word.edits.full, word.bin == 1)$mean.edit.distance)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 0.0263285 | 3.739219 | 3.712891 | 1.315183 | 0.1932291 | 62.81698 | -0.0136783 | 0.0663353 |
No difference in edit distance between high and low word groups.
kable(tidy(lm(mean.edit.distance~visual+ spatial + word, data = word.edits.full)))
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | 3.7262574 | 0.0102047 | 365.1500461 | 0.0000000 |
| visual | -0.0144586 | 0.0135385 | -1.0679658 | 0.2897442 |
| spatial | -0.0037199 | 0.0125341 | -0.2967822 | 0.7676410 |
| word | -0.0018809 | 0.0130628 | -0.1439900 | 0.8859831 |
Predicting edit distance with all the individual differences, none are significant predictors.
This is the measure taken from the rankedComparision.py script writted by Gary.
word.edits.full.ranked = d.raw %>%
group_by(workerId) %>% # collapse across trials for each worker
summarise(mean.rankDiff = mean(rankDiff, na.rm = T),
mean.numWordMatch = mean(numWordMatch, na.rm = T)) %>%
left_join(d.raw %>% ungroup() %>%group_by(workerId) %>% slice(1)%>% ungroup() %>% select(workerId, word.bin, visual.bin, word, visual, spatial), by = "workerId")
word.edits.long.ranked = select(word.edits.full.ranked, workerId, visual,
spatial, word, mean.numWordMatch, mean.rankDiff) %>%
gather("measure", "value", 2:4)
ggplot(word.edits.long.ranked, aes(y = mean.rankDiff, x = value)) +
facet_grid(.~measure) +
geom_point() +
geom_smooth(method = "lm") +
ggtitle("Rank edit difference") +
theme_bw()
kable(word.edits.long.ranked %>%
group_by(measure) %>%
do(tidy(cor.test(.$mean.rankDiff, .$value))))
| measure | estimate | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|---|
| spatial | -0.1141937 | -0.9123523 | 0.3650615 | 63 | -0.3483895 | 0.1334215 |
| visual | -0.2232166 | -1.8175867 | 0.0738831 | 63 | -0.4429979 | 0.0218733 |
| word | -0.0722010 | -0.5745775 | 0.5676233 | 63 | -0.3106300 | 0.1747758 |
There is not a significant correlation between edit distance and any of the processing preferences.
kable(tidy(t.test(filter(word.edits.full.ranked, word.bin == 1)$mean.rankDiff,
filter(word.edits.full.ranked, visual.bin == 1)$mean.rankDiff)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| -0.0015179 | 2.493409 | 2.494927 | -0.1582802 | 0.8747659 | 60.18372 | -0.0206996 | 0.0176638 |
No difference in edit distance for high word and visual groups.
kable(tidy(t.test(filter(word.edits.full.ranked, word.bin == 0)$mean.rankDiff,
filter(word.edits.full.ranked, word.bin == 1)$mean.rankDiff)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 0.0190235 | 2.512433 | 2.493409 | 1.646336 | 0.1053344 | 55.6191 | -0.0041276 | 0.0421746 |
No difference in edit distance between high and low word groups.
kable(tidy(lm(mean.rankDiff~visual+ spatial + word, data = word.edits.full.ranked)))
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | 2.5030672 | 0.0058460 | 428.1675984 | 0.0000000 |
| visual | -0.0127867 | 0.0077558 | -1.6486556 | 0.1043597 |
| spatial | -0.0042854 | 0.0071804 | -0.5968203 | 0.5528361 |
| word | -0.0044232 | 0.0074833 | -0.5910723 | 0.5566559 |
Predicting edit distance with all the individual differences, none are significant predictors.
Using Lg10WF.
d.raw %>%
group_by(workerId, word.bin, num) %>%
summarize(log.freq = mean(Lg10WF, na.rm = T)) %>%
group_by(word.bin, num) %>%
multi_boot_standard(column = "log.freq", na.rm = T) %>%
ggplot(aes(x = num, y = mean, group = word.bin,
color = word.bin)) +
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
xlab("Associate") +
ylab("log frequency") +
geom_point() +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
FS.by.participant = d.raw %>%
group_by(workerId, word.bin) %>%
summarize(forwardStrength = mean(forwardStrength, na.rm = T))
FS.by.participant %>%
group_by(word.bin) %>%
multi_boot_standard(column = "forwardStrength", na.rm = T) %>%
ggplot(aes(x = word.bin, y = mean, group = word.bin,
fill = word.bin)) +
xlab("Associate") +
ylab("Forward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(t.test(filter(FS.by.participant, word.bin == 1)$forwardStrength,
filter(FS.by.participant, word.bin == 0)$forwardStrength)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 0.0088468 | 0.1456551 | 0.1368083 | 1.89681 | 0.0627037 | 59.57996 | -0.000484 | 0.0181776 |
summary(lmer(forwardStrength~word+num+(num|workerId)+(1|cue),data=d.raw))
## Linear mixed model fit by REML ['lmerMod']
## Formula: forwardStrength ~ word + num + (num | workerId) + (1 | cue)
## Data: d.raw
##
## REML criterion at convergence: -3984.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9723 -0.5354 -0.1128 0.3927 4.3775
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 4.706e-04 0.021693
## num 4.811e-05 0.006936 -1.00
## cue (Intercept) 8.298e-03 0.091091
## Residual 2.388e-02 0.154536
## Number of obs: 4650, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.228973 0.015692 14.592
## word 0.003569 0.003085 1.157
## num -0.037061 0.002237 -16.571
##
## Correlation of Fixed Effects:
## (Intr) word
## word 0.000
## num -0.333 0.008
This is marginal.
Quantiles
FS.by.participant = d.raw %>%
group_by(workerId, word.quant) %>%
summarize(forwardStrength = mean(forwardStrength, na.rm = T))
FS.by.participant %>%
group_by(word.quant) %>%
multi_boot_standard(column = "forwardStrength", na.rm = T) %>%
ggplot(aes(x = word.quant, y = mean, group = word.quant,
fill = word.quant)) +
xlab("Word Quantile") +
ylab("Forward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Continuous
FS.by.participant = d.raw %>%
group_by(workerId, word) %>%
summarize(forwardStrength = mean(forwardStrength, na.rm = T))
FS.by.participant %>%
ggplot(aes(x = word, y = forwardStrength)) +
xlab("Word Quantile") +
ylab("Forward strength") +
geom_point() +
geom_smooth(method = "lm") +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(cor.test(FS.by.participant$forwardStrength, FS.by.participant$word)))
| estimate | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|
| 0.2010658 | 1.629182 | 0.1082652 | 63 | -0.0450422 | 0.424164 |
FS.by.participant = d.raw %>%
group_by(workerId, visual.bin) %>%
summarize(forwardStrength = mean(forwardStrength, na.rm = T))
FS.by.participant %>%
group_by(visual.bin) %>%
multi_boot_standard(column = "forwardStrength", na.rm = T) %>%
ggplot(aes(x = visual.bin, y = mean, group = visual.bin,
fill = visual.bin)) +
xlab("Associate") +
ylab("Forward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(t.test(filter(FS.by.participant, visual.bin == 1)$forwardStrength,
filter(FS.by.participant, visual.bin == 0)$forwardStrength)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 0.0032322 | 0.1428543 | 0.1396221 | 0.6783044 | 0.5000751 | 62.68405 | -0.0062911 | 0.0127555 |
summary(lmer(forwardStrength~visual+num+(num|workerId)+(1|cue),data=d.raw))
## Linear mixed model fit by REML ['lmerMod']
## Formula: forwardStrength ~ visual + num + (num | workerId) + (1 | cue)
## Data: d.raw
##
## REML criterion at convergence: -3983.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9773 -0.5340 -0.1135 0.3926 4.3726
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 0.0005216 0.022838
## num 0.0000519 0.007204 -1.00
## cue (Intercept) 0.0082980 0.091093
## Residual 0.0238787 0.154527
## Number of obs: 4650, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.2289642 0.0157177 14.567
## visual -0.0009031 0.0032081 -0.282
## num -0.0370787 0.0022495 -16.483
##
## Correlation of Fixed Effects:
## (Intr) visual
## visual -0.006
## num -0.336 0.005
Quantiles
FS.by.participant = d.raw %>%
group_by(workerId, visual.quant) %>%
summarize(forwardStrength = mean(forwardStrength, na.rm = T))
FS.by.participant %>%
group_by(visual.quant) %>%
multi_boot_standard(column = "forwardStrength", na.rm = T) %>%
ggplot(aes(x = visual.quant, y = mean, group = visual.quant,
fill = visual.quant)) +
xlab("Associate") +
ylab("Forward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Forward strengths are skewed. Let’s try log transforming.
ggplot(d.raw, aes(x = log(forwardStrength))) +
geom_histogram() +
theme_bw()
d.raw$log.forwardStrength = log(d.raw$forwardStrength)
FS.by.participant = d.raw %>%
group_by(workerId, word.bin) %>%
summarize(log.forwardStrength = mean(log.forwardStrength, na.rm = T))
FS.by.participant %>%
group_by(word.bin) %>%
multi_boot_standard(column = "log.forwardStrength", na.rm = T) %>%
ggplot(aes(x = word.bin, y = mean, group = word.bin,
fill = word.bin)) +
xlab("Associate") +
ylab("Log Forward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(t.test(filter(FS.by.participant, word.bin == 1)$log.forwardStrength,
filter(FS.by.participant, word.bin == 0)$log.forwardStrength)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| 0.0694542 | -2.610892 | -2.680347 | 2.013825 | 0.0487148 | 57.50959 | 0.000405 | 0.1385035 | Welch Two Sample t-test | two.sided |
summary(lmer(log.forwardStrength~word+num+(num|workerId)+(1|cue),data=d.raw))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.forwardStrength ~ word + num + (num | workerId) + (1 | cue)
## Data: d.raw
##
## REML criterion at convergence: 13697.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6884 -0.7391 -0.0161 0.7117 3.3780
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 0.022936 0.15145
## num 0.001868 0.04322 -1.00
## cue (Intercept) 0.203911 0.45157
## Residual 1.077435 1.03800
## Number of obs: 4650, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -2.04561 0.08224 -24.873
## word 0.03100 0.02143 1.447
## num -0.26474 0.01486 -17.814
##
## Correlation of Fixed Effects:
## (Intr) word
## word 0.000
## num -0.428 0.008
This is marginal.
Quantiles
FS.by.participant = d.raw %>%
group_by(workerId, word.quant) %>%
summarize(log.forwardStrength = mean(log.forwardStrength, na.rm = T))
FS.by.participant %>%
group_by(word.quant) %>%
multi_boot_standard(column = "log.forwardStrength", na.rm = T) %>%
ggplot(aes(x = word.quant, y = mean, group = word.quant,
fill = word.quant)) +
xlab("Word Quantile") +
ylab("Forward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Continuous
FS.by.participant = d.raw %>%
group_by(workerId, word) %>%
summarize(log.forwardStrength = mean(log.forwardStrength, na.rm = T))
FS.by.participant %>%
ggplot(aes(x = word, y = log.forwardStrength)) +
xlab("Word Quantile") +
ylab("Forward strength") +
geom_point() +
geom_smooth(method = "lm") +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(cor.test(FS.by.participant$log.forwardStrength, FS.by.participant$word)))
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| 0.2400805 | 1.962992 | 0.0540657 | 63 | -0.0040561 | 0.4572076 | Pearson’s product-moment correlation | two.sided |
FS.by.participant = d.raw %>%
group_by(workerId, visual.bin) %>%
summarize(log.forwardStrength = mean(log.forwardStrength, na.rm = T))
FS.by.participant %>%
group_by(visual.bin) %>%
multi_boot_standard(column = "log.forwardStrength", na.rm = T) %>%
ggplot(aes(x = visual.bin, y = mean, group = visual.bin,
fill = visual.bin)) +
xlab("Associate") +
ylab("Forward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(t.test(filter(FS.by.participant, visual.bin == 1)$log.forwardStrength,
filter(FS.by.participant, visual.bin == 0)$log.forwardStrength)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| 0.0359891 | -2.627329 | -2.663318 | 1.030719 | 0.306895 | 58.78184 | -0.0338841 | 0.1058623 | Welch Two Sample t-test | two.sided |
summary(lmer(log.forwardStrength~visual+num+(num|workerId)+(1|cue),data=d.raw))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.forwardStrength ~ visual + num + (num | workerId) + (1 |
## cue)
## Data: d.raw
##
## REML criterion at convergence: 13699.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6939 -0.7352 -0.0206 0.7135 3.3691
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 0.026763 0.16359
## num 0.002104 0.04587 -1.00
## cue (Intercept) 0.204127 0.45180
## Residual 1.077159 1.03786
## Number of obs: 4650, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -2.04596 0.08264 -24.759
## visual -0.00464 0.02244 -0.207
## num -0.26485 0.01498 -17.678
##
## Correlation of Fixed Effects:
## (Intr) visual
## visual -0.007
## num -0.434 0.004
Quantiles
FS.by.participant = d.raw %>%
group_by(workerId, visual.quant) %>%
summarize(log.forwardStrength = mean(log.forwardStrength, na.rm = T))
FS.by.participant %>%
group_by(visual.quant) %>%
multi_boot_standard(column = "log.forwardStrength", na.rm = T) %>%
ggplot(aes(x = visual.quant, y = mean, group = visual.quant,
fill = visual.quant)) +
xlab("Associate") +
ylab("Forward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
swow.FS = read.csv("SWOW_CP/mean_SWOW_CP.csv")
swow.FS.weights = read.csv("SWOW_CP/SWOW_associate_weights.csv")
d.raw = d.raw %>%
mutate(bigram = paste(cue, associate)) %>%
left_join(swow.FS) %>% # merge in cps
left_join(swow.FS.weights, by=c("num" = "associate")) %>% # merge in weights
mutate(swow.weighted.FS = trans.prob * weights) # get weighted cp
ggplot(d.raw, aes(x = swow.weighted.FS)) +
geom_histogram() +
theme_bw() +
ggtitle("weighted forward strengths")
FS.by.participant = d.raw %>%
group_by(workerId, word.bin) %>%
summarize(forwardStrengths.swow = mean(swow.weighted.FS, na.rm = T))
FS.by.participant %>%
group_by(word.bin) %>%
multi_boot_standard(column = "forwardStrengths.swow", na.rm = T) %>%
ggplot(aes(x = word.bin, y = mean, group = word.bin,
fill = word.bin)) +
xlab("Associate") +
ylab("Forward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(t.test(filter(FS.by.participant, word.bin == 1)$forwardStrengths.swow,
filter(FS.by.participant, word.bin == 0)$forwardStrengths.swow)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| 0.0002752 | 0.0039151 | 0.0036399 | 2.339964 | 0.0231573 | 52.02398 | 3.92e-05 | 0.0005111 | Welch Two Sample t-test | two.sided |
summary(lmer(swow.weighted.FS~word+num+(num|workerId)+(1|cue),data=d.raw)) # log values don't converge here - but this may be what we want?s
## Linear mixed model fit by REML ['lmerMod']
## Formula: swow.weighted.FS ~ word + num + (num | workerId) + (1 | cue)
## Data: d.raw
##
## REML criterion at convergence: -62071.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7584 -0.6163 -0.0690 0.4064 4.1859
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 1.858e-06 0.0013631
## num 1.689e-07 0.0004109 -1.00
## cue (Intercept) 3.689e-06 0.0019208
## Residual 2.655e-05 0.0051526
## Number of obs: 8094, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.192e-02 3.762e-04 31.68
## word 7.449e-05 8.504e-05 0.88
## num -3.407e-03 7.232e-05 -47.11
##
## Correlation of Fixed Effects:
## (Intr) word
## word -0.001
## num -0.548 0.006
Quantiles
FS.by.participant = d.raw %>%
group_by(workerId, word.quant) %>%
summarize(forwardStrengths.swow = mean(swow.weighted.FS, na.rm = T))
FS.by.participant %>%
group_by(word.quant) %>%
multi_boot_standard(column = "forwardStrengths.swow", na.rm = T) %>%
ggplot(aes(x = word.quant, y = mean, group = word.quant,
fill = word.quant)) +
xlab("Word Quantile") +
ylab("Forward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Continuous
FS.by.participant = d.raw %>%
group_by(workerId, word) %>%
summarize(forwardStrength = mean(swow.weighted.FS, na.rm = T))
FS.by.participant %>%
ggplot(aes(x = word, y = forwardStrength)) +
xlab("Word") +
ylab("Forward strength") +
geom_point() +
geom_smooth(method = "lm") +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(cor.test(FS.by.participant$forwardStrength, FS.by.participant$word)))
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| 0.2256263 | 1.838255 | 0.0707401 | 63 | -0.0193369 | 0.4450351 | Pearson’s product-moment correlation | two.sided |
FS.by.participant = d.raw %>%
group_by(workerId, visual.bin) %>%
summarize(forwardStrengths.swow = mean(swow.weighted.FS, na.rm = T))
FS.by.participant %>%
group_by(visual.bin) %>%
multi_boot_standard(column = "forwardStrengths.swow", na.rm = T) %>%
ggplot(aes(x = visual.bin, y = mean, group = visual.bin,
fill = visual.bin)) +
xlab("Associate") +
ylab("Forward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(t.test(filter(FS.by.participant, visual.bin == 1)$forwardStrengths.swow,
filter(FS.by.participant, visual.bin == 0)$forwardStrengths.swow)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| 0.0001258 | 0.0038412 | 0.0037154 | 1.033532 | 0.3053198 | 62.78353 | -0.0001174 | 0.0003689 | Welch Two Sample t-test | two.sided |
summary(lmer(swow.weighted.FS~visual+num+(num|workerId)+(1|cue),data=d.raw))
## Linear mixed model fit by REML ['lmerMod']
## Formula: swow.weighted.FS ~ visual + num + (num | workerId) + (1 | cue)
## Data: d.raw
##
## REML criterion at convergence: -62071.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7499 -0.6138 -0.0707 0.4034 4.1868
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 1.892e-06 0.0013754
## num 1.707e-07 0.0004131 -1.00
## cue (Intercept) 3.689e-06 0.0019208
## Residual 2.655e-05 0.0051524
## Number of obs: 8094, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.192e-02 3.768e-04 31.62
## visual 5.474e-05 8.794e-05 0.62
## num -3.407e-03 7.250e-05 -46.99
##
## Correlation of Fixed Effects:
## (Intr) visual
## visual 0.000
## num -0.550 -0.005
Quantiles
FS.by.participant = d.raw %>%
group_by(workerId, visual.quant) %>%
summarize(forwardStrengths.swow = mean(swow.weighted.FS, na.rm = T))
FS.by.participant %>%
group_by(visual.quant) %>%
multi_boot_standard(column = "forwardStrengths.swow", na.rm = T) %>%
ggplot(aes(x = visual.quant, y = mean, group = visual.quant,
fill = visual.quant)) +
xlab("Visual Quantile") +
ylab("Forward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Continuous
FS.by.participant = d.raw %>%
group_by(workerId, visual) %>%
summarize(forwardStrength = mean(swow.weighted.FS, na.rm = T))
FS.by.participant %>%
ggplot(aes(x = visual, y = forwardStrength)) +
xlab("Visual") +
ylab("Forward strength") +
geom_point() +
geom_smooth(method = "lm") +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(cor.test(FS.by.participant$forwardStrength, FS.by.participant$visual)))
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| 0.0638193 | 0.5075848 | 0.613517 | 63 | -0.1829271 | 0.3030021 | Pearson’s product-moment correlation | two.sided |
Data are skewed, let’s log-transform and replot.
d.raw$log.swow.weighted.FS = log(d.raw$swow.weighted.FS)
ggplot(d.raw, aes(x = log.swow.weighted.FS)) +
geom_histogram() +
theme_bw() +
ggtitle("weighted forward strengths")
FS.by.participant = d.raw %>%
group_by(workerId, word.bin) %>%
summarize(forwardStrengths.swow = mean(log.swow.weighted.FS, na.rm = T))
FS.by.participant %>%
group_by(word.bin) %>%
multi_boot_standard(column = "forwardStrengths.swow", na.rm = T) %>%
ggplot(aes(x = word.bin, y = mean, group = word.bin,
fill = word.bin)) +
xlab("Associate") +
ylab("Forward strength") +
#geom_bar(position = "dodge", stat = "identity") +
ylim(-7,-5)+
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(t.test(filter(FS.by.participant, word.bin == 1)$forwardStrengths.swow,
filter(FS.by.participant, word.bin == 0)$forwardStrengths.swow)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| 0.0936953 | -6.395849 | -6.489545 | 2.081713 | 0.0423024 | 52.05855 | 0.0033811 | 0.1840095 | Welch Two Sample t-test | two.sided |
summary(lmer(log.swow.weighted.FS~word+num+(num|workerId)+(1|cue),data=d.raw)) # log values don't converge here - but this may be what we want?s
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.swow.weighted.FS ~ word + num + (num | workerId) + (1 | cue)
## Data: d.raw
##
## REML criterion at convergence: 20164.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.81224 -0.72345 0.09215 0.80781 2.17407
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 0.000000 0.00000
## num 0.004743 0.06887 NaN
## cue (Intercept) 0.124581 0.35296
## Residual 1.387895 1.17809
## Number of obs: 6314, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -3.82581 0.06821 -56.09
## word 0.05208 0.02685 1.94
## num -1.35626 0.02012 -67.40
##
## Correlation of Fixed Effects:
## (Intr) word
## word 0.004
## num -0.467 -0.003
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
Quantiles
FS.by.participant = d.raw %>%
group_by(workerId, word.quant) %>%
summarize(forwardStrengths.swow = mean(log.swow.weighted.FS, na.rm = T))
FS.by.participant %>%
group_by(word.quant) %>%
multi_boot_standard(column = "forwardStrengths.swow", na.rm = T) %>%
ggplot(aes(x = word.quant, y = mean, group = word.quant,
fill = word.quant)) +
xlab("Word Quantile") +
ylab("Forward strength") +
#geom_bar(position = "dodge", stat = "identity") +
ylim(-7,-5)+
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Continuous
FS.by.participant = d.raw %>%
group_by(workerId, word) %>%
summarize(forwardStrength = mean(log.swow.weighted.FS, na.rm = T))
FS.by.participant %>%
ggplot(aes(x = word, y = forwardStrength)) +
xlab("Word") +
ylab("Forward strength") +
geom_point() +
geom_smooth(method = "lm") +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(cor.test(FS.by.participant$forwardStrength, FS.by.participant$word)))
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| 0.1795606 | 1.448765 | 0.1523625 | 63 | -0.0672853 | 0.4056925 | Pearson’s product-moment correlation | two.sided |
FS.by.participant = d.raw %>%
group_by(workerId, visual.bin) %>%
summarize(forwardStrengths.swow = mean(log.swow.weighted.FS, na.rm = T))
FS.by.participant %>%
group_by(visual.bin) %>%
multi_boot_standard(column = "forwardStrengths.swow", na.rm = T) %>%
ggplot(aes(x = visual.bin, y = mean, group = visual.bin,
fill = visual.bin)) +
xlab("Associate") +
ylab("Forward strength") +
#geom_bar(position = "dodge", stat = "identity") +
ylim(-7,-5)+
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(t.test(filter(FS.by.participant, visual.bin == 1)$forwardStrengths.swow,
filter(FS.by.participant, visual.bin == 0)$forwardStrengths.swow)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| 0.0583484 | -6.412897 | -6.471246 | 1.279863 | 0.2055876 | 59.18273 | -0.0328702 | 0.1495671 | Welch Two Sample t-test | two.sided |
summary(lmer(log.swow.weighted.FS~visual+num+(num|workerId)+(1|cue),data=d.raw))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.swow.weighted.FS ~ visual + num + (num | workerId) + (1 |
## cue)
## Data: d.raw
##
## REML criterion at convergence: 20166.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7956 -0.7225 0.0915 0.8093 2.1782
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 0.000000 0.00000
## num 0.004616 0.06794 NaN
## cue (Intercept) 0.124609 0.35300
## Residual 1.388505 1.17835
## Number of obs: 6314, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -3.82652 0.06822 -56.09
## visual 0.03897 0.02734 1.43
## num -1.35608 0.02008 -67.54
##
## Correlation of Fixed Effects:
## (Intr) visual
## visual -0.002
## num -0.468 0.000
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
Quantiles
FS.by.participant = d.raw %>%
group_by(workerId, visual.quant) %>%
summarize(forwardStrengths.swow = mean(log.swow.weighted.FS, na.rm = T))
FS.by.participant %>%
group_by(visual.quant) %>%
multi_boot_standard(column = "forwardStrengths.swow", na.rm = T) %>%
ggplot(aes(x = visual.quant, y = mean, group = visual.quant,
fill = visual.quant)) +
xlab("Visual Quantile") +
ylab("Forward strength") +
#geom_bar(position = "dodge", stat = "identity") +
ylim(-7,-5)+
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Continuous
FS.by.participant = d.raw %>%
group_by(workerId, visual) %>%
summarize(forwardStrength = mean(log.swow.weighted.FS, na.rm = T))
FS.by.participant %>%
ggplot(aes(x = visual, y = forwardStrength)) +
xlab("Visual") +
ylab("Forward strength") +
geom_point() +
geom_smooth(method = "lm") +
theme_bw(base_size = 15) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(cor.test(FS.by.participant$forwardStrength, FS.by.participant$visual)))
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| 0.1198526 | 0.9582078 | 0.3416209 | 63 | -0.1277818 | 0.3534206 | Pearson’s product-moment correlation | two.sided |
BS.by.participant = d.raw %>%
group_by(workerId, word.bin) %>%
summarize(backwardStrength = mean(backwardStrength, na.rm = T))
BS.by.participant %>%
group_by(word.bin) %>%
multi_boot_standard(column = "backwardStrength", na.rm = T) %>%
ggplot(aes(x = word.bin, y = mean, group = word.bin,
fill = word.bin)) +
xlab("Associate") +
ylab("Backward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(t.test(filter(BS.by.participant, word.bin == 1)$backwardStrength,
filter(BS.by.participant, word.bin == 0)$backwardStrength)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| -0.0034444 | 0.1200617 | 0.1235062 | -0.5128392 | 0.6098637 | 62.77598 | -0.016867 | 0.0099782 | Welch Two Sample t-test | two.sided |
summary(lmer(backwardStrength~word+num+(num|workerId)+(1|cue),data=d.raw))
## Linear mixed model fit by REML ['lmerMod']
## Formula: backwardStrength ~ word + num + (num | workerId) + (1 | cue)
## Data: d.raw
##
## REML criterion at convergence: -2980.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3118 -0.4587 -0.1067 0.1687 4.9219
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 6.400e-04 0.025299
## num 8.219e-05 0.009066 -0.90
## cue (Intercept) 1.900e-02 0.137855
## Residual 2.769e-02 0.166390
## Number of obs: 4273, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.146964 0.023066 6.371
## word -0.003025 0.003767 -0.803
## num -0.007831 0.002589 -3.024
##
## Correlation of Fixed Effects:
## (Intr) word
## word 0.001
## num -0.258 0.003
BS.by.participant = d.raw %>%
group_by(workerId, visual.bin) %>%
summarize(backwardStrength = mean(backwardStrength, na.rm = T))
BS.by.participant %>%
group_by(visual.bin) %>%
multi_boot_standard(column = "backwardStrength", na.rm = T) %>%
ggplot(aes(x = visual.bin, y = mean, group = visual.bin,
fill = visual.bin)) +
xlab("Associate") +
ylab("Backward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(t.test(filter(BS.by.participant, visual.bin == 1)$backwardStrength,
filter(BS.by.participant, visual.bin == 0)$backwardStrength)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| 0.0045679 | 0.1241998 | 0.1196319 | 0.6802049 | 0.4988805 | 62.65541 | -0.0088534 | 0.0179893 | Welch Two Sample t-test | two.sided |
summary(lmer(backwardStrength~visual+num+(num|workerId)+(1|cue),data=d.raw))
## Linear mixed model fit by REML ['lmerMod']
## Formula: backwardStrength ~ visual + num + (num | workerId) + (1 | cue)
## Data: d.raw
##
## REML criterion at convergence: -2984
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2686 -0.4621 -0.1077 0.1720 4.9286
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 5.948e-04 0.024388
## num 8.398e-05 0.009164 -0.91
## cue (Intercept) 1.899e-02 0.137820
## Residual 2.768e-02 0.166376
## Number of obs: 4273, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.146792 0.023046 6.370
## visual 0.007619 0.003773 2.019
## num -0.007785 0.002595 -3.001
##
## Correlation of Fixed Effects:
## (Intr) visual
## visual -0.004
## num -0.257 0.007
d.raw$log.backwardStrength = log(d.raw$backwardStrength)
BS.by.participant = d.raw %>%
filter(is.finite(log.backwardStrength )) %>%
group_by(workerId, word.bin) %>%
summarize(log.backwardStrength = mean(log.backwardStrength, na.rm = T))
BS.by.participant %>%
group_by(word.bin) %>%
multi_boot_standard(column = "log.backwardStrength", na.rm = T) %>%
ggplot(aes(x = word.bin, y = mean, group = word.bin,
fill = word.bin)) +
xlab("Associate") +
ylab("Backward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(t.test(filter(BS.by.participant, word.bin == 1)$log.backwardStrength,
filter(BS.by.participant, word.bin == 0)$log.backwardStrength)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| -0.0356786 | -2.141023 | -2.105344 | -0.5409371 | 0.5904694 | 62.64453 | -0.167498 | 0.0961407 | Welch Two Sample t-test | two.sided |
BS.by.participant = d.raw %>%
filter(is.finite(log.backwardStrength )) %>%
group_by(workerId, visual.bin) %>%
summarize(log.backwardStrength = mean(log.backwardStrength, na.rm = T))
BS.by.participant %>%
group_by(visual.bin) %>%
multi_boot_standard(column = "log.backwardStrength", na.rm = T) %>%
ggplot(aes(x = visual.bin, y = mean, group = visual.bin,
fill = visual.bin)) +
xlab("Associate") +
ylab("Backward strength") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
kable(tidy(t.test(filter(BS.by.participant, visual.bin == 1)$log.backwardStrength,
filter(BS.by.participant, visual.bin == 0)$log.backwardStrength)))
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| 0.0294428 | -2.107508 | -2.136951 | 0.4484982 | 0.6553421 | 62.60762 | -0.1017594 | 0.160645 | Welch Two Sample t-test | two.sided |
summary(lmer(backwardStrength~visual+num+(num|workerId)+(1|cue),data=d.raw))
## Linear mixed model fit by REML ['lmerMod']
## Formula: backwardStrength ~ visual + num + (num | workerId) + (1 | cue)
## Data: d.raw
##
## REML criterion at convergence: -2984
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2686 -0.4621 -0.1077 0.1720 4.9286
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 5.948e-04 0.024388
## num 8.398e-05 0.009164 -0.91
## cue (Intercept) 1.899e-02 0.137820
## Residual 2.768e-02 0.166376
## Number of obs: 4273, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.146792 0.023046 6.370
## visual 0.007619 0.003773 2.019
## num -0.007785 0.002595 -3.001
##
## Correlation of Fixed Effects:
## (Intr) visual
## visual -0.004
## num -0.257 0.007
i.e. How similar is a participant to other high/low word participants?
#write.csv(select(d.raw.wide, workerId, cue, a1, a2, a3, a4, word.bin, visual.bin), "all_associate_strings_bins.csv", row.names=FALSE) # for semantic similarity.py
#freqs = select(d.raw, associate, Lg10WF) %>%
# mutate(Lg10WF = ifelse(is.na(Lg10WF), 0, Lg10WF)) %>%
# unique()write.csv(freqs, "freqs.csv", row.names=FALSE)
py.sim.measures = read.csv("semantic_similarity/all_associate_strings_bins_subtlexus_P_word.csv")
sim.long.word = py.sim.measures %>%
gather("measure", "word_name", 3:6) %>%
left_join(d.raw %>% group_by(workerId) %>% slice(1) %>% ungroup () %>% select(visual, word, workerId)) %>%
left_join(d.raw %>% select(associate, cue, workerId, num))
sim.by.participant.word = sim.long.word %>%
group_by(workerId, word.bin, word) %>%
summarize(sim.word.bin = mean(sim.word.bin, na.rm = T)) %>%
ungroup() %>%
mutate(word.bin = as.factor(word.bin))
sim.by.participant.word %>%
group_by(word.bin) %>%
multi_boot_standard(column = "sim.word.bin", na.rm = T) %>%
ggplot(aes(x = word.bin, y = mean, group = word.bin,
fill = word.bin)) +
xlab("Word bin") +
ylab("Overall similarity") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position ="none")
t.test:
word.t.test = tidy(t.test(filter(sim.by.participant.word, word.bin == 1)$sim.word.bin,
filter(sim.by.participant.word, word.bin == 0)$sim.word.bin))
kable(word.t.test)
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| 0.0211509 | 0.4586593 | 0.4375084 | 3.12636 | 0.0027021 | 61.49797 | 0.007625 | 0.0346768 | Welch Two Sample t-test | two.sided |
Effect size:
es_all = tes(word.t.test$statistic, nrow(filter(sim.by.participant.word, word.bin == 1)), nrow(filter(sim.by.participant.word, word.bin == 0)), verbose = F)
es_all$d
## [1] 0.78
summary(lmer(sim.word.bin~word + visual + num + (num|workerId) + (1|cue), data=sim.long.word))
## Linear mixed model fit by REML ['lmerMod']
## Formula: sim.word.bin ~ word + visual + num + (num | workerId) + (1 |
## cue)
## Data: sim.long.word
##
## REML criterion at convergence: -92907.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.4994 -0.6164 0.1016 0.7244 2.7114
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 8.172e-04 2.859e-02
## num 6.361e-18 2.522e-09 1.00
## cue (Intercept) 3.642e-03 6.035e-02
## Residual 5.842e-03 7.643e-02
## Number of obs: 40560, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.479e-01 1.034e-02 43.34
## word 5.478e-03 4.564e-03 1.20
## visual 7.117e-03 4.642e-03 1.53
## num 1.902e-15 3.395e-04 0.00
##
## Correlation of Fixed Effects:
## (Intr) word visual
## word 0.000
## visual 0.000 -0.002
## num -0.082 0.000 0.000
summary(glmer(sim.word.bin~word.bin + visual.bin + num + (num|workerId) + (1|cue), data=sim.long.word))
## Linear mixed model fit by REML ['lmerMod']
## Formula: sim.word.bin ~ word.bin + visual.bin + num + (num | workerId) +
## (1 | cue)
## Data: sim.long.word
##
## REML criterion at convergence: -92915.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.5002 -0.6160 0.1014 0.7249 2.7095
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 7.401e-04 2.720e-02
## num 3.027e-17 5.501e-09 -1.00
## cue (Intercept) 3.642e-03 6.035e-02
## Residual 5.842e-03 7.643e-02
## Number of obs: 40560, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.344e-01 1.130e-02 38.44
## word.bin 2.125e-02 6.793e-03 3.13
## visual.bin 6.362e-03 6.799e-03 0.94
## num -3.140e-16 3.395e-04 0.00
##
## Correlation of Fixed Effects:
## (Intr) wrd.bn vsl.bn
## word.bin -0.301
## visual.bin -0.292 0.016
## num -0.075 0.000 0.000
py.sim.measures.visual = read.csv("semantic_similarity/all_associate_strings_bins_subtlexus_P_visual.csv")
sim.long.vis = py.sim.measures.visual %>%
gather("measure", "associate", 3:6) %>%
left_join(d.raw %>% group_by(workerId) %>% slice(1) %>% ungroup () %>% select(visual, workerId, word)) %>%
left_join(d.raw %>% select(associate, cue, workerId, num))
sim.by.participant.vis = sim.long.vis %>%
group_by(workerId, visual.bin) %>%
summarize(sim.visual.bin = mean(sim.visual.bin, na.rm = T)) %>%
ungroup() %>%
mutate(visual.bin = as.factor(visual.bin))
sim.by.participant.vis %>%
group_by(visual.bin) %>%
multi_boot_standard(column = "sim.visual.bin", na.rm = T) %>%
ggplot(aes(x = visual.bin, y = mean, group = visual.bin,
fill = visual.bin)) +
xlab("Visual bin") +
ylab("Overall similarity") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position ="none")
t.test:
visual.t.test = tidy(t.test(filter(sim.by.participant.vis, visual.bin == 1)$sim.visual.bin,
filter(sim.by.participant.vis, visual.bin == 0)$sim.visual.bin))
kable(visual.t.test)
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| 0.0158635 | 0.4557792 | 0.4399157 | 2.301396 | 0.0247314 | 62.21466 | 0.0020856 | 0.0296414 | Welch Two Sample t-test | two.sided |
effect size:
es_all = tes(visual.t.test$statistic, nrow(filter(sim.by.participant.vis, visual.bin == 1)), nrow(filter(sim.by.participant.vis, visual.bin == 0)), verbose = F)
es_all$d
## [1] 0.57
summary(lmer(sim.visual.bin ~ visual + word + num + (num|workerId) + (1|cue), data = sim.long.vis))
## Linear mixed model fit by REML ['lmerMod']
## Formula: sim.visual.bin ~ visual + word + num + (num | workerId) + (1 |
## cue)
## Data: sim.long.vis
##
## REML criterion at convergence: -22937.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.2922 -0.6091 0.1018 0.7087 2.4636
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 7.489e-04 2.737e-02
## num 1.422e-17 3.771e-09 -0.90
## cue (Intercept) 3.614e-03 6.012e-02
## Residual 5.846e-03 7.646e-02
## Number of obs: 10140, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.475e-01 1.038e-02 43.13
## visual 1.114e-02 4.528e-03 2.46
## word -1.038e-03 4.451e-03 -0.23
## num -2.230e-15 6.791e-04 0.00
##
## Correlation of Fixed Effects:
## (Intr) visual word
## visual 0.000
## word 0.000 -0.002
## num -0.164 0.000 0.000
summary(glmer(sim.visual.bin ~ visual.bin + word.bin + num + (num|workerId) + (1|cue), data=sim.long.vis))
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## sim.visual.bin ~ visual.bin + word.bin + num + (num | workerId) +
## (1 | cue)
## Data: sim.long.vis
##
## REML criterion at convergence: -22940
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.2947 -0.6071 0.1017 0.7087 2.4582
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 7.361e-04 2.713e-02
## num 3.465e-16 1.861e-08 -1.00
## cue (Intercept) 3.614e-03 6.012e-02
## Residual 5.846e-03 7.646e-02
## Number of obs: 10140, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.351e-01 1.141e-02 38.13
## visual.bin 1.602e-02 6.908e-03 2.32
## word.bin 9.665e-03 6.901e-03 1.40
## num -3.450e-15 6.791e-04 0.00
##
## Correlation of Fixed Effects:
## (Intr) vsl.bn wrd.bn
## visual.bin -0.294
## word.bin -0.302 0.016
## num -0.149 0.000 0.000
t-tests are reliable for both word and visual, but effect size is bigger for word.
Predicting word.binned similaritiy with both bins, word but not visual bin is significant. Continuous analysis not reliable.
Predicting visual.binned similarity with visual is a siginficant predictor in both binned and continuous analysis.
py.sim.measures.byword = read.csv("semantic_similarity/all_associate_strings_bins_subtlexus_P_word_associate_wise.csv")
sim.long.word = py.sim.measures.byword %>%
gather("measure", "word_name", 3:6) %>%
left_join(d.raw %>% group_by(workerId) %>% slice(1) %>% ungroup () %>% select(visual, word, workerId)) %>%
left_join(d.raw %>% select(associate, cue, workerId, num))
sim.by.participant.word = sim.long.word %>%
group_by(workerId, word.bin) %>%
summarize(sim.word.bin = mean(sim.word.bin, na.rm = T)) %>%
ungroup() %>%
mutate(word.bin = as.factor(word.bin))
sim.by.participant.word %>%
group_by(word.bin) %>%
multi_boot_standard(column = "sim.word.bin", na.rm = T) %>%
ggplot(aes(x = word.bin, y = mean, group = word.bin,
fill = word.bin)) +
xlab("Word bin") +
ylab("Overall similarity") +
geom_bar(position = "dodge", stat = "identity") +
theme_bw(base_size = 15) +
geom_linerange(aes(ymax = ci_upper, ymin = ci_lower)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position ="none")
t.test:
word.t.test = tidy(t.test(filter(sim.by.participant.word, word.bin == 1)$sim.word.bin,
filter(sim.by.participant.word, word.bin == 0)$sim.word.bin))
kable(word.t.test)
| estimate | estimate1 | estimate2 | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|---|
| 0.0022976 | 0.4378299 | 0.4355323 | 0.3072799 | 0.75965 | 62.61635 | -0.0126462 | 0.0172413 | Welch Two Sample t-test | two.sided |
Effect size:
es_all = tes(word.t.test$statistic, nrow(filter(sim.by.participant.word, word.bin == 1)), nrow(filter(sim.by.participant.word, word.bin == 0)), verbose = F)
es_all$d
## [1] 0.08
summary(lmer(sim.word.bin~word + visual + num + (num|workerId) + (1|cue), data=sim.long.word))
## Linear mixed model fit by REML ['lmerMod']
## Formula: sim.word.bin ~ word + visual + num + (num | workerId) + (1 |
## cue)
## Data: sim.long.word
##
## REML criterion at convergence: -81439.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9972 -0.6403 -0.0072 0.6381 5.4906
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 8.367e-04 2.893e-02
## num 1.656e-18 1.287e-09 -1.00
## cue (Intercept) 4.779e-03 6.913e-02
## Residual 7.748e-03 8.802e-02
## Number of obs: 40544, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.367e-01 1.169e-02 37.37
## word -7.719e-04 4.626e-03 -0.17
## visual 1.127e-02 4.705e-03 2.40
## num -7.859e-13 3.910e-04 0.00
##
## Correlation of Fixed Effects:
## (Intr) word visual
## word 0.000
## visual 0.000 -0.002
## num -0.084 0.000 0.000
summary(glmer(sim.word.bin~word.bin + visual.bin + num + (num|workerId) + (1|cue), data=sim.long.word))
## Linear mixed model fit by REML ['lmerMod']
## Formula: sim.word.bin ~ word.bin + visual.bin + num + (num | workerId) +
## (1 | cue)
## Data: sim.long.word
##
## REML criterion at convergence: -81439
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9995 -0.6396 -0.0065 0.6377 5.4909
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 8.688e-04 2.948e-02
## num 2.061e-17 4.540e-09 -1.00
## cue (Intercept) 4.780e-03 6.913e-02
## Residual 7.748e-03 8.802e-02
## Number of obs: 40544, groups: workerId, 65; cue, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.291e-01 1.277e-02 33.61
## word.bin 2.601e-03 7.366e-03 0.35
## visual.bin 1.317e-02 7.373e-03 1.79
## num -2.829e-12 3.910e-04 0.00
##
## Correlation of Fixed Effects:
## (Intr) wrd.bn vsl.bn
## word.bin -0.288
## visual.bin -0.280 0.016
## num -0.077 0.000 0.000