d = read.csv("/Documents/GRADUATE_SCHOOL/Projects/SEMSOC/data/associations_ppdetails_en_05_01_2015.csv")
d.clean = d %>%
gather("association", "word", 7:9) %>%
mutate(word = gsub("\\bx\\b", "NA", word)) %>% # remove missing words
spread("association", "word") %>%
rename(a1 = asso1Clean,
a2 = asso2Clean,
a3 = asso3Clean)
These data are from Small World of Words. In the task, participants are given a cue, and are asked to generate 3 associates. Each participante completes 15-19 trials. We have 73256 participants. There are 10050 distinct cues.
Number of participants per cue.
word.summary = d.clean %>%
group_by(cue) %>%
summarise(n = n())
ggplot(word.summary, aes(x = reorder(cue,-n), y = n)) +
geom_bar(stat = "identity") +
ylab("n trials")+
xlab("distinct cue words") +
geom_hline(yintercept = mean(word.summary$n), color = "red",
linetype = "longdash") +
theme_bw(base_size = 12) +
theme(legend.position="none",
axis.ticks = element_blank(), axis.text.x = element_blank())
ggplot(word.summary, aes(x = n)) +
geom_histogram() +
theme_bw()
Number of unique associates per cue.
associate.counts.per.cue = d.clean %>%
gather("associate_type", "associate", 7:9) %>%
select(cue, associate) %>%
distinct(cue, associate) %>%
group_by(cue) %>%
summarise(n = n())
ggplot(associate.counts.per.cue, aes(x = n)) +
geom_histogram() +
theme_bw()
So, for each cue, we have a mean of 106.8882587 responses from participants and 124.259801 unique words.
Sample distributions of associations, for particular cues.
associate.counts = d.clean %>%
gather("associate_type", "associate", 7:9) %>%
select(cue, associate) %>%
group_by(cue, associate) %>%
summarise(n = n())
ggplot(filter(associate.counts, cue == "abacus"), aes(x = n)) +
geom_histogram() +
ggtitle("Cue: Abacus") +
theme_bw()
ggplot(filter(associate.counts, cue == "zucchini"), aes(x = n)) +
geom_histogram() +
ggtitle("Cue: zucchini") +
theme_bw()
ggplot(filter(associate.counts, cue == "abandon"), aes(x = n)) +
geom_histogram() +
ggtitle("Cue: abandon") +
theme_bw()
ggplot(filter(associate.counts, cue == "abbreviation"), aes(x = n)) +
geom_histogram() +
ggtitle("Cue: abbreviation") +
theme_bw()