Wk 3 Exercise 7:
Here, I will recount my journey plotting the gender_bck
dataset.
Load packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Import data
swow <- "swow.csv" %>%
read_tsv() %>% # read the data from file
mutate(id = 1:n()) %>% # add the "id" column
rename(
n_response = R1, # nicer name for the response count
n_total = N, # nicer name for the total cue presentations
strength = R1.Strength # nicer name for the estimated response strength
)
## Rows: 483636 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): cue, response
## dbl (3): R1, N, R1.Strength
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Words associated with “man” and “woman”
(copy and pasted from Exercise 7)
woman_fwd <- swow %>%
filter(cue == "woman", n_response > 1) %>%
select(cue, response, strength, id) %>%
mutate(
rank = rank(-strength), # rank the data by strength
type = "forward", # this is forward associate (i.e., it's woman_FWD)
word = "woman", # the word is "woman" (i.e., it's WOMAN_fwd)
associate = response # for forward associations, the RESPONSE is the asociate
)
woman_bck <- swow %>%
filter(response == "woman", n_response > 1) %>%
arrange(desc(strength)) %>%
select(cue, response, strength, id) %>%
mutate(
rank = rank(-strength), # rank the data by strength
type = "backward", # this is backward associate (i.e., it's woman_BCK)
word = "woman", # the word is "woman" (i.e., it's WOMAN_bck)
associate = cue # for backward associations, the CUE is the associate
)
man_fwd <- swow %>%
filter(cue == "man", n_response > 1) %>%
select(-n_response, -n_total) %>%
mutate(
rank = rank(-strength), # rank of the association
type = "forward", # direction of the association
word = "man", # word being "associated to"
associate = response # word that is the "associate of"
)
man_bck <- swow %>%
filter(response == "man", n_response > 1) %>%
arrange(desc(strength)) %>%
select(-starts_with("n_")) %>% # ... remove variables starting with "n_"
mutate(
rank = rank(-strength), # rank of the association
type = "backward", # direction of the association
word = "man", # word being "associated to"
associate = cue # word that is the "associate of"
)
Combine the data sets
gender <- bind_rows(woman_fwd, woman_bck,
man_fwd, man_bck) %>%
select(id:associate) %>%
filter(associate != "man", associate != "woman")
Create and plot gender_bck
gender_bck <- gender %>%
filter(
type == "backward"
) %>%
pivot_wider(
id_cols = associate,
names_from = word,
values_from = rank
) %>%
mutate(
woman = replace_na(1/woman, 0),
man = replace_na(1/man, 0),
diff = woman - man) %>%
arrange(diff)
Here is how my initial graph looked (a total mess!)
picture_bck <- ggplot(
data = gender_bck,
mapping = aes(
x = reorder(associate, diff),
y = diff
)
) +
geom_col() +
coord_flip()
plot (picture_bck)

Here is how I fixed this:
Based off Danielle’s image of the graph we were supposed to achieve,
I plotted all the backward associations greater than the diff value of
“girl” and less than the diff value of “mister”
gender_bck_less <- gender_bck %>%
filter(diff > 0.051282051 | diff < -0.05128205)
This is the plot I achieved:
picture_bck_less <- ggplot(
data = gender_bck_less,
mapping = aes(
x = reorder(associate, diff),
y = diff
)
) +
geom_col() +
coord_flip()
plot (picture_bck_less)

This looks correct! But I knew there had to be a more
straightforward way of getting here…
To not plot all 477 variables in the backwards association
condition, I should have filtered the gender data set to display the top
20 words associated with “man” and “woman”.
gender_filtered <- gender %>% filter(rank< 20)
Let’s try the original code again, this time with
gender_filtered
gender_bck_filtered <- gender_filtered %>%
filter(
type == "backward"
) %>%
pivot_wider(
id_cols = associate,
names_from = word,
values_from = rank
) %>%
mutate(
woman = replace_na(1/woman, 0),
man = replace_na(1/man, 0),
diff = woman - man) %>%
arrange(diff)
Final plot
picture_bck_filtered <- ggplot(
data = gender_bck_filtered,
mapping = aes(
x = reorder(associate, diff),
y = diff
)
) +
geom_col() +
coord_flip()
plot (picture_bck_filtered)

Hooray!