Wk 3 Exercise 7:

Here, I will recount my journey plotting the gender_bck dataset.

Load packages

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Import data

swow <- "swow.csv" %>%
  read_tsv() %>%           # read the data from file
  mutate(id = 1:n()) %>%   # add the "id" column
  rename(
    n_response = R1,       # nicer name for the response count
    n_total = N,           # nicer name for the total cue presentations
    strength = R1.Strength # nicer name for the estimated response strength  
  )
## Rows: 483636 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): cue, response
## dbl (3): R1, N, R1.Strength
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Words associated with “man” and “woman”

(copy and pasted from Exercise 7)

woman_fwd <- swow %>%
  filter(cue == "woman", n_response > 1) %>%
  select(cue, response, strength, id) %>%
  mutate(
    rank = rank(-strength),  # rank the data by strength
    type = "forward",        # this is forward associate (i.e., it's woman_FWD)
    word = "woman",          # the word is "woman" (i.e., it's WOMAN_fwd)
    associate = response     # for forward associations, the RESPONSE is the asociate
  )

woman_bck <- swow %>%
  filter(response == "woman", n_response > 1)  %>%
  arrange(desc(strength)) %>%
  select(cue, response, strength, id) %>%
  mutate(
    rank = rank(-strength),  # rank the data by strength
    type = "backward",       # this is backward associate (i.e., it's woman_BCK)
    word = "woman",          # the word is "woman" (i.e., it's WOMAN_bck)
    associate = cue          # for backward associations, the CUE is the associate
  )

man_fwd <- swow %>%
  filter(cue == "man", n_response > 1)  %>%
  select(-n_response, -n_total)   %>%
  mutate(
    rank = rank(-strength),  # rank of the association
    type = "forward",        # direction of the association
    word = "man",            # word being "associated to"
    associate = response     # word that is the "associate of"
  )

man_bck <- swow %>%
  filter(response == "man", n_response > 1) %>%
  arrange(desc(strength)) %>%
  select(-starts_with("n_")) %>% # ... remove variables starting with "n_"
  mutate(
    rank = rank(-strength),  # rank of the association
    type = "backward",       # direction of the association
    word = "man",            # word being "associated to"
    associate = cue          # word that is the "associate of"
  )

Combine the data sets

gender <- bind_rows(woman_fwd, woman_bck, 
                    man_fwd, man_bck) %>%
  select(id:associate) %>%
  filter(associate != "man", associate != "woman")

Create and plot gender_bck

gender_bck <- gender %>% 
  filter(
    type == "backward"
  ) %>% 
  pivot_wider(
    id_cols = associate, 
    names_from = word, 
    values_from = rank
  ) %>%
  mutate(
    woman = replace_na(1/woman, 0),
    man = replace_na(1/man, 0),
 diff = woman - man) %>% 
  arrange(diff)

Here is how my initial graph looked (a total mess!)

picture_bck <- ggplot(
  data = gender_bck,
  mapping = aes(
    x = reorder(associate, diff),
    y = diff
  )
) + 
  geom_col() +  
  coord_flip()
 
plot (picture_bck)

Here is how I fixed this:

Based off Danielle’s image of the graph we were supposed to achieve, I plotted all the backward associations greater than the diff value of “girl” and less than the diff value of “mister”

gender_bck_less <- gender_bck %>%
  filter(diff > 0.051282051 | diff < -0.05128205)

This is the plot I achieved:

picture_bck_less <- ggplot(
  data = gender_bck_less,
  mapping = aes(
    x = reorder(associate, diff),
    y = diff
  )
) + 
  geom_col() +  
  coord_flip()
 
plot (picture_bck_less)

This looks correct! But I knew there had to be a more straightforward way of getting here…

To not plot all 477 variables in the backwards association condition, I should have filtered the gender data set to display the top 20 words associated with “man” and “woman”.

gender_filtered <- gender %>% filter(rank< 20) 

Let’s try the original code again, this time with gender_filtered

gender_bck_filtered <- gender_filtered %>% 
  filter(
    type == "backward"
  ) %>% 
  pivot_wider(
    id_cols = associate, 
    names_from = word, 
    values_from = rank
  ) %>%
  mutate(
    woman = replace_na(1/woman, 0),
    man = replace_na(1/man, 0),
 diff = woman - man) %>% 
  arrange(diff)

Final plot

picture_bck_filtered <- ggplot(
  data = gender_bck_filtered,
  mapping = aes(
    x = reorder(associate, diff),
    y = diff
  )
) + 
  geom_col() +  
  coord_flip()
 
plot (picture_bck_filtered)

Hooray!