Data description

CLEANED_RESPONSES_DF <- here("data/processed/character_norming/exp1/exp1_response_data.csv")
cleaned_responses_with_norms <- read_csv(CLEANED_RESPONSES_DF) %>%
    mutate(gender_group = fct_relevel(gender_group, "male-biased", "neutral")) 

Average N judgments per book:

n_judgments <- cleaned_responses_with_norms %>%
  distinct(book_id, participant_id) %>%
  group_by(book_id) %>%
  count() %>%
  arrange(n) 

#kable(n_judgments)
mean(n_judgments$n)
## [1] 10.13333

There were about 4 words per question on average.

cleaned_responses_with_norms %>%
  group_by(participant_id, book_id, character_name,  question_type) %>%
  count() %>%
  group_by(question_type) %>%
  multi_boot_standard(col = "n") %>%
  kable()
question_type ci_lower ci_upper mean
activity 3.903866 4.076577 3.990991
description 3.569069 3.746321 3.659159

Remove responses with wrong POS or greater than 35 characters.

cleaned_responses_with_norms_filtered <- cleaned_responses_with_norms %>%
  filter(correct_pos %in% c("action", "description"))  %>%
  mutate(nchar = nchar(raw_response)) %>%
  filter(nchar < 35) # remove responses 35 chars or more (tend to be full sentences)

n_wrong_type <- nrow(cleaned_responses_with_norms) - nrow(cleaned_responses_with_norms_filtered)

206 words were removed for being of the wrong part of speech/too long.

After lemmatizing, here are how many words (tokens) we have human judgments for:

cleaned_responses_with_norms_filtered %>%
  mutate(missing_human = is.na(human_gender_estimate_us)) %>%
  count(question_type, missing_human) %>%
  kable()
question_type missing_human n
activity FALSE 2428
activity TRUE 76
description FALSE 2139
description TRUE 246

After lemmatizing, here are how many types we have human judgments for:

cleaned_responses_with_norms_filtered %>%
  distinct(word_tidy_lemma, human_gender_estimate_us, question_type) %>%
  mutate(missing_human = is.na(human_gender_estimate_us)) %>%
  count(question_type, missing_human) %>%
  kable()
question_type missing_human n
activity FALSE 312
activity TRUE 76
description FALSE 376
description TRUE 246

Note that lemmatizing helps, and our norms have better coverage than glasgow.

Group means from human judgments

by_group_means <-  cleaned_responses_with_norms_filtered %>%
  mutate(gender_group = fct_relevel(gender_group, "male-biased", "neutral")) %>%
  filter(!is.na(human_gender_estimate_us)) %>%
  group_by(book_id, gender_group, question_type, participant_id) %>%
  summarize(mean_gender = mean(human_gender_estimate_us)) %>%
  group_by(book_id, gender_group, question_type) %>%
  summarize(mean_gender = mean(mean_gender)) %>%
  group_by(gender_group, question_type) %>%
  langcog::multi_boot_standard(col = "mean_gender")

ggplot(by_group_means, aes(x = gender_group, y = mean)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  #geom_bar(stat = "identity") +
  ylab("Human judgment of word female bias") +
  facet_wrap(~question_type) +
  theme_classic(base_size = 14)

Mixed effect models:

lmer(human_gender_estimate_us ~  gender_group+ (1|book_id) + (1|participant_id),
     data = cleaned_responses_with_norms_filtered %>% filter(question_type == "activity")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: human_gender_estimate_us ~ gender_group + (1 | book_id) + (1 |  
##     participant_id)
##    Data: cleaned_responses_with_norms_filtered %>% filter(question_type ==  
##     "activity")
## 
## REML criterion at convergence: 3455.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6672 -0.6274 -0.0273  0.5448  4.0229 
## 
## Random effects:
##  Groups         Name        Variance  Std.Dev.
##  participant_id (Intercept) 0.0004283 0.0207  
##  book_id        (Intercept) 0.0197927 0.1407  
##  Residual                   0.2342666 0.4840  
## Number of obs: 2428, groups:  participant_id, 150; book_id, 45
## 
## Fixed effects:
##                           Estimate Std. Error t value
## (Intercept)                2.99132    0.04044  73.978
## gender_groupneutral        0.02340    0.05763   0.406
## gender_groupfemale-biased  0.17627    0.05722   3.081
## 
## Correlation of Fixed Effects:
##             (Intr) gndr_g
## gndr_grpntr -0.701       
## gndr_grpfm- -0.706  0.495
lmer(human_gender_estimate_us ~  gender_group+ (1|book_id) + (1|participant_id),
     data = cleaned_responses_with_norms_filtered %>% filter(question_type == "description")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: human_gender_estimate_us ~ gender_group + (1 | book_id) + (1 |  
##     participant_id)
##    Data: cleaned_responses_with_norms_filtered %>% filter(question_type ==  
##     "description")
## 
## REML criterion at convergence: 4604.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3162 -0.7368  0.0277  0.6952  2.8929 
## 
## Random effects:
##  Groups         Name        Variance Std.Dev.
##  participant_id (Intercept) 0.009943 0.09971 
##  book_id        (Intercept) 0.044869 0.21182 
##  Residual                   0.477149 0.69076 
## Number of obs: 2139, groups:  participant_id, 151; book_id, 45
## 
## Fixed effects:
##                           Estimate Std. Error t value
## (Intercept)                2.92654    0.06213  47.103
## gender_groupneutral        0.21220    0.08805   2.410
## gender_groupfemale-biased  0.42373    0.08700   4.871
## 
## Correlation of Fixed Effects:
##             (Intr) gndr_g
## gndr_grpntr -0.697       
## gndr_grpfm- -0.704  0.497

Estimates by participant gender

There is no interaction with participant gender.

META_DF <- here("data/processed/character_norming/exp1/exp1_meta_data.csv")
meta_df <- read_csv(META_DF) %>%
  rename(participant_gender = gender)

Particpants by gender:

meta_df %>%
  count(participant_gender)
## # A tibble: 3 x 2
##   participant_gender     n
##   <chr>              <int>
## 1 female                81
## 2 male                  65
## 3 <NA>                   6
cleaned_responses_with_norms_filtered_with_gender <- cleaned_responses_with_norms_filtered %>%
  left_join(meta_df) 

by_group_means_p_gender <-  cleaned_responses_with_norms_filtered_with_gender %>%
  filter(!is.na(participant_gender)) %>%
  mutate(gender_group = fct_relevel(gender_group, "male-biased", "neutral")) %>%
  filter(!is.na(human_gender_estimate_us)) %>%
  group_by(book_id, gender_group, question_type, participant_id, participant_gender) %>%
  summarize(mean_gender = mean(human_gender_estimate_us)) %>%
  group_by(book_id, gender_group, question_type, participant_gender) %>%
  summarize(mean_gender = mean(mean_gender)) %>%
  group_by(gender_group, question_type, participant_gender) %>%
  langcog::multi_boot_standard(col = "mean_gender")

ggplot(by_group_means_p_gender, aes(x = gender_group, y = mean, color = participant_gender, group = participant_gender)) +
  geom_line() +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  #geom_bar(stat = "identity") +
  ylab("Human judgment of word female bias") +
  facet_wrap(~question_type) +
  theme_classic(base_size = 14)

Mixed effect models:

lmer(human_gender_estimate_us ~  gender_group*participant_gender+ (1|book_id) + (1|participant_id),
     data = cleaned_responses_with_norms_filtered_with_gender %>% filter(question_type == "activity")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: human_gender_estimate_us ~ gender_group * participant_gender +  
##     (1 | book_id) + (1 | participant_id)
##    Data: 
## cleaned_responses_with_norms_filtered_with_gender %>% filter(question_type ==  
##     "activity")
## 
## REML criterion at convergence: 3365.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6963 -0.6105 -0.0184  0.5627  3.9562 
## 
## Random effects:
##  Groups         Name        Variance  Std.Dev.
##  participant_id (Intercept) 0.0001815 0.01347 
##  book_id        (Intercept) 0.0189265 0.13757 
##  Residual                   0.2344407 0.48419 
## Number of obs: 2357, groups:  participant_id, 144; book_id, 45
## 
## Fixed effects:
##                                                  Estimate Std. Error t value
## (Intercept)                                       3.00036    0.04257  70.476
## gender_groupneutral                               0.04227    0.06092   0.694
## gender_groupfemale-biased                         0.15449    0.06011   2.570
## participant_gendermale                           -0.01926    0.03494  -0.551
## gender_groupneutral:participant_gendermale       -0.03632    0.05316  -0.683
## gender_groupfemale-biased:participant_gendermale  0.04643    0.05046   0.920
## 
## Correlation of Fixed Effects:
##             (Intr) gndr_g gndr_- prtcp_ gnd_:_
## gndr_grpntr -0.698                            
## gndr_grpfm- -0.708  0.495                     
## prtcpnt_gnd -0.358  0.250  0.253              
## gndr_grpn:_  0.235 -0.359 -0.166 -0.656       
## gndr_grp-:_  0.247 -0.173 -0.348 -0.690  0.454
lmer(human_gender_estimate_us ~  gender_group*participant_gender+ (1|book_id) + (1|participant_id),
     data = cleaned_responses_with_norms_filtered_with_gender %>% filter(question_type == "description")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: human_gender_estimate_us ~ gender_group * participant_gender +  
##     (1 | book_id) + (1 | participant_id)
##    Data: 
## cleaned_responses_with_norms_filtered_with_gender %>% filter(question_type ==  
##     "description")
## 
## REML criterion at convergence: 4439.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3489 -0.7194  0.0395  0.6925  2.9032 
## 
## Random effects:
##  Groups         Name        Variance Std.Dev.
##  participant_id (Intercept) 0.009407 0.09699 
##  book_id        (Intercept) 0.042615 0.20643 
##  Residual                   0.476883 0.69057 
## Number of obs: 2060, groups:  participant_id, 145; book_id, 45
## 
## Fixed effects:
##                                                   Estimate Std. Error t value
## (Intercept)                                       2.914174   0.066406  43.884
## gender_groupneutral                               0.208142   0.093794   2.219
## gender_groupfemale-biased                         0.477448   0.092352   5.170
## participant_gendermale                            0.009057   0.058924   0.154
## gender_groupneutral:participant_gendermale        0.027778   0.084558   0.329
## gender_groupfemale-biased:participant_gendermale -0.122331   0.080393  -1.522
## 
## Correlation of Fixed Effects:
##             (Intr) gndr_g gndr_- prtcp_ gnd_:_
## gndr_grpntr -0.695                            
## gndr_grpfm- -0.704  0.500                     
## prtcpnt_gnd -0.394  0.266  0.266              
## gndr_grpn:_  0.260 -0.381 -0.187 -0.660       
## gndr_grp-:_  0.271 -0.195 -0.376 -0.685  0.482

Estimates by book familarity

For description, the effect is bigger for people not familiar with the text (i.e. people less likely to take into account the pictures).

FAM_DF <- here("data/processed/character_norming/exp1/exp1_familiarity_data.csv")
fam_df <- read_csv(FAM_DF) %>%
  rename(familiarity = response)

cleaned_responses_with_norms_filtered_fam <- cleaned_responses_with_norms_filtered %>%
  left_join(fam_df)

by_group_means_fam <-  cleaned_responses_with_norms_filtered_fam %>%
  mutate(gender_group = fct_relevel(gender_group, "male-biased", "neutral")) %>%
  filter(!is.na(human_gender_estimate_us)) %>%
  group_by(book_id, gender_group, question_type, participant_id, familiarity) %>%
  summarize(mean_gender = mean(human_gender_estimate_us)) %>%
  group_by(book_id, gender_group, question_type, familiarity) %>%
  summarize(mean_gender = mean(mean_gender)) %>%
  group_by(gender_group, question_type, familiarity) %>%
  langcog::multi_boot_standard(col = "mean_gender")

ggplot(by_group_means_fam, aes(x = gender_group, y = mean, color = familiarity, group = familiarity)) +
  geom_line() +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  ylab("Human judgment of word female bias") +
  #geom_bar(stat = "identity") +
  facet_wrap(~question_type) +
  theme_classic(base_size = 14)

lmer(human_gender_estimate_us ~  gender_group*familiarity+ (1|book_id) + (1|participant_id),
     data = cleaned_responses_with_norms_filtered_fam %>% filter(question_type == "activity")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: human_gender_estimate_us ~ gender_group * familiarity + (1 |  
##     book_id) + (1 | participant_id)
##    Data: cleaned_responses_with_norms_filtered_fam %>% filter(question_type ==  
##     "activity")
## 
## REML criterion at convergence: 3469.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6643 -0.6237 -0.0294  0.5448  4.0167 
## 
## Random effects:
##  Groups         Name        Variance  Std.Dev.
##  participant_id (Intercept) 0.0005712 0.0239  
##  book_id        (Intercept) 0.0198384 0.1408  
##  Residual                   0.2344157 0.4842  
## Number of obs: 2428, groups:  participant_id, 150; book_id, 45
## 
## Fixed effects:
##                                                    Estimate Std. Error t value
## (Intercept)                                        2.983179   0.048301  61.762
## gender_groupneutral                                0.027992   0.069312   0.404
## gender_groupfemale-biased                          0.187553   0.069159   2.712
## familiaritynot familiar                            0.011615   0.037787   0.307
## gender_groupneutral:familiaritynot familiar       -0.006126   0.057503  -0.107
## gender_groupfemale-biased:familiaritynot familiar -0.015962   0.054169  -0.295
## 
## Correlation of Fixed Effects:
##             (Intr) gndr_g gndr_- fmlrtf gnd_:f
## gndr_grpntr -0.695                            
## gndr_grpfm- -0.696  0.485                     
## fmlrtyntfml -0.545  0.378  0.378              
## gndr_grpn:f  0.357 -0.554 -0.248 -0.655       
## gndr_grp-:f  0.377 -0.262 -0.560 -0.693  0.455
lmer(human_gender_estimate_us ~  gender_group*familiarity+ (1|book_id) + (1|participant_id),
     data = cleaned_responses_with_norms_filtered_fam %>% filter(question_type == "description")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: human_gender_estimate_us ~ gender_group * familiarity + (1 |  
##     book_id) + (1 | participant_id)
##    Data: cleaned_responses_with_norms_filtered_fam %>% filter(question_type ==  
##     "description")
## 
## REML criterion at convergence: 4609.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2963 -0.7250  0.0323  0.7049  2.7617 
## 
## Random effects:
##  Groups         Name        Variance Std.Dev.
##  participant_id (Intercept) 0.009931 0.09966 
##  book_id        (Intercept) 0.044148 0.21012 
##  Residual                   0.476740 0.69046 
## Number of obs: 2139, groups:  participant_id, 151; book_id, 45
## 
## Fixed effects:
##                                                   Estimate Std. Error t value
## (Intercept)                                        3.01300    0.07551  39.901
## gender_groupneutral                                0.16091    0.10729   1.500
## gender_groupfemale-biased                          0.30034    0.10580   2.839
## familiaritynot familiar                           -0.12518    0.06275  -1.995
## gender_groupneutral:familiaritynot familiar        0.07300    0.09192   0.794
## gender_groupfemale-biased:familiaritynot familiar  0.17607    0.08612   2.045
## 
## Correlation of Fixed Effects:
##             (Intr) gndr_g gndr_- fmlrtf gnd_:f
## gndr_grpntr -0.691                            
## gndr_grpfm- -0.695  0.489                     
## fmlrtyntfml -0.576  0.395  0.394              
## gndr_grpn:f  0.383 -0.579 -0.270 -0.666       
## gndr_grp-:f  0.401 -0.281 -0.577 -0.702  0.481

Gender estimates from wiki model

The pattern is the same here as for human judgements, but weaker

WIKI_WORD_DF <- here("data/processed/character_norming/exp1/response_embedding_gender_scores.csv")

wiki_word <- read_csv(WIKI_WORD_DF)

cleaned_responses_with_norms_filtered_wiki <- cleaned_responses_with_norms_filtered %>%
  left_join(wiki_word, by = c("word_tidy_lemma" = "word"))
by_group_means_wiki <-  cleaned_responses_with_norms_filtered_wiki %>%
  mutate(gender_group = fct_relevel(gender_group, "male-biased", "neutral")) %>%
  filter(!is.na(male_score)) %>%
  group_by(book_id, gender_group, question_type, participant_id) %>%
  summarize(male_score = mean(-male_score)) %>%
  group_by(book_id, gender_group, question_type) %>%
  summarize(male_score = mean(male_score)) %>%
  group_by(gender_group, question_type) %>%
  langcog::multi_boot_standard(col = "male_score")

ggplot(by_group_means_wiki, aes(x = gender_group, y = mean)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  ylab("female embedding score") +
  #geom_bar(stat = "identity") +
  facet_wrap(~question_type) +
  theme_classic(base_size = 14)

Mixed effect models:

lmer(male_score ~  gender_group+ (1|book_id) + (1|participant_id),
     data = cleaned_responses_with_norms_filtered_wiki %>% filter(question_type == "activity")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: male_score ~ gender_group + (1 | book_id) + (1 | participant_id)
##    Data: 
## cleaned_responses_with_norms_filtered_wiki %>% filter(question_type ==  
##     "activity")
## 
## REML criterion at convergence: -10378.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.6139 -0.5416  0.0990  0.5520  2.7778 
## 
## Random effects:
##  Groups         Name        Variance  Std.Dev.
##  participant_id (Intercept) 0.000e+00 0.000000
##  book_id        (Intercept) 8.297e-05 0.009109
##  Residual                   8.877e-04 0.029794
## Number of obs: 2503, groups:  participant_id, 150; book_id, 45
## 
## Fixed effects:
##                            Estimate Std. Error t value
## (Intercept)                0.001570   0.002585   0.607
## gender_groupneutral        0.001370   0.003682   0.372
## gender_groupfemale-biased -0.005752   0.003658  -1.572
## 
## Correlation of Fixed Effects:
##             (Intr) gndr_g
## gndr_grpntr -0.702       
## gndr_grpfm- -0.707  0.496
## convergence code: 0
## boundary (singular) fit: see ?isSingular
lmer(male_score ~  gender_group+ (1|book_id) + (1|participant_id),
     data = cleaned_responses_with_norms_filtered_wiki %>% filter(question_type == "description")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: male_score ~ gender_group + (1 | book_id) + (1 | participant_id)
##    Data: 
## cleaned_responses_with_norms_filtered_wiki %>% filter(question_type ==  
##     "description")
## 
## REML criterion at convergence: -9255.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8642 -0.4314  0.0795  0.6057  3.0634 
## 
## Random effects:
##  Groups         Name        Variance  Std.Dev.
##  participant_id (Intercept) 2.461e-05 0.004961
##  book_id        (Intercept) 3.705e-05 0.006087
##  Residual                   1.151e-03 0.033927
## Number of obs: 2384, groups:  participant_id, 152; book_id, 45
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)                0.0008024  0.0020794   0.386
## gender_groupneutral       -0.0071561  0.0029312  -2.441
## gender_groupfemale-biased -0.0140291  0.0028826  -4.867
## 
## Correlation of Fixed Effects:
##             (Intr) gndr_g
## gndr_grpntr -0.689       
## gndr_grpfm- -0.699  0.497