CLEANED_RESPONSES_DF <- here("data/processed/character_norming/exp1/exp1_response_data.csv")
cleaned_responses_with_norms <- read_csv(CLEANED_RESPONSES_DF) %>%
mutate(gender_group = fct_relevel(gender_group, "male-biased", "neutral"))
Average N judgments per book:
n_judgments <- cleaned_responses_with_norms %>%
distinct(book_id, participant_id) %>%
group_by(book_id) %>%
count() %>%
arrange(n)
#kable(n_judgments)
mean(n_judgments$n)
## [1] 10.13333
There were about 4 words per question on average.
cleaned_responses_with_norms %>%
group_by(participant_id, book_id, character_name, question_type) %>%
count() %>%
group_by(question_type) %>%
multi_boot_standard(col = "n") %>%
kable()
| question_type | ci_lower | ci_upper | mean |
|---|---|---|---|
| activity | 3.903866 | 4.076577 | 3.990991 |
| description | 3.569069 | 3.746321 | 3.659159 |
Remove responses with wrong POS or greater than 35 characters.
cleaned_responses_with_norms_filtered <- cleaned_responses_with_norms %>%
filter(correct_pos %in% c("action", "description")) %>%
mutate(nchar = nchar(raw_response)) %>%
filter(nchar < 35) # remove responses 35 chars or more (tend to be full sentences)
n_wrong_type <- nrow(cleaned_responses_with_norms) - nrow(cleaned_responses_with_norms_filtered)
206 words were removed for being of the wrong part of speech/too long.
After lemmatizing, here are how many words (tokens) we have human judgments for:
cleaned_responses_with_norms_filtered %>%
mutate(missing_human = is.na(human_gender_estimate_us)) %>%
count(question_type, missing_human) %>%
kable()
| question_type | missing_human | n |
|---|---|---|
| activity | FALSE | 2428 |
| activity | TRUE | 76 |
| description | FALSE | 2139 |
| description | TRUE | 246 |
After lemmatizing, here are how many types we have human judgments for:
cleaned_responses_with_norms_filtered %>%
distinct(word_tidy_lemma, human_gender_estimate_us, question_type) %>%
mutate(missing_human = is.na(human_gender_estimate_us)) %>%
count(question_type, missing_human) %>%
kable()
| question_type | missing_human | n |
|---|---|---|
| activity | FALSE | 312 |
| activity | TRUE | 76 |
| description | FALSE | 376 |
| description | TRUE | 246 |
Note that lemmatizing helps, and our norms have better coverage than glasgow.
by_group_means <- cleaned_responses_with_norms_filtered %>%
mutate(gender_group = fct_relevel(gender_group, "male-biased", "neutral")) %>%
filter(!is.na(human_gender_estimate_us)) %>%
group_by(book_id, gender_group, question_type, participant_id) %>%
summarize(mean_gender = mean(human_gender_estimate_us)) %>%
group_by(book_id, gender_group, question_type) %>%
summarize(mean_gender = mean(mean_gender)) %>%
group_by(gender_group, question_type) %>%
langcog::multi_boot_standard(col = "mean_gender")
ggplot(by_group_means, aes(x = gender_group, y = mean)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
#geom_bar(stat = "identity") +
ylab("Human judgment of word female bias") +
facet_wrap(~question_type) +
theme_classic(base_size = 14)
Mixed effect models:
lmer(human_gender_estimate_us ~ gender_group+ (1|book_id) + (1|participant_id),
data = cleaned_responses_with_norms_filtered %>% filter(question_type == "activity")) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: human_gender_estimate_us ~ gender_group + (1 | book_id) + (1 |
## participant_id)
## Data: cleaned_responses_with_norms_filtered %>% filter(question_type ==
## "activity")
##
## REML criterion at convergence: 3455.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6672 -0.6274 -0.0273 0.5448 4.0229
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.0004283 0.0207
## book_id (Intercept) 0.0197927 0.1407
## Residual 0.2342666 0.4840
## Number of obs: 2428, groups: participant_id, 150; book_id, 45
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.99132 0.04044 73.978
## gender_groupneutral 0.02340 0.05763 0.406
## gender_groupfemale-biased 0.17627 0.05722 3.081
##
## Correlation of Fixed Effects:
## (Intr) gndr_g
## gndr_grpntr -0.701
## gndr_grpfm- -0.706 0.495
lmer(human_gender_estimate_us ~ gender_group+ (1|book_id) + (1|participant_id),
data = cleaned_responses_with_norms_filtered %>% filter(question_type == "description")) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: human_gender_estimate_us ~ gender_group + (1 | book_id) + (1 |
## participant_id)
## Data: cleaned_responses_with_norms_filtered %>% filter(question_type ==
## "description")
##
## REML criterion at convergence: 4604.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3162 -0.7368 0.0277 0.6952 2.8929
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.009943 0.09971
## book_id (Intercept) 0.044869 0.21182
## Residual 0.477149 0.69076
## Number of obs: 2139, groups: participant_id, 151; book_id, 45
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.92654 0.06213 47.103
## gender_groupneutral 0.21220 0.08805 2.410
## gender_groupfemale-biased 0.42373 0.08700 4.871
##
## Correlation of Fixed Effects:
## (Intr) gndr_g
## gndr_grpntr -0.697
## gndr_grpfm- -0.704 0.497
There is no interaction with participant gender.
META_DF <- here("data/processed/character_norming/exp1/exp1_meta_data.csv")
meta_df <- read_csv(META_DF) %>%
rename(participant_gender = gender)
Particpants by gender:
meta_df %>%
count(participant_gender)
## # A tibble: 3 x 2
## participant_gender n
## <chr> <int>
## 1 female 81
## 2 male 65
## 3 <NA> 6
cleaned_responses_with_norms_filtered_with_gender <- cleaned_responses_with_norms_filtered %>%
left_join(meta_df)
by_group_means_p_gender <- cleaned_responses_with_norms_filtered_with_gender %>%
filter(!is.na(participant_gender)) %>%
mutate(gender_group = fct_relevel(gender_group, "male-biased", "neutral")) %>%
filter(!is.na(human_gender_estimate_us)) %>%
group_by(book_id, gender_group, question_type, participant_id, participant_gender) %>%
summarize(mean_gender = mean(human_gender_estimate_us)) %>%
group_by(book_id, gender_group, question_type, participant_gender) %>%
summarize(mean_gender = mean(mean_gender)) %>%
group_by(gender_group, question_type, participant_gender) %>%
langcog::multi_boot_standard(col = "mean_gender")
ggplot(by_group_means_p_gender, aes(x = gender_group, y = mean, color = participant_gender, group = participant_gender)) +
geom_line() +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
#geom_bar(stat = "identity") +
ylab("Human judgment of word female bias") +
facet_wrap(~question_type) +
theme_classic(base_size = 14)
Mixed effect models:
lmer(human_gender_estimate_us ~ gender_group*participant_gender+ (1|book_id) + (1|participant_id),
data = cleaned_responses_with_norms_filtered_with_gender %>% filter(question_type == "activity")) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: human_gender_estimate_us ~ gender_group * participant_gender +
## (1 | book_id) + (1 | participant_id)
## Data:
## cleaned_responses_with_norms_filtered_with_gender %>% filter(question_type ==
## "activity")
##
## REML criterion at convergence: 3365.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6963 -0.6105 -0.0184 0.5627 3.9562
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.0001815 0.01347
## book_id (Intercept) 0.0189265 0.13757
## Residual 0.2344407 0.48419
## Number of obs: 2357, groups: participant_id, 144; book_id, 45
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.00036 0.04257 70.476
## gender_groupneutral 0.04227 0.06092 0.694
## gender_groupfemale-biased 0.15449 0.06011 2.570
## participant_gendermale -0.01926 0.03494 -0.551
## gender_groupneutral:participant_gendermale -0.03632 0.05316 -0.683
## gender_groupfemale-biased:participant_gendermale 0.04643 0.05046 0.920
##
## Correlation of Fixed Effects:
## (Intr) gndr_g gndr_- prtcp_ gnd_:_
## gndr_grpntr -0.698
## gndr_grpfm- -0.708 0.495
## prtcpnt_gnd -0.358 0.250 0.253
## gndr_grpn:_ 0.235 -0.359 -0.166 -0.656
## gndr_grp-:_ 0.247 -0.173 -0.348 -0.690 0.454
lmer(human_gender_estimate_us ~ gender_group*participant_gender+ (1|book_id) + (1|participant_id),
data = cleaned_responses_with_norms_filtered_with_gender %>% filter(question_type == "description")) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: human_gender_estimate_us ~ gender_group * participant_gender +
## (1 | book_id) + (1 | participant_id)
## Data:
## cleaned_responses_with_norms_filtered_with_gender %>% filter(question_type ==
## "description")
##
## REML criterion at convergence: 4439.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3489 -0.7194 0.0395 0.6925 2.9032
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.009407 0.09699
## book_id (Intercept) 0.042615 0.20643
## Residual 0.476883 0.69057
## Number of obs: 2060, groups: participant_id, 145; book_id, 45
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.914174 0.066406 43.884
## gender_groupneutral 0.208142 0.093794 2.219
## gender_groupfemale-biased 0.477448 0.092352 5.170
## participant_gendermale 0.009057 0.058924 0.154
## gender_groupneutral:participant_gendermale 0.027778 0.084558 0.329
## gender_groupfemale-biased:participant_gendermale -0.122331 0.080393 -1.522
##
## Correlation of Fixed Effects:
## (Intr) gndr_g gndr_- prtcp_ gnd_:_
## gndr_grpntr -0.695
## gndr_grpfm- -0.704 0.500
## prtcpnt_gnd -0.394 0.266 0.266
## gndr_grpn:_ 0.260 -0.381 -0.187 -0.660
## gndr_grp-:_ 0.271 -0.195 -0.376 -0.685 0.482
For description, the effect is bigger for people not familiar with the text (i.e. people less likely to take into account the pictures).
FAM_DF <- here("data/processed/character_norming/exp1/exp1_familiarity_data.csv")
fam_df <- read_csv(FAM_DF) %>%
rename(familiarity = response)
cleaned_responses_with_norms_filtered_fam <- cleaned_responses_with_norms_filtered %>%
left_join(fam_df)
by_group_means_fam <- cleaned_responses_with_norms_filtered_fam %>%
mutate(gender_group = fct_relevel(gender_group, "male-biased", "neutral")) %>%
filter(!is.na(human_gender_estimate_us)) %>%
group_by(book_id, gender_group, question_type, participant_id, familiarity) %>%
summarize(mean_gender = mean(human_gender_estimate_us)) %>%
group_by(book_id, gender_group, question_type, familiarity) %>%
summarize(mean_gender = mean(mean_gender)) %>%
group_by(gender_group, question_type, familiarity) %>%
langcog::multi_boot_standard(col = "mean_gender")
ggplot(by_group_means_fam, aes(x = gender_group, y = mean, color = familiarity, group = familiarity)) +
geom_line() +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
ylab("Human judgment of word female bias") +
#geom_bar(stat = "identity") +
facet_wrap(~question_type) +
theme_classic(base_size = 14)
lmer(human_gender_estimate_us ~ gender_group*familiarity+ (1|book_id) + (1|participant_id),
data = cleaned_responses_with_norms_filtered_fam %>% filter(question_type == "activity")) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: human_gender_estimate_us ~ gender_group * familiarity + (1 |
## book_id) + (1 | participant_id)
## Data: cleaned_responses_with_norms_filtered_fam %>% filter(question_type ==
## "activity")
##
## REML criterion at convergence: 3469.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6643 -0.6237 -0.0294 0.5448 4.0167
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.0005712 0.0239
## book_id (Intercept) 0.0198384 0.1408
## Residual 0.2344157 0.4842
## Number of obs: 2428, groups: participant_id, 150; book_id, 45
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.983179 0.048301 61.762
## gender_groupneutral 0.027992 0.069312 0.404
## gender_groupfemale-biased 0.187553 0.069159 2.712
## familiaritynot familiar 0.011615 0.037787 0.307
## gender_groupneutral:familiaritynot familiar -0.006126 0.057503 -0.107
## gender_groupfemale-biased:familiaritynot familiar -0.015962 0.054169 -0.295
##
## Correlation of Fixed Effects:
## (Intr) gndr_g gndr_- fmlrtf gnd_:f
## gndr_grpntr -0.695
## gndr_grpfm- -0.696 0.485
## fmlrtyntfml -0.545 0.378 0.378
## gndr_grpn:f 0.357 -0.554 -0.248 -0.655
## gndr_grp-:f 0.377 -0.262 -0.560 -0.693 0.455
lmer(human_gender_estimate_us ~ gender_group*familiarity+ (1|book_id) + (1|participant_id),
data = cleaned_responses_with_norms_filtered_fam %>% filter(question_type == "description")) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: human_gender_estimate_us ~ gender_group * familiarity + (1 |
## book_id) + (1 | participant_id)
## Data: cleaned_responses_with_norms_filtered_fam %>% filter(question_type ==
## "description")
##
## REML criterion at convergence: 4609.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2963 -0.7250 0.0323 0.7049 2.7617
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.009931 0.09966
## book_id (Intercept) 0.044148 0.21012
## Residual 0.476740 0.69046
## Number of obs: 2139, groups: participant_id, 151; book_id, 45
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.01300 0.07551 39.901
## gender_groupneutral 0.16091 0.10729 1.500
## gender_groupfemale-biased 0.30034 0.10580 2.839
## familiaritynot familiar -0.12518 0.06275 -1.995
## gender_groupneutral:familiaritynot familiar 0.07300 0.09192 0.794
## gender_groupfemale-biased:familiaritynot familiar 0.17607 0.08612 2.045
##
## Correlation of Fixed Effects:
## (Intr) gndr_g gndr_- fmlrtf gnd_:f
## gndr_grpntr -0.691
## gndr_grpfm- -0.695 0.489
## fmlrtyntfml -0.576 0.395 0.394
## gndr_grpn:f 0.383 -0.579 -0.270 -0.666
## gndr_grp-:f 0.401 -0.281 -0.577 -0.702 0.481
The pattern is the same here as for human judgements, but weaker
WIKI_WORD_DF <- here("data/processed/character_norming/exp1/response_embedding_gender_scores.csv")
wiki_word <- read_csv(WIKI_WORD_DF)
cleaned_responses_with_norms_filtered_wiki <- cleaned_responses_with_norms_filtered %>%
left_join(wiki_word, by = c("word_tidy_lemma" = "word"))
by_group_means_wiki <- cleaned_responses_with_norms_filtered_wiki %>%
mutate(gender_group = fct_relevel(gender_group, "male-biased", "neutral")) %>%
filter(!is.na(male_score)) %>%
group_by(book_id, gender_group, question_type, participant_id) %>%
summarize(male_score = mean(-male_score)) %>%
group_by(book_id, gender_group, question_type) %>%
summarize(male_score = mean(male_score)) %>%
group_by(gender_group, question_type) %>%
langcog::multi_boot_standard(col = "male_score")
ggplot(by_group_means_wiki, aes(x = gender_group, y = mean)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
ylab("female embedding score") +
#geom_bar(stat = "identity") +
facet_wrap(~question_type) +
theme_classic(base_size = 14)
Mixed effect models:
lmer(male_score ~ gender_group+ (1|book_id) + (1|participant_id),
data = cleaned_responses_with_norms_filtered_wiki %>% filter(question_type == "activity")) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: male_score ~ gender_group + (1 | book_id) + (1 | participant_id)
## Data:
## cleaned_responses_with_norms_filtered_wiki %>% filter(question_type ==
## "activity")
##
## REML criterion at convergence: -10378.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.6139 -0.5416 0.0990 0.5520 2.7778
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.000e+00 0.000000
## book_id (Intercept) 8.297e-05 0.009109
## Residual 8.877e-04 0.029794
## Number of obs: 2503, groups: participant_id, 150; book_id, 45
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.001570 0.002585 0.607
## gender_groupneutral 0.001370 0.003682 0.372
## gender_groupfemale-biased -0.005752 0.003658 -1.572
##
## Correlation of Fixed Effects:
## (Intr) gndr_g
## gndr_grpntr -0.702
## gndr_grpfm- -0.707 0.496
## convergence code: 0
## boundary (singular) fit: see ?isSingular
lmer(male_score ~ gender_group+ (1|book_id) + (1|participant_id),
data = cleaned_responses_with_norms_filtered_wiki %>% filter(question_type == "description")) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: male_score ~ gender_group + (1 | book_id) + (1 | participant_id)
## Data:
## cleaned_responses_with_norms_filtered_wiki %>% filter(question_type ==
## "description")
##
## REML criterion at convergence: -9255.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8642 -0.4314 0.0795 0.6057 3.0634
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 2.461e-05 0.004961
## book_id (Intercept) 3.705e-05 0.006087
## Residual 1.151e-03 0.033927
## Number of obs: 2384, groups: participant_id, 152; book_id, 45
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.0008024 0.0020794 0.386
## gender_groupneutral -0.0071561 0.0029312 -2.441
## gender_groupfemale-biased -0.0140291 0.0028826 -4.867
##
## Correlation of Fixed Effects:
## (Intr) gndr_g
## gndr_grpntr -0.689
## gndr_grpfm- -0.699 0.497