p1 <- here("data/raw/other_norms/animacy_rater1.csv")
rater1 <- read_csv(p1, skip = 2, col_names = c("word", "X", "animacy_animate", "animacy_inanimate",
"animacy_both", "animacy_dk", "X2",
"humanness_human", "humanness_nonhuman",
"humanness_both", "humanness_dk")) %>%
select(-X, -X2) %>%
mutate(rater_id = 1)
rater1_animacy <- rater1 %>%
select(word, rater_id, contains("animacy")) %>%
gather("measure", "value", -1:-2)
rater1_humanness <- rater1 %>%
select(word, rater_id, contains("humanness")) %>%
gather("measure", "value", -1:-2)
p2 <- here("data/raw/other_norms/animacy_rater2.csv")
rater2 <- read_csv(p2, skip = 2, col_names = c("word", "X", "animacy_animate", "animacy_inanimate",
"animacy_both", "animacy_dk", "X2",
"humanness_human", "humanness_nonhuman",
"humanness_both", "humanness_dk")) %>%
select(-X, -X2) %>%
mutate(rater_id = 2)
rater2_animacy <- rater2 %>%
select(word, rater_id, contains("animacy")) %>%
gather("measure", "value", -1:-2)
rater2_humanness <- rater2 %>%
select(word, rater_id, contains("humanness")) %>%
gather("measure", "value", -1:-2)
animacy_data <- bind_rows(list(rater1_animacy, rater1_humanness,
rater2_animacy, rater2_humanness)) %>%
filter(value == 1) %>%
select(-value) %>%
separate(measure, into = c("measure", "value"), "_")
tidy_data <- animacy_data %>%
mutate(word = str_replace_all(word, '\"', "")) %>%
group_by(word, measure) %>%
summarize(values = list(value)) %>%
mutate(n_vals = map_dbl(values, ~ length(unique(.))))
The raters disagree (n_vals = 2) on roughly one-half of all words:
tidy_data %>%
ungroup()%>%
count(measure, n_vals) %>%
kable()
| measure | n_vals | n |
|---|---|---|
| animacy | 1 | 1323 |
| animacy | 2 | 957 |
| humanness | 1 | 1182 |
| humanness | 2 | 1098 |
Let’s look at the words with only one value.
data_agreed <- tidy_data %>%
filter(n_vals == 1) %>%
mutate(value = map_chr(values, pluck, 1)) %>%
select(-values, -n_vals) %>%
ungroup()
data_agreed %>%
count(measure, value) %>%
kable()
| measure | value | n |
|---|---|---|
| animacy | animate | 762 |
| animacy | both | 16 |
| animacy | inanimate | 545 |
| humanness | both | 286 |
| humanness | human | 596 |
| humanness | nonhuman | 300 |
RATING_DEMO_PATH <- here("data/processed/words/gender_ratings_demographics.csv")
rating_demos <- read_csv(RATING_DEMO_PATH)
non_native_participants <- rating_demos %>%
filter(question_name == "native_english" | question_name == "native") %>%
filter(response_str == 0) %>%
pull(subj_id)
rating_demos_ex <- rating_demos %>%
filter(!(subj_id %in% non_native_participants))
gender_data <- rating_demos_ex %>%
filter(question_name == "gender") %>%
count(response_str)
age_median <- rating_demos_ex %>%
filter(question_name == "age") %>%
mutate(response_str = as.numeric(response_str)) %>%
summarize(median_age = median(response_str, na.rm = T))
## TH
edu_data <- rating_demos_ex %>%
filter(question_name == "education") %>%
mutate(response_str = as.factor(response_str)) %>%
rename(edu = response_str) %>%
select(-question_name)
RATING_RATINGS_PATH <- here("data/processed/words/gender_ratings.csv")
ratings <- read_csv(RATING_RATINGS_PATH, col_names = TRUE, cols(subj_id = "c", word = "c", rating = "n")) %>%
filter(!(subj_id %in% non_native_participants))
N_ratings_per_subject <- ratings %>%
count(subj_id) %>%
summarize(min = min(n),
max = max(n))
by_word_means <- ratings %>%
group_by(word) %>%
summarize(mean_rating = mean(rating)) %>%
ungroup()
words_with_ratings <- data_agreed %>%
left_join(by_word_means)
ggplot(words_with_ratings, aes(x= mean_rating)) +
geom_density(aes(fill = value), alpha = .2) +
facet_grid(value ~ measure, drop = T,scales = "free_x" )+
geom_vline(aes(xintercept = 3), linetype = 2) +
theme_classic()
gender_rating_by_word_type <- words_with_ratings %>%
group_by(measure, value) %>%
multi_boot_standard(col = "mean_rating", na.rm = T)
ggplot(gender_rating_by_word_type, aes(x = value, y= mean)) +
facet_wrap(~ measure, drop = T,scales = "free_x" )+
geom_hline(aes(yintercept = 3), linetype = 2) +
geom_pointrange(aes(ymin= ci_lower, ymax = ci_upper))+
theme_classic()