KEY_PATH <- here("analyses/11_hypernyms/data/item_key.csv")
key_df <- read_csv(KEY_PATH) %>%
select(num_item_id, uni_lemma)
BY_WORD <- here("data/ratings.csv")
word_df <- read_csv(BY_WORD) %>%
left_join(key_df) %>%
group_by(subjCode) %>%
mutate(resp = scale(resp))
by_word_tidy <- word_df %>%
group_by(uni_lemma, num_item_id) %>%
summarize(mean_rating = mean(resp),
n = n()) %>%
arrange(mean_rating)
DT::datatable(by_word_tidy)
HYPER <- here("analyses/11_hypernyms/data/wordbank_to_wordnet_with_hypernyms_hyponyms.csv")
hyper_tidy <- read_csv(HYPER) %>%
mutate(uni_lemma = case_when(uni_lemma %in%
c("TV", "daddy*", "feet", "call (on phone)") ~ wordnet_lemma,
uni_lemma == "dry" ~ "dry (action)",
TRUE ~ uni_lemma)) %>%
left_join(key_df)
hyper <- hyper_tidy %>%
left_join(by_word_tidy, by = "num_item_id") %>%
group_by(wordnet_PoS) %>%
mutate(hypernyms_scaled = scale(hypernyms),
mean_rating_scaled = scale(mean_rating),
log_hyponyms_all = log(hyponyms_all + 1))
ggplot(hyper, aes(x = hypernyms, y = mean_rating)) +
geom_point(aes(color = wordnet_PoS)) +
geom_smooth(method = "lm")
##
## Pearson's product-moment correlation
##
## data: hyper$hypernyms and hyper$mean_rating
## t = -12.321, df = 448, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5690704 -0.4306877
## sample estimates:
## cor
## -0.5030968
##
## Call:
## lm(formula = mean_rating ~ hypernyms_all + log_hyponyms_all,
## data = hyper)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.81796 -0.27210 -0.01673 0.24336 1.20522
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.085278 0.049799 1.712 0.0875 .
## hypernyms_all -0.061889 0.005006 -12.364 <2e-16 ***
## log_hyponyms_all 0.122163 0.012086 10.108 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.392 on 447 degrees of freedom
## (23 observations deleted due to missingness)
## Multiple R-squared: 0.377, Adjusted R-squared: 0.3742
## F-statistic: 135.2 on 2 and 447 DF, p-value: < 2.2e-16
by_word_tidy_error_bars <- word_df %>%
group_by(uni_lemma, num_item_id) %>%
langcog::multi_boot_standard(col = "resp") %>%
left_join(hyper_tidy, by = "num_item_id")
ggplot(by_word_tidy_error_bars, aes(x = jitter(hypernyms, 2), y = mean)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper, color = wordnet_PoS)) +
#geom_point(aes(color = wordnet_PoS)) +
geom_smooth(method = "lm")
Nouns:
##
## Pearson's product-moment correlation
##
## data: nouns_only$hypernyms and nouns_only$mean_rating
## t = -8.6449, df = 363, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4948546 -0.3242973
## sample estimates:
## cor
## -0.413193
Verbs:
##
## Pearson's product-moment correlation
##
## data: verbs_only$hypernyms and verbs_only$mean_rating
## t = -1.824, df = 83, p-value = 0.07175
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.39299872 0.01754026
## sample estimates:
## cor
## -0.1963177
##
## Pearson's product-moment correlation
##
## data: hyper$log_hyponyms_all and hyper$mean_rating
## t = 10.472, df = 471, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3584727 0.5049626
## sample estimates:
## cor
## 0.4345876
Nouns:
##
## Pearson's product-moment correlation
##
## data: nouns_only$log_hyponyms_all and nouns_only$mean_rating
## t = 9.7543, df = 365, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3695562 0.5323143
## sample estimates:
## cor
## 0.4547235
Verbs:
##
## Pearson's product-moment correlation
##
## data: verbs_only$log_hyponyms_all and verbs_only$mean_rating
## t = 4.2605, df = 104, p-value = 4.49e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2101923 0.5367752
## sample estimates:
## cor
## 0.3854913