Word norms

Hypernyms
Hyponyms

library(knitr)

opts_chunk$set(echo = T, message = F, warning = F, 
               error = F, cache = F, tidy = F)

library(tidyverse)
library(here)

theme_set(theme_classic(base_size = 10))

KEY_PATH <- here("analyses/11_hypernyms/data/item_key.csv")
key_df <- read_csv(KEY_PATH) %>%
  select(num_item_id, uni_lemma)

BY_WORD <- here("data/ratings.csv")
word_df <- read_csv(BY_WORD) %>%
  left_join(key_df) %>%
  group_by(subjCode) %>%
  mutate(resp = scale(resp))

by_word_tidy <- word_df %>%
  group_by(uni_lemma, num_item_id) %>%
  summarize(mean_rating = mean(resp),
            n = n()) %>%
  arrange(mean_rating) 

DT::datatable(by_word_tidy)

Show entries

Search:

	uni_lemma	num_item_id	mean_rating	n
1	green beans	114	-1.60312875604291	10
2	choo choo	2	-1.51778545033412	10
3	rocking chair	282	-1.16249561218804	10
4	rooster	47	-1.11979730157977	11
5	cockadoodledoo	3	-1.08419920871701	10
6	pattycake	390	-1.07785547655788	11
7	tissue/kleenex	253	-1.07078969311122	10
8	woof woof	11	-1.06765678886406	10
9	carrots	96	-1.05419107775642	11
10	toothbrush	254	-1.0351279090813	10

Showing 1 to 10 of 680 entries

Previous1 2 3 4 5…68Next

Hypernyms

HYPER <- here("analyses/11_hypernyms/data/wordbank_to_wordnet_with_hypernyms_hyponyms.csv")
hyper_tidy <- read_csv(HYPER) %>%
  mutate(uni_lemma = case_when(uni_lemma %in% 
                                 c("TV",  "daddy*", "feet", "call (on phone)") ~ wordnet_lemma, 
                               uni_lemma == "dry" ~ "dry (action)",
                               TRUE ~ uni_lemma)) %>%
    left_join(key_df) 


hyper <- hyper_tidy %>% 
  left_join(by_word_tidy, by = "num_item_id") %>%
  group_by(wordnet_PoS) %>%
  mutate(hypernyms_scaled = scale(hypernyms),
         mean_rating_scaled = scale(mean_rating),
         log_hyponyms_all =  log(hyponyms_all + 1))

ggplot(hyper, aes(x = hypernyms, y = mean_rating)) +
  geom_point(aes(color = wordnet_PoS)) + 
  geom_smooth(method = "lm")

cor.test(hyper$hypernyms, hyper$mean_rating)

## 
##  Pearson's product-moment correlation
## 
## data:  hyper$hypernyms and hyper$mean_rating
## t = -12.321, df = 448, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5690704 -0.4306877
## sample estimates:
##        cor 
## -0.5030968

lm(mean_rating ~ hypernyms_all + log_hyponyms_all, data = hyper) %>%
  summary()

## 
## Call:
## lm(formula = mean_rating ~ hypernyms_all + log_hyponyms_all, 
##     data = hyper)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81796 -0.27210 -0.01673  0.24336  1.20522 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.085278   0.049799   1.712   0.0875 .  
## hypernyms_all    -0.061889   0.005006 -12.364   <2e-16 ***
## log_hyponyms_all  0.122163   0.012086  10.108   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.392 on 447 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.377,  Adjusted R-squared:  0.3742 
## F-statistic: 135.2 on 2 and 447 DF,  p-value: < 2.2e-16

by_word_tidy_error_bars <- word_df %>%
  group_by(uni_lemma, num_item_id) %>%
  langcog::multi_boot_standard(col = "resp")  %>%
  left_join(hyper_tidy, by = "num_item_id")



ggplot(by_word_tidy_error_bars, aes(x = jitter(hypernyms, 2), y = mean)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper, color = wordnet_PoS)) +
  #geom_point(aes(color = wordnet_PoS)) + 
  geom_smooth(method = "lm")

Nouns:

nouns_only <- hyper %>%
  filter(wordnet_PoS == "n") 

cor.test(nouns_only$hypernyms, nouns_only$mean_rating)

## 
##  Pearson's product-moment correlation
## 
## data:  nouns_only$hypernyms and nouns_only$mean_rating
## t = -8.6449, df = 363, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4948546 -0.3242973
## sample estimates:
##       cor 
## -0.413193

Verbs:

verbs_only <- hyper %>%
  filter(wordnet_PoS == "v") 

cor.test(verbs_only$hypernyms, verbs_only$mean_rating)

## 
##  Pearson's product-moment correlation
## 
## data:  verbs_only$hypernyms and verbs_only$mean_rating
## t = -1.824, df = 83, p-value = 0.07175
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.39299872  0.01754026
## sample estimates:
##        cor 
## -0.1963177

Hyponyms

ggplot(hyper, aes(x = log_hyponyms_all, y = mean_rating)) +
  geom_point(aes(color = wordnet_PoS)) + 
  geom_smooth(method = "lm")

cor.test(hyper$log_hyponyms_all, hyper$mean_rating)

## 
##  Pearson's product-moment correlation
## 
## data:  hyper$log_hyponyms_all and hyper$mean_rating
## t = 10.472, df = 471, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3584727 0.5049626
## sample estimates:
##       cor 
## 0.4345876

Nouns:

nouns_only <- hyper %>%
  filter(wordnet_PoS == "n") 

cor.test(nouns_only$log_hyponyms_all, nouns_only$mean_rating)

## 
##  Pearson's product-moment correlation
## 
## data:  nouns_only$log_hyponyms_all and nouns_only$mean_rating
## t = 9.7543, df = 365, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3695562 0.5323143
## sample estimates:
##       cor 
## 0.4547235

Verbs:

verbs_only <- hyper %>%
  filter(wordnet_PoS == "v") 

cor.test(verbs_only$log_hyponyms_all, verbs_only$mean_rating)

## 
##  Pearson's product-moment correlation
## 
## data:  verbs_only$log_hyponyms_all and verbs_only$mean_rating
## t = 4.2605, df = 104, p-value = 4.49e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2101923 0.5367752
## sample estimates:
##       cor 
## 0.3854913

Word norms

2019-08-13

Hypernyms

Hyponyms