light-light words only.
BEDNY_DATA <- here("data/raw/datalong_CBSAMT.csv")
bedny_data <- read_csv(BEDNY_DATA)
sim_ratings <-bedny_data %>%
filter(C1 == "Light", C2 == "Light") %>%
select(contains("V"), contains("_")) %>%
rename(word1 = V1,
word2 = V2) %>%
gather("subject_id", "raw_similarity", -word1, -word2) %>%
group_by(subject_id) %>%
mutate(scale_similarity = scale(raw_similarity),
norm_similarity = (scale_similarity - min(scale_similarity))/
(max(scale_similarity) - min(scale_similarity))) %>%
mutate(group_type = case_when(str_detect(subject_id, "CB_")~ "CB",
str_detect(subject_id, "S_")~ "S",
str_detect(subject_id, "AMT_")~ "AMT"))
mean_ratings <- sim_ratings %>%
group_by(group_type, word1, word2) %>%
filter(!is.na(norm_similarity))%>%
summarize(similarity = mean(norm_similarity))
# get language distances
LANGUAGE_DISTANCES <- here("data/processed/bedny_2019_lang_distances.csv")
long_word_word_dists <- read_csv(LANGUAGE_DISTANCES)
all_data <- mean_ratings %>%
left_join(long_word_word_dists)
ggplot(all_data, aes(x = language_similarity, y = similarity))+
geom_point() +
#geom_text(aes(label = word1)) +
ylab("human similarity")+
geom_smooth(method = "lm") +
facet_grid(~group_type) +
theme_classic()
all_data %>%
group_by(group_type)%>%
nest() %>%
mutate(temp = map(data, ~tidy(cor.test(.$similarity, .$language_similarity, method = "spearman")))) %>%
select(-data) %>%
unnest()
## # A tibble: 3 x 6
## group_type estimate statistic p.value method alternative
## <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 AMT 0.451 105997. 1.41e-6 Spearman's rank co… two.sided
## 2 CB 0.530 90588. 5.89e-9 Spearman's rank co… two.sided
## 3 S 0.470 102340. 4.36e-7 Spearman's rank co… two.sided
cb_s <- all_data %>%
filter(group_type != "AMT") %>%
select(-language_similarity) %>%
spread(group_type, similarity)
ggplot(cb_s, aes(x = CB, y = S))+
ggtitle("Sighted vs. Blind") +
geom_point() +
geom_smooth(method = "lm") +
theme_classic()
cor.test(cb_s$S, cb_s$CB)
##
## Pearson's product-moment correlation
##
## data: cb_s$S and cb_s$CB
## t = 21.231, df = 103, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8591094 0.9326100
## sample estimates:
## cor
## 0.9022147
ggplot(all_data %>% filter(word1 != "blink", word2 != "blink"),
aes(x = language_similarity, y = similarity))+
geom_point() +
#geom_text(aes(label = word1)) +
ylab("human similarity")+
geom_smooth(method = "lm") +
facet_grid(~group_type) +
theme_classic()
all_data %>%
filter(word1 != "blink", word1 != "blink") %>%
group_by(group_type)%>%
nest() %>%
mutate(temp = map(data, ~tidy(cor.test(.$similarity, .$language_similarity, method = "spearman")))) %>%
select(-data) %>%
unnest()
## # A tibble: 3 x 6
## group_type estimate statistic p.value method alternative
## <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 AMT 0.530 60980. 5.51e- 8 Spearman's rank corre… two.sided
## 2 CB 0.626 48485. 2.42e-11 Spearman's rank corre… two.sided
## 3 S 0.586 53715. 8.39e-10 Spearman's rank corre… two.sided
ITEMS: flash, shine, blaze, twinkle, glow, flare, blink, glitter, flicker, sparkle, shimmer, glimmer, gleam, glint, glisten