library(knitr)
opts_chunk$set(echo = T, message = F, warning = F,
error = F, cache = F, tidy = F)
library(tidyverse)
library(feather)
library(langcog)
library(broom)
library(lme4)
library(data.table)
theme_set(theme_classic(base_size = 10))Get t-values
t_values <- read_csv("data/word_coeffs_log_mtld_t2.csv") %>%
#filter(n_know >= 10) %>%
select(word, t) %>%
mutate(word = tolower(word)) Plot childes-trained model
word2vec_model_childes <- read_csv("/Users/mollylewis/Documents/research/Projects/1_in_progress/VOCAB_SEEDS/analyses/3_kid_vocabs/3_train_childes_model/childes_adult_w2v.txt")
crit_model <- word2vec_model_childes %>%
filter(word %in% t_values$word)
tsne_outF = Rtsne::Rtsne(crit_model[,-1])
tsne_dimsF <- tsne_outF$Y %>%
as.data.frame() %>%
rename(tsne_X = V1,
tsne_Y = V2) %>%
bind_cols(word = crit_model$word)
tsne_dims <- tsne_dimsF %>%
left_join(t_values) %>%
mutate(t_bin = ifelse(t > 1.2, 1, 0),
t_bin = as.factor(t_bin))
ggplot(tsne_dims, aes(x = tsne_X, y = tsne_Y, color = t_bin)) +
#scale_color_continuous(low = "white", high = "red") +
geom_point()tsne_dims %>%
filter(t_bin == 1) %>%
ggplot(aes(x = tsne_X, y = tsne_Y, color = t_bin)) +
geom_text(aes(label = word), size = 2)Plot wikipedia-trained model
MODEL_PATH <- "/Users/mollylewis/Documents/research/Projects/1_in_progress/VOCAB_SEEDS/analyses/0_exploration/wiki.en.vec"
wiki_model <- fread(
MODEL_PATH,
header = FALSE,
skip = 1,
quote = "",
encoding = "UTF-8",
data.table = TRUE,
col.names = c("target_word",
unlist(lapply(2:301, function(x) paste0("V", x)))))
crit_model <- wiki_model %>%
filter(target_word %in% t_values$word)
tsne_outF = Rtsne::Rtsne(crit_model[,-1])
tsne_dimsF <- tsne_outF$Y %>%
as.data.frame() %>%
rename(tsne_X = V1,
tsne_Y = V2) %>%
bind_cols(word = crit_model$target_word)
tsne_dims <- tsne_dimsF %>%
left_join(t_values) %>%
mutate(t_bin = ifelse(t > 1.2, 1, 0),
t_bin = as.factor(t_bin))
ggplot(tsne_dims, aes(x = tsne_X, y = tsne_Y, color = t_bin)) +
#scale_color_continuous(low = "white", high = "red") +
geom_point()tsne_dims %>%
filter(t_bin == 1) %>%
ggplot(
aes(x = tsne_X, y = tsne_Y, color = t_bin)) +
geom_text(aes(label = word), size = 2)