library(knitr)
opts_chunk$set(echo = T, message = F, warning = F,
error = F, cache = F, tidy = F)
library(tidyverse)
library(langcog)
library(corrplot)
library(feather)
library(broom)
theme_set(theme_classic(base_size = 10))df <- read_csv("../data/information_density.csv") %>%
select(2,3,5,29:69, -45, -35, -40, -38,
-42,-43, -54, -68, -62, -33, -31,
-65, -36, -5, -32, -61,-3, -69, -66) %>%
filter(lang_mean_n_resid < .1 & lang_mean_n_resid> -.2) %>%
filter(rhsw > -1.5)
df_no_corrs <- df %>%
select(-starts_with("r_"))
corr_mat <- cor(df_no_corrs,
use = "pairwise.complete.obs")
p.mat <- cor.mtest(df_no_corrs,
conf.level = .95,
use = "pairwise.complete.obs")$p
cols <- rev(colorRampPalette(c("red", "white", "blue"))(100))
corrplot(corr_mat, method = "color", col = cols,
type = "full", order = "hclust", number.cex = .7,
addCoef.col = "black", insig = "blank",
p.mat = p.mat, sig.level = .05,
tl.col = "black", tl.srt = 90,
diag = FALSE)df_no_corrs %>%
gather("measure", "value", -6) %>%
ggplot(aes(x = lang_mean_n_resid , y = value)) +
geom_smooth(method = "lm") +
geom_point(size = .2) +
facet_wrap(~measure, scales = "free") +
theme_classic()Excluding outliers, lang_mean_n_resid is positively correlated with semantic H (.74) and Hsum (.44)
df_no_corrs %>%
gather("measure", "value", -5) %>%
ggplot(aes(x = lang_cv_n_resid , y = value)) +
geom_smooth(method = "lm") +
geom_point(size = .2) +
facet_wrap(~measure, scales = "free") +
theme_classic()Excluding outliers, lang_cv_n_resid is positively correlated with entropy_words (.51) and rhsw (.39)