library(knitr)

opts_chunk$set(echo = T, message = F, warning = F, 
               error = F, cache = F, tidy = F)

library(tidyverse)
library(langcog)
library(corrplot)
library(feather)
library(broom)

theme_set(theme_classic(base_size = 10))
df <- read_csv("../data/information_density.csv") %>%
  select(2,3,5,29:69, -45, -35, -40, -38,
         -42,-43, -54, -68, -62, -33, -31, 
         -65, -36, -5, -32, -61,-3, -69, -66)  %>%
    filter(lang_mean_n_resid < .1  & lang_mean_n_resid> -.2) %>%
    filter(rhsw > -1.5)

df_no_corrs <- df %>%
  select(-starts_with("r_"))

corr_mat <- cor(df_no_corrs, 
                use = "pairwise.complete.obs")

p.mat <- cor.mtest(df_no_corrs, 
                  conf.level = .95,  
                  use = "pairwise.complete.obs")$p

cols <- rev(colorRampPalette(c("red", "white", "blue"))(100))

corrplot(corr_mat, method = "color",  col = cols,
         type = "full", order = "hclust", number.cex = .7,
         addCoef.col = "black", insig = "blank",
         p.mat = p.mat, sig.level = .05, 
         tl.col = "black", tl.srt = 90,
         diag = FALSE)

lang_mean_n_resid

df_no_corrs %>%
  gather("measure", "value", -6) %>%
  ggplot(aes(x =  lang_mean_n_resid  , y = value)) +
  geom_smooth(method = "lm") +
  geom_point(size = .2) +
  facet_wrap(~measure, scales = "free") +
  theme_classic()

Excluding outliers, lang_mean_n_resid is positively correlated with semantic H (.74) and Hsum (.44)

lang_mean_cv_resid

df_no_corrs %>%
  gather("measure", "value", -5) %>%
  ggplot(aes(x =  lang_cv_n_resid  , y = value)) +
  geom_smooth(method = "lm") +
  geom_point(size = .2) +
  facet_wrap(~measure, scales = "free") +
  theme_classic()

Excluding outliers, lang_cv_n_resid is positively correlated with entropy_words (.51) and rhsw (.39)