Load packages

library(tidyverse)
library(lubridate)
library(langcog)
library(skimr) #devtools::install_github("hadley/colformat") #devtools::install_github("ropenscilabs/skimr")
library(corrr)
library(knitr)
library(feather)

opts_chunk$set(echo = T, message = F, warning = F, 
               error = F, cache = F, tidy = F)

theme_set(theme_minimal())

Experimental data

Read in participant-wise data.

file_1 <- "../../data-flurry/summary-07-12-17.csv"
file_2 <- "../../data-flurry/summary-12-9-16.csv"
# file_3 <- "data-flurry/summary-2-7-17.csv" # this one is missing, but we may want it? (N doesn't match email - but maybe that's including excluded participants?)

mss <- read_csv(file_1) %>%
          rbind(read_csv(file_2)) %>%
          rename(sub_id = SubjectID) %>%
          mutate_at(vars(sub_id), funs(as.factor))

Summarize data

skim(mss)
## Numeric Variables
## # A tibble: 5 x 13
##           var    type missing complete     n      mean          sd
##         <chr>   <chr>   <dbl>    <dbl> <dbl>     <dbl>       <dbl>
## 1         age numeric       0      139   139  3.123309  0.55097067
## 2      ME_acc numeric       1      138   139 81.652682 16.79279517
## 3    ME_count integer       1      138   139 18.971014  0.34050261
## 4   vocab_acc numeric       0      139   139 65.835903 15.40588604
## 5 vocab_count integer       1      138   139 20.007246  0.08512565
## # ... with 6 more variables: min <dbl>, `25% quantile` <dbl>,
## #   median <dbl>, `75% quantile` <dbl>, max <dbl>, hist <chr>
## 
## Factor Variables
## # A tibble: 1 x 7
##      var   type complete missing     n n_unique
##    <chr>  <chr>    <dbl>   <dbl> <dbl>    <dbl>
## 1 sub_id factor      139       0   139      139
## # ... with 1 more variables: stat <chr>
NUM_PARTICIPANTS <-  nrow(mss)

There are 139 unique participants.

One participant has no data. Remove them.

mss_c <- filter(mss, !is.na(ME_acc))
mss_c %>%
  gather("measure", "value", 2:4) %>%
  ggplot(aes(x = value, fill = measure)) +
  geom_histogram() +
  facet_wrap(~measure, scales = "free") +
  theme(legend.position = "none") 

Developmental plots

Age vs. Vocab

ggplot(mss_c, aes(x = age, y = vocab_acc)) +
  geom_point() +
  geom_smooth(method = "lm")

Age vs. ME

ggplot(mss_c, aes(x = age, y = ME_acc)) +
  geom_point() +
  geom_smooth(method = "lm")

Vocab vs. ME

ggplot(mss_c, aes(x = vocab_acc, y = ME_acc)) +
  geom_point() +
  geom_smooth(method = "lm")

Stats

Correlations

mss_c %>%
  select(age, vocab_acc, ME_acc)  %>%
  correlate() %>%
  shave() %>%
  kable()
rowname age vocab_acc ME_acc
age NA NA NA
vocab_acc 0.3803345 NA NA
ME_acc 0.2487739 0.5387383 NA

Partial Correlations

ME and vocab, controlling for age

ppcor::pcor.test(mss_c$ME_acc, mss_c$vocab_acc, mss_c$age) %>%
  kable()
estimate p.value statistic n gp Method
0.4957962 0 6.633318 138 1 pearson

Age and vocab, controlling for ME

ppcor::pcor.test(mss_c$age, mss_c$vocab_acc, mss_c$ME_acc) %>%
  kable()
estimate p.value statistic n gp Method
0.3018559 0.0003374 3.678854 138 1 pearson

MA data

Read in ME meta-analysis data in Metalab

ma_raw <- read_feather("../data/mutual_exclusivity_metalab") %>%
  select(1, 3, 6, 9, 11, 12, 18, 20, 49, 52, 53, 95, 96) %>%
  mutate_if(is.character, as.factor)

Summarize data

skim(ma_raw)
## Numeric Variables
## # A tibble: 7 x 13
##                        var    type missing complete     n        mean
##                      <chr>   <chr>   <dbl>    <dbl> <dbl>       <dbl>
## 1                   d_calc numeric       0       60    60   1.0837422
## 2               d_var_calc numeric       0       60    60   0.1583856
## 3                 expt_num numeric       0       60    60   1.3333333
## 4               mean_age_1 numeric       0       60    60 842.4536217
## 5 mean_comprehension_vocab numeric      48       12    60 324.8058333
## 6    mean_production_vocab numeric      41       19    60 173.4921053
## 7                      n_1 numeric       0       60    60  18.8333333
## # ... with 7 more variables: sd <dbl>, min <dbl>, `25% quantile` <dbl>,
## #   median <dbl>, `75% quantile` <dbl>, max <dbl>, hist <chr>
## 
## Factor Variables
## # A tibble: 6 x 7
##                 var   type complete missing     n n_unique
##               <chr>  <chr>    <dbl>   <dbl> <dbl>    <dbl>
## 1 dependent_measure factor       60       0    60        2
## 2            method factor       60       0    60        1
## 3   object_stimulus factor       60       0    60        3
## 4     response_mode factor       60       0    60        2
## 5        short_cite factor       60       0    60       19
## 6          study_ID factor       60       0    60       20
## # ... with 1 more variables: stat <chr>

Developmental plots

mean_month <- 365.2425 / 12.0

ma_c <- ma_raw %>%
  mutate(mean_age = mean_age_1/mean_month)

Age vs. Vocab

ma_c %>%
  filter(!is.na(mean_production_vocab),
           mean_age < 36) %>%
  ggplot(aes(x = mean_age, y = mean_production_vocab)) +
  geom_point(aes(size = n_1)) +
  geom_smooth(method = "lm", formula = y ~ log(x)) +
  xlab("Mean age (months)") +
  ylab("Mean CDI productive vocabulary") +
  theme(legend.position = "none")

Age vs. ME

ma_c %>%
ggplot(aes(x = mean_age, y = d_calc)) +
  geom_point(aes(size = n_1)) +
  geom_smooth(method = "lm", formula = y ~ log(x)) +
  xlab("mean age (months)") +
  ylab("Estimated effect size (d)") +
  ggtitle("all MA data - age") +
  theme(legend.position = "none")

Age vs. ME subsetted

ma_c %>%
  filter(!is.na(mean_production_vocab),
           mean_age < 36) %>%
ggplot(aes(x = mean_age, y = d_calc)) +
  geom_point(aes(size = n_1)) +
  geom_smooth(method = "lm", formula = y ~ log(x)) +
  xlab("mean age (months)") +
  ylab("Estimated effect size (d)") +
  ggtitle("production MA data - age") +
  theme(legend.position = "none")

Vocab vs. ME

ma_c %>%
  filter(!is.na(mean_production_vocab),
           mean_age < 36) %>%
  ggplot(aes(x = mean_production_vocab, y = d_calc)) +
  geom_point(aes(size = n_1)) +
  geom_smooth(method = "lm", formula = y ~ log(x)) +
  xlab("Mean CDI productive vocabulary") +
  ylab("Estimated effect size (d)") +
  ggtitle("production MA data - vocab") +
  theme(legend.position = "none")

Stats

Correlations

ma_c %>%
  filter(!is.na(mean_production_vocab),
           mean_age < 36)  %>%
  select(mean_age, mean_production_vocab, d_calc)  %>%
  correlate() %>%
  shave() %>%
  kable()
rowname mean_age mean_production_vocab d_calc
mean_age NA NA NA
mean_production_vocab 0.9578729 NA NA
d_calc 0.7013107 0.71578 NA

Partial Correlations

ME and vocab, controlling for age

ma_sub <- ma_c %>%
  filter(!is.na(mean_production_vocab),
           mean_age < 36)

ppcor::pcor.test(ma_sub$d_calc, ma_sub$mean_production_vocab, ma_sub$mean_age) %>%
  kable()
estimate p.value statistic n gp Method
0.2149863 0.4073031 0.8525742 18 1 pearson

Age and vocab, controlling for ME

ppcor::pcor.test(ma_sub$mean_age, ma_sub$mean_production_vocab, ma_sub$d_calc)%>%
  kable()
estimate p.value statistic n gp Method
0.9157966 2e-07 8.830905 18 1 pearson

TO DO: Get meta-analytics estimates