Experimental data
- Developmental plots
- Stats
  - Correlations
  - Partial Correlations
MA data
- Developmental plots
- Stats
  - Correlations
  - Partial Correlations

Load packages

library(tidyverse)
library(lubridate)
library(langcog)
library(skimr) #devtools::install_github("hadley/colformat") #devtools::install_github("ropenscilabs/skimr")
library(corrr)
library(knitr)
library(feather)

opts_chunk$set(echo = T, message = F, warning = F, 
               error = F, cache = F, tidy = F)

theme_set(theme_minimal())

Experimental data

Read in participant-wise data.

file_1 <- "../../data-flurry/summary-07-12-17.csv"
file_2 <- "../../data-flurry/summary-12-9-16.csv"
# file_3 <- "data-flurry/summary-2-7-17.csv" # this one is missing, but we may want it? (N doesn't match email - but maybe that's including excluded participants?)

mss <- read_csv(file_1) %>%
          rbind(read_csv(file_2)) %>%
          rename(sub_id = SubjectID) %>%
          mutate_at(vars(sub_id), funs(as.factor))

Summarize data

skim(mss)

## Numeric Variables
## # A tibble: 5 x 13
##           var    type missing complete     n      mean          sd
##         <chr>   <chr>   <dbl>    <dbl> <dbl>     <dbl>       <dbl>
## 1         age numeric       0      139   139  3.123309  0.55097067
## 2      ME_acc numeric       1      138   139 81.652682 16.79279517
## 3    ME_count integer       1      138   139 18.971014  0.34050261
## 4   vocab_acc numeric       0      139   139 65.835903 15.40588604
## 5 vocab_count integer       1      138   139 20.007246  0.08512565
## # ... with 6 more variables: min <dbl>, `25% quantile` <dbl>,
## #   median <dbl>, `75% quantile` <dbl>, max <dbl>, hist <chr>
## 
## Factor Variables
## # A tibble: 1 x 7
##      var   type complete missing     n n_unique
##    <chr>  <chr>    <dbl>   <dbl> <dbl>    <dbl>
## 1 sub_id factor      139       0   139      139
## # ... with 1 more variables: stat <chr>

NUM_PARTICIPANTS <-  nrow(mss)

There are 139 unique participants.

One participant has no data. Remove them.

mss_c <- filter(mss, !is.na(ME_acc))

mss_c %>%
  gather("measure", "value", 2:4) %>%
  ggplot(aes(x = value, fill = measure)) +
  geom_histogram() +
  facet_wrap(~measure, scales = "free") +
  theme(legend.position = "none")

Developmental plots

Age vs. Vocab

ggplot(mss_c, aes(x = age, y = vocab_acc)) +
  geom_point() +
  geom_smooth(method = "lm")

Age vs. ME

ggplot(mss_c, aes(x = age, y = ME_acc)) +
  geom_point() +
  geom_smooth(method = "lm")

Vocab vs. ME

ggplot(mss_c, aes(x = vocab_acc, y = ME_acc)) +
  geom_point() +
  geom_smooth(method = "lm")

Stats

Correlations

mss_c %>%
  select(age, vocab_acc, ME_acc)  %>%
  correlate() %>%
  shave() %>%
  kable()

rowname	age	vocab_acc	ME_acc
age	NA	NA	NA
vocab_acc	0.3803345	NA	NA
ME_acc	0.2487739	0.5387383	NA

Partial Correlations

ME and vocab, controlling for age

ppcor::pcor.test(mss_c$ME_acc, mss_c$vocab_acc, mss_c$age) %>%
  kable()

estimate	p.value	statistic	n	gp	Method
0.4957962	0	6.633318	138	1	pearson

Age and vocab, controlling for ME

ppcor::pcor.test(mss_c$age, mss_c$vocab_acc, mss_c$ME_acc) %>%
  kable()

estimate	p.value	statistic	n	gp	Method
0.3018559	0.0003374	3.678854	138	1	pearson

MA data

Read in ME meta-analysis data in Metalab

ma_raw <- read_feather("../data/mutual_exclusivity_metalab") %>%
  select(1, 3, 6, 9, 11, 12, 18, 20, 49, 52, 53, 95, 96) %>%
  mutate_if(is.character, as.factor)

Summarize data

skim(ma_raw)

## Numeric Variables
## # A tibble: 7 x 13
##                        var    type missing complete     n        mean
##                      <chr>   <chr>   <dbl>    <dbl> <dbl>       <dbl>
## 1                   d_calc numeric       0       60    60   1.0837422
## 2               d_var_calc numeric       0       60    60   0.1583856
## 3                 expt_num numeric       0       60    60   1.3333333
## 4               mean_age_1 numeric       0       60    60 842.4536217
## 5 mean_comprehension_vocab numeric      48       12    60 324.8058333
## 6    mean_production_vocab numeric      41       19    60 173.4921053
## 7                      n_1 numeric       0       60    60  18.8333333
## # ... with 7 more variables: sd <dbl>, min <dbl>, `25% quantile` <dbl>,
## #   median <dbl>, `75% quantile` <dbl>, max <dbl>, hist <chr>
## 
## Factor Variables
## # A tibble: 6 x 7
##                 var   type complete missing     n n_unique
##               <chr>  <chr>    <dbl>   <dbl> <dbl>    <dbl>
## 1 dependent_measure factor       60       0    60        2
## 2            method factor       60       0    60        1
## 3   object_stimulus factor       60       0    60        3
## 4     response_mode factor       60       0    60        2
## 5        short_cite factor       60       0    60       19
## 6          study_ID factor       60       0    60       20
## # ... with 1 more variables: stat <chr>

Developmental plots

mean_month <- 365.2425 / 12.0

ma_c <- ma_raw %>%
  mutate(mean_age = mean_age_1/mean_month)

Age vs. Vocab

ma_c %>%
  filter(!is.na(mean_production_vocab),
           mean_age < 36) %>%
  ggplot(aes(x = mean_age, y = mean_production_vocab)) +
  geom_point(aes(size = n_1)) +
  geom_smooth(method = "lm", formula = y ~ log(x)) +
  xlab("Mean age (months)") +
  ylab("Mean CDI productive vocabulary") +
  theme(legend.position = "none")

Age vs. ME

ma_c %>%
ggplot(aes(x = mean_age, y = d_calc)) +
  geom_point(aes(size = n_1)) +
  geom_smooth(method = "lm", formula = y ~ log(x)) +
  xlab("mean age (months)") +
  ylab("Estimated effect size (d)") +
  ggtitle("all MA data - age") +
  theme(legend.position = "none")

Age vs. ME subsetted

ma_c %>%
  filter(!is.na(mean_production_vocab),
           mean_age < 36) %>%
ggplot(aes(x = mean_age, y = d_calc)) +
  geom_point(aes(size = n_1)) +
  geom_smooth(method = "lm", formula = y ~ log(x)) +
  xlab("mean age (months)") +
  ylab("Estimated effect size (d)") +
  ggtitle("production MA data - age") +
  theme(legend.position = "none")

Vocab vs. ME

ma_c %>%
  filter(!is.na(mean_production_vocab),
           mean_age < 36) %>%
  ggplot(aes(x = mean_production_vocab, y = d_calc)) +
  geom_point(aes(size = n_1)) +
  geom_smooth(method = "lm", formula = y ~ log(x)) +
  xlab("Mean CDI productive vocabulary") +
  ylab("Estimated effect size (d)") +
  ggtitle("production MA data - vocab") +
  theme(legend.position = "none")

Stats

Correlations

ma_c %>%
  filter(!is.na(mean_production_vocab),
           mean_age < 36)  %>%
  select(mean_age, mean_production_vocab, d_calc)  %>%
  correlate() %>%
  shave() %>%
  kable()

rowname	mean_age	mean_production_vocab	d_calc
mean_age	NA	NA	NA
mean_production_vocab	0.9578729	NA	NA
d_calc	0.7013107	0.71578	NA

Partial Correlations

ME and vocab, controlling for age

ma_sub <- ma_c %>%
  filter(!is.na(mean_production_vocab),
           mean_age < 36)

ppcor::pcor.test(ma_sub$d_calc, ma_sub$mean_production_vocab, ma_sub$mean_age) %>%
  kable()

estimate	p.value	statistic	n	gp	Method
0.2149863	0.4073031	0.8525742	18	1	pearson

Age and vocab, controlling for ME

ppcor::pcor.test(ma_sub$mean_age, ma_sub$mean_production_vocab, ma_sub$d_calc)%>%
  kable()

estimate	p.value	statistic	n	gp	Method
0.9157966	2e-07	8.830905	18	1	pearson

TO DO: Get meta-analytics estimates

ME vocab analyses with experimental and MA data

Mike Frank, Veronica Cristiano, Brenden Lake, Tammy Kwan, Molly Lewis

2017-08-11

Experimental data

Developmental plots

Age vs. Vocab

Age vs. ME

Vocab vs. ME

Stats

Correlations

Partial Correlations

MA data

Developmental plots

Age vs. Vocab

Age vs. ME

Age vs. ME subsetted

Vocab vs. ME

Stats

Correlations

Partial Correlations