Load packages
library(tidyverse)
library(lubridate)
library(langcog)
library(skimr) #devtools::install_github("hadley/colformat") #devtools::install_github("ropenscilabs/skimr")
library(corrr)
library(knitr)
library(feather)
opts_chunk$set(echo = T, message = F, warning = F,
error = F, cache = F, tidy = F)
theme_set(theme_minimal())
Read in participant-wise data.
file_1 <- "../../data-flurry/summary-07-12-17.csv"
file_2 <- "../../data-flurry/summary-12-9-16.csv"
# file_3 <- "data-flurry/summary-2-7-17.csv" # this one is missing, but we may want it? (N doesn't match email - but maybe that's including excluded participants?)
mss <- read_csv(file_1) %>%
rbind(read_csv(file_2)) %>%
rename(sub_id = SubjectID) %>%
mutate_at(vars(sub_id), funs(as.factor))
Summarize data
skim(mss)
## Numeric Variables
## # A tibble: 5 x 13
## var type missing complete n mean sd
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age numeric 0 139 139 3.123309 0.55097067
## 2 ME_acc numeric 1 138 139 81.652682 16.79279517
## 3 ME_count integer 1 138 139 18.971014 0.34050261
## 4 vocab_acc numeric 0 139 139 65.835903 15.40588604
## 5 vocab_count integer 1 138 139 20.007246 0.08512565
## # ... with 6 more variables: min <dbl>, `25% quantile` <dbl>,
## # median <dbl>, `75% quantile` <dbl>, max <dbl>, hist <chr>
##
## Factor Variables
## # A tibble: 1 x 7
## var type complete missing n n_unique
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 sub_id factor 139 0 139 139
## # ... with 1 more variables: stat <chr>
NUM_PARTICIPANTS <- nrow(mss)
There are 139 unique participants.
One participant has no data. Remove them.
mss_c <- filter(mss, !is.na(ME_acc))
mss_c %>%
gather("measure", "value", 2:4) %>%
ggplot(aes(x = value, fill = measure)) +
geom_histogram() +
facet_wrap(~measure, scales = "free") +
theme(legend.position = "none")
ggplot(mss_c, aes(x = age, y = vocab_acc)) +
geom_point() +
geom_smooth(method = "lm")
ggplot(mss_c, aes(x = age, y = ME_acc)) +
geom_point() +
geom_smooth(method = "lm")
ggplot(mss_c, aes(x = vocab_acc, y = ME_acc)) +
geom_point() +
geom_smooth(method = "lm")
mss_c %>%
select(age, vocab_acc, ME_acc) %>%
correlate() %>%
shave() %>%
kable()
| rowname | age | vocab_acc | ME_acc |
|---|---|---|---|
| age | NA | NA | NA |
| vocab_acc | 0.3803345 | NA | NA |
| ME_acc | 0.2487739 | 0.5387383 | NA |
ME and vocab, controlling for age
ppcor::pcor.test(mss_c$ME_acc, mss_c$vocab_acc, mss_c$age) %>%
kable()
| estimate | p.value | statistic | n | gp | Method |
|---|---|---|---|---|---|
| 0.4957962 | 0 | 6.633318 | 138 | 1 | pearson |
Age and vocab, controlling for ME
ppcor::pcor.test(mss_c$age, mss_c$vocab_acc, mss_c$ME_acc) %>%
kable()
| estimate | p.value | statistic | n | gp | Method |
|---|---|---|---|---|---|
| 0.3018559 | 0.0003374 | 3.678854 | 138 | 1 | pearson |
Read in ME meta-analysis data in Metalab
ma_raw <- read_feather("../data/mutual_exclusivity_metalab") %>%
select(1, 3, 6, 9, 11, 12, 18, 20, 49, 52, 53, 95, 96) %>%
mutate_if(is.character, as.factor)
Summarize data
skim(ma_raw)
## Numeric Variables
## # A tibble: 7 x 13
## var type missing complete n mean
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 d_calc numeric 0 60 60 1.0837422
## 2 d_var_calc numeric 0 60 60 0.1583856
## 3 expt_num numeric 0 60 60 1.3333333
## 4 mean_age_1 numeric 0 60 60 842.4536217
## 5 mean_comprehension_vocab numeric 48 12 60 324.8058333
## 6 mean_production_vocab numeric 41 19 60 173.4921053
## 7 n_1 numeric 0 60 60 18.8333333
## # ... with 7 more variables: sd <dbl>, min <dbl>, `25% quantile` <dbl>,
## # median <dbl>, `75% quantile` <dbl>, max <dbl>, hist <chr>
##
## Factor Variables
## # A tibble: 6 x 7
## var type complete missing n n_unique
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 dependent_measure factor 60 0 60 2
## 2 method factor 60 0 60 1
## 3 object_stimulus factor 60 0 60 3
## 4 response_mode factor 60 0 60 2
## 5 short_cite factor 60 0 60 19
## 6 study_ID factor 60 0 60 20
## # ... with 1 more variables: stat <chr>
mean_month <- 365.2425 / 12.0
ma_c <- ma_raw %>%
mutate(mean_age = mean_age_1/mean_month)
ma_c %>%
filter(!is.na(mean_production_vocab),
mean_age < 36) %>%
ggplot(aes(x = mean_age, y = mean_production_vocab)) +
geom_point(aes(size = n_1)) +
geom_smooth(method = "lm", formula = y ~ log(x)) +
xlab("Mean age (months)") +
ylab("Mean CDI productive vocabulary") +
theme(legend.position = "none")
ma_c %>%
ggplot(aes(x = mean_age, y = d_calc)) +
geom_point(aes(size = n_1)) +
geom_smooth(method = "lm", formula = y ~ log(x)) +
xlab("mean age (months)") +
ylab("Estimated effect size (d)") +
ggtitle("all MA data - age") +
theme(legend.position = "none")
ma_c %>%
filter(!is.na(mean_production_vocab),
mean_age < 36) %>%
ggplot(aes(x = mean_age, y = d_calc)) +
geom_point(aes(size = n_1)) +
geom_smooth(method = "lm", formula = y ~ log(x)) +
xlab("mean age (months)") +
ylab("Estimated effect size (d)") +
ggtitle("production MA data - age") +
theme(legend.position = "none")
ma_c %>%
filter(!is.na(mean_production_vocab),
mean_age < 36) %>%
ggplot(aes(x = mean_production_vocab, y = d_calc)) +
geom_point(aes(size = n_1)) +
geom_smooth(method = "lm", formula = y ~ log(x)) +
xlab("Mean CDI productive vocabulary") +
ylab("Estimated effect size (d)") +
ggtitle("production MA data - vocab") +
theme(legend.position = "none")
ma_c %>%
filter(!is.na(mean_production_vocab),
mean_age < 36) %>%
select(mean_age, mean_production_vocab, d_calc) %>%
correlate() %>%
shave() %>%
kable()
| rowname | mean_age | mean_production_vocab | d_calc |
|---|---|---|---|
| mean_age | NA | NA | NA |
| mean_production_vocab | 0.9578729 | NA | NA |
| d_calc | 0.7013107 | 0.71578 | NA |
ME and vocab, controlling for age
ma_sub <- ma_c %>%
filter(!is.na(mean_production_vocab),
mean_age < 36)
ppcor::pcor.test(ma_sub$d_calc, ma_sub$mean_production_vocab, ma_sub$mean_age) %>%
kable()
| estimate | p.value | statistic | n | gp | Method |
|---|---|---|---|---|---|
| 0.2149863 | 0.4073031 | 0.8525742 | 18 | 1 | pearson |
Age and vocab, controlling for ME
ppcor::pcor.test(ma_sub$mean_age, ma_sub$mean_production_vocab, ma_sub$d_calc)%>%
kable()
| estimate | p.value | statistic | n | gp | Method |
|---|---|---|---|---|---|
| 0.9157966 | 2e-07 | 8.830905 | 18 | 1 | pearson |
TO DO: Get meta-analytics estimates