Load packages
library(tidyverse)
library(lubridate)
library(langcog)
library(skimr) #devtools::install_github("hadley/colformat") #devtools::install_github("ropenscilabs/skimr")
library(corrr)
library(knitr)
opts_chunk$set(echo = T, message = F, warning = F,
error = F, cache = F, tidy = F)
theme_set(theme_minimal())Read in participant-wise data.
file_1 <- "data-flurry/summary-07-12-17.csv"
file_2 <- "data-flurry/summary-12-9-16.csv"
# file_3 <- "data-flurry/summary-2-7-17.csv" # this one is missing, but we may want it? (N doesn't match email - but maybe that's including excluded participants?)
mss <- read_csv(file_1) %>%
rbind(read_csv(file_2)) %>%
rename(sub_id = SubjectID) %>%
mutate_at(vars(sub_id), funs(as.factor))Summarize data
skim(mss)## Numeric Variables
## # A tibble: 5 x 13
## var type missing complete n mean sd
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age numeric 0 139 139 3.123309 0.55097067
## 2 ME_acc numeric 1 138 139 81.652682 16.79279517
## 3 ME_count integer 1 138 139 18.971014 0.34050261
## 4 vocab_acc numeric 0 139 139 65.835903 15.40588604
## 5 vocab_count integer 1 138 139 20.007246 0.08512565
## # ... with 6 more variables: min <dbl>, `25% quantile` <dbl>,
## # median <dbl>, `75% quantile` <dbl>, max <dbl>, hist <chr>
##
## Factor Variables
## # A tibble: 1 x 7
## var type complete missing n n_unique
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 sub_id factor 139 0 139 139
## # ... with 1 more variables: stat <chr>
NUM_PARTICIPANTS <- nrow(mss)There are 139 unique participants.
One participant has no data. Remove them.
mss_c <- filter(mss, !is.na(ME_acc))mss_c %>%
gather("measure", "value", 2:4) %>%
ggplot(aes(x = value, fill = measure)) +
geom_histogram() +
facet_wrap(~measure, scales = "free") +
theme(legend.position = "none") ggplot(mss_c, aes(x = age, y = vocab_acc)) +
geom_point() +
geom_smooth(method = "lm")ggplot(mss_c, aes(x = age, y = ME_acc)) +
geom_point() +
geom_smooth(method = "lm")ggplot(mss_c, aes(x = vocab_acc, y = ME_acc)) +
geom_point() +
geom_smooth(method = "lm")mss_c %>%
select(age, vocab_acc, ME_acc) %>%
correlate() %>%
shave() %>%
kable()| rowname | age | vocab_acc | ME_acc |
|---|---|---|---|
| age | NA | NA | NA |
| vocab_acc | 0.3803345 | NA | NA |
| ME_acc | 0.2487739 | 0.5387383 | NA |
ME and vocab, controlling for age
ppcor::pcor.test(mss_c$ME_acc, mss_c$vocab_acc, mss_c$age)## estimate p.value statistic n gp Method
## 1 0.4957962 7.26485e-10 6.633318 138 1 pearson
Age and vocab, controlling for ME
ppcor::pcor.test(mss_c$age, mss_c$vocab_acc, mss_c$ME_acc)## estimate p.value statistic n gp Method
## 1 0.3018559 0.0003373989 3.678853 138 1 pearson