Load packages

library(tidyverse)
library(lubridate)
library(langcog)
library(skimr) #devtools::install_github("hadley/colformat") #devtools::install_github("ropenscilabs/skimr")
library(corrr)
library(knitr)

opts_chunk$set(echo = T, message = F, warning = F, 
               error = F, cache = F, tidy = F)

theme_set(theme_minimal())

Read in participant-wise data.

file_1 <- "data-flurry/summary-07-12-17.csv"
file_2 <- "data-flurry/summary-12-9-16.csv"
# file_3 <- "data-flurry/summary-2-7-17.csv" # this one is missing, but we may want it? (N doesn't match email - but maybe that's including excluded participants?)

mss <- read_csv(file_1) %>%
          rbind(read_csv(file_2)) %>%
          rename(sub_id = SubjectID) %>%
          mutate_at(vars(sub_id), funs(as.factor))

Summarize data

skim(mss)
## Numeric Variables
## # A tibble: 5 x 13
##           var    type missing complete     n      mean          sd
##         <chr>   <chr>   <dbl>    <dbl> <dbl>     <dbl>       <dbl>
## 1         age numeric       0      139   139  3.123309  0.55097067
## 2      ME_acc numeric       1      138   139 81.652682 16.79279517
## 3    ME_count integer       1      138   139 18.971014  0.34050261
## 4   vocab_acc numeric       0      139   139 65.835903 15.40588604
## 5 vocab_count integer       1      138   139 20.007246  0.08512565
## # ... with 6 more variables: min <dbl>, `25% quantile` <dbl>,
## #   median <dbl>, `75% quantile` <dbl>, max <dbl>, hist <chr>
## 
## Factor Variables
## # A tibble: 1 x 7
##      var   type complete missing     n n_unique
##    <chr>  <chr>    <dbl>   <dbl> <dbl>    <dbl>
## 1 sub_id factor      139       0   139      139
## # ... with 1 more variables: stat <chr>
NUM_PARTICIPANTS <-  nrow(mss)

There are 139 unique participants.

One participant has no data. Remove them.

mss_c <- filter(mss, !is.na(ME_acc))
mss_c %>%
  gather("measure", "value", 2:4) %>%
  ggplot(aes(x = value, fill = measure)) +
  geom_histogram() +
  facet_wrap(~measure, scales = "free") +
  theme(legend.position = "none") 

Pairwise correlations

Age vs. Vocab

ggplot(mss_c, aes(x = age, y = vocab_acc)) +
  geom_point() +
  geom_smooth(method = "lm")

Age vs. ME

ggplot(mss_c, aes(x = age, y = ME_acc)) +
  geom_point() +
  geom_smooth(method = "lm")

Vocab vs. ME

ggplot(mss_c, aes(x = vocab_acc, y = ME_acc)) +
  geom_point() +
  geom_smooth(method = "lm")

Stats

Correlations

mss_c %>%
  select(age, vocab_acc, ME_acc)  %>%
  correlate() %>%
  shave() %>%
  kable()
rowname age vocab_acc ME_acc
age NA NA NA
vocab_acc 0.3803345 NA NA
ME_acc 0.2487739 0.5387383 NA

Partial Correlations

ME and vocab, controlling for age

ppcor::pcor.test(mss_c$ME_acc, mss_c$vocab_acc, mss_c$age)
##    estimate     p.value statistic   n gp  Method
## 1 0.4957962 7.26485e-10  6.633318 138  1 pearson

Age and vocab, controlling for ME

ppcor::pcor.test(mss_c$age, mss_c$vocab_acc, mss_c$ME_acc)
##    estimate      p.value statistic   n gp  Method
## 1 0.3018559 0.0003373989  3.678853 138  1 pearson