library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(langcog)
## 
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
## 
##     scale
library(magrittr)
library(stringr)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
library(googlesheets)


rm(list=ls())
theme_set(theme_mikabr() +
            theme(panel.grid = element_blank(),
                  strip.background = element_blank()))
font <- "Open Sans"

1 Runsheets and exclusions

runsheet <- read_csv("data/runsheet_6-20-16.csv") %>%
  select(SID, Homeroom, Consent)

classes <- read_csv("data/classrooms_6-20-16.csv") %>%
  select(Exclude, Homeroom, Group)

kids_raw <- left_join(runsheet, classes) %>%
  mutate(Consent = ifelse(is.na(Consent), 0, Consent))
## Joining by: "Homeroom"
kids <- kids_raw %>%
  filter(Consent == 1, Exclude == 0) 

groups <- kids %>% 
  select(SID, Group, Homeroom) %>%
  rename(roster_subid = SID,
         room = Homeroom,
         group = Group)

Raw numbers.

kable(kids_raw %>% group_by(Group, Consent) %>% summarise(n = n()))
Group Consent n
CNTL 0 92
CNTL 1 100
EXCLUDED 0 6
MA 0 93
MA 1 116
NA 0 3
NA 1 1

By consent proportion for included rooms.

kable(kids_raw %>% 
        group_by(Group) %>% 
        filter(!is.na(Group), Group != "EXCLUDED") %>%
        summarise(consent = sum(Consent == 1), 
                  total = n(), 
                  prop = mean(Consent == 1)), 
      digits = 2)
Group consent total prop
CNTL 100 192 0.52
MA 116 209 0.56

By room.

kable(kids_raw %>% 
        group_by(Group, Homeroom) %>% 
        filter(!is.na(Group), Group != "EXCLUDED") %>%
        summarise(consent = sum(Consent == 1), 
                  total = n(), 
                  prop = mean(Consent == 1)), 
      digits = 2)
Group Homeroom consent total prop
CNTL BURKE/HALL 6 19 0.32
CNTL GUTIERREZ 11 24 0.46
CNTL HEREDIA 2 3 0.67
CNTL HUNTER/PIERIES 10 20 0.50
CNTL KANE 11 22 0.50
CNTL MARGULIES 9 21 0.43
CNTL MEJIA/BRADBURY 13 22 0.59
CNTL PAZYMINO 12 16 0.75
CNTL ROMANO 4 9 0.44
CNTL ROSA 9 17 0.53
CNTL ZORRILLA 13 19 0.68
MA CAPILLI 7 16 0.44
MA CHUN 8 19 0.42
MA CSASZAR 11 19 0.58
MA D’IZZIA/JANO 11 21 0.52
MA GIGANTI/CHAMBERS 4 8 0.50
MA MATA 9 17 0.53
MA MEEKS 8 20 0.40
MA MITTMAN 4 10 0.40
MA PERRY/KEENAN 15 20 0.75
MA RENTAS 17 20 0.85
MA TRIANO 8 18 0.44
MA WOODING 14 21 0.67

Summary, we got around 50% consent rate; consent rate didn’t differ across groups.

2 Cognitive tasks

Read data. Note that there is one sitting file for the battery and one for each of the sub-tests, in case the battery crashed and failed to save data for that participant.

sittings.raw <- bind_rows(
  read_csv("data/2016/b301_06.20.16_17.00.40_sittings.csv"),
  read_csv("data/2016/b299_06.20.16_17.03.46_sittings.csv"),
  read_csv("data/2016/b300_06.20.16_17.03.52_sittings.csv"),
  read_csv("data/2016/b302_06.20.16_17.03.58_sittings.csv"))

sittings <- sittings.raw %>% 
  mutate(subid = str_trim(toupper(misc), side = "both"),
         sitting_id = id) %>%
  filter(str_detect(subid, "[Ss][12]-")) %>%
  mutate(grade = ifelse(str_detect(subid, "[Ss][1]"), 
                        "first grade", 
                        "second grade"), 
         year = factor(str_sub(start_time, 1, 4))) %>%
  select(subid, sitting_id, year, grade, battery_id, test_order, num_completed)

Each task has the every-battery and the one-sitting version.

gonogo.raw <- bind_rows(
  read_csv("data/2016/b299_t463_06.20.16_17.06.05.csv"),
  read_csv("data/2016/b301_t463_06.20.16_17.06.16.csv"))

ravens.raw <- bind_rows(
  read_csv("data/2016/b300_t464_06.20.16_17.06.12.csv"),
  read_csv("data/2016/b301_t464_06.20.16_17.06.18.csv"))

swm.raw <- bind_rows(
  read_csv("data/2016/b302_t467_06.20.16_17.06.27.csv"),
  read_csv("data/2016/b301_t467_06.20.16_17.06.19.csv"))

Merge in demographics.

First, find the subjects whose IDs were entered incorrectly and output.

anti_join(sittings, groups %>% rename(subid = roster_subid)) %>%
  select(subid, battery_id) %>%
  write_csv("data/missing_sittings.csv")
## Joining by: "subid"

Now read in the conversion table. (Done by hand by checking the runsheet for typos, notes etc, on the basis of missing_sittings.csv).

missing_sittings <- read_csv("data/missing_sittings_key.csv")

sittings <- left_join(sittings, missing_sittings) %>%
  mutate(exclude = ifelse(is.na(exclude), 0, exclude)) %>%
  filter(exclude != 1) %>%
  mutate(roster_subid = ifelse(is.na(new_subid), subid, new_subid)) %>%
  left_join(groups) 
## Joining by: c("subid", "battery_id")
## Joining by: "roster_subid"

Computer testing numbers. Note that battery 301 is the combined battery.

sittings %>% 
  group_by(grade, year, battery_id) %>%
  summarise(n = n())
## Source: local data frame [14 x 4]
## Groups: grade, year [?]
## 
##           grade   year battery_id     n
##           (chr) (fctr)      (int) (int)
## 1   first grade   2015        299     2
## 2   first grade   2015        300    10
## 3   first grade   2015        301    70
## 4   first grade   2015        302    10
## 5   first grade   2016        300     2
## 6   first grade   2016        301    72
## 7   first grade   2016        302     2
## 8  second grade   2015        299     1
## 9  second grade   2015        300     9
## 10 second grade   2015        301   111
## 11 second grade   2015        302    10
## 12 second grade   2016        300     4
## 13 second grade   2016        301   106
## 14 second grade   2016        302     4

2.1 Go / No-Go Task

gonogo <- gonogo.raw %>%
  filter(sitting_id %in% sittings$sitting_id) %>%
  left_join(sittings, by = "sitting_id") %>%
  select(subid, year, grade, trial, stim, correct, rt, accuracy, group,
         responseAssign, rtCall)

Accuracy.

gonogo.summary <- gonogo %>%
  group_by(subid, grade, group, year) %>%
  summarise(accuracy = mean(accuracy, na.rm=TRUE))
  
ms <- gonogo.summary %>%
  group_by(grade, group, year) %>%
  summarise(accuracy = mean(accuracy))

gonogo.summary %>%
  ggplot(aes(x=accuracy, fill = group)) + 
  geom_histogram(binwidth = .05) +
  facet_grid(grade ~ year) + 
  geom_vline(xintercept = .5, lty = 3) +
  xlim(c(0,1)) + 
  geom_vline(data = ms, aes(xintercept = accuracy, col = group), 
             lty = 2)

RT.

ms <- gonogo %>%
  filter(rt > 0 & accuracy == 1) %>%
  group_by(subid, grade, group, year) %>%
  summarise(rt = mean(rt, na.rm=TRUE)) %>%
  group_by(grade, group) %>%
  summarise(rt = mean(rt))

gonogo %>%
  filter(rt > 0 & accuracy == 1) %>%
  group_by(subid, grade, group, year) %>%
  summarise(rt = mean(rt, na.rm=TRUE)) %>%
  ggplot(aes(x=rt, fill = group)) + 
  geom_histogram(binwidth = 50) +
  facet_grid(grade ~ year) + 
  geom_vline(data = ms, aes(xintercept = rt, col = group), 
             lty = 2)

2.2 Spatial WM

Data processing.

swm <- swm.raw %>%
  filter(sitting_id %in% sittings$sitting_id,
         type == "test") %>%
  left_join(sittings, by = "sitting_id") %>%
  filter(type == "test") %>%
  select(subid, grade, group, year, trial, capacity, correct)

Mean level (same measure as in Zenith study).

swm.summary <- swm %>%
  group_by(subid, grade, group, year) %>%
  summarise(capacity = mean(capacity, na.rm=TRUE))
  
ms <- swm.summary %>%
  group_by(grade, group, year) %>%
  summarise(capacity = mean(capacity))
  
swm.summary %>%
  ggplot(aes(x=capacity, fill = group)) + 
  geom_histogram(binwidth = .5) +
  facet_grid(grade ~ year) +
  xlim(c(1,8)) +
  geom_vline(data = ms, aes(xintercept = capacity, col = group), 
             lty = 2)
## Warning: Removed 2 rows containing non-finite values (stat_bin).

2.3 Raven’s

Actually a matrix reasoning equivalent.

ravens <- ravens.raw %>%
  filter(sitting_id %in% sittings$sitting_id,
         type == "test") %>%
  left_join(sittings, by = "sitting_id") %>%
  filter(type == "test") %>%
  select(subid, grade, group, year, trial, correct)

Mean level (same measure as in Zenith study).

ravens.summary <- ravens %>% 
  group_by(subid, grade, group, year) %>%
    summarise(correct = sum(correct, na.rm=TRUE))

ms <- ravens.summary %>%
  group_by(grade, group, year) %>%
  summarise(correct = mean(correct))
  
ravens.summary %>%
  ggplot(aes(x=correct, fill = group)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(grade ~ year) +
  geom_vline(data = ms, aes(xintercept = correct, col = group), 
             lty = 2)

2.4 Summary plot

subs <- ravens.summary %>% 
  left_join(gonogo.summary) %>%
  left_join(swm.summary) %>%
  rename(ravens = correct, 
         gonogo = accuracy,
         swm = capacity) 
## Joining by: c("subid", "grade", "group", "year")
## Joining by: c("subid", "grade", "group", "year")
ms <- subs %>%
  gather(task, score, ravens, gonogo, swm) %>%
  group_by(year, group, task) %>%
  multi_boot_standard("score", na.rm=TRUE)
## Joining by: c("year", "group", "task")
pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
  geom_line(position = pos) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = pos) + 
  facet_wrap(~task, scales = "free_y") + 
  scale_colour_solarized()

And final numbers on N data points per task.

kable(subs %>%
  gather(task, score, ravens, gonogo, swm) %>%
  group_by(year, group, task) %>%
  summarise(n=n()))
year group task n
2015 CNTL gonogo 82
2015 CNTL ravens 82
2015 CNTL swm 82
2015 MA gonogo 92
2015 MA ravens 92
2015 MA swm 92
2016 CNTL gonogo 77
2016 CNTL ravens 77
2016 CNTL swm 77
2016 MA gonogo 98
2016 MA ravens 98
2016 MA swm 98

2.5 Individual correlations

Year-by-year task plot

subs_nona <- subs %>%
  filter(!is.na(ravens) & !is.na(gonogo) & !is.na(swm))

subs_wide <- subs_nona %>% 
  gather(task, score, ravens, gonogo, swm) %>%
  mutate(year = str_c("y", as.character(year))) %>%
  spread(year, score)

ggplot(aes(x = y2015, y = y2016, col = group), data = subs_wide) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  facet_wrap(~task, scales = "free")
## Warning: Removed 111 rows containing non-finite values (stat_smooth).
## Warning: Removed 111 rows containing missing values (geom_point).

Correlations within task.

with(filter(subs_wide, task == "gonogo"), cor.test(y2015, y2016))
## 
##  Pearson's product-moment correlation
## 
## data:  y2015 and y2016
## t = 7.447, df = 151, p-value = 6.803e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3918282 0.6255178
## sample estimates:
##       cor 
## 0.5182823
with(filter(subs_wide, task == "swm"), cor.test(y2015, y2016))
## 
##  Pearson's product-moment correlation
## 
## data:  y2015 and y2016
## t = 4.9263, df = 151, p-value = 2.178e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2268234 0.5011935
## sample estimates:
##       cor 
## 0.3721086
with(filter(subs_wide, task == "ravens"), cor.test(y2015, y2016))
## 
##  Pearson's product-moment correlation
## 
## data:  y2015 and y2016
## t = 5.0758, df = 151, p-value = 1.12e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2374858 0.5095848
## sample estimates:
##       cor 
## 0.3817771

Correlations across tasks for 2015.

kable(cor(subs_nona %>% 
            filter(year == "2015") %>%
            ungroup %>% 
            select(ravens, gonogo, swm)), digits = 2)
ravens gonogo swm
ravens 1.00 0.26 0.19
gonogo 0.26 1.00 0.41
swm 0.19 0.41 1.00

Correlations across tasks for 2016.

kable(cor(subs_nona %>% 
            filter(year == "2016") %>%
            ungroup %>% 
            select(ravens, gonogo, swm)), digits = 2)
ravens gonogo swm
ravens 1.00 0.24 0.26
gonogo 0.24 1.00 0.30
swm 0.26 0.30 1.00

3 Math tasks

Read in data.

pv <- read.csv("data/2015/2015_PVNumerals.csv") %>%
  select(subnum, pvAvg) %>%
  rename(subid = subnum)
wg <- read.csv("data/2015/2015_WGArith.csv") %>%
  select(subnum, arithmeticTotal, arithmeticAverage) %>%
  rename(subid = subnum)
wj <- read.csv("data/2015/2015_WOODCOCK.csv") %>%
  select(subnum, woodcockTotal) %>%
  rename(subid = subnum)

Merge with sub data.

d <- left_join(subs, pv) %>% 
  left_join(wg) %>% 
  left_join(wj)
## Joining by: "subid"
## Warning in left_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector
## Joining by: "subid"
## Warning in left_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector
## Joining by: "subid"
## Warning in left_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector

Now plot each task individually.

3.1 Place value

pv.summary <- d %>% 
  group_by(subid, grade) %>%
    summarise(correct = sum(pvAvg, na.rm=TRUE))

ms <- pv.summary %>%
  group_by(grade) %>%
  summarise(correct = mean(correct))
  
pv.summary %>%
  ggplot(aes(x=correct)) + 
  geom_histogram(binwidth = .1) +
  facet_grid(.~grade) +
  geom_vline(data = ms, aes(xintercept = correct), 
             col = "red", lty = 2) + 
  ggtitle("Place Value Accuracy") + 
  xlab("Proportion correct")

3.2 Arithmetic

wg.summary <- d %>% 
  group_by(subid, grade) %>%
    summarise(correct = sum(arithmeticTotal, na.rm=TRUE))

ms <- wg.summary %>%
  group_by(grade) %>%
  summarise(correct = mean(correct))
  
wg.summary %>%
  ggplot(aes(x=correct)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(.~grade) +
  geom_vline(data = ms, aes(xintercept = correct), 
             col = "red", lty = 2) + 
  ggtitle("Arithmetic Accuracy") + 
  xlab("Total Problems Correct")

3.3 Woodcock-Johnson III

wj.summary <- d %>% 
  group_by(subid, grade) %>%
    summarise(correct = sum(woodcockTotal, na.rm=TRUE))

ms <- wj.summary %>%
  group_by(grade) %>%
  summarise(correct = mean(correct))
  
wj.summary %>%
  ggplot(aes(x=correct)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(.~grade) +
  geom_vline(data = ms, aes(xintercept = correct), 
             col = "red", lty = 2) + 
  ggtitle("Woodcock-Johnson III Accuracy") + 
  xlab("Total Problems Correct")

3.4 Relations between math measures

Correlations.

kable(cor(d %>% ungroup %>% 
            select(woodcockTotal, arithmeticTotal, pvAvg)), digits = 2)
woodcockTotal arithmeticTotal pvAvg
woodcockTotal 1 NA NA
arithmeticTotal NA 1 NA
pvAvg NA NA 1
cor.test(d$woodcockTotal, d$arithmeticTotal)
## 
##  Pearson's product-moment correlation
## 
## data:  d$woodcockTotal and d$arithmeticTotal
## t = 19.898, df = 345, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6779046 0.7765320
## sample estimates:
##       cor 
## 0.7310136
cor.test(d$woodcockTotal, d$pvAvg)
## 
##  Pearson's product-moment correlation
## 
## data:  d$woodcockTotal and d$pvAvg
## t = 14.694, df = 345, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5511568 0.6812215
## sample estimates:
##       cor 
## 0.6204371
cor.test(d$arithmeticTotal, d$pvAvg)
## 
##  Pearson's product-moment correlation
## 
## data:  d$arithmeticTotal and d$pvAvg
## t = 16.118, df = 345, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5908825 0.7115761
## sample estimates:
##       cor 
## 0.6553933

4 Relations between all measures

kable(cor(d %>% ungroup %>% 
            select(ravens, gonogo, swm, 
                   woodcockTotal, arithmeticTotal, pvAvg)), digits = 2)
ravens gonogo swm woodcockTotal arithmeticTotal pvAvg
ravens 1 NA NA NA NA NA
gonogo NA 1 NA NA NA NA
swm NA NA 1 NA NA NA
woodcockTotal NA NA NA 1 NA NA
arithmeticTotal NA NA NA NA 1 NA
pvAvg NA NA NA NA NA 1
cor.test(d$ravens, d$woodcockTotal)
## 
##  Pearson's product-moment correlation
## 
## data:  d$ravens and d$woodcockTotal
## t = 5.4527, df = 345, p-value = 9.473e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1817842 0.3758135
## sample estimates:
##       cor 
## 0.2816759
cor.test(d$ravens, d$arithmeticTotal)
## 
##  Pearson's product-moment correlation
## 
## data:  d$ravens and d$arithmeticTotal
## t = 5.1172, df = 345, p-value = 5.159e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1649351 0.3607988
## sample estimates:
##       cor 
## 0.2656055
cor.test(d$ravens, d$pvAvg)
## 
##  Pearson's product-moment correlation
## 
## data:  d$ravens and d$pvAvg
## t = 6.3208, df = 345, p-value = 8.042e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2244916 0.4134202
## sample estimates:
##       cor 
## 0.3221599
cor.test(d$swm, d$woodcockTotal)
## 
##  Pearson's product-moment correlation
## 
## data:  d$swm and d$woodcockTotal
## t = 4.8337, df = 341, p-value = 2.031e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1513904 0.3497431
## sample estimates:
##       cor 
## 0.2532262
cor.test(d$swm, d$arithmeticTotal)
## 
##  Pearson's product-moment correlation
## 
## data:  d$swm and d$arithmeticTotal
## t = 6.6645, df = 341, p-value = 1.069e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2422835 0.4299102
## sample estimates:
##       cor 
## 0.3394693
cor.test(d$swm, d$pvAvg)
## 
##  Pearson's product-moment correlation
## 
## data:  d$swm and d$pvAvg
## t = 5.6865, df = 341, p-value = 2.789e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1944707 0.3881052
## sample estimates:
##       cor 
## 0.2943053
cor.test(d$gonogo, d$woodcockTotal)
## 
##  Pearson's product-moment correlation
## 
## data:  d$gonogo and d$woodcockTotal
## t = 5.8864, df = 343, p-value = 9.398e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2038381 0.3958340
## sample estimates:
##       cor 
## 0.3029064
cor.test(d$gonogo, d$arithmeticTotal)
## 
##  Pearson's product-moment correlation
## 
## data:  d$gonogo and d$arithmeticTotal
## t = 6.309, df = 343, p-value = 8.668e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2245123 0.4139497
## sample estimates:
##      cor 
## 0.322456
cor.test(d$gonogo, d$pvAvg)
## 
##  Pearson's product-moment correlation
## 
## data:  d$gonogo and d$pvAvg
## t = 4.813, df = 343, p-value = 2.232e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1499180 0.3478731
## sample estimates:
##       cor 
## 0.2515241