1 Runsheets and exclusions
2 Cognitive tasks
3 Math tasks
4 Relations between all measures

library(readr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(langcog)

## 
## Attaching package: 'langcog'

## The following object is masked from 'package:base':
## 
##     scale

library(magrittr)
library(stringr)
library(tidyr)

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:magrittr':
## 
##     extract

library(googlesheets)


rm(list=ls())
theme_set(theme_mikabr() +
            theme(panel.grid = element_blank(),
                  strip.background = element_blank()))
font <- "Open Sans"

1 Runsheets and exclusions

runsheet <- read_csv("data/runsheet_6-20-16.csv") %>%
  select(SID, Homeroom, Consent)

classes <- read_csv("data/classrooms_6-20-16.csv") %>%
  select(Exclude, Homeroom, Group)

kids_raw <- left_join(runsheet, classes) %>%
  mutate(Consent = ifelse(is.na(Consent), 0, Consent))

## Joining by: "Homeroom"

kids <- kids_raw %>%
  filter(Consent == 1, Exclude == 0) 

groups <- kids %>% 
  select(SID, Group, Homeroom) %>%
  rename(roster_subid = SID,
         room = Homeroom,
         group = Group)

Raw numbers.

kable(kids_raw %>% group_by(Group, Consent) %>% summarise(n = n()))

Group	Consent	n
CNTL	0	92
CNTL	1	100
EXCLUDED	0	6
MA	0	93
MA	1	116
NA	0	3
NA	1	1

By consent proportion for included rooms.

kable(kids_raw %>% 
        group_by(Group) %>% 
        filter(!is.na(Group), Group != "EXCLUDED") %>%
        summarise(consent = sum(Consent == 1), 
                  total = n(), 
                  prop = mean(Consent == 1)), 
      digits = 2)

Group	consent	total	prop
CNTL	100	192	0.52
MA	116	209	0.56

By room.

kable(kids_raw %>% 
        group_by(Group, Homeroom) %>% 
        filter(!is.na(Group), Group != "EXCLUDED") %>%
        summarise(consent = sum(Consent == 1), 
                  total = n(), 
                  prop = mean(Consent == 1)), 
      digits = 2)

Group	Homeroom	consent	total	prop
CNTL	BURKE/HALL	6	19	0.32
CNTL	GUTIERREZ	11	24	0.46
CNTL	HEREDIA	2	3	0.67
CNTL	HUNTER/PIERIES	10	20	0.50
CNTL	KANE	11	22	0.50
CNTL	MARGULIES	9	21	0.43
CNTL	MEJIA/BRADBURY	13	22	0.59
CNTL	PAZYMINO	12	16	0.75
CNTL	ROMANO	4	9	0.44
CNTL	ROSA	9	17	0.53
CNTL	ZORRILLA	13	19	0.68
MA	CAPILLI	7	16	0.44
MA	CHUN	8	19	0.42
MA	CSASZAR	11	19	0.58
MA	D’IZZIA/JANO	11	21	0.52
MA	GIGANTI/CHAMBERS	4	8	0.50
MA	MATA	9	17	0.53
MA	MEEKS	8	20	0.40
MA	MITTMAN	4	10	0.40
MA	PERRY/KEENAN	15	20	0.75
MA	RENTAS	17	20	0.85
MA	TRIANO	8	18	0.44
MA	WOODING	14	21	0.67

Summary, we got around 50% consent rate; consent rate didn’t differ across groups.

2 Cognitive tasks

Read data. Note that there is one sitting file for the battery and one for each of the sub-tests, in case the battery crashed and failed to save data for that participant.

sittings.raw <- bind_rows(
  read_csv("data/2016/b301_06.20.16_17.00.40_sittings.csv"),
  read_csv("data/2016/b299_06.20.16_17.03.46_sittings.csv"),
  read_csv("data/2016/b300_06.20.16_17.03.52_sittings.csv"),
  read_csv("data/2016/b302_06.20.16_17.03.58_sittings.csv"))

sittings <- sittings.raw %>% 
  mutate(subid = str_trim(toupper(misc), side = "both"),
         sitting_id = id) %>%
  filter(str_detect(subid, "[Ss][12]-")) %>%
  mutate(grade = ifelse(str_detect(subid, "[Ss][1]"), 
                        "first grade", 
                        "second grade"), 
         year = factor(str_sub(start_time, 1, 4))) %>%
  select(subid, sitting_id, year, grade, battery_id, test_order, num_completed)

Each task has the every-battery and the one-sitting version.

gonogo.raw <- bind_rows(
  read_csv("data/2016/b299_t463_06.20.16_17.06.05.csv"),
  read_csv("data/2016/b301_t463_06.20.16_17.06.16.csv"))

ravens.raw <- bind_rows(
  read_csv("data/2016/b300_t464_06.20.16_17.06.12.csv"),
  read_csv("data/2016/b301_t464_06.20.16_17.06.18.csv"))

swm.raw <- bind_rows(
  read_csv("data/2016/b302_t467_06.20.16_17.06.27.csv"),
  read_csv("data/2016/b301_t467_06.20.16_17.06.19.csv"))

Merge in demographics.

First, find the subjects whose IDs were entered incorrectly and output.

anti_join(sittings, groups %>% rename(subid = roster_subid)) %>%
  select(subid, battery_id) %>%
  write_csv("data/missing_sittings.csv")

## Joining by: "subid"

Now read in the conversion table. (Done by hand by checking the runsheet for typos, notes etc, on the basis of missing_sittings.csv).

missing_sittings <- read_csv("data/missing_sittings_key.csv")

sittings <- left_join(sittings, missing_sittings) %>%
  mutate(exclude = ifelse(is.na(exclude), 0, exclude)) %>%
  filter(exclude != 1) %>%
  mutate(roster_subid = ifelse(is.na(new_subid), subid, new_subid)) %>%
  left_join(groups)

## Joining by: c("subid", "battery_id")

## Joining by: "roster_subid"

Computer testing numbers. Note that battery 301 is the combined battery.

sittings %>% 
  group_by(grade, year, battery_id) %>%
  summarise(n = n())

## Source: local data frame [14 x 4]
## Groups: grade, year [?]
## 
##           grade   year battery_id     n
##           (chr) (fctr)      (int) (int)
## 1   first grade   2015        299     2
## 2   first grade   2015        300    10
## 3   first grade   2015        301    70
## 4   first grade   2015        302    10
## 5   first grade   2016        300     2
## 6   first grade   2016        301    72
## 7   first grade   2016        302     2
## 8  second grade   2015        299     1
## 9  second grade   2015        300     9
## 10 second grade   2015        301   111
## 11 second grade   2015        302    10
## 12 second grade   2016        300     4
## 13 second grade   2016        301   106
## 14 second grade   2016        302     4

2.1 Go / No-Go Task

gonogo <- gonogo.raw %>%
  filter(sitting_id %in% sittings$sitting_id) %>%
  left_join(sittings, by = "sitting_id") %>%
  select(subid, year, grade, trial, stim, correct, rt, accuracy, group,
         responseAssign, rtCall)

Accuracy.

gonogo.summary <- gonogo %>%
  group_by(subid, grade, group, year) %>%
  summarise(accuracy = mean(accuracy, na.rm=TRUE))
  
ms <- gonogo.summary %>%
  group_by(grade, group, year) %>%
  summarise(accuracy = mean(accuracy))

gonogo.summary %>%
  ggplot(aes(x=accuracy, fill = group)) + 
  geom_histogram(binwidth = .05) +
  facet_grid(grade ~ year) + 
  geom_vline(xintercept = .5, lty = 3) +
  xlim(c(0,1)) + 
  geom_vline(data = ms, aes(xintercept = accuracy, col = group), 
             lty = 2)

RT.

ms <- gonogo %>%
  filter(rt > 0 & accuracy == 1) %>%
  group_by(subid, grade, group, year) %>%
  summarise(rt = mean(rt, na.rm=TRUE)) %>%
  group_by(grade, group) %>%
  summarise(rt = mean(rt))

gonogo %>%
  filter(rt > 0 & accuracy == 1) %>%
  group_by(subid, grade, group, year) %>%
  summarise(rt = mean(rt, na.rm=TRUE)) %>%
  ggplot(aes(x=rt, fill = group)) + 
  geom_histogram(binwidth = 50) +
  facet_grid(grade ~ year) + 
  geom_vline(data = ms, aes(xintercept = rt, col = group), 
             lty = 2)

2.2 Spatial WM

Data processing.

swm <- swm.raw %>%
  filter(sitting_id %in% sittings$sitting_id,
         type == "test") %>%
  left_join(sittings, by = "sitting_id") %>%
  filter(type == "test") %>%
  select(subid, grade, group, year, trial, capacity, correct)

Mean level (same measure as in Zenith study).

swm.summary <- swm %>%
  group_by(subid, grade, group, year) %>%
  summarise(capacity = mean(capacity, na.rm=TRUE))
  
ms <- swm.summary %>%
  group_by(grade, group, year) %>%
  summarise(capacity = mean(capacity))
  
swm.summary %>%
  ggplot(aes(x=capacity, fill = group)) + 
  geom_histogram(binwidth = .5) +
  facet_grid(grade ~ year) +
  xlim(c(1,8)) +
  geom_vline(data = ms, aes(xintercept = capacity, col = group), 
             lty = 2)

## Warning: Removed 2 rows containing non-finite values (stat_bin).

2.3 Raven’s

Actually a matrix reasoning equivalent.

ravens <- ravens.raw %>%
  filter(sitting_id %in% sittings$sitting_id,
         type == "test") %>%
  left_join(sittings, by = "sitting_id") %>%
  filter(type == "test") %>%
  select(subid, grade, group, year, trial, correct)

Mean level (same measure as in Zenith study).

ravens.summary <- ravens %>% 
  group_by(subid, grade, group, year) %>%
    summarise(correct = sum(correct, na.rm=TRUE))

ms <- ravens.summary %>%
  group_by(grade, group, year) %>%
  summarise(correct = mean(correct))
  
ravens.summary %>%
  ggplot(aes(x=correct, fill = group)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(grade ~ year) +
  geom_vline(data = ms, aes(xintercept = correct, col = group), 
             lty = 2)

2.4 Summary plot

subs <- ravens.summary %>% 
  left_join(gonogo.summary) %>%
  left_join(swm.summary) %>%
  rename(ravens = correct, 
         gonogo = accuracy,
         swm = capacity)

## Joining by: c("subid", "grade", "group", "year")
## Joining by: c("subid", "grade", "group", "year")

ms <- subs %>%
  gather(task, score, ravens, gonogo, swm) %>%
  group_by(year, group, task) %>%
  multi_boot_standard("score", na.rm=TRUE)

## Joining by: c("year", "group", "task")

pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
  geom_line(position = pos) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = pos) + 
  facet_wrap(~task, scales = "free_y") + 
  scale_colour_solarized()

And final numbers on N data points per task.

kable(subs %>%
  gather(task, score, ravens, gonogo, swm) %>%
  group_by(year, group, task) %>%
  summarise(n=n()))

year	group	task	n
2015	CNTL	gonogo	82
2015	CNTL	ravens	82
2015	CNTL	swm	82
2015	MA	gonogo	92
2015	MA	ravens	92
2015	MA	swm	92
2016	CNTL	gonogo	77
2016	CNTL	ravens	77
2016	CNTL	swm	77
2016	MA	gonogo	98
2016	MA	ravens	98
2016	MA	swm	98

2.5 Individual correlations

Year-by-year task plot

subs_nona <- subs %>%
  filter(!is.na(ravens) & !is.na(gonogo) & !is.na(swm))

subs_wide <- subs_nona %>% 
  gather(task, score, ravens, gonogo, swm) %>%
  mutate(year = str_c("y", as.character(year))) %>%
  spread(year, score)

ggplot(aes(x = y2015, y = y2016, col = group), data = subs_wide) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  facet_wrap(~task, scales = "free")

## Warning: Removed 111 rows containing non-finite values (stat_smooth).

## Warning: Removed 111 rows containing missing values (geom_point).

Correlations within task.

with(filter(subs_wide, task == "gonogo"), cor.test(y2015, y2016))

## 
##  Pearson's product-moment correlation
## 
## data:  y2015 and y2016
## t = 7.447, df = 151, p-value = 6.803e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3918282 0.6255178
## sample estimates:
##       cor 
## 0.5182823

with(filter(subs_wide, task == "swm"), cor.test(y2015, y2016))

## 
##  Pearson's product-moment correlation
## 
## data:  y2015 and y2016
## t = 4.9263, df = 151, p-value = 2.178e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2268234 0.5011935
## sample estimates:
##       cor 
## 0.3721086

with(filter(subs_wide, task == "ravens"), cor.test(y2015, y2016))

## 
##  Pearson's product-moment correlation
## 
## data:  y2015 and y2016
## t = 5.0758, df = 151, p-value = 1.12e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2374858 0.5095848
## sample estimates:
##       cor 
## 0.3817771

Correlations across tasks for 2015.

kable(cor(subs_nona %>% 
            filter(year == "2015") %>%
            ungroup %>% 
            select(ravens, gonogo, swm)), digits = 2)

	ravens	gonogo	swm
ravens	1.00	0.26	0.19
gonogo	0.26	1.00	0.41
swm	0.19	0.41	1.00

Correlations across tasks for 2016.

kable(cor(subs_nona %>% 
            filter(year == "2016") %>%
            ungroup %>% 
            select(ravens, gonogo, swm)), digits = 2)

	ravens	gonogo	swm
ravens	1.00	0.24	0.26
gonogo	0.24	1.00	0.30
swm	0.26	0.30	1.00

3 Math tasks

Read in data.

pv <- read.csv("data/2015/2015_PVNumerals.csv") %>%
  select(subnum, pvAvg) %>%
  rename(subid = subnum)
wg <- read.csv("data/2015/2015_WGArith.csv") %>%
  select(subnum, arithmeticTotal, arithmeticAverage) %>%
  rename(subid = subnum)
wj <- read.csv("data/2015/2015_WOODCOCK.csv") %>%
  select(subnum, woodcockTotal) %>%
  rename(subid = subnum)

Merge with sub data.

d <- left_join(subs, pv) %>% 
  left_join(wg) %>% 
  left_join(wj)

## Joining by: "subid"

## Warning in left_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector

## Joining by: "subid"

## Warning in left_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector

## Joining by: "subid"

## Warning in left_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector

Now plot each task individually.

3.1 Place value

pv.summary <- d %>% 
  group_by(subid, grade) %>%
    summarise(correct = sum(pvAvg, na.rm=TRUE))

ms <- pv.summary %>%
  group_by(grade) %>%
  summarise(correct = mean(correct))
  
pv.summary %>%
  ggplot(aes(x=correct)) + 
  geom_histogram(binwidth = .1) +
  facet_grid(.~grade) +
  geom_vline(data = ms, aes(xintercept = correct), 
             col = "red", lty = 2) + 
  ggtitle("Place Value Accuracy") + 
  xlab("Proportion correct")

3.2 Arithmetic

wg.summary <- d %>% 
  group_by(subid, grade) %>%
    summarise(correct = sum(arithmeticTotal, na.rm=TRUE))

ms <- wg.summary %>%
  group_by(grade) %>%
  summarise(correct = mean(correct))
  
wg.summary %>%
  ggplot(aes(x=correct)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(.~grade) +
  geom_vline(data = ms, aes(xintercept = correct), 
             col = "red", lty = 2) + 
  ggtitle("Arithmetic Accuracy") + 
  xlab("Total Problems Correct")

3.3 Woodcock-Johnson III

wj.summary <- d %>% 
  group_by(subid, grade) %>%
    summarise(correct = sum(woodcockTotal, na.rm=TRUE))

ms <- wj.summary %>%
  group_by(grade) %>%
  summarise(correct = mean(correct))
  
wj.summary %>%
  ggplot(aes(x=correct)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(.~grade) +
  geom_vline(data = ms, aes(xintercept = correct), 
             col = "red", lty = 2) + 
  ggtitle("Woodcock-Johnson III Accuracy") + 
  xlab("Total Problems Correct")

3.4 Relations between math measures

Correlations.

kable(cor(d %>% ungroup %>% 
            select(woodcockTotal, arithmeticTotal, pvAvg)), digits = 2)

	woodcockTotal	arithmeticTotal	pvAvg
woodcockTotal	1	NA	NA
arithmeticTotal	NA	1	NA
pvAvg	NA	NA	1

cor.test(d$woodcockTotal, d$arithmeticTotal)

## 
##  Pearson's product-moment correlation
## 
## data:  d$woodcockTotal and d$arithmeticTotal
## t = 19.898, df = 345, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6779046 0.7765320
## sample estimates:
##       cor 
## 0.7310136

cor.test(d$woodcockTotal, d$pvAvg)

## 
##  Pearson's product-moment correlation
## 
## data:  d$woodcockTotal and d$pvAvg
## t = 14.694, df = 345, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5511568 0.6812215
## sample estimates:
##       cor 
## 0.6204371

cor.test(d$arithmeticTotal, d$pvAvg)

## 
##  Pearson's product-moment correlation
## 
## data:  d$arithmeticTotal and d$pvAvg
## t = 16.118, df = 345, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5908825 0.7115761
## sample estimates:
##       cor 
## 0.6553933

4 Relations between all measures

kable(cor(d %>% ungroup %>% 
            select(ravens, gonogo, swm, 
                   woodcockTotal, arithmeticTotal, pvAvg)), digits = 2)

	ravens	gonogo	swm	woodcockTotal	arithmeticTotal	pvAvg
ravens	1	NA	NA	NA	NA	NA
gonogo	NA	1	NA	NA	NA	NA
swm	NA	NA	1	NA	NA	NA
woodcockTotal	NA	NA	NA	1	NA	NA
arithmeticTotal	NA	NA	NA	NA	1	NA
pvAvg	NA	NA	NA	NA	NA	1

cor.test(d$ravens, d$woodcockTotal)

## 
##  Pearson's product-moment correlation
## 
## data:  d$ravens and d$woodcockTotal
## t = 5.4527, df = 345, p-value = 9.473e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1817842 0.3758135
## sample estimates:
##       cor 
## 0.2816759

cor.test(d$ravens, d$arithmeticTotal)

## 
##  Pearson's product-moment correlation
## 
## data:  d$ravens and d$arithmeticTotal
## t = 5.1172, df = 345, p-value = 5.159e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1649351 0.3607988
## sample estimates:
##       cor 
## 0.2656055

cor.test(d$ravens, d$pvAvg)

## 
##  Pearson's product-moment correlation
## 
## data:  d$ravens and d$pvAvg
## t = 6.3208, df = 345, p-value = 8.042e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2244916 0.4134202
## sample estimates:
##       cor 
## 0.3221599

cor.test(d$swm, d$woodcockTotal)

## 
##  Pearson's product-moment correlation
## 
## data:  d$swm and d$woodcockTotal
## t = 4.8337, df = 341, p-value = 2.031e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1513904 0.3497431
## sample estimates:
##       cor 
## 0.2532262

cor.test(d$swm, d$arithmeticTotal)

## 
##  Pearson's product-moment correlation
## 
## data:  d$swm and d$arithmeticTotal
## t = 6.6645, df = 341, p-value = 1.069e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2422835 0.4299102
## sample estimates:
##       cor 
## 0.3394693

cor.test(d$swm, d$pvAvg)

## 
##  Pearson's product-moment correlation
## 
## data:  d$swm and d$pvAvg
## t = 5.6865, df = 341, p-value = 2.789e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1944707 0.3881052
## sample estimates:
##       cor 
## 0.2943053

cor.test(d$gonogo, d$woodcockTotal)

## 
##  Pearson's product-moment correlation
## 
## data:  d$gonogo and d$woodcockTotal
## t = 5.8864, df = 343, p-value = 9.398e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2038381 0.3958340
## sample estimates:
##       cor 
## 0.3029064

cor.test(d$gonogo, d$arithmeticTotal)

## 
##  Pearson's product-moment correlation
## 
## data:  d$gonogo and d$arithmeticTotal
## t = 6.309, df = 343, p-value = 8.668e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2245123 0.4139497
## sample estimates:
##      cor 
## 0.322456

cor.test(d$gonogo, d$pvAvg)

## 
##  Pearson's product-moment correlation
## 
## data:  d$gonogo and d$pvAvg
## t = 4.813, df = 343, p-value = 2.232e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1499180 0.3478731
## sample estimates:
##       cor 
## 0.2515241

MAJIC Study Analyses

Mike Frank

June 20, 2016