library(readr)
library(dplyr)
library(ggplot2)
library(langcog)
library(magrittr)
library(stringr)
library(tidyr)
library(lme4)


rm(list=ls())
theme_set(theme_mikabr() +
            theme(panel.grid = element_blank(),
                  strip.background = element_blank()))
font <- "Open Sans"

1 Runsheets and exclusions

runsheet <- read_csv("data/metadata/runsheet_6-20-16.csv") %>%
  select(SID, Homeroom, Consent)

classes <- read_csv("data/metadata/classrooms_6-20-16.csv") %>%
  select(Exclude, Homeroom, Group)

kids_raw <- left_join(runsheet, classes) %>%
  mutate(Consent = ifelse(is.na(Consent), 0, Consent))

## Joining, by = "Homeroom"

kids <- kids_raw %>%
  filter(Consent == 1, Exclude == 0) 

groups <- kids %>% 
  select(SID, Group, Homeroom) %>%
  rename(roster_subid = SID,
         room = Homeroom,
         group = Group)

Raw numbers.

kable(kids_raw %>% group_by(Group, Consent) %>% summarise(n = n()))

Group	Consent	n
CNTL	0	92
CNTL	1	100
EXCLUDED	0	6
MA	0	93
MA	1	116
NA	0	3
NA	1	1

By consent proportion for included rooms.

consent <- kids_raw %>% 
        group_by(Group) %>% 
        filter(!is.na(Group), Group != "EXCLUDED") %>%
        summarise(consent = sum(Consent == 1), 
                  total = n(), 
                  prop = mean(Consent == 1))

kable(consent, 
      digits = 2)

Group	consent	total	prop
CNTL	100	192	0.52
MA	116	209	0.56

By grade and room.

consent <- kids_raw %>% 
  mutate(grade = ifelse(str_detect(SID, "S2"), "2", "1")) %>%
  group_by(Group, grade) %>% 
  filter(!is.na(Group), Group != "EXCLUDED") %>%
  summarise(consent = sum(Consent == 1), 
            total = n(), 
            prop = mean(Consent == 1))

kable(consent, 
      digits = 2)

Group	grade	consent	total	prop
CNTL	1	49	103	0.48
CNTL	2	51	89	0.57
MA	1	46	95	0.48
MA	2	70	114	0.61

By room.

kable(kids_raw %>% 
        group_by(Group, Homeroom) %>% 
        filter(!is.na(Group), Group != "EXCLUDED") %>%
        summarise(consent = sum(Consent == 1), 
                  total = n(), 
                  prop = mean(Consent == 1)), 
      digits = 2)

Group	Homeroom	consent	total	prop
CNTL	BURKE/HALL	6	19	0.32
CNTL	GUTIERREZ	11	24	0.46
CNTL	HEREDIA	2	3	0.67
CNTL	HUNTER/PIERIES	10	20	0.50
CNTL	KANE	11	22	0.50
CNTL	MARGULIES	9	21	0.43
CNTL	MEJIA/BRADBURY	13	22	0.59
CNTL	PAZYMINO	12	16	0.75
CNTL	ROMANO	4	9	0.44
CNTL	ROSA	9	17	0.53
CNTL	ZORRILLA	13	19	0.68
MA	CAPILLI	7	16	0.44
MA	CHUN	8	19	0.42
MA	CSASZAR	11	19	0.58
MA	D’IZZIA/JANO	11	21	0.52
MA	GIGANTI/CHAMBERS	4	8	0.50
MA	MATA	9	17	0.53
MA	MEEKS	8	20	0.40
MA	MITTMAN	4	10	0.40
MA	PERRY/KEENAN	15	20	0.75
MA	RENTAS	17	20	0.85
MA	TRIANO	8	18	0.44
MA	WOODING	14	21	0.67

Summary, we got around 50% consent rate; consent rate didn’t differ across groups.

2 Cognitive tasks

Read data. Note that there is one sitting file for the battery and one for each of the sub-tests, in case the battery crashed and failed to save data for that participant.

sittings.raw <- bind_rows(
  read_csv("data/computer/b301_06.20.16_17.00.40_sittings.csv"),
  read_csv("data/computer/b299_06.20.16_17.03.46_sittings.csv"),
  read_csv("data/computer/b300_06.20.16_17.03.52_sittings.csv"),
  read_csv("data/computer/b302_06.20.16_17.03.58_sittings.csv"))

sittings <- sittings.raw %>% 
  mutate(subid = str_trim(toupper(misc), side = "both"),
         sitting_id = id) %>%
  filter(str_detect(subid, "[Ss][12]-")) %>%
  mutate(grade = ifelse(str_detect(subid, "[Ss][1]"), 
                        "first grade", 
                        "second grade"), 
         year = factor(str_sub(start_time, 1, 4))) %>%
  select(subid, sitting_id, year, grade, battery_id, test_order, num_completed)

Each task has the every-battery and the one-sitting version.

gonogo.raw <- bind_rows(
  read_csv("data/computer/b299_t463_06.20.16_17.06.05.csv"),
  read_csv("data/computer/b301_t463_06.20.16_17.06.16.csv"))

ravens.raw <- bind_rows(
  read_csv("data/computer/b300_t464_06.20.16_17.06.12.csv"),
  read_csv("data/computer/b301_t464_06.20.16_17.06.18.csv"))

swm.raw <- bind_rows(
  read_csv("data/computer/b302_t467_06.20.16_17.06.27.csv"),
  read_csv("data/computer/b301_t467_06.20.16_17.06.19.csv"))

Merge in demographics.

First, find the subjects whose IDs were entered incorrectly and output.

anti_join(sittings, groups %>% rename(subid = roster_subid)) %>%
  select(subid, battery_id) %>%
  write_csv("data/metadata/missing_sittings.csv")

## Joining, by = "subid"

Now read in the conversion table. (Done by hand by checking the runsheet for typos, notes etc, on the basis of missing_sittings.csv).

missing_sittings <- read_csv("data/metadata/missing_sittings_key.csv")

sittings <- left_join(sittings, missing_sittings) %>%
  mutate(exclude = ifelse(is.na(exclude), 0, exclude)) %>%
  filter(exclude != 1) %>%
  mutate(roster_subid = ifelse(is.na(new_subid), subid, new_subid)) %>%
  left_join(groups)

## Joining, by = c("subid", "battery_id")

## Joining, by = "roster_subid"

Computer testing numbers. Note that battery 301 is the combined battery.

sittings %>% 
  group_by(grade, year, battery_id) %>%
  summarise(n = n())

## Source: local data frame [14 x 4]
## Groups: grade, year [?]
## 
##           grade   year battery_id     n
##           <chr> <fctr>      <int> <int>
## 1   first grade   2015        299     2
## 2   first grade   2015        300    10
## 3   first grade   2015        301    70
## 4   first grade   2015        302    10
## 5   first grade   2016        300     2
## 6   first grade   2016        301    72
## 7   first grade   2016        302     2
## 8  second grade   2015        299     1
## 9  second grade   2015        300     9
## 10 second grade   2015        301   111
## 11 second grade   2015        302    10
## 12 second grade   2016        300     4
## 13 second grade   2016        301   106
## 14 second grade   2016        302     4

2.1 Go / No-Go Task

gonogo <- gonogo.raw %>%
  filter(sitting_id %in% sittings$sitting_id) %>%
  left_join(sittings, by = "sitting_id") %>%
  mutate(subid = roster_subid) %>%
  select(subid, year, grade, trial, stim, correct, rt, accuracy, group,
         responseAssign, rtCall)

Accuracy.

gonogo.summary <- gonogo %>%
  group_by(subid, grade, group, year) %>%
  summarise(accuracy = mean(accuracy, na.rm=TRUE))
  
ms <- gonogo.summary %>%
  group_by(grade, group, year) %>%
  summarise(accuracy = mean(accuracy))

gonogo.summary %>%
  ggplot(aes(x=accuracy, fill = group)) + 
  geom_histogram(binwidth = .05) +
  facet_grid(grade ~ year) + 
  geom_vline(xintercept = .5, lty = 3) +
  xlim(c(0,1)) + 
  geom_vline(data = ms, aes(xintercept = accuracy, col = group), 
             lty = 2)

RT.

ms <- gonogo %>%
  filter(rt > 0 & accuracy == 1) %>%
  group_by(subid, grade, group, year) %>%
  summarise(rt = mean(rt, na.rm=TRUE)) %>%
  group_by(grade, group) %>%
  summarise(rt = mean(rt))

gonogo %>%
  filter(rt > 0 & accuracy == 1) %>%
  group_by(subid, grade, group, year) %>%
  summarise(rt = mean(rt, na.rm=TRUE)) %>%
  ggplot(aes(x=rt, fill = group)) + 
  geom_histogram(binwidth = 50) +
  facet_grid(grade ~ year) + 
  geom_vline(data = ms, aes(xintercept = rt, col = group), 
             lty = 2)

2.2 Spatial WM

Data processing.

swm <- swm.raw %>%
  filter(sitting_id %in% sittings$sitting_id,
         type == "test") %>%
  left_join(sittings, by = "sitting_id") %>%
  mutate(subid = roster_subid) %>%
  filter(type == "test") %>%
  select(subid, grade, group, year, trial, capacity, correct)

Mean level (same measure as in Zenith study).

swm.summary <- swm %>%
  group_by(subid, grade, group, year) %>%
  summarise(capacity = mean(capacity, na.rm=TRUE))
  
ms <- swm.summary %>%
  group_by(grade, group, year) %>%
  summarise(capacity = mean(capacity))
  
swm.summary %>%
  ggplot(aes(x=capacity, fill = group)) + 
  geom_histogram(binwidth = .5) +
  facet_grid(grade ~ year) +
  xlim(c(1,8)) +
  geom_vline(data = ms, aes(xintercept = capacity, col = group), 
             lty = 2)

## Warning: Removed 2 rows containing non-finite values (stat_bin).

2.3 Raven’s

Actually a matrix reasoning equivalent.

ravens <- ravens.raw %>%
  filter(sitting_id %in% sittings$sitting_id,
         type == "test") %>%
  left_join(sittings, by = "sitting_id") %>%
  mutate(subid = roster_subid) %>%
  filter(type == "test") %>%
  select(subid, grade, group, year, trial, correct)

Mean level (same measure as in Zenith study).

ravens.summary <- ravens %>% 
  group_by(subid, grade, group, year) %>%
    summarise(correct = sum(correct, na.rm=TRUE))

ms <- ravens.summary %>%
  group_by(grade, group, year) %>%
  summarise(correct = mean(correct))
  
ravens.summary %>%
  ggplot(aes(x=correct, fill = group)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(grade ~ year) +
  geom_vline(data = ms, aes(xintercept = correct, col = group), 
             lty = 2)

2.4 Change over time

subs <- ravens.summary %>% 
  left_join(gonogo.summary) %>%
  left_join(swm.summary) %>%
  rename(ravens = correct, 
         gonogo = accuracy,
         swm = capacity)

## Joining, by = c("subid", "grade", "group", "year")
## Joining, by = c("subid", "grade", "group", "year")

ms <- subs %>%
  gather(task, score, ravens, gonogo, swm) %>%
  group_by(year, group, task) %>%
  multi_boot_standard("score", na.rm=TRUE)

## Joining, by = c("year", "group", "task")

pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
  geom_line(position = pos) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = pos) + 
  facet_wrap(~task, scales = "free_y") + 
  scale_colour_solarized()

And by grade level.

ms <- subs %>%
  gather(task, score, ravens, gonogo, swm) %>%
  group_by(year, grade, group, task) %>%
  multi_boot_standard("score", na.rm=TRUE)

## Joining, by = c("year", "grade", "group", "task")

pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
  geom_line(position = pos) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = pos) + 
  facet_wrap(grade~task, scales = "free_y") + 
  scale_colour_solarized()

And final numbers on N data points per task.

kable(subs %>%
  gather(task, score, ravens, gonogo, swm) %>%
  group_by(year, group, task) %>%
  summarise(n=n()))

year	group	task	n
2015	CNTL	gonogo	82
2015	CNTL	ravens	82
2015	CNTL	swm	82
2015	MA	gonogo	92
2015	MA	ravens	92
2015	MA	swm	92
2016	CNTL	gonogo	77
2016	CNTL	ravens	77
2016	CNTL	swm	77
2016	MA	gonogo	98
2016	MA	ravens	98
2016	MA	swm	98

2.5 Individual correlations

Year-by-year task plot

subs_nona <- subs %>%
  filter(!is.na(ravens) & !is.na(gonogo) & !is.na(swm))

subs_wide <- subs_nona %>% 
  gather(task, score, ravens, gonogo, swm) %>%
  mutate(year = str_c("y", as.character(year))) %>%
  spread(year, score)

ggplot(aes(x = y2015, y = y2016, col = group), data = subs_wide) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  facet_wrap(~task, scales = "free")

## Warning: Removed 96 rows containing non-finite values (stat_smooth).

## Warning: Removed 96 rows containing missing values (geom_point).

Correlations within task.

with(filter(subs_wide, task == "gonogo"), cor.test(y2015, y2016))

## 
##  Pearson's product-moment correlation
## 
## data:  y2015 and y2016
## t = 7.4019, df = 154, p-value = 8.168e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3862104 0.6195323
## sample estimates:
##       cor 
## 0.5122622

with(filter(subs_wide, task == "swm"), cor.test(y2015, y2016))

## 
##  Pearson's product-moment correlation
## 
## data:  y2015 and y2016
## t = 4.9743, df = 154, p-value = 1.731e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2282667 0.4999710
## sample estimates:
##       cor 
## 0.3720616

with(filter(subs_wide, task == "ravens"), cor.test(y2015, y2016))

## 
##  Pearson's product-moment correlation
## 
## data:  y2015 and y2016
## t = 4.994, df = 154, p-value = 1.586e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2296626 0.5010751
## sample estimates:
##       cor 
## 0.3733301

Correlations across tasks for 2015.

kable(cor(subs_nona %>% 
            filter(year == "2015") %>%
            ungroup %>% 
            select(ravens, gonogo, swm)), digits = 2)

	ravens	gonogo	swm
ravens	1.00	0.26	0.19
gonogo	0.26	1.00	0.41
swm	0.19	0.41	1.00

Correlations across tasks for 2016.

kable(cor(subs_nona %>% 
            filter(year == "2016") %>%
            ungroup %>% 
            select(ravens, gonogo, swm)), digits = 2)

	ravens	gonogo	swm
ravens	1.00	0.24	0.26
gonogo	0.24	1.00	0.30
swm	0.26	0.30	1.00

3 Math tasks

Read in data.

pv0 <- read.csv("data/paper/2015_PVNumerals.csv") %>%
  select(subnum, pvAvg) %>%
  mutate(year = "2015") %>%
  rename(subid = subnum)
wg0 <- read.csv("data/paper/2015_WGArith.csv") %>%
  select(subnum, arithmeticTotal, arithmeticAverage) %>%
  mutate(year = "2015") %>%
  rename(subid = subnum)
wj0 <- read.csv("data/paper/2015_WOODCOCK.csv") %>%
  select(subnum, woodcockTotal) %>%
  mutate(year = "2015") %>%
  rename(subid = subnum)
pv1 <- read.csv("data/paper/2016_PVNumerals.csv") %>%
  select(SubjectID, pvAvg) %>%
  mutate(year = "2016") %>%
  rename(subid = SubjectID)
wg1 <- read.csv("data/paper/2016_WGArith.csv") %>%
  select(SubjectID, arithmeticTotal, arithmeticAverage) %>%
  mutate(year = "2016") %>%
  rename(subid = SubjectID)
wj1 <- read.csv("data/paper/2016_WOODCOCK.csv") %>%
  select(SubjectID, woodcockTotal) %>%
  mutate(year = "2016") %>%
  rename(subid = SubjectID)

Merge with sub data.

d <- left_join(subs, bind_rows(pv0,pv1)) %>% 
  left_join(bind_rows(wg0,wg1)) %>% 
  left_join(bind_rows(wj0,wj1))

## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character

## Joining, by = c("subid", "year")

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): Unequal
## factor levels: coercing to character

## Joining, by = c("subid", "year")

## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character

## Joining, by = c("subid", "year")

Extract classes from subids.

d <- d %>%
  separate(subid,"-", into=c("grade_level","class","num"), remove=FALSE) %>%
  unite(class_num, grade_level, class) %>%
  mutate(class_num = factor(class_num)) %>%
  select(-num)

3.1 Individual tasks

Now plot each task individually.

3.1.1 Place value

pv.summary <- d %>% 
  group_by(subid, year, grade) %>%
    summarise(correct = sum(pvAvg, na.rm=TRUE))

ms <- pv.summary %>%
  group_by(year, grade) %>%
  summarise(correct = mean(correct))
  
pv.summary %>%
  ggplot(aes(x=correct)) + 
  geom_histogram(binwidth = .1) +
  facet_grid(year~grade) +
  geom_vline(data = ms, aes(xintercept = correct), 
             col = "red", lty = 2) + 
  ggtitle("Place Value Accuracy") + 
  xlab("Proportion correct")

3.1.2 Arithmetic

wg.summary <- d %>% 
  group_by(subid, year, grade) %>%
    summarise(correct = sum(arithmeticTotal, na.rm=TRUE))

ms <- wg.summary %>%
  group_by(year, grade) %>%
  summarise(correct = mean(correct))
  
wg.summary %>%
  ggplot(aes(x=correct)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(year~grade) +
  geom_vline(data = ms, aes(xintercept = correct), 
             col = "red", lty = 2) + 
  ggtitle("Arithmetic Accuracy") + 
  xlab("Total Problems Correct")

3.1.3 Woodcock-Johnson III

wj.summary <- d %>% 
  group_by(subid, year, grade) %>%
    summarise(correct = sum(woodcockTotal, na.rm=TRUE))

ms <- wj.summary %>%
  group_by(year, grade) %>%
  summarise(correct = mean(correct))
  
wj.summary %>%
  ggplot(aes(x=correct)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(year~grade) +
  geom_vline(data = ms, aes(xintercept = correct), 
             col = "red", lty = 2) + 
  ggtitle("Woodcock-Johnson III Accuracy") + 
  xlab("Total Problems Correct")

3.2 Change over time

3.2.1 Main plot

Without grade-level breakdown.

ms <- d %>%
  gather(task, score, pvAvg, arithmeticAverage, woodcockTotal) %>%
  group_by(year, group, task) %>%
  multi_boot_standard("score", na.rm=TRUE)

## Joining, by = c("year", "group", "task")

pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
  geom_line(position = pos) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = pos) + 
  facet_wrap(~task, scales = "free_y") + 
  scale_colour_solarized()

Broken down by grade.

ms <- d %>%
  gather(task, score, pvAvg, arithmeticAverage, woodcockTotal) %>%
  group_by(year, grade, group, task) %>%
  multi_boot_standard("score", na.rm=TRUE)

## Joining, by = c("year", "grade", "group", "task")

pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
  geom_line(position = pos) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = pos) + 
  facet_wrap(grade~task, scales = "free_y") + 
  scale_colour_solarized()

3.2.2 Pre-registered models

wj.mod <- lmer(woodcockTotal ~ grade + year * group + 
                 (1 | subid) + 
                 (year | class_num), 
     data=d)

pv.mod <- lmer(pvAvg ~ grade + year * group + 
                 (1 | subid) + 
                 (year | class_num), 
     data=d)

wg.mod <- lmer(arithmeticTotal ~ grade + year * group + 
                 (1 | subid) + 
                 (year | class_num), 
     data=d)

And summarize. First, WJ.

kable(summary(wj.mod)$coef)

	Estimate	Std. Error	t value
(Intercept)	5.6609036	0.3863944	14.6505830
gradesecond grade	3.3985659	0.3565510	9.5317799
year2016	4.8353558	0.3144660	15.3764007
groupMA	0.1114861	0.4743396	0.2350343
year2016:groupMA	-0.0257998	0.4298529	-0.0600201

Place value.

kable(summary(pv.mod)$coef)

	Estimate	Std. Error	t value
(Intercept)	0.0241715	0.0333462	0.7248646
gradesecond grade	0.2679955	0.0347614	7.7095744
year2016	0.2668794	0.0369524	7.2222570
groupMA	0.0340511	0.0377221	0.9026825
year2016:groupMA	0.0462637	0.0505900	0.9144819

and arithmetic.

kable(summary(wg.mod)$coef)

	Estimate	Std. Error	t value
(Intercept)	1.8776615	0.5856658	3.2060288
gradesecond grade	4.3392875	0.6510306	6.6652592
year2016	6.8049119	0.6142575	11.0782723
groupMA	0.4720199	0.6457014	0.7310189
year2016:groupMA	-0.5871245	0.8354591	-0.7027568

3.2.3 Exploratory place value model.