library(readr)
library(dplyr)
library(ggplot2)
library(langcog)
library(magrittr)
library(stringr)
library(tidyr)
library(lme4)


rm(list=ls())
theme_set(theme_mikabr() +
            theme(panel.grid = element_blank(),
                  strip.background = element_blank()))
font <- "Open Sans"

1 Runsheets and exclusions

runsheet <- read_csv("data/metadata/runsheet_6-20-16.csv") %>%
  select(SID, Homeroom, Consent)

classes <- read_csv("data/metadata/classrooms_6-20-16.csv") %>%
  select(Exclude, Homeroom, Group)

kids_raw <- left_join(runsheet, classes) %>%
  mutate(Consent = ifelse(is.na(Consent), 0, Consent))
## Joining, by = "Homeroom"
kids <- kids_raw %>%
  filter(Consent == 1, Exclude == 0) 

groups <- kids %>% 
  select(SID, Group, Homeroom) %>%
  rename(roster_subid = SID,
         room = Homeroom,
         group = Group)

Raw numbers.

kable(kids_raw %>% group_by(Group, Consent) %>% summarise(n = n()))
Group Consent n
CNTL 0 92
CNTL 1 100
EXCLUDED 0 6
MA 0 93
MA 1 116
NA 0 3
NA 1 1

By consent proportion for included rooms.

consent <- kids_raw %>% 
        group_by(Group) %>% 
        filter(!is.na(Group), Group != "EXCLUDED") %>%
        summarise(consent = sum(Consent == 1), 
                  total = n(), 
                  prop = mean(Consent == 1))

kable(consent, 
      digits = 2)
Group consent total prop
CNTL 100 192 0.52
MA 116 209 0.56

By grade and room.

consent <- kids_raw %>% 
  mutate(grade = ifelse(str_detect(SID, "S2"), "2", "1")) %>%
  group_by(Group, grade) %>% 
  filter(!is.na(Group), Group != "EXCLUDED") %>%
  summarise(consent = sum(Consent == 1), 
            total = n(), 
            prop = mean(Consent == 1))

kable(consent, 
      digits = 2)
Group grade consent total prop
CNTL 1 49 103 0.48
CNTL 2 51 89 0.57
MA 1 46 95 0.48
MA 2 70 114 0.61

By room.

kable(kids_raw %>% 
        group_by(Group, Homeroom) %>% 
        filter(!is.na(Group), Group != "EXCLUDED") %>%
        summarise(consent = sum(Consent == 1), 
                  total = n(), 
                  prop = mean(Consent == 1)), 
      digits = 2)
Group Homeroom consent total prop
CNTL BURKE/HALL 6 19 0.32
CNTL GUTIERREZ 11 24 0.46
CNTL HEREDIA 2 3 0.67
CNTL HUNTER/PIERIES 10 20 0.50
CNTL KANE 11 22 0.50
CNTL MARGULIES 9 21 0.43
CNTL MEJIA/BRADBURY 13 22 0.59
CNTL PAZYMINO 12 16 0.75
CNTL ROMANO 4 9 0.44
CNTL ROSA 9 17 0.53
CNTL ZORRILLA 13 19 0.68
MA CAPILLI 7 16 0.44
MA CHUN 8 19 0.42
MA CSASZAR 11 19 0.58
MA D’IZZIA/JANO 11 21 0.52
MA GIGANTI/CHAMBERS 4 8 0.50
MA MATA 9 17 0.53
MA MEEKS 8 20 0.40
MA MITTMAN 4 10 0.40
MA PERRY/KEENAN 15 20 0.75
MA RENTAS 17 20 0.85
MA TRIANO 8 18 0.44
MA WOODING 14 21 0.67

Summary, we got around 50% consent rate; consent rate didn’t differ across groups.

2 Cognitive tasks

Read data. Note that there is one sitting file for the battery and one for each of the sub-tests, in case the battery crashed and failed to save data for that participant.

sittings.raw <- bind_rows(
  read_csv("data/computer/b301_06.20.16_17.00.40_sittings.csv"),
  read_csv("data/computer/b299_06.20.16_17.03.46_sittings.csv"),
  read_csv("data/computer/b300_06.20.16_17.03.52_sittings.csv"),
  read_csv("data/computer/b302_06.20.16_17.03.58_sittings.csv"))

sittings <- sittings.raw %>% 
  mutate(subid = str_trim(toupper(misc), side = "both"),
         sitting_id = id) %>%
  filter(str_detect(subid, "[Ss][12]-")) %>%
  mutate(grade = ifelse(str_detect(subid, "[Ss][1]"), 
                        "first grade", 
                        "second grade"), 
         year = factor(str_sub(start_time, 1, 4))) %>%
  select(subid, sitting_id, year, grade, battery_id, test_order, num_completed)

Each task has the every-battery and the one-sitting version.

gonogo.raw <- bind_rows(
  read_csv("data/computer/b299_t463_06.20.16_17.06.05.csv"),
  read_csv("data/computer/b301_t463_06.20.16_17.06.16.csv"))

ravens.raw <- bind_rows(
  read_csv("data/computer/b300_t464_06.20.16_17.06.12.csv"),
  read_csv("data/computer/b301_t464_06.20.16_17.06.18.csv"))

swm.raw <- bind_rows(
  read_csv("data/computer/b302_t467_06.20.16_17.06.27.csv"),
  read_csv("data/computer/b301_t467_06.20.16_17.06.19.csv"))

Merge in demographics.

First, find the subjects whose IDs were entered incorrectly and output.

anti_join(sittings, groups %>% rename(subid = roster_subid)) %>%
  select(subid, battery_id) %>%
  write_csv("data/metadata/missing_sittings.csv")
## Joining, by = "subid"

Now read in the conversion table. (Done by hand by checking the runsheet for typos, notes etc, on the basis of missing_sittings.csv).

missing_sittings <- read_csv("data/metadata/missing_sittings_key.csv")

sittings <- left_join(sittings, missing_sittings) %>%
  mutate(exclude = ifelse(is.na(exclude), 0, exclude)) %>%
  filter(exclude != 1) %>%
  mutate(roster_subid = ifelse(is.na(new_subid), subid, new_subid)) %>%
  left_join(groups) 
## Joining, by = c("subid", "battery_id")
## Joining, by = "roster_subid"

Computer testing numbers. Note that battery 301 is the combined battery.

sittings %>% 
  group_by(grade, year, battery_id) %>%
  summarise(n = n())
## Source: local data frame [14 x 4]
## Groups: grade, year [?]
## 
##           grade   year battery_id     n
##           <chr> <fctr>      <int> <int>
## 1   first grade   2015        299     2
## 2   first grade   2015        300    10
## 3   first grade   2015        301    70
## 4   first grade   2015        302    10
## 5   first grade   2016        300     2
## 6   first grade   2016        301    72
## 7   first grade   2016        302     2
## 8  second grade   2015        299     1
## 9  second grade   2015        300     9
## 10 second grade   2015        301   111
## 11 second grade   2015        302    10
## 12 second grade   2016        300     4
## 13 second grade   2016        301   106
## 14 second grade   2016        302     4

2.1 Go / No-Go Task

gonogo <- gonogo.raw %>%
  filter(sitting_id %in% sittings$sitting_id) %>%
  left_join(sittings, by = "sitting_id") %>%
  mutate(subid = roster_subid) %>%
  select(subid, year, grade, trial, stim, correct, rt, accuracy, group,
         responseAssign, rtCall)

Accuracy.

gonogo.summary <- gonogo %>%
  group_by(subid, grade, group, year) %>%
  summarise(accuracy = mean(accuracy, na.rm=TRUE))
  
ms <- gonogo.summary %>%
  group_by(grade, group, year) %>%
  summarise(accuracy = mean(accuracy))

gonogo.summary %>%
  ggplot(aes(x=accuracy, fill = group)) + 
  geom_histogram(binwidth = .05) +
  facet_grid(grade ~ year) + 
  geom_vline(xintercept = .5, lty = 3) +
  xlim(c(0,1)) + 
  geom_vline(data = ms, aes(xintercept = accuracy, col = group), 
             lty = 2)

RT.

ms <- gonogo %>%
  filter(rt > 0 & accuracy == 1) %>%
  group_by(subid, grade, group, year) %>%
  summarise(rt = mean(rt, na.rm=TRUE)) %>%
  group_by(grade, group) %>%
  summarise(rt = mean(rt))

gonogo %>%
  filter(rt > 0 & accuracy == 1) %>%
  group_by(subid, grade, group, year) %>%
  summarise(rt = mean(rt, na.rm=TRUE)) %>%
  ggplot(aes(x=rt, fill = group)) + 
  geom_histogram(binwidth = 50) +
  facet_grid(grade ~ year) + 
  geom_vline(data = ms, aes(xintercept = rt, col = group), 
             lty = 2)

2.2 Spatial WM

Data processing.

swm <- swm.raw %>%
  filter(sitting_id %in% sittings$sitting_id,
         type == "test") %>%
  left_join(sittings, by = "sitting_id") %>%
  mutate(subid = roster_subid) %>%
  filter(type == "test") %>%
  select(subid, grade, group, year, trial, capacity, correct)

Mean level (same measure as in Zenith study).

swm.summary <- swm %>%
  group_by(subid, grade, group, year) %>%
  summarise(capacity = mean(capacity, na.rm=TRUE))
  
ms <- swm.summary %>%
  group_by(grade, group, year) %>%
  summarise(capacity = mean(capacity))
  
swm.summary %>%
  ggplot(aes(x=capacity, fill = group)) + 
  geom_histogram(binwidth = .5) +
  facet_grid(grade ~ year) +
  xlim(c(1,8)) +
  geom_vline(data = ms, aes(xintercept = capacity, col = group), 
             lty = 2)
## Warning: Removed 2 rows containing non-finite values (stat_bin).

2.3 Raven’s

Actually a matrix reasoning equivalent.

ravens <- ravens.raw %>%
  filter(sitting_id %in% sittings$sitting_id,
         type == "test") %>%
  left_join(sittings, by = "sitting_id") %>%
  mutate(subid = roster_subid) %>%
  filter(type == "test") %>%
  select(subid, grade, group, year, trial, correct)

Mean level (same measure as in Zenith study).

ravens.summary <- ravens %>% 
  group_by(subid, grade, group, year) %>%
    summarise(correct = sum(correct, na.rm=TRUE))

ms <- ravens.summary %>%
  group_by(grade, group, year) %>%
  summarise(correct = mean(correct))
  
ravens.summary %>%
  ggplot(aes(x=correct, fill = group)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(grade ~ year) +
  geom_vline(data = ms, aes(xintercept = correct, col = group), 
             lty = 2)

2.4 Change over time

subs <- ravens.summary %>% 
  left_join(gonogo.summary) %>%
  left_join(swm.summary) %>%
  rename(ravens = correct, 
         gonogo = accuracy,
         swm = capacity) 
## Joining, by = c("subid", "grade", "group", "year")
## Joining, by = c("subid", "grade", "group", "year")
ms <- subs %>%
  gather(task, score, ravens, gonogo, swm) %>%
  group_by(year, group, task) %>%
  multi_boot_standard("score", na.rm=TRUE)
## Joining, by = c("year", "group", "task")
pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
  geom_line(position = pos) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = pos) + 
  facet_wrap(~task, scales = "free_y") + 
  scale_colour_solarized()

And by grade level.

ms <- subs %>%
  gather(task, score, ravens, gonogo, swm) %>%
  group_by(year, grade, group, task) %>%
  multi_boot_standard("score", na.rm=TRUE)
## Joining, by = c("year", "grade", "group", "task")
pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
  geom_line(position = pos) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = pos) + 
  facet_wrap(grade~task, scales = "free_y") + 
  scale_colour_solarized()

And final numbers on N data points per task.

kable(subs %>%
  gather(task, score, ravens, gonogo, swm) %>%
  group_by(year, group, task) %>%
  summarise(n=n()))
year group task n
2015 CNTL gonogo 82
2015 CNTL ravens 82
2015 CNTL swm 82
2015 MA gonogo 92
2015 MA ravens 92
2015 MA swm 92
2016 CNTL gonogo 77
2016 CNTL ravens 77
2016 CNTL swm 77
2016 MA gonogo 98
2016 MA ravens 98
2016 MA swm 98

2.5 Individual correlations

Year-by-year task plot

subs_nona <- subs %>%
  filter(!is.na(ravens) & !is.na(gonogo) & !is.na(swm))

subs_wide <- subs_nona %>% 
  gather(task, score, ravens, gonogo, swm) %>%
  mutate(year = str_c("y", as.character(year))) %>%
  spread(year, score)

ggplot(aes(x = y2015, y = y2016, col = group), data = subs_wide) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  facet_wrap(~task, scales = "free")
## Warning: Removed 96 rows containing non-finite values (stat_smooth).
## Warning: Removed 96 rows containing missing values (geom_point).

Correlations within task.

with(filter(subs_wide, task == "gonogo"), cor.test(y2015, y2016))
## 
##  Pearson's product-moment correlation
## 
## data:  y2015 and y2016
## t = 7.4019, df = 154, p-value = 8.168e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3862104 0.6195323
## sample estimates:
##       cor 
## 0.5122622
with(filter(subs_wide, task == "swm"), cor.test(y2015, y2016))
## 
##  Pearson's product-moment correlation
## 
## data:  y2015 and y2016
## t = 4.9743, df = 154, p-value = 1.731e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2282667 0.4999710
## sample estimates:
##       cor 
## 0.3720616
with(filter(subs_wide, task == "ravens"), cor.test(y2015, y2016))
## 
##  Pearson's product-moment correlation
## 
## data:  y2015 and y2016
## t = 4.994, df = 154, p-value = 1.586e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2296626 0.5010751
## sample estimates:
##       cor 
## 0.3733301

Correlations across tasks for 2015.

kable(cor(subs_nona %>% 
            filter(year == "2015") %>%
            ungroup %>% 
            select(ravens, gonogo, swm)), digits = 2)
ravens gonogo swm
ravens 1.00 0.26 0.19
gonogo 0.26 1.00 0.41
swm 0.19 0.41 1.00

Correlations across tasks for 2016.

kable(cor(subs_nona %>% 
            filter(year == "2016") %>%
            ungroup %>% 
            select(ravens, gonogo, swm)), digits = 2)
ravens gonogo swm
ravens 1.00 0.24 0.26
gonogo 0.24 1.00 0.30
swm 0.26 0.30 1.00

3 Math tasks

Read in data.

pv0 <- read.csv("data/paper/2015_PVNumerals.csv") %>%
  select(subnum, pvAvg) %>%
  mutate(year = "2015") %>%
  rename(subid = subnum)
wg0 <- read.csv("data/paper/2015_WGArith.csv") %>%
  select(subnum, arithmeticTotal, arithmeticAverage) %>%
  mutate(year = "2015") %>%
  rename(subid = subnum)
wj0 <- read.csv("data/paper/2015_WOODCOCK.csv") %>%
  select(subnum, woodcockTotal) %>%
  mutate(year = "2015") %>%
  rename(subid = subnum)
pv1 <- read.csv("data/paper/2016_PVNumerals.csv") %>%
  select(SubjectID, pvAvg) %>%
  mutate(year = "2016") %>%
  rename(subid = SubjectID)
wg1 <- read.csv("data/paper/2016_WGArith.csv") %>%
  select(SubjectID, arithmeticTotal, arithmeticAverage) %>%
  mutate(year = "2016") %>%
  rename(subid = SubjectID)
wj1 <- read.csv("data/paper/2016_WOODCOCK.csv") %>%
  select(SubjectID, woodcockTotal) %>%
  mutate(year = "2016") %>%
  rename(subid = SubjectID)

Merge with sub data.

d <- left_join(subs, bind_rows(pv0,pv1)) %>% 
  left_join(bind_rows(wg0,wg1)) %>% 
  left_join(bind_rows(wj0,wj1))
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Joining, by = c("subid", "year")
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): Unequal
## factor levels: coercing to character
## Joining, by = c("subid", "year")
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Joining, by = c("subid", "year")

Extract classes from subids.

d <- d %>%
  separate(subid,"-", into=c("grade_level","class","num"), remove=FALSE) %>%
  unite(class_num, grade_level, class) %>%
  mutate(class_num = factor(class_num)) %>%
  select(-num)

3.1 Individual tasks

Now plot each task individually.

3.1.1 Place value

pv.summary <- d %>% 
  group_by(subid, year, grade) %>%
    summarise(correct = sum(pvAvg, na.rm=TRUE))

ms <- pv.summary %>%
  group_by(year, grade) %>%
  summarise(correct = mean(correct))
  
pv.summary %>%
  ggplot(aes(x=correct)) + 
  geom_histogram(binwidth = .1) +
  facet_grid(year~grade) +
  geom_vline(data = ms, aes(xintercept = correct), 
             col = "red", lty = 2) + 
  ggtitle("Place Value Accuracy") + 
  xlab("Proportion correct")

3.1.2 Arithmetic

wg.summary <- d %>% 
  group_by(subid, year, grade) %>%
    summarise(correct = sum(arithmeticTotal, na.rm=TRUE))

ms <- wg.summary %>%
  group_by(year, grade) %>%
  summarise(correct = mean(correct))
  
wg.summary %>%
  ggplot(aes(x=correct)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(year~grade) +
  geom_vline(data = ms, aes(xintercept = correct), 
             col = "red", lty = 2) + 
  ggtitle("Arithmetic Accuracy") + 
  xlab("Total Problems Correct")

3.1.3 Woodcock-Johnson III

wj.summary <- d %>% 
  group_by(subid, year, grade) %>%
    summarise(correct = sum(woodcockTotal, na.rm=TRUE))

ms <- wj.summary %>%
  group_by(year, grade) %>%
  summarise(correct = mean(correct))
  
wj.summary %>%
  ggplot(aes(x=correct)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(year~grade) +
  geom_vline(data = ms, aes(xintercept = correct), 
             col = "red", lty = 2) + 
  ggtitle("Woodcock-Johnson III Accuracy") + 
  xlab("Total Problems Correct")

3.2 Change over time

3.2.1 Main plot

Without grade-level breakdown.

ms <- d %>%
  gather(task, score, pvAvg, arithmeticAverage, woodcockTotal) %>%
  group_by(year, group, task) %>%
  multi_boot_standard("score", na.rm=TRUE)
## Joining, by = c("year", "group", "task")
pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
  geom_line(position = pos) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = pos) + 
  facet_wrap(~task, scales = "free_y") + 
  scale_colour_solarized()

Broken down by grade.

ms <- d %>%
  gather(task, score, pvAvg, arithmeticAverage, woodcockTotal) %>%
  group_by(year, grade, group, task) %>%
  multi_boot_standard("score", na.rm=TRUE)
## Joining, by = c("year", "grade", "group", "task")
pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
  geom_line(position = pos) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = pos) + 
  facet_wrap(grade~task, scales = "free_y") + 
  scale_colour_solarized()

3.2.2 Pre-registered models

wj.mod <- lmer(woodcockTotal ~ grade + year * group + 
                 (1 | subid) + 
                 (year | class_num), 
     data=d)

pv.mod <- lmer(pvAvg ~ grade + year * group + 
                 (1 | subid) + 
                 (year | class_num), 
     data=d)

wg.mod <- lmer(arithmeticTotal ~ grade + year * group + 
                 (1 | subid) + 
                 (year | class_num), 
     data=d)

And summarize. First, WJ.

kable(summary(wj.mod)$coef)
Estimate Std. Error t value
(Intercept) 5.6609036 0.3863944 14.6505830
gradesecond grade 3.3985659 0.3565510 9.5317799
year2016 4.8353558 0.3144660 15.3764007
groupMA 0.1114861 0.4743396 0.2350343
year2016:groupMA -0.0257998 0.4298529 -0.0600201

Place value.

kable(summary(pv.mod)$coef)
Estimate Std. Error t value
(Intercept) 0.0241715 0.0333462 0.7248646
gradesecond grade 0.2679955 0.0347614 7.7095744
year2016 0.2668794 0.0369524 7.2222570
groupMA 0.0340511 0.0377221 0.9026825
year2016:groupMA 0.0462637 0.0505900 0.9144819

and arithmetic.

kable(summary(wg.mod)$coef)
Estimate Std. Error t value
(Intercept) 1.8776615 0.5856658 3.2060288
gradesecond grade 4.3392875 0.6510306 6.6652592
year2016 6.8049119 0.6142575 11.0782723
groupMA 0.4720199 0.6457014 0.7310189
year2016:groupMA -0.5871245 0.8354591 -0.7027568

3.2.3 Exploratory place value model.

Place value.

pv.exp.mod <- lmer(pvAvg ~ grade * year * group + 
                 (1 | subid) + 
                 (year | class_num), 
     data=d)

kable(summary(pv.exp.mod)$coef)
Estimate Std. Error t value
(Intercept) 0.0391175 0.0412554 0.9481805
gradesecond grade 0.2391025 0.0555904 4.3011490
year2016 0.2901527 0.0567255 5.1150311
groupMA -0.0143919 0.0598512 -0.2404607
gradesecond grade:year2016 -0.0403699 0.0769322 -0.5247462
gradesecond grade:groupMA 0.0820839 0.0778316 1.0546339
year2016:groupMA 0.0514488 0.0790735 0.6506459
gradesecond grade:year2016:groupMA -0.0066832 0.1059016 -0.0631079

3.3 Mediation analysis

high_swm <- d %>%
  filter(year == "2015") %>%
  group_by(grade) %>%
  mutate(high_swm = swm > median(swm, na.rm=TRUE)) %>%
  filter(high_swm)

ms <- d %>%
  mutate(high_swm = subid %in% high_swm$subid) %>%
  gather(task, score, pvAvg, arithmeticAverage, woodcockTotal) %>%
  group_by(year, group, task, high_swm) %>%
  multi_boot_standard("score", na.rm=TRUE)
## Joining, by = c("year", "group", "task", "high_swm")
pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
  geom_line(position = pos) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = pos) + 
  facet_wrap(high_swm~task, scales = "free_y") + 
  scale_colour_solarized()

And by grade too.

high_swm <- d %>%
  filter(year == "2015") %>%
  group_by(grade) %>%
  mutate(high_swm = swm > median(swm, na.rm=TRUE)) %>%
  filter(high_swm)

ms <- d %>%
  mutate(high_swm = subid %in% high_swm$subid) %>%
  gather(task, score, pvAvg, arithmeticAverage, woodcockTotal) %>%
  group_by(year, group, grade, task, high_swm) %>%
  multi_boot_standard("score", na.rm=TRUE)
## Joining, by = c("year", "group", "grade", "task", "high_swm")
pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, pch = high_swm, lty = high_swm)) +
  geom_line(position = pos, aes(group = interaction(high_swm, group))) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = pos) + 
  facet_wrap(grade~task, scales = "free_y") + 
  scale_colour_solarized()

4 Final demographics

d %>%
  group_by(group, grade, year) %>%
  summarise(n=n())
## Source: local data frame [8 x 4]
## Groups: group, grade [?]
## 
##   group        grade  year     n
##   <chr>        <chr> <chr> <int>
## 1  CNTL  first grade  2015    36
## 2  CNTL  first grade  2016    32
## 3  CNTL second grade  2015    46
## 4  CNTL second grade  2016    45
## 5    MA  first grade  2015    33
## 6    MA  first grade  2016    38
## 7    MA second grade  2015    59
## 8    MA second grade  2016    60