Runsheets and exclusions
runsheet <- read_csv("data/metadata/runsheet_6-20-16.csv") %>%
select(SID, Homeroom, Consent)
classes <- read_csv("data/metadata/classrooms_6-20-16.csv") %>%
select(Exclude, Homeroom, Group)
kids_raw <- left_join(runsheet, classes) %>%
mutate(Consent = ifelse(is.na(Consent), 0, Consent))
## Joining, by = "Homeroom"
kids <- kids_raw %>%
filter(Consent == 1, Exclude == 0)
groups <- kids %>%
select(SID, Group, Homeroom) %>%
rename(roster_subid = SID,
room = Homeroom,
group = Group)
Raw numbers.
kable(kids_raw %>% group_by(Group, Consent) %>% summarise(n = n()))
CNTL |
0 |
92 |
CNTL |
1 |
100 |
EXCLUDED |
0 |
6 |
MA |
0 |
93 |
MA |
1 |
116 |
NA |
0 |
3 |
NA |
1 |
1 |
By consent proportion for included rooms.
consent <- kids_raw %>%
group_by(Group) %>%
filter(!is.na(Group), Group != "EXCLUDED") %>%
summarise(consent = sum(Consent == 1),
total = n(),
prop = mean(Consent == 1))
kable(consent,
digits = 2)
CNTL |
100 |
192 |
0.52 |
MA |
116 |
209 |
0.56 |
By grade and room.
consent <- kids_raw %>%
mutate(grade = ifelse(str_detect(SID, "S2"), "2", "1")) %>%
group_by(Group, grade) %>%
filter(!is.na(Group), Group != "EXCLUDED") %>%
summarise(consent = sum(Consent == 1),
total = n(),
prop = mean(Consent == 1))
kable(consent,
digits = 2)
CNTL |
1 |
49 |
103 |
0.48 |
CNTL |
2 |
51 |
89 |
0.57 |
MA |
1 |
46 |
95 |
0.48 |
MA |
2 |
70 |
114 |
0.61 |
By room.
kable(kids_raw %>%
group_by(Group, Homeroom) %>%
filter(!is.na(Group), Group != "EXCLUDED") %>%
summarise(consent = sum(Consent == 1),
total = n(),
prop = mean(Consent == 1)),
digits = 2)
CNTL |
BURKE/HALL |
6 |
19 |
0.32 |
CNTL |
GUTIERREZ |
11 |
24 |
0.46 |
CNTL |
HEREDIA |
2 |
3 |
0.67 |
CNTL |
HUNTER/PIERIES |
10 |
20 |
0.50 |
CNTL |
KANE |
11 |
22 |
0.50 |
CNTL |
MARGULIES |
9 |
21 |
0.43 |
CNTL |
MEJIA/BRADBURY |
13 |
22 |
0.59 |
CNTL |
PAZYMINO |
12 |
16 |
0.75 |
CNTL |
ROMANO |
4 |
9 |
0.44 |
CNTL |
ROSA |
9 |
17 |
0.53 |
CNTL |
ZORRILLA |
13 |
19 |
0.68 |
MA |
CAPILLI |
7 |
16 |
0.44 |
MA |
CHUN |
8 |
19 |
0.42 |
MA |
CSASZAR |
11 |
19 |
0.58 |
MA |
D’IZZIA/JANO |
11 |
21 |
0.52 |
MA |
GIGANTI/CHAMBERS |
4 |
8 |
0.50 |
MA |
MATA |
9 |
17 |
0.53 |
MA |
MEEKS |
8 |
20 |
0.40 |
MA |
MITTMAN |
4 |
10 |
0.40 |
MA |
PERRY/KEENAN |
15 |
20 |
0.75 |
MA |
RENTAS |
17 |
20 |
0.85 |
MA |
TRIANO |
8 |
18 |
0.44 |
MA |
WOODING |
14 |
21 |
0.67 |
Summary, we got around 50% consent rate; consent rate didn’t differ across groups.
Cognitive tasks
Read data. Note that there is one sitting file for the battery and one for each of the sub-tests, in case the battery crashed and failed to save data for that participant.
sittings.raw <- bind_rows(
read_csv("data/computer/b301_06.20.16_17.00.40_sittings.csv"),
read_csv("data/computer/b299_06.20.16_17.03.46_sittings.csv"),
read_csv("data/computer/b300_06.20.16_17.03.52_sittings.csv"),
read_csv("data/computer/b302_06.20.16_17.03.58_sittings.csv"))
sittings <- sittings.raw %>%
mutate(subid = str_trim(toupper(misc), side = "both"),
sitting_id = id) %>%
filter(str_detect(subid, "[Ss][12]-")) %>%
mutate(grade = ifelse(str_detect(subid, "[Ss][1]"),
"first grade",
"second grade"),
year = factor(str_sub(start_time, 1, 4))) %>%
select(subid, sitting_id, year, grade, battery_id, test_order, num_completed)
Each task has the every-battery and the one-sitting version.
gonogo.raw <- bind_rows(
read_csv("data/computer/b299_t463_06.20.16_17.06.05.csv"),
read_csv("data/computer/b301_t463_06.20.16_17.06.16.csv"))
ravens.raw <- bind_rows(
read_csv("data/computer/b300_t464_06.20.16_17.06.12.csv"),
read_csv("data/computer/b301_t464_06.20.16_17.06.18.csv"))
swm.raw <- bind_rows(
read_csv("data/computer/b302_t467_06.20.16_17.06.27.csv"),
read_csv("data/computer/b301_t467_06.20.16_17.06.19.csv"))
Merge in demographics.
First, find the subjects whose IDs were entered incorrectly and output.
anti_join(sittings, groups %>% rename(subid = roster_subid)) %>%
select(subid, battery_id) %>%
write_csv("data/metadata/missing_sittings.csv")
## Joining, by = "subid"
Now read in the conversion table. (Done by hand by checking the runsheet for typos, notes etc, on the basis of missing_sittings.csv
).
missing_sittings <- read_csv("data/metadata/missing_sittings_key.csv")
sittings <- left_join(sittings, missing_sittings) %>%
mutate(exclude = ifelse(is.na(exclude), 0, exclude)) %>%
filter(exclude != 1) %>%
mutate(roster_subid = ifelse(is.na(new_subid), subid, new_subid)) %>%
left_join(groups)
## Joining, by = c("subid", "battery_id")
## Joining, by = "roster_subid"
Computer testing numbers. Note that battery 301 is the combined battery.
sittings %>%
group_by(grade, year, battery_id) %>%
summarise(n = n())
## Source: local data frame [14 x 4]
## Groups: grade, year [?]
##
## grade year battery_id n
## <chr> <fctr> <int> <int>
## 1 first grade 2015 299 2
## 2 first grade 2015 300 10
## 3 first grade 2015 301 70
## 4 first grade 2015 302 10
## 5 first grade 2016 300 2
## 6 first grade 2016 301 72
## 7 first grade 2016 302 2
## 8 second grade 2015 299 1
## 9 second grade 2015 300 9
## 10 second grade 2015 301 111
## 11 second grade 2015 302 10
## 12 second grade 2016 300 4
## 13 second grade 2016 301 106
## 14 second grade 2016 302 4
Go / No-Go Task
gonogo <- gonogo.raw %>%
filter(sitting_id %in% sittings$sitting_id) %>%
left_join(sittings, by = "sitting_id") %>%
mutate(subid = roster_subid) %>%
select(subid, year, grade, trial, stim, correct, rt, accuracy, group,
responseAssign, rtCall)
Accuracy.
gonogo.summary <- gonogo %>%
group_by(subid, grade, group, year) %>%
summarise(accuracy = mean(accuracy, na.rm=TRUE))
ms <- gonogo.summary %>%
group_by(grade, group, year) %>%
summarise(accuracy = mean(accuracy))
gonogo.summary %>%
ggplot(aes(x=accuracy, fill = group)) +
geom_histogram(binwidth = .05) +
facet_grid(grade ~ year) +
geom_vline(xintercept = .5, lty = 3) +
xlim(c(0,1)) +
geom_vline(data = ms, aes(xintercept = accuracy, col = group),
lty = 2)

RT.
ms <- gonogo %>%
filter(rt > 0 & accuracy == 1) %>%
group_by(subid, grade, group, year) %>%
summarise(rt = mean(rt, na.rm=TRUE)) %>%
group_by(grade, group) %>%
summarise(rt = mean(rt))
gonogo %>%
filter(rt > 0 & accuracy == 1) %>%
group_by(subid, grade, group, year) %>%
summarise(rt = mean(rt, na.rm=TRUE)) %>%
ggplot(aes(x=rt, fill = group)) +
geom_histogram(binwidth = 50) +
facet_grid(grade ~ year) +
geom_vline(data = ms, aes(xintercept = rt, col = group),
lty = 2)

Spatial WM
Data processing.
swm <- swm.raw %>%
filter(sitting_id %in% sittings$sitting_id,
type == "test") %>%
left_join(sittings, by = "sitting_id") %>%
mutate(subid = roster_subid) %>%
filter(type == "test") %>%
select(subid, grade, group, year, trial, capacity, correct)
Mean level (same measure as in Zenith study).
swm.summary <- swm %>%
group_by(subid, grade, group, year) %>%
summarise(capacity = mean(capacity, na.rm=TRUE))
ms <- swm.summary %>%
group_by(grade, group, year) %>%
summarise(capacity = mean(capacity))
swm.summary %>%
ggplot(aes(x=capacity, fill = group)) +
geom_histogram(binwidth = .5) +
facet_grid(grade ~ year) +
xlim(c(1,8)) +
geom_vline(data = ms, aes(xintercept = capacity, col = group),
lty = 2)
## Warning: Removed 2 rows containing non-finite values (stat_bin).

Raven’s
Actually a matrix reasoning equivalent.
ravens <- ravens.raw %>%
filter(sitting_id %in% sittings$sitting_id,
type == "test") %>%
left_join(sittings, by = "sitting_id") %>%
mutate(subid = roster_subid) %>%
filter(type == "test") %>%
select(subid, grade, group, year, trial, correct)
Mean level (same measure as in Zenith study).
ravens.summary <- ravens %>%
group_by(subid, grade, group, year) %>%
summarise(correct = sum(correct, na.rm=TRUE))
ms <- ravens.summary %>%
group_by(grade, group, year) %>%
summarise(correct = mean(correct))
ravens.summary %>%
ggplot(aes(x=correct, fill = group)) +
geom_histogram(binwidth = 1) +
facet_grid(grade ~ year) +
geom_vline(data = ms, aes(xintercept = correct, col = group),
lty = 2)

Change over time
subs <- ravens.summary %>%
left_join(gonogo.summary) %>%
left_join(swm.summary) %>%
rename(ravens = correct,
gonogo = accuracy,
swm = capacity)
## Joining, by = c("subid", "grade", "group", "year")
## Joining, by = c("subid", "grade", "group", "year")
ms <- subs %>%
gather(task, score, ravens, gonogo, swm) %>%
group_by(year, group, task) %>%
multi_boot_standard("score", na.rm=TRUE)
## Joining, by = c("year", "group", "task")
pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
geom_line(position = pos) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper),
position = pos) +
facet_wrap(~task, scales = "free_y") +
scale_colour_solarized()

And by grade level.
ms <- subs %>%
gather(task, score, ravens, gonogo, swm) %>%
group_by(year, grade, group, task) %>%
multi_boot_standard("score", na.rm=TRUE)
## Joining, by = c("year", "grade", "group", "task")
pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
geom_line(position = pos) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper),
position = pos) +
facet_wrap(grade~task, scales = "free_y") +
scale_colour_solarized()

And final numbers on N data points per task.
kable(subs %>%
gather(task, score, ravens, gonogo, swm) %>%
group_by(year, group, task) %>%
summarise(n=n()))
2015 |
CNTL |
gonogo |
82 |
2015 |
CNTL |
ravens |
82 |
2015 |
CNTL |
swm |
82 |
2015 |
MA |
gonogo |
92 |
2015 |
MA |
ravens |
92 |
2015 |
MA |
swm |
92 |
2016 |
CNTL |
gonogo |
77 |
2016 |
CNTL |
ravens |
77 |
2016 |
CNTL |
swm |
77 |
2016 |
MA |
gonogo |
98 |
2016 |
MA |
ravens |
98 |
2016 |
MA |
swm |
98 |
Individual correlations
Year-by-year task plot
subs_nona <- subs %>%
filter(!is.na(ravens) & !is.na(gonogo) & !is.na(swm))
subs_wide <- subs_nona %>%
gather(task, score, ravens, gonogo, swm) %>%
mutate(year = str_c("y", as.character(year))) %>%
spread(year, score)
ggplot(aes(x = y2015, y = y2016, col = group), data = subs_wide) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~task, scales = "free")
## Warning: Removed 96 rows containing non-finite values (stat_smooth).
## Warning: Removed 96 rows containing missing values (geom_point).

Correlations within task.
with(filter(subs_wide, task == "gonogo"), cor.test(y2015, y2016))
##
## Pearson's product-moment correlation
##
## data: y2015 and y2016
## t = 7.4019, df = 154, p-value = 8.168e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3862104 0.6195323
## sample estimates:
## cor
## 0.5122622
with(filter(subs_wide, task == "swm"), cor.test(y2015, y2016))
##
## Pearson's product-moment correlation
##
## data: y2015 and y2016
## t = 4.9743, df = 154, p-value = 1.731e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2282667 0.4999710
## sample estimates:
## cor
## 0.3720616
with(filter(subs_wide, task == "ravens"), cor.test(y2015, y2016))
##
## Pearson's product-moment correlation
##
## data: y2015 and y2016
## t = 4.994, df = 154, p-value = 1.586e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2296626 0.5010751
## sample estimates:
## cor
## 0.3733301
Correlations across tasks for 2015.
kable(cor(subs_nona %>%
filter(year == "2015") %>%
ungroup %>%
select(ravens, gonogo, swm)), digits = 2)
ravens |
1.00 |
0.26 |
0.19 |
gonogo |
0.26 |
1.00 |
0.41 |
swm |
0.19 |
0.41 |
1.00 |
Correlations across tasks for 2016.
kable(cor(subs_nona %>%
filter(year == "2016") %>%
ungroup %>%
select(ravens, gonogo, swm)), digits = 2)
ravens |
1.00 |
0.24 |
0.26 |
gonogo |
0.24 |
1.00 |
0.30 |
swm |
0.26 |
0.30 |
1.00 |
Math tasks
Read in data.
pv0 <- read.csv("data/paper/2015_PVNumerals.csv") %>%
select(subnum, pvAvg) %>%
mutate(year = "2015") %>%
rename(subid = subnum)
wg0 <- read.csv("data/paper/2015_WGArith.csv") %>%
select(subnum, arithmeticTotal, arithmeticAverage) %>%
mutate(year = "2015") %>%
rename(subid = subnum)
wj0 <- read.csv("data/paper/2015_WOODCOCK.csv") %>%
select(subnum, woodcockTotal) %>%
mutate(year = "2015") %>%
rename(subid = subnum)
pv1 <- read.csv("data/paper/2016_PVNumerals.csv") %>%
select(SubjectID, pvAvg) %>%
mutate(year = "2016") %>%
rename(subid = SubjectID)
wg1 <- read.csv("data/paper/2016_WGArith.csv") %>%
select(SubjectID, arithmeticTotal, arithmeticAverage) %>%
mutate(year = "2016") %>%
rename(subid = SubjectID)
wj1 <- read.csv("data/paper/2016_WOODCOCK.csv") %>%
select(SubjectID, woodcockTotal) %>%
mutate(year = "2016") %>%
rename(subid = SubjectID)
Merge with sub data.
d <- left_join(subs, bind_rows(pv0,pv1)) %>%
left_join(bind_rows(wg0,wg1)) %>%
left_join(bind_rows(wj0,wj1))
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Joining, by = c("subid", "year")
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): Unequal
## factor levels: coercing to character
## Joining, by = c("subid", "year")
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Joining, by = c("subid", "year")
Extract classes from subids.
d <- d %>%
separate(subid,"-", into=c("grade_level","class","num"), remove=FALSE) %>%
unite(class_num, grade_level, class) %>%
mutate(class_num = factor(class_num)) %>%
select(-num)
Individual tasks
Now plot each task individually.
Place value
pv.summary <- d %>%
group_by(subid, year, grade) %>%
summarise(correct = sum(pvAvg, na.rm=TRUE))
ms <- pv.summary %>%
group_by(year, grade) %>%
summarise(correct = mean(correct))
pv.summary %>%
ggplot(aes(x=correct)) +
geom_histogram(binwidth = .1) +
facet_grid(year~grade) +
geom_vline(data = ms, aes(xintercept = correct),
col = "red", lty = 2) +
ggtitle("Place Value Accuracy") +
xlab("Proportion correct")

Arithmetic
wg.summary <- d %>%
group_by(subid, year, grade) %>%
summarise(correct = sum(arithmeticTotal, na.rm=TRUE))
ms <- wg.summary %>%
group_by(year, grade) %>%
summarise(correct = mean(correct))
wg.summary %>%
ggplot(aes(x=correct)) +
geom_histogram(binwidth = 1) +
facet_grid(year~grade) +
geom_vline(data = ms, aes(xintercept = correct),
col = "red", lty = 2) +
ggtitle("Arithmetic Accuracy") +
xlab("Total Problems Correct")

Woodcock-Johnson III
wj.summary <- d %>%
group_by(subid, year, grade) %>%
summarise(correct = sum(woodcockTotal, na.rm=TRUE))
ms <- wj.summary %>%
group_by(year, grade) %>%
summarise(correct = mean(correct))
wj.summary %>%
ggplot(aes(x=correct)) +
geom_histogram(binwidth = 1) +
facet_grid(year~grade) +
geom_vline(data = ms, aes(xintercept = correct),
col = "red", lty = 2) +
ggtitle("Woodcock-Johnson III Accuracy") +
xlab("Total Problems Correct")

Change over time
Main plot
Without grade-level breakdown.
ms <- d %>%
gather(task, score, pvAvg, arithmeticAverage, woodcockTotal) %>%
group_by(year, group, task) %>%
multi_boot_standard("score", na.rm=TRUE)
## Joining, by = c("year", "group", "task")
pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
geom_line(position = pos) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper),
position = pos) +
facet_wrap(~task, scales = "free_y") +
scale_colour_solarized()

Broken down by grade.
ms <- d %>%
gather(task, score, pvAvg, arithmeticAverage, woodcockTotal) %>%
group_by(year, grade, group, task) %>%
multi_boot_standard("score", na.rm=TRUE)
## Joining, by = c("year", "grade", "group", "task")
pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
geom_line(position = pos) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper),
position = pos) +
facet_wrap(grade~task, scales = "free_y") +
scale_colour_solarized()

Pre-registered models
wj.mod <- lmer(woodcockTotal ~ grade + year * group +
(1 | subid) +
(year | class_num),
data=d)
pv.mod <- lmer(pvAvg ~ grade + year * group +
(1 | subid) +
(year | class_num),
data=d)
wg.mod <- lmer(arithmeticTotal ~ grade + year * group +
(1 | subid) +
(year | class_num),
data=d)
And summarize. First, WJ.
kable(summary(wj.mod)$coef)
(Intercept) |
5.6609036 |
0.3863944 |
14.6505830 |
gradesecond grade |
3.3985659 |
0.3565510 |
9.5317799 |
year2016 |
4.8353558 |
0.3144660 |
15.3764007 |
groupMA |
0.1114861 |
0.4743396 |
0.2350343 |
year2016:groupMA |
-0.0257998 |
0.4298529 |
-0.0600201 |
Place value.
kable(summary(pv.mod)$coef)
(Intercept) |
0.0241715 |
0.0333462 |
0.7248646 |
gradesecond grade |
0.2679955 |
0.0347614 |
7.7095744 |
year2016 |
0.2668794 |
0.0369524 |
7.2222570 |
groupMA |
0.0340511 |
0.0377221 |
0.9026825 |
year2016:groupMA |
0.0462637 |
0.0505900 |
0.9144819 |
and arithmetic.
kable(summary(wg.mod)$coef)
(Intercept) |
1.8776615 |
0.5856658 |
3.2060288 |
gradesecond grade |
4.3392875 |
0.6510306 |
6.6652592 |
year2016 |
6.8049119 |
0.6142575 |
11.0782723 |
groupMA |
0.4720199 |
0.6457014 |
0.7310189 |
year2016:groupMA |
-0.5871245 |
0.8354591 |
-0.7027568 |
Exploratory place value model.
Place value.
pv.exp.mod <- lmer(pvAvg ~ grade * year * group +
(1 | subid) +
(year | class_num),
data=d)
kable(summary(pv.exp.mod)$coef)
(Intercept) |
0.0391175 |
0.0412554 |
0.9481805 |
gradesecond grade |
0.2391025 |
0.0555904 |
4.3011490 |
year2016 |
0.2901527 |
0.0567255 |
5.1150311 |
groupMA |
-0.0143919 |
0.0598512 |
-0.2404607 |
gradesecond grade:year2016 |
-0.0403699 |
0.0769322 |
-0.5247462 |
gradesecond grade:groupMA |
0.0820839 |
0.0778316 |
1.0546339 |
year2016:groupMA |
0.0514488 |
0.0790735 |
0.6506459 |
gradesecond grade:year2016:groupMA |
-0.0066832 |
0.1059016 |
-0.0631079 |