library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(langcog)
##
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
##
## scale
library(magrittr)
library(stringr)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
##
## extract
library(googlesheets)
rm(list=ls())
theme_set(theme_mikabr() +
theme(panel.grid = element_blank(),
strip.background = element_blank()))
font <- "Open Sans"
runsheet <- read_csv("data/runsheet_6-20-16.csv") %>%
select(SID, Homeroom, Consent)
classes <- read_csv("data/classrooms_6-20-16.csv") %>%
select(Exclude, Homeroom, Group)
kids_raw <- left_join(runsheet, classes) %>%
mutate(Consent = ifelse(is.na(Consent), 0, Consent))
## Joining by: "Homeroom"
kids <- kids_raw %>%
filter(Consent == 1, Exclude == 0)
groups <- kids %>%
select(SID, Group, Homeroom) %>%
rename(roster_subid = SID,
room = Homeroom,
group = Group)
Raw numbers.
kable(kids_raw %>% group_by(Group, Consent) %>% summarise(n = n()))
Group | Consent | n |
---|---|---|
CNTL | 0 | 92 |
CNTL | 1 | 100 |
EXCLUDED | 0 | 6 |
MA | 0 | 93 |
MA | 1 | 116 |
NA | 0 | 3 |
NA | 1 | 1 |
By consent proportion for included rooms.
kable(kids_raw %>%
group_by(Group) %>%
filter(!is.na(Group), Group != "EXCLUDED") %>%
summarise(consent = sum(Consent == 1),
total = n(),
prop = mean(Consent == 1)),
digits = 2)
Group | consent | total | prop |
---|---|---|---|
CNTL | 100 | 192 | 0.52 |
MA | 116 | 209 | 0.56 |
By room.
kable(kids_raw %>%
group_by(Group, Homeroom) %>%
filter(!is.na(Group), Group != "EXCLUDED") %>%
summarise(consent = sum(Consent == 1),
total = n(),
prop = mean(Consent == 1)),
digits = 2)
Group | Homeroom | consent | total | prop |
---|---|---|---|---|
CNTL | BURKE/HALL | 6 | 19 | 0.32 |
CNTL | GUTIERREZ | 11 | 24 | 0.46 |
CNTL | HEREDIA | 2 | 3 | 0.67 |
CNTL | HUNTER/PIERIES | 10 | 20 | 0.50 |
CNTL | KANE | 11 | 22 | 0.50 |
CNTL | MARGULIES | 9 | 21 | 0.43 |
CNTL | MEJIA/BRADBURY | 13 | 22 | 0.59 |
CNTL | PAZYMINO | 12 | 16 | 0.75 |
CNTL | ROMANO | 4 | 9 | 0.44 |
CNTL | ROSA | 9 | 17 | 0.53 |
CNTL | ZORRILLA | 13 | 19 | 0.68 |
MA | CAPILLI | 7 | 16 | 0.44 |
MA | CHUN | 8 | 19 | 0.42 |
MA | CSASZAR | 11 | 19 | 0.58 |
MA | D’IZZIA/JANO | 11 | 21 | 0.52 |
MA | GIGANTI/CHAMBERS | 4 | 8 | 0.50 |
MA | MATA | 9 | 17 | 0.53 |
MA | MEEKS | 8 | 20 | 0.40 |
MA | MITTMAN | 4 | 10 | 0.40 |
MA | PERRY/KEENAN | 15 | 20 | 0.75 |
MA | RENTAS | 17 | 20 | 0.85 |
MA | TRIANO | 8 | 18 | 0.44 |
MA | WOODING | 14 | 21 | 0.67 |
Summary, we got around 50% consent rate; consent rate didn’t differ across groups.
Read data. Note that there is one sitting file for the battery and one for each of the sub-tests, in case the battery crashed and failed to save data for that participant.
sittings.raw <- bind_rows(
read_csv("data/2016/b301_06.20.16_17.00.40_sittings.csv"),
read_csv("data/2016/b299_06.20.16_17.03.46_sittings.csv"),
read_csv("data/2016/b300_06.20.16_17.03.52_sittings.csv"),
read_csv("data/2016/b302_06.20.16_17.03.58_sittings.csv"))
sittings <- sittings.raw %>%
mutate(subid = str_trim(toupper(misc), side = "both"),
sitting_id = id) %>%
filter(str_detect(subid, "[Ss][12]-")) %>%
mutate(grade = ifelse(str_detect(subid, "[Ss][1]"),
"first grade",
"second grade"),
year = factor(str_sub(start_time, 1, 4))) %>%
select(subid, sitting_id, year, grade, battery_id, test_order, num_completed)
Each task has the every-battery and the one-sitting version.
gonogo.raw <- bind_rows(
read_csv("data/2016/b299_t463_06.20.16_17.06.05.csv"),
read_csv("data/2016/b301_t463_06.20.16_17.06.16.csv"))
ravens.raw <- bind_rows(
read_csv("data/2016/b300_t464_06.20.16_17.06.12.csv"),
read_csv("data/2016/b301_t464_06.20.16_17.06.18.csv"))
swm.raw <- bind_rows(
read_csv("data/2016/b302_t467_06.20.16_17.06.27.csv"),
read_csv("data/2016/b301_t467_06.20.16_17.06.19.csv"))
Merge in demographics.
First, find the subjects whose IDs were entered incorrectly and output.
anti_join(sittings, groups %>% rename(subid = roster_subid)) %>%
select(subid, battery_id) %>%
write_csv("data/missing_sittings.csv")
## Joining by: "subid"
Now read in the conversion table. (Done by hand by checking the runsheet for typos, notes etc, on the basis of missing_sittings.csv
).
missing_sittings <- read_csv("data/missing_sittings_key.csv")
sittings <- left_join(sittings, missing_sittings) %>%
mutate(exclude = ifelse(is.na(exclude), 0, exclude)) %>%
filter(exclude != 1) %>%
mutate(roster_subid = ifelse(is.na(new_subid), subid, new_subid)) %>%
left_join(groups)
## Joining by: c("subid", "battery_id")
## Joining by: "roster_subid"
Computer testing numbers. Note that battery 301 is the combined battery.
sittings %>%
group_by(grade, year, battery_id) %>%
summarise(n = n())
## Source: local data frame [14 x 4]
## Groups: grade, year [?]
##
## grade year battery_id n
## (chr) (fctr) (int) (int)
## 1 first grade 2015 299 2
## 2 first grade 2015 300 10
## 3 first grade 2015 301 70
## 4 first grade 2015 302 10
## 5 first grade 2016 300 2
## 6 first grade 2016 301 72
## 7 first grade 2016 302 2
## 8 second grade 2015 299 1
## 9 second grade 2015 300 9
## 10 second grade 2015 301 111
## 11 second grade 2015 302 10
## 12 second grade 2016 300 4
## 13 second grade 2016 301 106
## 14 second grade 2016 302 4
gonogo <- gonogo.raw %>%
filter(sitting_id %in% sittings$sitting_id) %>%
left_join(sittings, by = "sitting_id") %>%
select(subid, year, grade, trial, stim, correct, rt, accuracy, group,
responseAssign, rtCall)
Accuracy.
gonogo.summary <- gonogo %>%
group_by(subid, grade, group, year) %>%
summarise(accuracy = mean(accuracy, na.rm=TRUE))
ms <- gonogo.summary %>%
group_by(grade, group, year) %>%
summarise(accuracy = mean(accuracy))
gonogo.summary %>%
ggplot(aes(x=accuracy, fill = group)) +
geom_histogram(binwidth = .05) +
facet_grid(grade ~ year) +
geom_vline(xintercept = .5, lty = 3) +
xlim(c(0,1)) +
geom_vline(data = ms, aes(xintercept = accuracy, col = group),
lty = 2)
RT.
ms <- gonogo %>%
filter(rt > 0 & accuracy == 1) %>%
group_by(subid, grade, group, year) %>%
summarise(rt = mean(rt, na.rm=TRUE)) %>%
group_by(grade, group) %>%
summarise(rt = mean(rt))
gonogo %>%
filter(rt > 0 & accuracy == 1) %>%
group_by(subid, grade, group, year) %>%
summarise(rt = mean(rt, na.rm=TRUE)) %>%
ggplot(aes(x=rt, fill = group)) +
geom_histogram(binwidth = 50) +
facet_grid(grade ~ year) +
geom_vline(data = ms, aes(xintercept = rt, col = group),
lty = 2)
Data processing.
swm <- swm.raw %>%
filter(sitting_id %in% sittings$sitting_id,
type == "test") %>%
left_join(sittings, by = "sitting_id") %>%
filter(type == "test") %>%
select(subid, grade, group, year, trial, capacity, correct)
Mean level (same measure as in Zenith study).
swm.summary <- swm %>%
group_by(subid, grade, group, year) %>%
summarise(capacity = mean(capacity, na.rm=TRUE))
ms <- swm.summary %>%
group_by(grade, group, year) %>%
summarise(capacity = mean(capacity))
swm.summary %>%
ggplot(aes(x=capacity, fill = group)) +
geom_histogram(binwidth = .5) +
facet_grid(grade ~ year) +
xlim(c(1,8)) +
geom_vline(data = ms, aes(xintercept = capacity, col = group),
lty = 2)
## Warning: Removed 2 rows containing non-finite values (stat_bin).
Actually a matrix reasoning equivalent.
ravens <- ravens.raw %>%
filter(sitting_id %in% sittings$sitting_id,
type == "test") %>%
left_join(sittings, by = "sitting_id") %>%
filter(type == "test") %>%
select(subid, grade, group, year, trial, correct)
Mean level (same measure as in Zenith study).
ravens.summary <- ravens %>%
group_by(subid, grade, group, year) %>%
summarise(correct = sum(correct, na.rm=TRUE))
ms <- ravens.summary %>%
group_by(grade, group, year) %>%
summarise(correct = mean(correct))
ravens.summary %>%
ggplot(aes(x=correct, fill = group)) +
geom_histogram(binwidth = 1) +
facet_grid(grade ~ year) +
geom_vline(data = ms, aes(xintercept = correct, col = group),
lty = 2)
subs <- ravens.summary %>%
left_join(gonogo.summary) %>%
left_join(swm.summary) %>%
rename(ravens = correct,
gonogo = accuracy,
swm = capacity)
## Joining by: c("subid", "grade", "group", "year")
## Joining by: c("subid", "grade", "group", "year")
ms <- subs %>%
gather(task, score, ravens, gonogo, swm) %>%
group_by(year, group, task) %>%
multi_boot_standard("score", na.rm=TRUE)
## Joining by: c("year", "group", "task")
pos <- position_dodge(width = .05)
ggplot(ms, aes(x = year, y = mean, col = group, group = group)) +
geom_line(position = pos) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper),
position = pos) +
facet_wrap(~task, scales = "free_y") +
scale_colour_solarized()
And final numbers on N data points per task.
kable(subs %>%
gather(task, score, ravens, gonogo, swm) %>%
group_by(year, group, task) %>%
summarise(n=n()))
year | group | task | n |
---|---|---|---|
2015 | CNTL | gonogo | 82 |
2015 | CNTL | ravens | 82 |
2015 | CNTL | swm | 82 |
2015 | MA | gonogo | 92 |
2015 | MA | ravens | 92 |
2015 | MA | swm | 92 |
2016 | CNTL | gonogo | 77 |
2016 | CNTL | ravens | 77 |
2016 | CNTL | swm | 77 |
2016 | MA | gonogo | 98 |
2016 | MA | ravens | 98 |
2016 | MA | swm | 98 |
Year-by-year task plot
subs_nona <- subs %>%
filter(!is.na(ravens) & !is.na(gonogo) & !is.na(swm))
subs_wide <- subs_nona %>%
gather(task, score, ravens, gonogo, swm) %>%
mutate(year = str_c("y", as.character(year))) %>%
spread(year, score)
ggplot(aes(x = y2015, y = y2016, col = group), data = subs_wide) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~task, scales = "free")
## Warning: Removed 111 rows containing non-finite values (stat_smooth).
## Warning: Removed 111 rows containing missing values (geom_point).
Correlations within task.
with(filter(subs_wide, task == "gonogo"), cor.test(y2015, y2016))
##
## Pearson's product-moment correlation
##
## data: y2015 and y2016
## t = 7.447, df = 151, p-value = 6.803e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3918282 0.6255178
## sample estimates:
## cor
## 0.5182823
with(filter(subs_wide, task == "swm"), cor.test(y2015, y2016))
##
## Pearson's product-moment correlation
##
## data: y2015 and y2016
## t = 4.9263, df = 151, p-value = 2.178e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2268234 0.5011935
## sample estimates:
## cor
## 0.3721086
with(filter(subs_wide, task == "ravens"), cor.test(y2015, y2016))
##
## Pearson's product-moment correlation
##
## data: y2015 and y2016
## t = 5.0758, df = 151, p-value = 1.12e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2374858 0.5095848
## sample estimates:
## cor
## 0.3817771
Correlations across tasks for 2015.
kable(cor(subs_nona %>%
filter(year == "2015") %>%
ungroup %>%
select(ravens, gonogo, swm)), digits = 2)
ravens | gonogo | swm | |
---|---|---|---|
ravens | 1.00 | 0.26 | 0.19 |
gonogo | 0.26 | 1.00 | 0.41 |
swm | 0.19 | 0.41 | 1.00 |
Correlations across tasks for 2016.
kable(cor(subs_nona %>%
filter(year == "2016") %>%
ungroup %>%
select(ravens, gonogo, swm)), digits = 2)
ravens | gonogo | swm | |
---|---|---|---|
ravens | 1.00 | 0.24 | 0.26 |
gonogo | 0.24 | 1.00 | 0.30 |
swm | 0.26 | 0.30 | 1.00 |
Read in data.
pv <- read.csv("data/2015/2015_PVNumerals.csv") %>%
select(subnum, pvAvg) %>%
rename(subid = subnum)
wg <- read.csv("data/2015/2015_WGArith.csv") %>%
select(subnum, arithmeticTotal, arithmeticAverage) %>%
rename(subid = subnum)
wj <- read.csv("data/2015/2015_WOODCOCK.csv") %>%
select(subnum, woodcockTotal) %>%
rename(subid = subnum)
Merge with sub data.
d <- left_join(subs, pv) %>%
left_join(wg) %>%
left_join(wj)
## Joining by: "subid"
## Warning in left_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector
## Joining by: "subid"
## Warning in left_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector
## Joining by: "subid"
## Warning in left_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector
Now plot each task individually.
pv.summary <- d %>%
group_by(subid, grade) %>%
summarise(correct = sum(pvAvg, na.rm=TRUE))
ms <- pv.summary %>%
group_by(grade) %>%
summarise(correct = mean(correct))
pv.summary %>%
ggplot(aes(x=correct)) +
geom_histogram(binwidth = .1) +
facet_grid(.~grade) +
geom_vline(data = ms, aes(xintercept = correct),
col = "red", lty = 2) +
ggtitle("Place Value Accuracy") +
xlab("Proportion correct")
wg.summary <- d %>%
group_by(subid, grade) %>%
summarise(correct = sum(arithmeticTotal, na.rm=TRUE))
ms <- wg.summary %>%
group_by(grade) %>%
summarise(correct = mean(correct))
wg.summary %>%
ggplot(aes(x=correct)) +
geom_histogram(binwidth = 1) +
facet_grid(.~grade) +
geom_vline(data = ms, aes(xintercept = correct),
col = "red", lty = 2) +
ggtitle("Arithmetic Accuracy") +
xlab("Total Problems Correct")
wj.summary <- d %>%
group_by(subid, grade) %>%
summarise(correct = sum(woodcockTotal, na.rm=TRUE))
ms <- wj.summary %>%
group_by(grade) %>%
summarise(correct = mean(correct))
wj.summary %>%
ggplot(aes(x=correct)) +
geom_histogram(binwidth = 1) +
facet_grid(.~grade) +
geom_vline(data = ms, aes(xintercept = correct),
col = "red", lty = 2) +
ggtitle("Woodcock-Johnson III Accuracy") +
xlab("Total Problems Correct")
Correlations.
kable(cor(d %>% ungroup %>%
select(woodcockTotal, arithmeticTotal, pvAvg)), digits = 2)
woodcockTotal | arithmeticTotal | pvAvg | |
---|---|---|---|
woodcockTotal | 1 | NA | NA |
arithmeticTotal | NA | 1 | NA |
pvAvg | NA | NA | 1 |
cor.test(d$woodcockTotal, d$arithmeticTotal)
##
## Pearson's product-moment correlation
##
## data: d$woodcockTotal and d$arithmeticTotal
## t = 19.898, df = 345, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6779046 0.7765320
## sample estimates:
## cor
## 0.7310136
cor.test(d$woodcockTotal, d$pvAvg)
##
## Pearson's product-moment correlation
##
## data: d$woodcockTotal and d$pvAvg
## t = 14.694, df = 345, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5511568 0.6812215
## sample estimates:
## cor
## 0.6204371
cor.test(d$arithmeticTotal, d$pvAvg)
##
## Pearson's product-moment correlation
##
## data: d$arithmeticTotal and d$pvAvg
## t = 16.118, df = 345, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5908825 0.7115761
## sample estimates:
## cor
## 0.6553933
kable(cor(d %>% ungroup %>%
select(ravens, gonogo, swm,
woodcockTotal, arithmeticTotal, pvAvg)), digits = 2)
ravens | gonogo | swm | woodcockTotal | arithmeticTotal | pvAvg | |
---|---|---|---|---|---|---|
ravens | 1 | NA | NA | NA | NA | NA |
gonogo | NA | 1 | NA | NA | NA | NA |
swm | NA | NA | 1 | NA | NA | NA |
woodcockTotal | NA | NA | NA | 1 | NA | NA |
arithmeticTotal | NA | NA | NA | NA | 1 | NA |
pvAvg | NA | NA | NA | NA | NA | 1 |
cor.test(d$ravens, d$woodcockTotal)
##
## Pearson's product-moment correlation
##
## data: d$ravens and d$woodcockTotal
## t = 5.4527, df = 345, p-value = 9.473e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1817842 0.3758135
## sample estimates:
## cor
## 0.2816759
cor.test(d$ravens, d$arithmeticTotal)
##
## Pearson's product-moment correlation
##
## data: d$ravens and d$arithmeticTotal
## t = 5.1172, df = 345, p-value = 5.159e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1649351 0.3607988
## sample estimates:
## cor
## 0.2656055
cor.test(d$ravens, d$pvAvg)
##
## Pearson's product-moment correlation
##
## data: d$ravens and d$pvAvg
## t = 6.3208, df = 345, p-value = 8.042e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2244916 0.4134202
## sample estimates:
## cor
## 0.3221599
cor.test(d$swm, d$woodcockTotal)
##
## Pearson's product-moment correlation
##
## data: d$swm and d$woodcockTotal
## t = 4.8337, df = 341, p-value = 2.031e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1513904 0.3497431
## sample estimates:
## cor
## 0.2532262
cor.test(d$swm, d$arithmeticTotal)
##
## Pearson's product-moment correlation
##
## data: d$swm and d$arithmeticTotal
## t = 6.6645, df = 341, p-value = 1.069e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2422835 0.4299102
## sample estimates:
## cor
## 0.3394693
cor.test(d$swm, d$pvAvg)
##
## Pearson's product-moment correlation
##
## data: d$swm and d$pvAvg
## t = 5.6865, df = 341, p-value = 2.789e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1944707 0.3881052
## sample estimates:
## cor
## 0.2943053
cor.test(d$gonogo, d$woodcockTotal)
##
## Pearson's product-moment correlation
##
## data: d$gonogo and d$woodcockTotal
## t = 5.8864, df = 343, p-value = 9.398e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2038381 0.3958340
## sample estimates:
## cor
## 0.3029064
cor.test(d$gonogo, d$arithmeticTotal)
##
## Pearson's product-moment correlation
##
## data: d$gonogo and d$arithmeticTotal
## t = 6.309, df = 343, p-value = 8.668e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2245123 0.4139497
## sample estimates:
## cor
## 0.322456
cor.test(d$gonogo, d$pvAvg)
##
## Pearson's product-moment correlation
##
## data: d$gonogo and d$pvAvg
## t = 4.813, df = 343, p-value = 2.232e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1499180 0.3478731
## sample estimates:
## cor
## 0.2515241