knitr::opts_chunk$set(warning = FALSE, message = FALSE, cache = TRUE)
rm(list=ls())
library(readr)
library(dplyr)
library(tidyr)
library(langcog)
library(ggplot2)
# library(lubridate)
Note that the dataset here is those participants (31) who pass the length sanity checks.
d <- read_csv("../data/consolidated_data_passing.csv") %>%
mutate(posture = factor(posture),
orientation = factor(orientation))
demo <- read_csv("../data/demographics/demographics.csv")
cdis <- read_csv("../data/cdis/cdi_summary.csv")
ps <- d %>%
filter(!is.na(posture)) %>%
group_by(subid, age.grp, posture) %>%
summarise(time = sum(dt, na.rm=TRUE)) %>%
mutate(prop.time = time/sum(time)) %>%
group_by(age.grp, posture) %>%
multi_boot_standard(col = "prop.time", na.rm=TRUE)
ggplot(ps, aes(x = factor(age.grp), y = mean,
fill = posture, pch = posture)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymin=ci_lower, ymax=ci_upper),
position = position_dodge(width = .9)) +
ylab("Proportion Time") +
xlab("Age (months)") +
scale_fill_solarized() +
theme_bw()
os <- d %>%
filter(!is.na(orientation)) %>%
group_by(age.grp, subid, orientation) %>%
summarise(time = sum(dt, na.rm=TRUE)) %>%
mutate(prop.time = time/sum(time)) %>%
group_by(age.grp, orientation) %>%
multi_boot_standard(col = "prop.time")
ggplot(os, aes(x = factor(age.grp), y = mean,
fill = orientation, pch = orientation)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymin=ci_lower, ymax=ci_upper),
position = position_dodge(width = .9)) +
ylab("Proportion Time") +
xlab("Age (months)") +
scale_fill_solarized() +
theme_bw()
Data don’t look interpretable typically when sliced this fine.
pos <- d %>%
filter(!is.na(orientation) & !is.na(posture)) %>%
group_by(age.grp, subid, orientation, posture) %>%
summarise(time = sum(dt, na.rm=TRUE)) %>%
group_by(age.grp, subid) %>%
mutate(prop.time = time/sum(time)) %>%
group_by(age.grp, orientation, posture) %>%
multi_boot_standard(col = "prop.time")
ggplot(pos, aes(x = factor(age.grp), y = mean,
fill = posture)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymin=ci_lower, ymax=ci_upper),
position = position_dodge(width = .9)) +
facet_grid(.~ orientation) +
ylab("Proportion Time") +
xlab("Age (months)") +
scale_fill_solarized() +
theme_bw()
Using automated face detection results.
mss <- d %>%
group_by(age.grp, subid) %>%
summarise(face = mean(face)) %>%
group_by(age.grp)
ms <- mss %>%
multi_boot_standard(col = "face")
ggplot(mss, aes(x = age.grp, y = face)) +
geom_jitter(width = .25) +
geom_pointrange(data = ms,
aes(x = age.grp,
y = mean,
ymin = ci_lower,
ymax = ci_upper), col = "red") +
geom_line(data = ms,
aes(x = age.grp,
y = mean), col = "red") +
scale_x_continuous(breaks=c(8,12,16)) +
ylab("Proportion Face Detections") +
xlab("Age (months)") +
theme_bw()
And with continuous age. This is probably the simplest option.
mss <- d %>%
filter(!is.na(age.at.test)) %>%
group_by(subid, age.at.test) %>%
summarise(face = mean(face)) %>%
left_join(demo) %>%
mutate(len = as.numeric(lubridate::as.duration(lubridate::ms(len))))
ggplot(mss, aes(x = age.at.test, y = face)) +
geom_jitter(width = .25, aes(size = len)) +
geom_smooth() +
scale_x_continuous(breaks=c(8,12,16)) +
scale_size_continuous(name = "Length (s)") +
ylab("Proportion Face Detections") +
xlab("Age (months)") +
theme_bw()
Boxplot, though it’s not as good.
ggplot(mss, aes(x = factor(age.grp), y = face)) +
geom_boxplot() +
ylab("Proportion Face Detections") +
xlab("Age (months)") +
theme_bw()
Stats. Standard ANOVA (violates distributional assumptions).
mod <- lm(face ~ factor(age.grp), data = mss)
summary(mod)
##
## Call:
## lm(formula = face ~ factor(age.grp), data = mss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.06557 -0.03640 -0.01747 0.01736 0.24241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.065573 0.019520 3.359 0.00227 **
## factor(age.grp)12 -0.037457 0.029817 -1.256 0.21940
## factor(age.grp)16 -0.002401 0.028952 -0.083 0.93449
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06762 on 28 degrees of freedom
## Multiple R-squared: 0.06211, Adjusted R-squared: -0.00488
## F-statistic: 0.9272 on 2 and 28 DF, p-value: 0.4075
anova(mod)
## Analysis of Variance Table
##
## Response: face
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(age.grp) 2 0.008478 0.0042391 0.9272 0.4075
## Residuals 28 0.128020 0.0045721
Now a non-parametric test.
kruskal.test(face ~ factor(age.grp), data = mss)
##
## Kruskal-Wallis rank sum test
##
## data: face by factor(age.grp)
## Kruskal-Wallis chi-squared = 1.3202, df = 2, p-value = 0.5168
No real signal that the (predicted) U-shaped pattern is reliable, unfortunately.
Do a 10s moving window average.
d <- d %>%
group_by(subid) %>%
mutate(face_avg = zoo::rollapply(data = face,
width = 30*10, # 10s window
FUN = mean,
align = "right",
fill = NA,
na.rm = TRUE))
ggplot(d, aes(x = frame, y = face_avg)) +
geom_line() +
facet_wrap(~subid) +
theme_bw()
mss <- d %>%
filter(!is.na(posture)) %>%
group_by(age.grp, posture, subid) %>%
summarise(face = mean(face)) %>%
group_by(age.grp)
# ms <- mss %>%
# multi_boot_standard(col = "face")
ggplot(mss, aes(x = factor(age.grp), y = face, col = posture)) +
geom_boxplot() +
ylab("Proportion Face Detections") +
xlab("Age (months)") +
theme_bw()
mss <- d %>%
filter(!is.na(orientation)) %>%
group_by(age.grp, orientation, subid) %>%
summarise(face = mean(face)) %>%
group_by(age.grp)
# ms <- mss %>%
# multi_boot_standard(col = "face")
ggplot(mss, aes(x = factor(age.grp), y = face, col = orientation)) +
geom_boxplot() +
ylab("Proportion Face Detections") +
xlab("Age (months)") +
theme_bw()
Not really enough data to say much when binned this finely.
mss <- d %>%
filter(!is.na(posture) & !is.na(orientation)) %>%
group_by(age.grp, posture, orientation, subid) %>%
summarise(face = mean(face)) %>%
group_by(age.grp)
# ms <- mss %>%
# multi_boot_standard(col = "face")
ggplot(mss, aes(x = factor(age.grp), y = face, col = posture)) +
geom_boxplot() +
ylab("Proportion Face Detections") +
xlab("Age (months)") +
facet_grid(~orientation) +
ylim(c(0,.2)) +
theme_bw()
Need some helper functions to merge in namings.
source("helper.R")
namings <- d %>%
group_by(subid) %>%
do(summarize.naming(.)) %>%
left_join(demo %>% select(subid, gender, age.at.test, len))
First examine: how many namings do we have, generally?
naming_rate <- namings %>%
group_by(subid, age.grp, age.at.test) %>%
summarise(num = n(),
len = len[1]) %>%
mutate(rate = num / (as.numeric(lubridate::as.duration(lubridate::ms(len)))/60))
ggplot(naming_rate,
aes(x = age.at.test, y = rate)) +
geom_point() +
geom_smooth(span = 1) +
ylab("Namings per minute") +
xlab("Age (months)") +
theme_bw()
Break down by novel and familiar.
naming_rate_familiarity <- namings %>%
group_by(subid, familiarity, age.at.test) %>%
summarise(num = n(),
len = len[1]) %>%
mutate(rate = num / (as.numeric(lubridate::as.duration(lubridate::ms(len)))/60))
ggplot(naming_rate_familiarity,
aes(x = age.at.test, y = rate, col = familiarity)) +
geom_point() +
geom_smooth(span = 1) +
ylab("Namings per minute") +
xlab("Age (months)") +
theme_bw()
Item effects just for fun.
namings %>%
group_by(subid, name, familiarity, age.at.test) %>%
summarise(num = n(),
len = len[1]) %>%
mutate(rate = num / (as.numeric(lubridate::as.duration(lubridate::ms(len)))/60)) %>%
ggplot(aes(x = age.at.test, y = rate, col = name)) +
geom_point() +
facet_grid(.~familiarity) +
geom_smooth(span = 1) +
ylab("Namings per minute") +
xlab("Age (months)") +
theme_bw()
# sb <- ddply(d,~subid, summarize.naming, window=c(-3,0))
# sa <- ddply(d,~subid, summarize.naming, window=c(0,3))
# sb$window <- "before"
# sa$window <- "after"
# s <- rbind(sb,sa)
# s$window <- factor(s$window)
#
# # d <- subset(d,!is.na(time))
# # s <- ddply(d,~subid, summarize.naming, window=c(-3,3))
#
# s$first.instance <- s$naming.instance == 1
# s$log.instance <- round(log10(s$naming.instance)*4)/4
# s$binned.instance <- 10^(round(log10(s$naming.instance)*4)/4)
# s$face.logical <- s$face != 0
#
# ## try plotting individuals
# mss <- ddply(s,~subid+age.grp+window,summarise,
# face = na.mean(face))
# qplot(age.grp,face,colour=window,
# xlab="Age (Months)",ylab="Proportion faces detected in window",
# data=mss,geom=c("point")) +
# scale_x_continuous(breaks=c(8,12,16)) +
# geom_smooth()
ms <- namings %>%
group_by(age.grp, subid, naming.instance, familiarity, name) %>%
summarise(face = mean(face)) %>%
group_by(age.grp, naming.instance, familiarity) %>%
multi_boot_standard(col = "face", na.rm = TRUE)
ggplot(ms,
aes(x = naming.instance, y = mean, col = familiarity)) +
geom_point() +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper)) +
geom_smooth(se = FALSE) +
facet_grid(~age.grp) +
xlim(c(0,20)) +
scale_color_solarized() +
theme_bw()
# ## now calculate differences from base rate
# n <- ddply(s,~subid+age.grp,summarise,
# face = na.mean(face))
#
# n$br <- ddply(d,~subid+age.grp,summarise,
# face=na.mean(face))$face
#
# n$br.diff <- n$face - n$br
#
# # pdf("~/Projects/xsface/writeup/figures/naming_faces_diff.pdf",width=4,height=3)
# qplot(age.grp,br.diff,position=position_jitter(.2),
# data=subset(n,age.grp>4),xlab="Age (Months)",ylab="Face detections relative to base rate") +
# scale_x_continuous(breaks=c(8,12,16,20)) +
# geom_abline(slope=0,intercept=0,lty=2) +
# stat_summary(fun.data="mean_cl_boot",color="red")
# # dev.off()
cdi_namings <- left_join(naming_rate, cdis)
qplot(comprehension, facets= .~ age.grp, data = cdi_namings) +
theme_bw()
qplot(production, facets= .~ age.grp, data = cdi_namings) +
theme_bw()
Just out of curiousity, are there any major differences in terms of CDIs by naming rate? (Would help to establish the validity of the naming rate analyses).
ggplot(cdi_namings,
aes(x = rate, y = comprehension)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_grid(.~age.grp) +
xlab("Namings per minute") +
ylab("CDI Summed Comprehension Vocabulary") +
theme_bw()
cdis <- read_csv("../data/cdis/cdi_summary.csv")
left_join(naming_rate, cdis) %>%
ggplot(aes(x = rate, y = production)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_grid(.~age.grp) +
xlab("Namings per minute") +
ylab("CDI Summed Production Vocabulary") +
theme_bw()