knitr::opts_chunk$set(warning = FALSE, message = FALSE, cache = TRUE)
rm(list=ls())
library(readr)
library(dplyr)
library(tidyr)
library(langcog)
library(ggplot2)
# library(lubridate)

Note that the dataset here is those participants (31) who pass the length sanity checks.

d <- read_csv("../data/consolidated_data_passing.csv") %>%
  mutate(posture = factor(posture), 
         orientation = factor(orientation))
demo <- read_csv("../data/demographics/demographics.csv")
cdis <- read_csv("../data/cdis/cdi_summary.csv")

Postures and Orientations

Postures

ps <- d %>%
  filter(!is.na(posture)) %>%
  group_by(subid, age.grp, posture) %>%
  summarise(time = sum(dt, na.rm=TRUE)) %>%
  mutate(prop.time = time/sum(time)) %>%
  group_by(age.grp, posture) %>%
  multi_boot_standard(col = "prop.time", na.rm=TRUE)
ggplot(ps, aes(x = factor(age.grp), y = mean, 
               fill = posture, pch = posture)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  geom_linerange(aes(ymin=ci_lower, ymax=ci_upper), 
                 position = position_dodge(width = .9)) +
  ylab("Proportion Time") + 
  xlab("Age (months)") + 
  scale_fill_solarized() + 
  theme_bw()

Orientations

os <- d %>%
  filter(!is.na(orientation)) %>%
  group_by(age.grp, subid, orientation) %>%
  summarise(time = sum(dt, na.rm=TRUE)) %>%
  mutate(prop.time = time/sum(time)) %>%
  group_by(age.grp, orientation) %>%
  multi_boot_standard(col = "prop.time")
ggplot(os, aes(x = factor(age.grp), y = mean, 
               fill = orientation, pch = orientation)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  geom_linerange(aes(ymin=ci_lower, ymax=ci_upper), 
                 position = position_dodge(width = .9)) +
  ylab("Proportion Time") + 
  xlab("Age (months)") + 
  scale_fill_solarized() + 
  theme_bw()

Postures and orientations together

Data don’t look interpretable typically when sliced this fine.

pos <- d %>%
  filter(!is.na(orientation) & !is.na(posture)) %>%
  group_by(age.grp, subid, orientation, posture) %>%
  summarise(time = sum(dt, na.rm=TRUE)) %>%
  group_by(age.grp, subid) %>%
  mutate(prop.time = time/sum(time)) %>%
  group_by(age.grp, orientation, posture) %>%
  multi_boot_standard(col = "prop.time")
ggplot(pos, aes(x = factor(age.grp), y = mean, 
               fill = posture)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  geom_linerange(aes(ymin=ci_lower, ymax=ci_upper), 
                 position = position_dodge(width = .9)) +
  facet_grid(.~ orientation) + 
  ylab("Proportion Time") + 
  xlab("Age (months)") + 
  scale_fill_solarized() + 
  theme_bw()

Face detections

Using automated face detection results.

Basic detections descriptives

mss <- d %>%
  group_by(age.grp, subid) %>%
  summarise(face = mean(face)) %>%
  group_by(age.grp)

ms <- mss %>%
  multi_boot_standard(col = "face")
  
ggplot(mss, aes(x = age.grp, y = face)) + 
  geom_jitter(width = .25) + 
  geom_pointrange(data = ms, 
                 aes(x = age.grp, 
                     y = mean, 
                     ymin = ci_lower, 
                     ymax = ci_upper), col = "red") +
  geom_line(data = ms, 
             aes(x = age.grp, 
                     y = mean), col = "red") + 
  scale_x_continuous(breaks=c(8,12,16)) +
  ylab("Proportion Face Detections") + 
  xlab("Age (months)") + 
  theme_bw() 

And with continuous age. This is probably the simplest option.

mss <- d %>%
  filter(!is.na(age.at.test)) %>%
  group_by(subid, age.at.test) %>%
  summarise(face = mean(face)) %>%
  left_join(demo) %>%
  mutate(len = as.numeric(lubridate::as.duration(lubridate::ms(len))))

ggplot(mss, aes(x = age.at.test, y = face)) + 
  geom_jitter(width = .25, aes(size = len)) + 
  geom_smooth() + 
  scale_x_continuous(breaks=c(8,12,16)) +
  scale_size_continuous(name = "Length (s)") + 
  ylab("Proportion Face Detections") + 
  xlab("Age (months)") + 
  theme_bw() 

Boxplot, though it’s not as good.

ggplot(mss, aes(x = factor(age.grp), y = face)) + 
  geom_boxplot() + 
  ylab("Proportion Face Detections") + 
  xlab("Age (months)") + 
  theme_bw() 

Stats. Standard ANOVA (violates distributional assumptions).

mod <- lm(face ~ factor(age.grp), data = mss)
summary(mod)
## 
## Call:
## lm(formula = face ~ factor(age.grp), data = mss)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.06557 -0.03640 -0.01747  0.01736  0.24241 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)        0.065573   0.019520   3.359  0.00227 **
## factor(age.grp)12 -0.037457   0.029817  -1.256  0.21940   
## factor(age.grp)16 -0.002401   0.028952  -0.083  0.93449   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06762 on 28 degrees of freedom
## Multiple R-squared:  0.06211,    Adjusted R-squared:  -0.00488 
## F-statistic: 0.9272 on 2 and 28 DF,  p-value: 0.4075
anova(mod)
## Analysis of Variance Table
## 
## Response: face
##                 Df   Sum Sq   Mean Sq F value Pr(>F)
## factor(age.grp)  2 0.008478 0.0042391  0.9272 0.4075
## Residuals       28 0.128020 0.0045721

Now a non-parametric test.

kruskal.test(face ~ factor(age.grp), data = mss)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  face by factor(age.grp)
## Kruskal-Wallis chi-squared = 1.3202, df = 2, p-value = 0.5168

No real signal that the (predicted) U-shaped pattern is reliable, unfortunately.

Detections timecourse

Do a 10s moving window average.

d <- d %>%
  group_by(subid) %>%
  mutate(face_avg = zoo::rollapply(data = face, 
                                   width = 30*10, # 10s window
                                   FUN = mean, 
                                   align = "right", 
                                   fill = NA, 
                                   na.rm = TRUE))

ggplot(d, aes(x = frame, y = face_avg)) + 
  geom_line() + 
  facet_wrap(~subid) + 
  theme_bw()

Detections by posture and orientation

Posture

mss <- d %>%
  filter(!is.na(posture)) %>%
  group_by(age.grp, posture, subid) %>%
  summarise(face = mean(face)) %>%
  group_by(age.grp)

# ms <- mss %>%
#   multi_boot_standard(col = "face")
  
ggplot(mss, aes(x = factor(age.grp), y = face, col = posture)) + 
  geom_boxplot() + 
  ylab("Proportion Face Detections") + 
  xlab("Age (months)") + 
  theme_bw() 

Orientation

mss <- d %>%
  filter(!is.na(orientation)) %>%
  group_by(age.grp, orientation, subid) %>%
  summarise(face = mean(face)) %>%
  group_by(age.grp)

# ms <- mss %>%
#   multi_boot_standard(col = "face")
  
ggplot(mss, aes(x = factor(age.grp), y = face, col = orientation)) + 
  geom_boxplot() + 
  ylab("Proportion Face Detections") + 
  xlab("Age (months)") + 
  theme_bw() 

Both

Not really enough data to say much when binned this finely.

mss <- d %>%
  filter(!is.na(posture) & !is.na(orientation)) %>%
  group_by(age.grp, posture, orientation, subid) %>%
  summarise(face = mean(face)) %>%
  group_by(age.grp)

# ms <- mss %>%
#   multi_boot_standard(col = "face")
  
ggplot(mss, aes(x = factor(age.grp), y = face, col = posture)) + 
  geom_boxplot() + 
  ylab("Proportion Face Detections") + 
  xlab("Age (months)") + 
  facet_grid(~orientation) + 
  ylim(c(0,.2)) + 
  theme_bw() 

Naming data

Need some helper functions to merge in namings.

source("helper.R")
namings <- d %>%
  group_by(subid) %>%
  do(summarize.naming(.)) %>%
  left_join(demo %>% select(subid, gender, age.at.test, len))

First examine: how many namings do we have, generally?

naming_rate <- namings %>%
  group_by(subid, age.grp, age.at.test) %>%
  summarise(num = n(), 
            len = len[1]) %>%
  mutate(rate = num / (as.numeric(lubridate::as.duration(lubridate::ms(len)))/60)) 

ggplot(naming_rate, 
       aes(x = age.at.test, y = rate)) +
  geom_point() + 
  geom_smooth(span = 1) + 
  ylab("Namings per minute") + 
  xlab("Age (months)") + 
  theme_bw() 

Break down by novel and familiar.

naming_rate_familiarity <- namings %>%
  group_by(subid, familiarity, age.at.test) %>%
  summarise(num = n(), 
            len = len[1]) %>%
  mutate(rate = num / (as.numeric(lubridate::as.duration(lubridate::ms(len)))/60))

ggplot(naming_rate_familiarity, 
       aes(x = age.at.test, y = rate, col = familiarity)) +
  geom_point() + 
  geom_smooth(span = 1) + 
  ylab("Namings per minute") + 
  xlab("Age (months)") + 
  theme_bw() 

Item effects just for fun.

namings %>%
  group_by(subid, name, familiarity, age.at.test) %>%
  summarise(num = n(), 
            len = len[1]) %>%
  mutate(rate = num / (as.numeric(lubridate::as.duration(lubridate::ms(len)))/60)) %>%
  ggplot(aes(x = age.at.test, y = rate, col = name)) +
  geom_point() + 
  facet_grid(.~familiarity) + 
  geom_smooth(span = 1) + 
  ylab("Namings per minute") + 
  xlab("Age (months)") + 
  theme_bw() 

Namings and face detections

Are there more faces during naming instances?

Are there more faces before or after naming instances?

#   sb <- ddply(d,~subid, summarize.naming, window=c(-3,0))
#   sa <- ddply(d,~subid, summarize.naming, window=c(0,3))
#   sb$window <- "before"
#   sa$window <- "after"
#   s <- rbind(sb,sa)
#   s$window <- factor(s$window)
#   
# #   d <- subset(d,!is.na(time))
# #   s <- ddply(d,~subid, summarize.naming, window=c(-3,3))
#   
#   s$first.instance <- s$naming.instance == 1
#   s$log.instance <- round(log10(s$naming.instance)*4)/4
#   s$binned.instance <- 10^(round(log10(s$naming.instance)*4)/4)
#   s$face.logical <- s$face != 0
#   
#   ## try plotting individuals
#   mss <- ddply(s,~subid+age.grp+window,summarise,
#                face = na.mean(face))
  # qplot(age.grp,face,colour=window,
  #       xlab="Age (Months)",ylab="Proportion faces detected in window",
  #       data=mss,geom=c("point")) + 
  #   scale_x_continuous(breaks=c(8,12,16)) +
  #   geom_smooth()

How do face detections change with sequence of naming instances?

ms <- namings %>%
  group_by(age.grp, subid, naming.instance, familiarity, name) %>%
  summarise(face = mean(face)) %>%
  group_by(age.grp, naming.instance, familiarity) %>%
  multi_boot_standard(col = "face", na.rm = TRUE)
ggplot(ms, 
       aes(x = naming.instance, y = mean, col = familiarity)) +
  geom_point() + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  geom_smooth(se = FALSE) + 
  facet_grid(~age.grp) + 
  xlim(c(0,20)) + 
  scale_color_solarized() + 
  theme_bw()

#   ## now calculate differences from base rate
#   n <- ddply(s,~subid+age.grp,summarise,
#              face = na.mean(face))
#   
#   n$br <- ddply(d,~subid+age.grp,summarise,
#               face=na.mean(face))$face
#   
#   n$br.diff <- n$face - n$br
#   
# #   pdf("~/Projects/xsface/writeup/figures/naming_faces_diff.pdf",width=4,height=3)
#   qplot(age.grp,br.diff,position=position_jitter(.2),
#         data=subset(n,age.grp>4),xlab="Age (Months)",ylab="Face detections relative to base rate") + 
#     scale_x_continuous(breaks=c(8,12,16,20)) +
#     geom_abline(slope=0,intercept=0,lty=2) + 
#     stat_summary(fun.data="mean_cl_boot",color="red")
# #   dev.off()

CDIs, namings, and face detections

cdi_namings <- left_join(naming_rate, cdis)

qplot(comprehension, facets= .~ age.grp, data = cdi_namings) + 
  theme_bw()

qplot(production, facets= .~ age.grp, data = cdi_namings) + 
  theme_bw()

CDIs and namings

Just out of curiousity, are there any major differences in terms of CDIs by naming rate? (Would help to establish the validity of the naming rate analyses).

ggplot(cdi_namings, 
       aes(x = rate, y = comprehension)) + 
  geom_point() + 
  geom_smooth(method = "lm", se = FALSE) + 
  facet_grid(.~age.grp) + 
  xlab("Namings per minute") + 
  ylab("CDI Summed Comprehension Vocabulary") + 
  theme_bw()

cdis <- read_csv("../data/cdis/cdi_summary.csv")
left_join(naming_rate, cdis) %>%
  ggplot(aes(x = rate, y = production)) + 
  geom_point() + 
  geom_smooth(method = "lm", se = FALSE) + 
  facet_grid(.~age.grp) + 
  xlab("Namings per minute") + 
  ylab("CDI Summed Production Vocabulary") + 
  theme_bw()