Negpad felicity

Prelims and libraries.

rm(list=ls())
#Load libraries
library(reshape2)
library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(bootstrap)
library(lme4)

## Loading required package: Matrix
## Loading required package: Rcpp

Functions and add some style elements for ggplot2

## number of unique subs
n.unique <- function (x) {
  length(unique(x))
}

## for bootstrapping 95% confidence intervals
theta <- function(x,xdata) {mean(xdata[x])}
ci.low <- function(x) {
  quantile(bootstrap(1:length(x),1000,theta,x)$thetastar,.025)}
ci.high <- function(x) {
  quantile(bootstrap(1:length(x),1000,theta,x)$thetastar,.975)}

plot.style <- theme_bw() + 
  theme(panel.grid.minor=element_blank(), 
        panel.grid.major=element_blank(), 
        legend.position="right", 
        axis.line = element_line(colour="black",size=.5), 
        axis.ticks = element_line(size=.5), 
        axis.title.x = element_text(vjust=-.5), 
        axis.title.y = element_text(angle=90,vjust=0.25))

Load in data.

all.data <- read.csv("negpad_long_data.csv")

Condense scale (1=bad, 2=neutral, 3=good)

all.data$resp2 <- 2
all.data[all.data$resp > 3,]$resp2 <- 3
all.data[all.data$resp < 3,]$resp2 <- 1
all.data$resp2 <- factor(all.data$resp2)

Participant exclusions

Reject subjects who don’t understand scale (based on positive sentences):

reject <- all.data %>%
  filter(sent.type=="positive") %>% #Only look at positive sentences
  group_by(subid) %>%
  mutate(total = n()) %>% #get total # of positive sentences child saw
  group_by(subid, condition, truth, total, resp2) %>%
  filter((truth=="True" & resp2==3) | (truth=="False" & resp2==1)) %>% #Get # "good" for true pos and "bad" for false pos
  summarize(counts = n()) %>% 
  group_by(subid, condition, total) %>%
  summarize(counts = sum(counts)) %>% #total # "correct" responses
  mutate(prop = counts/total) %>% #proportion correct
  filter(prop < .6) #reject kids who got < .6 "correct" (this allows for 2/6 "mistakes")

for (i in reject$subid) {
  all.data <- filter(all.data, subid !=i)
}

Make sure there aren’t any kids who just used one side of scale. Reject kids who only chose a single data point

scaleUse <- aggregate(resp2 ~ subid, all.data, n.unique)
table(scaleUse$resp2) #Are any resp2=1

## 
##  2  3 
## 49 20

Categorize kids based on response type

tn_responses <- all.data %>%
  filter(sent.type=="negative" & truth=="True") %>%
  group_by(subid) %>%
  mutate(total = n()) %>%
  group_by(subid, condition, total, resp2) %>%
  summarize(counts = n()) %>%
  mutate(prop = counts/total)

category <- dcast(tn_responses, subid + condition ~ resp2)

## Using prop as value column: use value.var to override.

names(category) <- c("subid","condition","bad","neutral","good")
category[is.na(category)] <- 0

category$type <- "other"
#category[category$neutral > .6,]$type <- "tn_neutral"
category[category$bad > .6,]$type <- "tn_bad"
category[category$good > .6,]$type <- "tn_good"

cat_counts <- category %>%
  group_by(condition, type) %>%
  summarise(counts = n())
cat_counts$condition <- factor(cat_counts$condition, levels=c("none","target"), labels=c("None","Target"))
cat_counts$type <- factor(cat_counts$type, levels=c("tn_bad","tn_good","other"), labels=c("True Negatives = Bad", "True Negatives = Good", "Inconsistent/Other"))

qplot(data=cat_counts, x=condition, y=counts, fill=type, 
      stat="identity", position="dodge", geom="bar") + 
  scale_fill_hue("Response Type") +
  ylab("Count") + xlab("Context Condition") +
  plot.style

plot of chunk unnamed-chunk-7

Main Analysis

First aross ages

ms <- all.data %>%
  group_by(subid, condition, sent.type, truth) %>%
  summarise(subm = mean(resp)) %>%
  group_by(condition, sent.type, truth) %>%
  summarise(m = mean(subm),
            cih = ci.high(subm),
            cil = ci.low(subm))
ms$condition <- factor(ms$condition, labels=c("None","Target"))
ms$truth <- factor(ms$truth, levels=c("True","False"))

qplot(data=subset(ms, sent.type=="negative"), 
      x=condition, y=m, facets=~truth,
      stat="identity", position="dodge", geom="bar") +
  geom_errorbar(aes(ymin=cil, ymax=cih), 
                position=position_dodge(.9), width=0) + 
  scale_fill_grey("") +
  xlab("Context") + ylab("Response") +
  scale_y_continuous(limits=c(0, 5), breaks=seq(1,5,1)) +
  #coord_equal(1/1.5) +
  plot.style

plot of chunk unnamed-chunk-8

Histogram of responses

truenegs <- filter(ms, truth=="True" & sent.type == "negative")

#make df for histogram (for formatting reasons)
hist_data <- all.data %>%
  filter(truth=="True" & sent.type=="negative") %>%
  group_by(condition, resp) %>%
  summarise(count = n())
hist_data$condition <- factor(hist_data$condition, labels=c("None","Target"))


qplot(data=hist_data, y=count, x=resp, 
      fill = condition, width=.5, 
      geom="bar", position = position_dodge(.6), stat="identity") +
  geom_point(data=truenegs, aes(x=m, y=c(41, 42), color=condition)) +
  geom_segment(data=truenegs, aes(x=cil, xend=cih, y=c(41, 42), yend=c(41, 42), color=condition)) + 
  scale_fill_grey("Condition") + scale_color_grey("Condition") +
  xlab("Response") + ylab("Count") +
  #ggtitle("True Negatives, 3-5-year-olds (N=43)") +
  plot.style

plot of chunk unnamed-chunk-9

Break down by age.

ms <- all.data %>%
  group_by(subid, agegroup, condition, sent.type, truth) %>%
  summarise(subm = mean(resp)) %>%
  group_by(condition, agegroup, sent.type, truth) %>%
  summarise(m = mean(subm),
            cih = ci.high(subm),
            cil = ci.low(subm))
ms$condition <- factor(ms$condition, labels=c("None","Target"))
ms$truth <- factor(ms$truth, levels=c("True","False"))

qplot(data=subset(ms, sent.type=="negative"), 
      x=condition, y=m, facets=agegroup~truth,
      stat="identity", position="dodge", geom="bar") +
  geom_errorbar(aes(ymin=cil, ymax=cih), 
                position=position_dodge(.9), width=0) + 
  scale_fill_grey("") +
  xlab("Context") + ylab("Response") +
  scale_y_continuous(limits=c(0, 5), breaks=seq(1,5,1)) +
  #coord_equal(1/1.5) +
  plot.style

plot of chunk unnamed-chunk-10

Histogram of responses

trueneg_3s <- filter(ms, truth=="True" & sent.type == "negative" & agegroup == "3")
trueneg_4s <- filter(ms, truth=="True" & sent.type == "negative" & agegroup == "4")

hist_data_3s <- all.data %>%
  filter(truth=="True" & sent.type=="negative" & agegroup=="3") %>%
  group_by(condition, resp) %>%
  summarise(count = n())
hist_data_3s$condition <- factor(hist_data_3s$condition, labels=c("None","Target"))

#quartz()
qplot(data=hist_data_3s, y=count, x=resp, 
      fill = condition, width=.5, 
      geom="bar", position = position_dodge(.6), stat="identity") +
  geom_point(data=trueneg_3s, aes(x=m, y=c(40, 41), color=condition)) +
  geom_segment(data=trueneg_3s, aes(x=cil, xend=cih, y=c(40, 41), yend=c(40, 41), color=condition)) + 
  scale_fill_grey("Condition") + scale_color_grey("Condition") +
  xlab("Response") + ylab("Count") +
  ylim(c(0, 80)) + 
  #ggtitle("True Negatives, 3-year-olds (N=35)") +
  plot.style

plot of chunk unnamed-chunk-11

hist_data_4s <- all.data %>%
  filter(truth=="True" & sent.type=="negative" & agegroup=="4") %>%
  group_by(condition, resp) %>%
  summarise(count = n())
hist_data_4s$condition <- factor(hist_data_4s$condition, labels=c("None","Target"))

#quartz()
qplot(data=hist_data_4s, y=count, x=resp, 
      fill = condition, width=.5, 
      geom="bar", position = position_dodge(.6), stat="identity") +
  geom_point(data=trueneg_4s, aes(x=m, y=c(40, 41), color=condition)) +
  geom_segment(data=trueneg_4s, aes(x=cil, xend=cih, y=c(40, 41), yend=c(40, 41), color=condition)) + 
  scale_fill_grey("Condition") + scale_color_grey("Condition") +
  xlab("Response") + ylab("Count") +
  ylim(c(0, 80)) + 
  #ggtitle("True Negatives, 4-year-olds (N=34)") +
  plot.style

plot of chunk unnamed-chunk-11

Playing around

ms <- all.data %>%
  group_by(sent.type, truth, condition, agegroup, subid) %>%
  summarise(resp = mean(resp)) %>%
  group_by(sent.type, truth, condition, agegroup) %>%
  summarise(cih = ci.high(resp),
            cil = ci.low(resp),
            m = mean(resp)) 

ggplot(ms, aes(x = sent.type:truth, y = m, fill = condition)) + 
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = cil, ymax = cih), 
                 position = position_dodge(width = .9)) +
  facet_grid(.~agegroup)

plot of chunk unnamed-chunk-12

Statistics

Basic continuous models.

summary(lmer(resp ~ condition * agegroup * truth  + (1|subid) 
     + (1|item), 
     data = filter(all.data, sent.type == "negative")))

## Linear mixed model fit by REML ['lmerMod']
## Formula: resp ~ condition * agegroup * truth + (1 | subid) + (1 | item)
##    Data: filter(all.data, sent.type == "negative")
## 
## REML criterion at convergence: 2275
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.456 -0.526 -0.060  0.639  3.405 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subid    (Intercept) 1.16     1.08    
##  item     (Intercept) 0.00     0.00    
##  Residual             1.52     1.23    
## Number of obs: 655, groups:  subid, 69; item, 16
## 
## Fixed effects:
##                                    Estimate Std. Error t value
## (Intercept)                           3.563      1.657    2.15
## conditiontarget                      -0.256      2.402   -0.11
## agegroup                             -0.457      0.474   -0.96
## truthTrue                             1.216      1.183    1.03
## conditiontarget:agegroup              0.150      0.679    0.22
## conditiontarget:truthTrue            -1.004      1.722   -0.58
## agegroup:truthTrue                   -0.125      0.338   -0.37
## conditiontarget:agegroup:truthTrue    0.434      0.486    0.90
## 
## Correlation of Fixed Effects:
##             (Intr) cndtnt agegrp trthTr cndtn: cndt:T aggr:T
## conditntrgt -0.690                                          
## agegroup    -0.990  0.683                                   
## truthTrue   -0.570  0.393  0.563                            
## cndtntrgt:g  0.691 -0.990 -0.698 -0.393                     
## cndtntrgt:T  0.391 -0.577 -0.387 -0.687  0.569              
## aggrp:trthT  0.564 -0.389 -0.569 -0.990  0.398  0.680       
## cndtntrg::T -0.393  0.571  0.396  0.689 -0.575 -0.990 -0.696

summary(lmer(resp ~ condition * agegroup + 
               (1|subid) + (1|item), 
             data = filter(all.data, sent.type == "negative" & truth == "True")))

## Linear mixed model fit by REML ['lmerMod']
## Formula: resp ~ condition * agegroup + (1 | subid) + (1 | item)
##    Data: filter(all.data, sent.type == "negative" & truth == "True")
## 
## REML criterion at convergence: 1642
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.572 -0.152  0.087  0.453  3.049 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subid    (Intercept) 1.90     1.379   
##  item     (Intercept) 0.00     0.000   
##  Residual             0.93     0.965   
## Number of obs: 525, groups:  subid, 69; item, 16
## 
## Fixed effects:
##                          Estimate Std. Error t value
## (Intercept)                 4.750      1.688    2.81
## conditiontarget            -1.260      2.429   -0.52
## agegroup                   -0.575      0.483   -1.19
## conditiontarget:agegroup    0.588      0.688    0.86
## 
## Correlation of Fixed Effects:
##             (Intr) cndtnt agegrp
## conditntrgt -0.695              
## agegroup    -0.990  0.688       
## cndtntrgt:g  0.695 -0.990 -0.702

Some simpler models, including within-agegroup.

model <- lmer(resp ~ condition*agegroup +
                (1 | subid) + 
                (1 | item), 
              data=subset(all.data, sent.type == "negative" & truth == "True"))
summary(model)

## Linear mixed model fit by REML ['lmerMod']
## Formula: resp ~ condition * agegroup + (1 | subid) + (1 | item)
##    Data: subset(all.data, sent.type == "negative" & truth == "True")
## 
## REML criterion at convergence: 1642
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.572 -0.152  0.087  0.453  3.049 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subid    (Intercept) 1.90     1.379   
##  item     (Intercept) 0.00     0.000   
##  Residual             0.93     0.965   
## Number of obs: 525, groups:  subid, 69; item, 16
## 
## Fixed effects:
##                          Estimate Std. Error t value
## (Intercept)                 4.750      1.688    2.81
## conditiontarget            -1.260      2.429   -0.52
## agegroup                   -0.575      0.483   -1.19
## conditiontarget:agegroup    0.588      0.688    0.86
## 
## Correlation of Fixed Effects:
##             (Intr) cndtnt agegrp
## conditntrgt -0.695              
## agegroup    -0.990  0.688       
## cndtntrgt:g  0.695 -0.990 -0.702

threes <- filter(all.data, agegroup == "3")
threes_subs <- aggregate(resp ~ subid + condition, threes, mean)
t.test(resp ~ condition, threes_subs)

## 
##  Welch Two Sample t-test
## 
## data:  resp by condition
## t = -0.9181, df = 32.36, p-value = 0.3654
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7716  0.2920
## sample estimates:
##   mean in group none mean in group target 
##                3.186                3.426

fours <- filter(all.data, agegroup == "4")
fours_subs <- aggregate(resp ~ subid + condition, fours, mean)
t.test(resp ~ condition, fours_subs)

## 
##  Welch Two Sample t-test
## 
## data:  resp by condition
## t = -2.069, df = 30.75, p-value = 0.04701
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.12949 -0.00801
## sample estimates:
##   mean in group none mean in group target 
##                2.806                3.375

subs <- aggregate(resp ~ subid + condition, all.data, mean)
t.test(resp ~ condition, subs)

## 
##  Welch Two Sample t-test
## 
## data:  resp by condition
## t = -2.048, df = 66.8, p-value = 0.04445
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.763101 -0.009868
## sample estimates:
##   mean in group none mean in group target 
##                3.013                3.399

Discrete models

all.data$bin.resp <- all.data$resp > 3

ms <- all.data %>%
  group_by(sent.type, truth, condition, agegroup, subid) %>%
  summarise(resp = mean(bin.resp)) %>%
  group_by(sent.type, truth, condition, agegroup) %>%
  summarise(cih = ci.high(resp),
            cil = ci.low(resp),
            m = mean(resp)) 

ggplot(ms, aes(x = sent.type:truth, y = m, fill = condition)) + 
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = cil, ymax = cih), 
                 position = position_dodge(width = .9)) +
  facet_grid(.~agegroup)

summary(glmer(bin.resp ~ (condition + agegroup + truth)^3  + (1|subid) 
             + (1|item), family = "binomial",
             data = filter(all.data, sent.type == "negative")))

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: bin.resp ~ (condition + agegroup + truth)^3 + (1 | subid) + (1 |  
##     item)
##    Data: filter(all.data, sent.type == "negative")
## 
##      AIC      BIC   logLik deviance df.resid 
##    667.9    712.7   -323.9    647.9      645 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.250 -0.452 -0.197  0.462  6.489 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  subid  (Intercept) 4.17e+00 2.04090 
##  item   (Intercept) 6.31e-06 0.00251 
## Number of obs: 655, groups:  subid, 69; item, 16
## 
## Fixed effects:
##                                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                           5.060      4.195    1.21    0.228  
## conditiontarget                      -4.148      5.487   -0.76    0.450  
## agegroup                             -2.381      1.256   -1.90    0.058 .
## truthTrue                            -0.505      3.497   -0.14    0.885  
## conditiontarget:agegroup              1.642      1.600    1.03    0.305  
## conditiontarget:truthTrue             0.776      4.459    0.17    0.862  
## agegroup:truthTrue                    0.748      1.055    0.71    0.478  
## conditiontarget:agegroup:truthTrue   -0.226      1.313   -0.17    0.863  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtnt agegrp trthTr cndtn: cndt:T aggr:T
## conditntrgt -0.764                                          
## agegroup    -0.990  0.756                                   
## truthTrue   -0.737  0.563  0.740                            
## cndtntrgt:g  0.775 -0.990 -0.782 -0.581                     
## cndtntrgt:T  0.578 -0.692 -0.581 -0.784  0.696              
## aggrp:trthT  0.736 -0.563 -0.754 -0.991  0.591  0.777       
## cndtntrg::T -0.591  0.690  0.604  0.796 -0.707 -0.990 -0.803

Subjectwise mean distribution

ms <- all.data %>%
  group_by(sent.type, truth, condition, agegroup, subid) %>%
  summarise(m = mean(resp))
  
qplot(round(m),
      fill = condition, 
      facets = ~ agegroup,
      position = "dodge", 
      binwidth = .5,
      data = filter(ms, truth == "True" & 
                    sent.type == "negative"))

plot of chunk unnamed-chunk-16