Disambiguation Meta-Analysis

load libraries and functions

library(ggplot2)
library(metafor)
source("/Documents/GRADUATE_SCHOOL/Ranalysis/useful.R")

# calculate variance on d, assumes variance in (imaginery) control group same as experimental
d_var<- function(n, d){
  vd = ((2*n)/(n^2)) + ((d^2)/(4*n))
  return(vd)
}

load data

metad = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ME_meta/SRCD_abstract/data/Disambiguation Meta-Analysis Data.csv")

calculate d for conditions not reported

metad$d_by_t <- metad$t/sqrt(metad$N)
metad$d_by_M <- (metad$M_experimental-metad$M_baseline) / metad$SD

get a d for each condition

metad$d_calculate <- ifelse(!is.na(metad$d), metad$d, 
                            ifelse(!is.na(metad$d_by_t),metad$d_by_t, 
                                   ifelse(!is.na(metad$d_by_M), metad$d_by_M, NA)))

remove conditions for which we have no d or we’re excluding for methodological reasons

#metad[!is.na(metad$d_calculate),c("paper_key","t","N","M_experimental", "M_baseline" ,"SD", "d", "d_by_t", "d_by_M",'d_calculate', 'd_error')]
md <- metad[!is.na(metad$d_calculate),]
md <- md[md$include.in.basic.analysis.==1,]

# sanity check for indexing: which condition has the max
md[which(md$d_calculate==(max(md$d_calculate, na.rm=T))), 
   c("paper_key", "d_calculate")]

##     paper_key d_calculate
## 175 frank2015       7.833

md.order = md[order(md$d_calculate),]

get var on d

md$d_var = apply(md[,c('N','d_calculate')], 1, function(x) d_var(x[1],x[2]))

summary of final sample of studies

# total number of unique studies 
length(unique(metad$paper_key))

## [1] 39

# number of unique studies usable
length(unique(md$paper_key))#21

## [1] 21

# total number of conditions
dim(md)[1] #63

## [1] 63

Analysis #1: Effect size across studies

distribution of raw cohen’s d’s.

qplot(d_calculate, data=md, geom="histogram") + 
  geom_vline(xintercept = 0, color = "red", cex = 2)

get random effect model

result.all = rma(md$d_calculate, vi = md$d_var, method = "REML")
result.all

## 
## Random-Effects Model (k = 63; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 1.4036 (SE = 0.2804)
## tau (square root of estimated tau^2 value):      1.1847
## I^2 (total heterogeneity / total variability):   92.19%
## H^2 (total variability / sampling variability):  12.80
## 
## Test for Heterogeneity: 
## Q(df = 62) = 433.7926, p-val < .0001
## 
## Model Results:
## 
## estimate       se     zval     pval    ci.lb    ci.ub          
##   0.9618   0.1576   6.1045   <.0001   0.6530   1.2707      *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

There is a disambiguation effect. Next, look at the effect by study in a forest plot.

md$DV.type = factor(md$DV.type, levels = c("" ,"search" , "ET", "FC"  ,  "Y/N"  ))

# this is just to get the plot sorted correctly with the right labels
md$first = gsub("\\d", "", md$paper_key) 
md$i = order(md$DV.type, md$age_mean..months.)
md.sort = md[md$i,]
md.sort$num_lab = 1:dim(md)[1]
md.sort$lab = paste(md.sort$num_lab, ". ", 
                    md.sort$first, sep="")
md[md$i,"lab"] = md.sort$lab
md[md$i,"num_lab"] = md.sort$num_lab

#pdf("forest1.pdf", 7, 10)
par(mar=c(4,4,1,2))
forest(result.all,
      slab = md$lab,
      ilab = cbind(md$year, md$age_mean..months., 
                   md$N, as.character(md$DV.type)),
      ilab.xpos = c(-6.25,-5,-4,-3),
      order = order(md$DV.type, md$age_mean..months.),
      mlab = "Grand effect size",
      xlab ="Effect size estimate", 
      xlim = c(-9, 2),
      annotate = F) 

text(-8, 66, "First author", pos=1,font = 2, )
text(-6.25, 66, "Year", pos = 1,font = 2, cex = 1)
text(-5, 66, "Age (m.)", pos = 1,font = 2, cex = 1)
text(-4, 66, "N", pos = 1,font = 2, cex = 1)
text(-3, 66, "Measure", pos = 1,font = 2, cex = 1)

plot of chunk unnamed-chunk-12

#dev.off()

Are these d’s distributed as would be expected, based on study precision? Look at funnel plot.

#pdf("funnel.pdf", 5, 5)
md$age.f = cut(md$age_mean..months., 
               breaks = seq(1,70,5), labels = F)
md$color = rev(heat.colors(14))[md$age.f]
m = funnel(result.all, pch = 26, digits = 2, back = "gray82")
m[md$i,"num_lab"] = md.sort$num_lab
text(m$x,m$y,m$num_lab,cex=1, font = 2, col = md$color)
# add scale
n=1:14
col <- rev(heat.colors(14))
x <- rep(par('usr')[2]*1.1,length(n)+1)
y <- seq(f=par('usr')[3],t=par('usr')[4],length.out=length(n)+1)
x = x-2
y = (y/4) +.05
nul <- sapply(n,FUN=function(n,col,x,y,...){lines(x=x[c(n,n+1)],y=y[c(n,n+1)],col=col[n],...)},
              lwd=30,lend="butt",col=col,x=x,y=y)
#dev.off()
text(x[1],y[15]-.02 ,"Age",cex=1,)

plot of chunk unnamed-chunk-13

For the most part, yes. But, the older FC Frank studes are outliers.

Analysis #2: Age as predictor

plot linear model of age and effect size

#pdf("vocab2.pdf", width = 6, height = 6)
par(mfrow=c(2,1))

ggplot(md, aes(x=age_mean..months., y=d_calculate, label = num_lab))+
    ylab("Cohen's d")+
    xlab("Age (months)")+  
  #geom_smooth(method="lm", formula=y~log(x)) +
  geom_smooth(method="lm", color = "steelblue", alpha = .3, size = 1.25) +
  theme_bw() + 
  theme(text = element_text(size=30),
        plot.title=element_text(size=20, face = "bold"),
        plot.background = element_blank(),
        panel.grid.major = element_blank(),
        axis.line = element_line(size = 1.5),
        axis.ticks = element_line(size = 1.5),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(color = 'black')) +
  geom_text(fontface = 2) + 
  #scale_y_continuous(limits = c(-1, 2.4)) +
  annotate("text", x=20, y=7, size = 10,
           label=paste("r =",round(cor(md$d_calculate,
                                      md$age_mean..months., use = "complete"), 2)), color = "red")

plot of chunk unnamed-chunk-14

#dev.off()

cor.test(md$d_calculate, log(md$age_mean..months.), use = "complete.obs")

## 
##  Pearson's product-moment correlation
## 
## data:  md$d_calculate and log(md$age_mean..months.)
## t = 6.501, df = 61, p-value = 1.66e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4658 0.7661
## sample estimates:
##    cor 
## 0.6397

get randome effec model

result.age = rma(md$d_calculate ~ log(md$age_mean..months.), 
                 vi = md$d_var, method = "REML")
result.age

## 
## Mixed-Effects Model (k = 63; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0.6612 (SE = 0.1469)
## tau (square root of estimated tau^2 value):             0.8131
## I^2 (residual heterogeneity / unaccounted variability): 84.75%
## H^2 (unaccounted variability / sampling variability):   6.56
## R^2 (amount of heterogeneity accounted for):            52.89%
## 
## Test for Residual Heterogeneity: 
## QE(df = 61) = 286.5665, p-val < .0001
## 
## Test of Moderators (coefficient(s) 2): 
## QM(df = 1) = 48.2146, p-val < .0001
## 
## Model Results:
## 
##                            estimate      se     zval    pval    ci.lb
## intrcpt                     -6.2672  1.0402  -6.0251  <.0001  -8.3060
## log(md$age_mean..months.)    2.2272  0.3208   6.9437  <.0001   1.5986
##                              ci.ub     
## intrcpt                    -4.2285  ***
## log(md$age_mean..months.)   2.8559  ***
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest plot

par(mar=c(4,4,1,2))

forest(result.age,
      slab = md$lab,
      ilab = cbind(md$year, md$age_mean..months., 
                   md$N, as.character(md$DV.type)),
      ilab.xpos = c(-6.25,-5,-4,-3),
      order = order(md$DV.type, md$age_mean..months.),
      mlab = "Grand effect size",
      xlab ="Effect size estimate", 
      xlim = c(-9, 2),
      annotate = F) 


text(-31, 54.5, "First author", pos=1,font = 2, cex = .8)
text(-25, 54.5, "Year", pos = 1,font = 2, cex = .8)
text(-20, 54.5, "Age (m.)", pos = 1,font = 2, cex = .8)
text(-15, 54.5, "N", pos = 1,font = 2, cex = .8)
text(-10, 54.5, "Measure", pos = 1,font = 2, cex = .8)

plot of chunk unnamed-chunk-16

funnel

#pdf("funnel.pdf", 5, 5)
m = funnel(result.age, pch = 26, digits = 2, back = "gray82")
m[md$i,"num_lab"] = md.sort$num_lab
text(m$x,m$y,m$num_lab,cex=1, font = 2, col = md$col)
# add scale
n=1:14
col <- rev(heat.colors(14))
x <- rep(par('usr')[2]*1.1,length(n)+1)
y <- seq(f=par('usr')[3],t=par('usr')[4],length.out=length(n)+1)
x = x-2
y = (y/4) +.05
nul <- sapply(n,FUN=function(n,col,x,y,...){lines(x=x[c(n,n+1)],y=y[c(n,n+1)],col=col[n],...)},lwd=30,lend="butt",col=col,x=x,y=y)
#dev.off()
text(x[1],y[15]-.02 ,"Age",cex=1,)

plot of chunk unnamed-chunk-17

#dev.off()

Analysis #3: Vocab size as predictor

plot vocab as predictor of effect size with basic linear model

#pdf("dbyvocab.pdf", width = 20, height = 20)
ggplot(md, aes(x=log(CDI_prod_mean), y=d_calculate, label = num_lab))+
    ylab("Cohen's d")+
    xlab("Log mean CDI productive vocabulary")+ 
  #geom_smooth(method="lm", formula=y~log(x)) +
  geom_smooth(method="lm", color = "steelblue",alpha = .3, size = 1.25) +
  theme_bw() + 
  scale_y_continuous(limits = c(-1, 2.5)) +
  theme(text = element_text(size=30),
        plot.title=element_text(size=20, face = "bold"),
        plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(color = 'black',size = 1.5), 
        axis.ticks = element_line(size = 1.5)) +
   geom_text(fontface = 2) +
   annotate("text", x=4.25, y=2, size = 10,
           label=paste("r=",round(cor(md$d_calculate, log(md$CDI_prod_mean), use = "complete"), 2)),
           color="red")

plot of chunk unnamed-chunk-18

cor.test(md$d_calculate, log(md$CDI_prod_mean), use = "complete.obs")

## 
##  Pearson's product-moment correlation
## 
## data:  md$d_calculate and log(md$CDI_prod_mean)
## t = 5.712, df = 21, p-value = 1.139e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5421 0.9022
## sample estimates:
##  cor 
## 0.78

#dev.off()

compare vocab and age as predictors of effect size in a basic linear model

m1 = lm(scale(d_calculate) ~ scale(log(CDI_prod_mean)) + 
          scale(log(age_mean..months.)), d=md)
summary(m1)

## 
## Call:
## lm(formula = scale(d_calculate) ~ scale(log(CDI_prod_mean)) + 
##     scale(log(age_mean..months.)), data = md)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.696 -0.247  0.028  0.342  0.486 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -0.119      0.101   -1.18  0.25309    
## scale(log(CDI_prod_mean))        0.409      0.104    3.94  0.00082 ***
## scale(log(age_mean..months.))    0.173      0.136    1.27  0.21741    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.394 on 20 degrees of freedom
##   (40 observations deleted due to missingness)
## Multiple R-squared:  0.638,  Adjusted R-squared:  0.602 
## F-statistic: 17.6 on 2 and 20 DF,  p-value: 3.89e-05

get random effect model

result.vocab = rma(md$d_calculate ~ log(md$CDI_prod_mean), vi = md$d_var, method = "REML")
result.vocab

## 
## Mixed-Effects Model (k = 23; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0.2376 (SE = 0.1165)
## tau (square root of estimated tau^2 value):             0.4874
## I^2 (residual heterogeneity / unaccounted variability): 65.28%
## H^2 (unaccounted variability / sampling variability):   2.88
## R^2 (amount of heterogeneity accounted for):            62.92%
## 
## Test for Residual Heterogeneity: 
## QE(df = 21) = 61.4494, p-val < .0001
## 
## Test of Moderators (coefficient(s) 2): 
## QM(df = 1) = 25.2653, p-val < .0001
## 
## Model Results:
## 
##                        estimate      se     zval    pval    ci.lb    ci.ub
## intrcpt                 -3.9534  0.9228  -4.2842  <.0001  -5.7620  -2.1447
## log(md$CDI_prod_mean)    0.9117  0.1814   5.0265  <.0001   0.5562   1.2672
##                           
## intrcpt                ***
## log(md$CDI_prod_mean)  ***
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest plot

par(mar=c(4,4,1,2))
forest(result.vocab,
      slab = md$lab,
      ilab = cbind(md$year, md$age_mean..months., 
                   md$N, as.character(md$DV.type)),
      ilab.xpos = c(-6.25,-5,-4,-3),
      order = order(md$DV.type, md$age_mean..months.),
      mlab = "Grand effect size",
      xlab ="Effect size estimate", 
      xlim = c(-9, 2),
      annotate = F) 

text(-8, 26, "First author", pos=1,font = 2, cex = .8)
text(-25,  25, "Year", pos = 1,font = 2, cex = .8)

plot of chunk unnamed-chunk-21

Now, compare BOTH age and vocab in random effect model

md.good = md[!is.na(md$CDI_prod_mean),]
result.all.mods= rma(md.good$d_calculate ~ log(md.good$CDI_prod_mean) +
                       log(md.good$age_mean..months.), vi = md.good$d_var, method = "REML")
result.all.mods

## 
## Mixed-Effects Model (k = 23; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0.2250 (SE = 0.1154)
## tau (square root of estimated tau^2 value):             0.4744
## I^2 (residual heterogeneity / unaccounted variability): 64.07%
## H^2 (unaccounted variability / sampling variability):   2.78
## R^2 (amount of heterogeneity accounted for):            64.89%
## 
## Test for Residual Heterogeneity: 
## QE(df = 20) = 56.2138, p-val < .0001
## 
## Test of Moderators (coefficient(s) 2,3): 
## QM(df = 2) = 27.6512, p-val < .0001
## 
## Model Results:
## 
##                                 estimate      se     zval    pval    ci.lb
## intrcpt                          -5.3342  1.4127  -3.7758  0.0002  -8.1032
## log(md.good$CDI_prod_mean)        0.7466  0.2192   3.4065  0.0007   0.3170
## log(md.good$age_mean..months.)    0.7198  0.5613   1.2824  0.1997  -0.3803
##                                   ci.ub     
## intrcpt                         -2.5653  ***
## log(md.good$CDI_prod_mean)       1.1761  ***
## log(md.good$age_mean..months.)   1.8200     
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest plot

par(mar=c(4,4,1,2))
forest(result.all.mods,
      slab = md.good$first,
      ilab=cbind(md.good$year),
      ilab.xpos=c(-3),
      mlab = "Grand effect size",
      order=order(md.good$age_mean..months., as.numeric(md.good$DV.type)), 
      xlab="Effect size estimate", 
      addfit = T
      )

plot of chunk unnamed-chunk-23

funnel

funnel(result.all.mods)

plot of chunk unnamed-chunk-24

Analysis #4: measure as predictor

get random effect models, by measure but controlling for age

result.ET = rma(md$d_calculate[md$DV.type == "ET"] ~ log(md$age_mean..months.)[md$DV.type == "ET"], 
                vi = md$d_var[md$DV.type == "ET"], method = "REML")
result.search = rma(md$d_calculate[md$DV.type == "search"] ~ log(md$age_mean..months.)[md$DV.type == "search"], 
                    vi = md$d_var[md$DV.type == "search"], method = "REML")
result.FC = rma(md$d_calculate[md$DV.type == "FC"] ~  log(md$age_mean..months.)[md$DV.type == "FC"], 
                vi = md$d_var[md$DV.type == "FC"], method = "REML")

df.measure = data.frame("measure" = c("eye-tracking", "search", "forced-choice"),
                        "d" = c(result.ET$b[2], result.search$b[2], result.FC$b[2]),
                        "lb" = c(result.ET$ci.lb[2], result.search$ci.lb[2], result.FC$ci.lb[2]), 
                        "ub" = c(result.ET$ci.ub[2], result.search$ci.ub[2], result.FC$ci.ub[2]))
df.measure$measure = factor(df.measure$measure, levels = c( "search", "eye-tracking","forced-choice"))

bar plot of effect by measure

#pdf("measure.pdf", width = 6, height = 6)

ggplot(df.measure, aes(y=d, x=measure)) + 
  geom_bar(fill = "steelblue", stat = "identity") + 
  geom_errorbar(aes(ymax = ub, ymin=lb),
                position=position_dodge(width=0.9), width=0.25) +
  geom_abline(intercept = 0, slope = 0, color = "black") +
  ylab("Cohen's d") +
  xlab("Measure") +
  #ggtitle("Mean length") +
    theme(text = element_text(size=30),
        plot.title=element_text(size=20, face = "bold"),
        plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(color = 'black'))

plot of chunk unnamed-chunk-26

#dev.off()