Disambiguation Meta-Analysis

load libraries and functions

library(ggplot2)
library(metafor)
source("/Documents/GRADUATE_SCHOOL/Ranalysis/useful.R")

# calculate pooled and weighted estimate of population d
d_pop <- function(N, d) {
  d = sum(N * d)/sum(N)
  return(d)
}

#calculate variance on d
d_var<- function(n, d){
  vd = ((2*n)/(n^2)) + ((d^2)/(4*n))
  return(vd)
}

load data

metad = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ME_meta/SRCD_abstract/data/Disambiguation Meta-Analysis Data.csv")

calculate d for conditions not reported

metad$d_by_t <- metad$t/sqrt(metad$N)
metad$d_by_M <- (metad$M_experimental-metad$M_baseline) / metad$SD

get a d for each condition

metad$d_calculate <- ifelse(!is.na(metad$d), metad$d, 
                            ifelse(!is.na(metad$d_by_t),metad$d_by_t, 
                                   ifelse(!is.na(metad$d_by_M), metad$d_by_M, NA)))

look at rows with discrepant d’s

metad$d_error = ifelse((abs(metad$d_by_t-metad$d_by_M )> .1) | (abs(metad$d-metad$d_by_M ) > .1) | (abs(metad$d-metad$d_by_t)> .1), 1, 0)
metad[which(metad$d_error == 1),c("paper_key","t","N","M_experimental", "M_baseline" ,"SD", "d", "d_by_t", "d_by_M")] # Diesendruck and demarchena

##        paper_key     t  N M_experimental M_baseline SD    d d_by_t d_by_M
## 5 demarchena2011 23.22 40           0.87        0.5 NA 1.57  3.671     NA
## 6 demarchena2011 31.70 42           0.86        0.5 NA 2.03  4.891     NA
## 7 demarchena2011 17.86 40           0.67        0.5 NA 0.92  2.824     NA
## 8 demarchena2011 14.62 42           0.74        0.5 NA 0.58  2.256     NA

remove conditions for which we don’t have cohen’s d and that we’re excluding for methodological differences

#metad[!is.na(metad$d_calculate),c("paper_key","t","N","M_experimental", "M_baseline" ,"SD", "d", "d_by_t", "d_by_M",'d_calculate', 'd_error')]
md <- metad[!is.na(metad$d_calculate),]
md <- md[md$include.in.basic.analysis.==1,]

look at conditions we have d’s for

#md$notes_short = substr(md$Notes,1,40)
#md[, c("paper_key", 'notes_short', "expt_num",'d_calculate', "N")]
#md <- md[order(md$paper_key),] 
#md[order(md$d_calculate),c("paper_key", 'notes_short', "expt_num",'d_calculate')]  #sorted by effectsize

get var on d

md$d_var = apply(md[,c('N','d_calculate')], 1, function(x) d_var(x[1],x[2])) # Is this right??

summary of final sample of studies

# total number of unique studies 
length(unique(metad$paper_key))

## [1] 38

# number of unique studies usable
length(unique(md$paper_key))#20

## [1] 20

# total number of conditions
dim(md)[1] #51

## [1] 51

Analysis #1: Effect size accross studies

get model

result.all = rma(md$d_calculate, vi = md$d_var, method = "FE")
result.all

## 
## Fixed-Effects Model (k = 51)
## 
## Test for Heterogeneity: 
## Q(df = 50) = 181.1574, p-val < .0001
## 
## Model Results:
## 
## estimate       se     zval     pval    ci.lb    ci.ub          
##   0.5157   0.0465  11.0954   <.0001   0.4246   0.6068      *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest plot

md$first = gsub("\\d", "", md$paper_key) 
par(mar=c(4,4,1,2))
forest(result.all,
      slab = md$first,
      ilab=cbind(md$year, md$age_mean..months., md$N, as.character(md$DV.type)),
      ilab.xpos=c(-20, -15, -10, -5),
      mlab = "Grand effect size",
      #order=order(md$age_mean..months., as.numeric(md$DV.type)), 
      order="obs",
      xlab="Effect size estimate", 
      xlim = c(-30, 15))

text(-26, 54.5, "First author", pos=1,font = 2, cex = .8)
text(-20, 54.5, "Year", pos = 1,font = 2, cex = .8)
text(-15, 54.5, "Age (m.)", pos = 1,font = 2, cex = .8)
text(-10, 54.5, "N", pos = 1,font = 2, cex = .8)
text(-5, 54.5, "Measure", pos = 1,font = 2, cex = .8)
text(8, 54.5, "Effect Size [95% CI]", pos = 1,font = 2, cex = .8)

plot of chunk unnamed-chunk-11

histogram

qplot(d_calculate, data=md, geom="histogram", xlim=c(-2,2)) + 
  geom_vline(xintercept = 0, color = "red", cex = 2)

plot of chunk unnamed-chunk-12

funnel

funnel(result.all)

plot of chunk unnamed-chunk-13

Analysis #2: Age as predictor

get model

result.age = rma(md$d_calculate ~ md$age_mean..months., vi = md$d_var, method = "FE")
result.age

## 
## Fixed-Effects with Moderators Model (k = 51)
## 
## Test for Residual Heterogeneity: 
## QE(df = 49) = 143.9907, p-val < .0001
## 
## Test of Moderators (coefficient(s) 2): 
## QM(df = 1) = 37.1667, p-val < .0001
## 
## Model Results:
## 
##                       estimate      se     zval    pval    ci.lb    ci.ub
## intrcpt                -0.2958  0.1410  -2.0979  0.0359  -0.5721  -0.0195
## md$age_mean..months.    0.0336  0.0055   6.0965  <.0001   0.0228   0.0444
##                          
## intrcpt                 *
## md$age_mean..months.  ***
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest plot

par(mar=c(4,4,1,2))
forest(result.age,
      slab = md$first,
      ilab=cbind(md$year),
      ilab.xpos=c(-5),
      mlab = "Grand effect size",
      order=order(md$age_mean..months., as.numeric(md$DV.type)), 
      #xlab="Effect size estimate", 
      #border = "red", 
      xlim = c(-10, 0),
      annotate=F
      )

text(-31, 54.5, "First author", pos=1,font = 2, cex = .8)
text(-25, 54.5, "Year", pos = 1,font = 2, cex = .8)
text(-20, 54.5, "Age (m.)", pos = 1,font = 2, cex = .8)
text(-15, 54.5, "N", pos = 1,font = 2, cex = .8)
text(-10, 54.5, "Measure", pos = 1,font = 2, cex = .8)
text(18, 54.5, "Effect Size [95% CI]", pos = 1,font = 2, cex = .8)

plot of chunk unnamed-chunk-15

#pdf("d_age_vocab2.pdf", width = 12, height = 6)
par(mfrow=c(2,1))
qplot(age_mean..months., d_calculate, 
      data=md, ylab = "Cohen's d", xlab= "Age (months)") + 
  geom_smooth(method="lm", formula=y~log(x)) +
  theme_bw() + 
  theme(axis.title = element_text(face="bold", size=20),
        axis.text  = element_text(vjust=0.5, size=16),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) + 
  scale_y_continuous(limits = c(-1, 2.4)) +
  annotate("text", x=50, y=-.5, size = 10,
           label=paste("r=",round(cor(md$d_calculate, md$age_mean..months., use = "complete"), 2)))

plot of chunk unnamed-chunk-16

#dev.off()

#cor.test(md$d_calculate, md$age_mean..months., use = "complete.obs")

Analysis #3: Vocab size as predictor

get model

result.vocab = rma(md$d_calculate ~ md$CDI_prod_mean, vi = md$d_var, method = "FE")
result.vocab

## 
## Fixed-Effects with Moderators Model (k = 23)
## 
## Test for Residual Heterogeneity: 
## QE(df = 21) = 51.5224, p-val = 0.0002
## 
## Test of Moderators (coefficient(s) 2): 
## QM(df = 1) = 63.0978, p-val < .0001
## 
## Model Results:
## 
##                   estimate      se     zval    pval    ci.lb    ci.ub     
## intrcpt            -0.4717  0.1387  -3.4009  0.0007  -0.7436  -0.1999  ***
## md$CDI_prod_mean    0.0054  0.0007   7.9434  <.0001   0.0041   0.0067  ***
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest plot

par(mar=c(4,4,1,2))
forest(result.vocab,
      slab = md$first,
      ilab=cbind(md$year),
      ilab.xpos=c(-5),
      mlab = "Grand effect size",
      order=order(md$age_mean..months., as.numeric(md$DV.type)), 
      xlab="Effect size estimate", 
      xlim = c(-10, 0),
      annotate=F
      )

text(-8, 25, "First author", pos=1,font = 2, cex = .8)
text(-25,  25, "Year", pos = 1,font = 2, cex = .8)

plot of chunk unnamed-chunk-18

#pdf("dbyvocab.pdf", width = 6, height = 6)
qplot(CDI_prod_mean, d_calculate, data=md, ylab = "Cohen's d", xlab= "Mean CDI productive vocabulary") + 
  geom_smooth(method="lm", formula=y~log(x)) +
  theme_bw() + 
  theme(axis.title = element_text(face="bold", size=20),
        axis.text  = element_text(vjust=0.5, size=16),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) + 
  scale_y_continuous(limits = c(-1, 2.4)) +
  annotate("text", x=300, y=-.5, size = 10,
           label=paste("r=",round(cor(md$d_calculate, md$CDI_prod_mean, use = "complete"), 2)))

plot of chunk unnamed-chunk-19

cor.test(md$d_calculate, md$CDI_prod_mean, use = "complete.obs")

## 
##  Pearson's product-moment correlation
## 
## data:  md$d_calculate and md$CDI_prod_mean
## t = 6.424, df = 21, p-value = 2.287e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6049 0.9182
## sample estimates:
##    cor 
## 0.8141

#dev.off()

compare vocab and age as predictors of effect size

m1 = lm(scale(d_calculate) ~ scale(CDI_prod_mean)  + scale(age_mean..months.), d=md)
summary(m1)

## 
## Call:
## lm(formula = scale(d_calculate) ~ scale(CDI_prod_mean) + scale(age_mean..months.), 
##     data = md)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2194 -0.4333  0.0976  0.4441  0.9854 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 0.161      0.148    1.09     0.29    
## scale(CDI_prod_mean)        0.888      0.152    5.83  1.1e-05 ***
## scale(age_mean..months.)    0.285      0.167    1.71     0.10    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.678 on 20 degrees of freedom
##   (28 observations deleted due to missingness)
## Multiple R-squared:  0.706,  Adjusted R-squared:  0.676 
## F-statistic:   24 on 2 and 20 DF,  p-value: 4.88e-06

m2 = lm(d_calculate ~ age_mean..months., d=md)
summary(m2)

## 
## Call:
## lm(formula = d_calculate ~ age_mean..months., data = md)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2904 -0.5004 -0.0476  0.3030  1.4316 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -0.24986    0.26631   -0.94  0.35273    
## age_mean..months.  0.03687    0.00978    3.77  0.00044 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.682 on 49 degrees of freedom
## Multiple R-squared:  0.225,  Adjusted R-squared:  0.209 
## F-statistic: 14.2 on 1 and 49 DF,  p-value: 0.000437

get model

md.good = md[!is.na(md$CDI_prod_mean),]
result.all.mods= rma(md.good$d_calculate ~ md.good$CDI_prod_mean +
                       md.good$age_mean..months., vi = md.good$d_var, method = "FE")
result.all.mods

## 
## Fixed-Effects with Moderators Model (k = 23)
## 
## Test for Residual Heterogeneity: 
## QE(df = 20) = 45.0466, p-val = 0.0011
## 
## Test of Moderators (coefficient(s) 2,3): 
## QM(df = 2) = 69.5736, p-val < .0001
## 
## Model Results:
## 
##                            estimate      se     zval    pval    ci.lb
## intrcpt                     -0.8846  0.2135  -4.1442  <.0001  -1.3030
## md.good$CDI_prod_mean        0.0048  0.0007   6.6639  <.0001   0.0034
## md.good$age_mean..months.    0.0233  0.0091   2.5448  0.0109   0.0053
##                              ci.ub     
## intrcpt                    -0.4662  ***
## md.good$CDI_prod_mean       0.0062  ***
## md.good$age_mean..months.   0.0412    *
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest plot

par(mar=c(4,4,1,2))
forest(result.all.mods,
      slab = md.good$first,
      ilab=cbind(md.good$year),
      ilab.xpos=c(-3),
      mlab = "Grand effect size",
      order=order(md.good$age_mean..months., as.numeric(md.good$DV.type)), 
      xlab="Effect size estimate", 
      addfit = T
      )

plot of chunk unnamed-chunk-22

funnel

funnel(result.all.mods)

plot of chunk unnamed-chunk-23