load libraries and functions
library(ggplot2)
library(metafor)
source("/Documents/GRADUATE_SCHOOL/Ranalysis/useful.R")
# calculate variance on d, assumes variance in (imaginery) control group same as experimental
d_var<- function(n, d){
vd = ((2*n)/(n^2)) + ((d^2)/(4*n))
return(vd)
}
load data
metad = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ME_meta/SRCD_abstract/data/Disambiguation Meta-Analysis Data.csv")
calculate d for conditions not reported
metad$d_by_t <- metad$t/sqrt(metad$N)
metad$d_by_M <- (metad$M_experimental-metad$M_baseline) / metad$SD
get a d for each condition
metad$d_calculate <- ifelse(!is.na(metad$d), metad$d,
ifelse(!is.na(metad$d_by_t),metad$d_by_t,
ifelse(!is.na(metad$d_by_M), metad$d_by_M, NA)))
remove conditions for which we have no d or we’re excluding for methodological reasons
#metad[!is.na(metad$d_calculate),c("paper_key","t","N","M_experimental", "M_baseline" ,"SD", "d", "d_by_t", "d_by_M",'d_calculate', 'd_error')]
md <- metad[!is.na(metad$d_calculate),]
md <- md[md$include.in.basic.analysis.==1,]
# sanity check for indexing: which condition has the max
md[which(md$d_calculate==(max(md$d_calculate, na.rm=T))),
c("paper_key", "d_calculate")]
## paper_key d_calculate
## 175 frank2015 7.833
md.order = md[order(md$d_calculate),]
get var on d
md$d_var = apply(md[,c('N','d_calculate')], 1, function(x) d_var(x[1],x[2]))
summary of final sample of studies
# total number of unique studies
length(unique(metad$paper_key))
## [1] 39
# number of unique studies usable
length(unique(md$paper_key))#21
## [1] 21
# total number of conditions
dim(md)[1] #63
## [1] 63
distribution of raw cohen’s d’s.
qplot(d_calculate, data=md, geom="histogram") +
geom_vline(xintercept = 0, color = "red", cex = 2)
get random effect model
result.all = rma(md$d_calculate, vi = md$d_var, method = "REML")
result.all
##
## Random-Effects Model (k = 63; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 1.4036 (SE = 0.2804)
## tau (square root of estimated tau^2 value): 1.1847
## I^2 (total heterogeneity / total variability): 92.19%
## H^2 (total variability / sampling variability): 12.80
##
## Test for Heterogeneity:
## Q(df = 62) = 433.7926, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.9618 0.1576 6.1045 <.0001 0.6530 1.2707 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
There is a disambiguation effect. Next, look at the effect by study in a forest plot.
md$DV.type = factor(md$DV.type, levels = c("" ,"search" , "ET", "FC" , "Y/N" ))
# this is just to get the plot sorted correctly with the right labels
md$first = gsub("\\d", "", md$paper_key)
md$i = order(md$DV.type, md$age_mean..months.)
md.sort = md[md$i,]
md.sort$num_lab = 1:dim(md)[1]
md.sort$lab = paste(md.sort$num_lab, ". ",
md.sort$first, sep="")
md[md$i,"lab"] = md.sort$lab
md[md$i,"num_lab"] = md.sort$num_lab
#pdf("forest1.pdf", 7, 10)
par(mar=c(4,4,1,2))
forest(result.all,
slab = md$lab,
ilab = cbind(md$year, md$age_mean..months.,
md$N, as.character(md$DV.type)),
ilab.xpos = c(-6.25,-5,-4,-3),
order = order(md$DV.type, md$age_mean..months.),
mlab = "Grand effect size",
xlab ="Effect size estimate",
xlim = c(-9, 2),
annotate = F)
text(-8, 66, "First author", pos=1,font = 2, )
text(-6.25, 66, "Year", pos = 1,font = 2, cex = 1)
text(-5, 66, "Age (m.)", pos = 1,font = 2, cex = 1)
text(-4, 66, "N", pos = 1,font = 2, cex = 1)
text(-3, 66, "Measure", pos = 1,font = 2, cex = 1)
#dev.off()
Are these d’s distributed as would be expected, based on study precision? Look at funnel plot.
#pdf("funnel.pdf", 5, 5)
md$age.f = cut(md$age_mean..months.,
breaks = seq(1,70,5), labels = F)
md$color = rev(heat.colors(14))[md$age.f]
m = funnel(result.all, pch = 26, digits = 2, back = "gray82")
m[md$i,"num_lab"] = md.sort$num_lab
text(m$x,m$y,m$num_lab,cex=1, font = 2, col = md$color)
# add scale
n=1:14
col <- rev(heat.colors(14))
x <- rep(par('usr')[2]*1.1,length(n)+1)
y <- seq(f=par('usr')[3],t=par('usr')[4],length.out=length(n)+1)
x = x-2
y = (y/4) +.05
nul <- sapply(n,FUN=function(n,col,x,y,...){lines(x=x[c(n,n+1)],y=y[c(n,n+1)],col=col[n],...)},
lwd=30,lend="butt",col=col,x=x,y=y)
#dev.off()
text(x[1],y[15]-.02 ,"Age",cex=1,)
For the most part, yes. But, the older FC Frank studes are outliers.
plot linear model of age and effect size
#pdf("vocab2.pdf", width = 6, height = 6)
par(mfrow=c(2,1))
ggplot(md, aes(x=age_mean..months., y=d_calculate, label = num_lab))+
ylab("Cohen's d")+
xlab("Age (months)")+
#geom_smooth(method="lm", formula=y~log(x)) +
geom_smooth(method="lm", color = "steelblue", alpha = .3, size = 1.25) +
theme_bw() +
theme(text = element_text(size=30),
plot.title=element_text(size=20, face = "bold"),
plot.background = element_blank(),
panel.grid.major = element_blank(),
axis.line = element_line(size = 1.5),
axis.ticks = element_line(size = 1.5),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(color = 'black')) +
geom_text(fontface = 2) +
#scale_y_continuous(limits = c(-1, 2.4)) +
annotate("text", x=20, y=7, size = 10,
label=paste("r =",round(cor(md$d_calculate,
md$age_mean..months., use = "complete"), 2)), color = "red")
#dev.off()
cor.test(md$d_calculate, log(md$age_mean..months.), use = "complete.obs")
##
## Pearson's product-moment correlation
##
## data: md$d_calculate and log(md$age_mean..months.)
## t = 6.501, df = 61, p-value = 1.66e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4658 0.7661
## sample estimates:
## cor
## 0.6397
get randome effec model
result.age = rma(md$d_calculate ~ log(md$age_mean..months.),
vi = md$d_var, method = "REML")
result.age
##
## Mixed-Effects Model (k = 63; tau^2 estimator: REML)
##
## tau^2 (estimated amount of residual heterogeneity): 0.6612 (SE = 0.1469)
## tau (square root of estimated tau^2 value): 0.8131
## I^2 (residual heterogeneity / unaccounted variability): 84.75%
## H^2 (unaccounted variability / sampling variability): 6.56
## R^2 (amount of heterogeneity accounted for): 52.89%
##
## Test for Residual Heterogeneity:
## QE(df = 61) = 286.5665, p-val < .0001
##
## Test of Moderators (coefficient(s) 2):
## QM(df = 1) = 48.2146, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb
## intrcpt -6.2672 1.0402 -6.0251 <.0001 -8.3060
## log(md$age_mean..months.) 2.2272 0.3208 6.9437 <.0001 1.5986
## ci.ub
## intrcpt -4.2285 ***
## log(md$age_mean..months.) 2.8559 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest plot
par(mar=c(4,4,1,2))
forest(result.age,
slab = md$lab,
ilab = cbind(md$year, md$age_mean..months.,
md$N, as.character(md$DV.type)),
ilab.xpos = c(-6.25,-5,-4,-3),
order = order(md$DV.type, md$age_mean..months.),
mlab = "Grand effect size",
xlab ="Effect size estimate",
xlim = c(-9, 2),
annotate = F)
text(-31, 54.5, "First author", pos=1,font = 2, cex = .8)
text(-25, 54.5, "Year", pos = 1,font = 2, cex = .8)
text(-20, 54.5, "Age (m.)", pos = 1,font = 2, cex = .8)
text(-15, 54.5, "N", pos = 1,font = 2, cex = .8)
text(-10, 54.5, "Measure", pos = 1,font = 2, cex = .8)
funnel
#pdf("funnel.pdf", 5, 5)
m = funnel(result.age, pch = 26, digits = 2, back = "gray82")
m[md$i,"num_lab"] = md.sort$num_lab
text(m$x,m$y,m$num_lab,cex=1, font = 2, col = md$col)
# add scale
n=1:14
col <- rev(heat.colors(14))
x <- rep(par('usr')[2]*1.1,length(n)+1)
y <- seq(f=par('usr')[3],t=par('usr')[4],length.out=length(n)+1)
x = x-2
y = (y/4) +.05
nul <- sapply(n,FUN=function(n,col,x,y,...){lines(x=x[c(n,n+1)],y=y[c(n,n+1)],col=col[n],...)},lwd=30,lend="butt",col=col,x=x,y=y)
#dev.off()
text(x[1],y[15]-.02 ,"Age",cex=1,)
#dev.off()
plot vocab as predictor of effect size with basic linear model
#pdf("dbyvocab.pdf", width = 20, height = 20)
ggplot(md, aes(x=log(CDI_prod_mean), y=d_calculate, label = num_lab))+
ylab("Cohen's d")+
xlab("Log mean CDI productive vocabulary")+
#geom_smooth(method="lm", formula=y~log(x)) +
geom_smooth(method="lm", color = "steelblue",alpha = .3, size = 1.25) +
theme_bw() +
scale_y_continuous(limits = c(-1, 2.5)) +
theme(text = element_text(size=30),
plot.title=element_text(size=20, face = "bold"),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(color = 'black',size = 1.5),
axis.ticks = element_line(size = 1.5)) +
geom_text(fontface = 2) +
annotate("text", x=4.25, y=2, size = 10,
label=paste("r=",round(cor(md$d_calculate, log(md$CDI_prod_mean), use = "complete"), 2)),
color="red")
cor.test(md$d_calculate, log(md$CDI_prod_mean), use = "complete.obs")
##
## Pearson's product-moment correlation
##
## data: md$d_calculate and log(md$CDI_prod_mean)
## t = 5.712, df = 21, p-value = 1.139e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5421 0.9022
## sample estimates:
## cor
## 0.78
#dev.off()
compare vocab and age as predictors of effect size in a basic linear model
m1 = lm(scale(d_calculate) ~ scale(log(CDI_prod_mean)) +
scale(log(age_mean..months.)), d=md)
summary(m1)
##
## Call:
## lm(formula = scale(d_calculate) ~ scale(log(CDI_prod_mean)) +
## scale(log(age_mean..months.)), data = md)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.696 -0.247 0.028 0.342 0.486
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.119 0.101 -1.18 0.25309
## scale(log(CDI_prod_mean)) 0.409 0.104 3.94 0.00082 ***
## scale(log(age_mean..months.)) 0.173 0.136 1.27 0.21741
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.394 on 20 degrees of freedom
## (40 observations deleted due to missingness)
## Multiple R-squared: 0.638, Adjusted R-squared: 0.602
## F-statistic: 17.6 on 2 and 20 DF, p-value: 3.89e-05
get random effect model
result.vocab = rma(md$d_calculate ~ log(md$CDI_prod_mean), vi = md$d_var, method = "REML")
result.vocab
##
## Mixed-Effects Model (k = 23; tau^2 estimator: REML)
##
## tau^2 (estimated amount of residual heterogeneity): 0.2376 (SE = 0.1165)
## tau (square root of estimated tau^2 value): 0.4874
## I^2 (residual heterogeneity / unaccounted variability): 65.28%
## H^2 (unaccounted variability / sampling variability): 2.88
## R^2 (amount of heterogeneity accounted for): 62.92%
##
## Test for Residual Heterogeneity:
## QE(df = 21) = 61.4494, p-val < .0001
##
## Test of Moderators (coefficient(s) 2):
## QM(df = 1) = 25.2653, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt -3.9534 0.9228 -4.2842 <.0001 -5.7620 -2.1447
## log(md$CDI_prod_mean) 0.9117 0.1814 5.0265 <.0001 0.5562 1.2672
##
## intrcpt ***
## log(md$CDI_prod_mean) ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest plot
par(mar=c(4,4,1,2))
forest(result.vocab,
slab = md$lab,
ilab = cbind(md$year, md$age_mean..months.,
md$N, as.character(md$DV.type)),
ilab.xpos = c(-6.25,-5,-4,-3),
order = order(md$DV.type, md$age_mean..months.),
mlab = "Grand effect size",
xlab ="Effect size estimate",
xlim = c(-9, 2),
annotate = F)
text(-8, 26, "First author", pos=1,font = 2, cex = .8)
text(-25, 25, "Year", pos = 1,font = 2, cex = .8)
Now, compare BOTH age and vocab in random effect model
md.good = md[!is.na(md$CDI_prod_mean),]
result.all.mods= rma(md.good$d_calculate ~ log(md.good$CDI_prod_mean) +
log(md.good$age_mean..months.), vi = md.good$d_var, method = "REML")
result.all.mods
##
## Mixed-Effects Model (k = 23; tau^2 estimator: REML)
##
## tau^2 (estimated amount of residual heterogeneity): 0.2250 (SE = 0.1154)
## tau (square root of estimated tau^2 value): 0.4744
## I^2 (residual heterogeneity / unaccounted variability): 64.07%
## H^2 (unaccounted variability / sampling variability): 2.78
## R^2 (amount of heterogeneity accounted for): 64.89%
##
## Test for Residual Heterogeneity:
## QE(df = 20) = 56.2138, p-val < .0001
##
## Test of Moderators (coefficient(s) 2,3):
## QM(df = 2) = 27.6512, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb
## intrcpt -5.3342 1.4127 -3.7758 0.0002 -8.1032
## log(md.good$CDI_prod_mean) 0.7466 0.2192 3.4065 0.0007 0.3170
## log(md.good$age_mean..months.) 0.7198 0.5613 1.2824 0.1997 -0.3803
## ci.ub
## intrcpt -2.5653 ***
## log(md.good$CDI_prod_mean) 1.1761 ***
## log(md.good$age_mean..months.) 1.8200
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest plot
par(mar=c(4,4,1,2))
forest(result.all.mods,
slab = md.good$first,
ilab=cbind(md.good$year),
ilab.xpos=c(-3),
mlab = "Grand effect size",
order=order(md.good$age_mean..months., as.numeric(md.good$DV.type)),
xlab="Effect size estimate",
addfit = T
)
funnel
funnel(result.all.mods)
get random effect models, by measure but controlling for age
result.ET = rma(md$d_calculate[md$DV.type == "ET"] ~ log(md$age_mean..months.)[md$DV.type == "ET"],
vi = md$d_var[md$DV.type == "ET"], method = "REML")
result.search = rma(md$d_calculate[md$DV.type == "search"] ~ log(md$age_mean..months.)[md$DV.type == "search"],
vi = md$d_var[md$DV.type == "search"], method = "REML")
result.FC = rma(md$d_calculate[md$DV.type == "FC"] ~ log(md$age_mean..months.)[md$DV.type == "FC"],
vi = md$d_var[md$DV.type == "FC"], method = "REML")
df.measure = data.frame("measure" = c("eye-tracking", "search", "forced-choice"),
"d" = c(result.ET$b[2], result.search$b[2], result.FC$b[2]),
"lb" = c(result.ET$ci.lb[2], result.search$ci.lb[2], result.FC$ci.lb[2]),
"ub" = c(result.ET$ci.ub[2], result.search$ci.ub[2], result.FC$ci.ub[2]))
df.measure$measure = factor(df.measure$measure, levels = c( "search", "eye-tracking","forced-choice"))
bar plot of effect by measure
#pdf("measure.pdf", width = 6, height = 6)
ggplot(df.measure, aes(y=d, x=measure)) +
geom_bar(fill = "steelblue", stat = "identity") +
geom_errorbar(aes(ymax = ub, ymin=lb),
position=position_dodge(width=0.9), width=0.25) +
geom_abline(intercept = 0, slope = 0, color = "black") +
ylab("Cohen's d") +
xlab("Measure") +
#ggtitle("Mean length") +
theme(text = element_text(size=30),
plot.title=element_text(size=20, face = "bold"),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(color = 'black'))
#dev.off()