load libraries and functions
library(ggplot2)
library(metafor)
source("/Documents/GRADUATE_SCHOOL/Ranalysis/useful.R")
# calculate pooled and weighted estimate of population d
d_pop <- function(N, d) {
d = sum(N * d)/sum(N)
return(d)
}
#calculate variance on d
d_var<- function(n, d){
vd = ((2*n)/(n^2)) + ((d^2)/(4*n))
return(vd)
}
load data
metad = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ME_meta/SRCD_abstract/data/Disambiguation Meta-Analysis Data.csv")
calculate d for conditions not reported
metad$d_by_t <- metad$t/sqrt(metad$N)
metad$d_by_M <- (metad$M_experimental-metad$M_baseline) / metad$SD
get a d for each condition
metad$d_calculate <- ifelse(!is.na(metad$d), metad$d,
ifelse(!is.na(metad$d_by_t),metad$d_by_t,
ifelse(!is.na(metad$d_by_M), metad$d_by_M, NA)))
look at rows with discrepant d’s
metad$d_error = ifelse((abs(metad$d_by_t-metad$d_by_M )> .1) | (abs(metad$d-metad$d_by_M ) > .1) | (abs(metad$d-metad$d_by_t)> .1), 1, 0)
metad[which(metad$d_error == 1),c("paper_key","t","N","M_experimental", "M_baseline" ,"SD", "d", "d_by_t", "d_by_M")] # Diesendruck and demarchena
## paper_key t N M_experimental M_baseline SD d d_by_t d_by_M
## 5 demarchena2011 23.22 40 0.87 0.5 NA 1.57 3.671 NA
## 6 demarchena2011 31.70 42 0.86 0.5 NA 2.03 4.891 NA
## 7 demarchena2011 17.86 40 0.67 0.5 NA 0.92 2.824 NA
## 8 demarchena2011 14.62 42 0.74 0.5 NA 0.58 2.256 NA
remove conditions for which we don’t have cohen’s d and that we’re excluding for methodological differences
#metad[!is.na(metad$d_calculate),c("paper_key","t","N","M_experimental", "M_baseline" ,"SD", "d", "d_by_t", "d_by_M",'d_calculate', 'd_error')]
md <- metad[!is.na(metad$d_calculate),]
md <- md[md$include.in.basic.analysis.==1,]
look at conditions we have d’s for
#md$notes_short = substr(md$Notes,1,40)
#md[, c("paper_key", 'notes_short', "expt_num",'d_calculate', "N")]
#md <- md[order(md$paper_key),]
#md[order(md$d_calculate),c("paper_key", 'notes_short', "expt_num",'d_calculate')] #sorted by effectsize
get var on d
md$d_var = apply(md[,c('N','d_calculate')], 1, function(x) d_var(x[1],x[2])) # Is this right??
summary of final sample of studies
# total number of unique studies
length(unique(metad$paper_key))
## [1] 38
# number of unique studies usable
length(unique(md$paper_key))#20
## [1] 20
# total number of conditions
dim(md)[1] #51
## [1] 51
get model
result.all = rma(md$d_calculate, vi = md$d_var, method = "FE")
result.all
##
## Fixed-Effects Model (k = 51)
##
## Test for Heterogeneity:
## Q(df = 50) = 181.1574, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.5157 0.0465 11.0954 <.0001 0.4246 0.6068 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest plot
md$first = gsub("\\d", "", md$paper_key)
par(mar=c(4,4,1,2))
forest(result.all,
slab = md$first,
ilab=cbind(md$year, md$age_mean..months., md$N, as.character(md$DV.type)),
ilab.xpos=c(-20, -15, -10, -5),
mlab = "Grand effect size",
#order=order(md$age_mean..months., as.numeric(md$DV.type)),
order="obs",
xlab="Effect size estimate",
xlim = c(-30, 15))
text(-26, 54.5, "First author", pos=1,font = 2, cex = .8)
text(-20, 54.5, "Year", pos = 1,font = 2, cex = .8)
text(-15, 54.5, "Age (m.)", pos = 1,font = 2, cex = .8)
text(-10, 54.5, "N", pos = 1,font = 2, cex = .8)
text(-5, 54.5, "Measure", pos = 1,font = 2, cex = .8)
text(8, 54.5, "Effect Size [95% CI]", pos = 1,font = 2, cex = .8)
histogram
qplot(d_calculate, data=md, geom="histogram", xlim=c(-2,2)) +
geom_vline(xintercept = 0, color = "red", cex = 2)
funnel
funnel(result.all)
get model
result.age = rma(md$d_calculate ~ md$age_mean..months., vi = md$d_var, method = "FE")
result.age
##
## Fixed-Effects with Moderators Model (k = 51)
##
## Test for Residual Heterogeneity:
## QE(df = 49) = 143.9907, p-val < .0001
##
## Test of Moderators (coefficient(s) 2):
## QM(df = 1) = 37.1667, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt -0.2958 0.1410 -2.0979 0.0359 -0.5721 -0.0195
## md$age_mean..months. 0.0336 0.0055 6.0965 <.0001 0.0228 0.0444
##
## intrcpt *
## md$age_mean..months. ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest plot
par(mar=c(4,4,1,2))
forest(result.age,
slab = md$first,
ilab=cbind(md$year),
ilab.xpos=c(-5),
mlab = "Grand effect size",
order=order(md$age_mean..months., as.numeric(md$DV.type)),
#xlab="Effect size estimate",
#border = "red",
xlim = c(-10, 0),
annotate=F
)
text(-31, 54.5, "First author", pos=1,font = 2, cex = .8)
text(-25, 54.5, "Year", pos = 1,font = 2, cex = .8)
text(-20, 54.5, "Age (m.)", pos = 1,font = 2, cex = .8)
text(-15, 54.5, "N", pos = 1,font = 2, cex = .8)
text(-10, 54.5, "Measure", pos = 1,font = 2, cex = .8)
text(18, 54.5, "Effect Size [95% CI]", pos = 1,font = 2, cex = .8)
#pdf("d_age_vocab2.pdf", width = 12, height = 6)
par(mfrow=c(2,1))
qplot(age_mean..months., d_calculate,
data=md, ylab = "Cohen's d", xlab= "Age (months)") +
geom_smooth(method="lm", formula=y~log(x)) +
theme_bw() +
theme(axis.title = element_text(face="bold", size=20),
axis.text = element_text(vjust=0.5, size=16),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(limits = c(-1, 2.4)) +
annotate("text", x=50, y=-.5, size = 10,
label=paste("r=",round(cor(md$d_calculate, md$age_mean..months., use = "complete"), 2)))
#dev.off()
#cor.test(md$d_calculate, md$age_mean..months., use = "complete.obs")
get model
result.vocab = rma(md$d_calculate ~ md$CDI_prod_mean, vi = md$d_var, method = "FE")
result.vocab
##
## Fixed-Effects with Moderators Model (k = 23)
##
## Test for Residual Heterogeneity:
## QE(df = 21) = 51.5224, p-val = 0.0002
##
## Test of Moderators (coefficient(s) 2):
## QM(df = 1) = 63.0978, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt -0.4717 0.1387 -3.4009 0.0007 -0.7436 -0.1999 ***
## md$CDI_prod_mean 0.0054 0.0007 7.9434 <.0001 0.0041 0.0067 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest plot
par(mar=c(4,4,1,2))
forest(result.vocab,
slab = md$first,
ilab=cbind(md$year),
ilab.xpos=c(-5),
mlab = "Grand effect size",
order=order(md$age_mean..months., as.numeric(md$DV.type)),
xlab="Effect size estimate",
xlim = c(-10, 0),
annotate=F
)
text(-8, 25, "First author", pos=1,font = 2, cex = .8)
text(-25, 25, "Year", pos = 1,font = 2, cex = .8)
#pdf("dbyvocab.pdf", width = 6, height = 6)
qplot(CDI_prod_mean, d_calculate, data=md, ylab = "Cohen's d", xlab= "Mean CDI productive vocabulary") +
geom_smooth(method="lm", formula=y~log(x)) +
theme_bw() +
theme(axis.title = element_text(face="bold", size=20),
axis.text = element_text(vjust=0.5, size=16),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(limits = c(-1, 2.4)) +
annotate("text", x=300, y=-.5, size = 10,
label=paste("r=",round(cor(md$d_calculate, md$CDI_prod_mean, use = "complete"), 2)))
cor.test(md$d_calculate, md$CDI_prod_mean, use = "complete.obs")
##
## Pearson's product-moment correlation
##
## data: md$d_calculate and md$CDI_prod_mean
## t = 6.424, df = 21, p-value = 2.287e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6049 0.9182
## sample estimates:
## cor
## 0.8141
#dev.off()
compare vocab and age as predictors of effect size
m1 = lm(scale(d_calculate) ~ scale(CDI_prod_mean) + scale(age_mean..months.), d=md)
summary(m1)
##
## Call:
## lm(formula = scale(d_calculate) ~ scale(CDI_prod_mean) + scale(age_mean..months.),
## data = md)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2194 -0.4333 0.0976 0.4441 0.9854
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.161 0.148 1.09 0.29
## scale(CDI_prod_mean) 0.888 0.152 5.83 1.1e-05 ***
## scale(age_mean..months.) 0.285 0.167 1.71 0.10
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.678 on 20 degrees of freedom
## (28 observations deleted due to missingness)
## Multiple R-squared: 0.706, Adjusted R-squared: 0.676
## F-statistic: 24 on 2 and 20 DF, p-value: 4.88e-06
m2 = lm(d_calculate ~ age_mean..months., d=md)
summary(m2)
##
## Call:
## lm(formula = d_calculate ~ age_mean..months., data = md)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2904 -0.5004 -0.0476 0.3030 1.4316
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.24986 0.26631 -0.94 0.35273
## age_mean..months. 0.03687 0.00978 3.77 0.00044 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.682 on 49 degrees of freedom
## Multiple R-squared: 0.225, Adjusted R-squared: 0.209
## F-statistic: 14.2 on 1 and 49 DF, p-value: 0.000437
get model
md.good = md[!is.na(md$CDI_prod_mean),]
result.all.mods= rma(md.good$d_calculate ~ md.good$CDI_prod_mean +
md.good$age_mean..months., vi = md.good$d_var, method = "FE")
result.all.mods
##
## Fixed-Effects with Moderators Model (k = 23)
##
## Test for Residual Heterogeneity:
## QE(df = 20) = 45.0466, p-val = 0.0011
##
## Test of Moderators (coefficient(s) 2,3):
## QM(df = 2) = 69.5736, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb
## intrcpt -0.8846 0.2135 -4.1442 <.0001 -1.3030
## md.good$CDI_prod_mean 0.0048 0.0007 6.6639 <.0001 0.0034
## md.good$age_mean..months. 0.0233 0.0091 2.5448 0.0109 0.0053
## ci.ub
## intrcpt -0.4662 ***
## md.good$CDI_prod_mean 0.0062 ***
## md.good$age_mean..months. 0.0412 *
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest plot
par(mar=c(4,4,1,2))
forest(result.all.mods,
slab = md.good$first,
ilab=cbind(md.good$year),
ilab.xpos=c(-3),
mlab = "Grand effect size",
order=order(md.good$age_mean..months., as.numeric(md.good$DV.type)),
xlab="Effect size estimate",
addfit = T
)
funnel
funnel(result.all.mods)