library(metafor)
library(tidyr)
library(dplyr)
df <- get(data(dat.molloy2014))
df <- mutate(df, id = 1:16) # add id column
df <- df %>% select(id, authors:quality) %>%
select(-ri, ri) %>% select(-ni, ni)
df <- df %>% mutate(Zi=transf.rtoz(df$ri))
head(df)
df <- data.frame(escalc(measure="ZCOR", ri=ri, ni=ni, data=df))
head(df)
paste.slab <- paste(df$authors, df$year, sep=", ")
re_model <- rma(yi, vi, data=df, slab=paste.slab)
re_model
##
## Random-Effects Model (k = 16; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 0.0081 (SE = 0.0055)
## tau (square root of estimated tau^2 value): 0.0901
## I^2 (total heterogeneity / total variability): 61.73%
## H^2 (total variability / sampling variability): 2.61
##
## Test for Heterogeneity:
## Q(df = 15) = 38.1595, p-val = 0.0009
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.1499 0.0316 4.7501 <.0001 0.0881 0.2118 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The Q-statistic with \(15\) degrees of freedom is \(38.1595\), and the p-value is \(0.0009\), which suggests that included studies do not share a common effect size.
The following is the breakdown of the model results:
- The “estimate” provides the estimated model coefficient or summary effect size, which is \(0.1499\)
- The “se” is the standard error of the “estimate”, which is \(0.0316\)
- The “zval” is the corresponding test-statistic, which is \(4.7501\)
- The “pval” is the corresponding p-value, wihch is \(<.0001\)
- The “ci.lb” is the corresponding lower bound of the confidence interval, which is \(0.0881\)
- The “ci.ub” is the corresponding upper bound of the confidence interval, which is \(0.2118\)
The q-statistic and \(I^2\) provide evidence for heterogeneity, but it does not provide any information on which studies are affecting the heterogeneity. Let’s use study IDs to identify the studies that are affecting heterogeneity.
fe_model <- rma(yi, vi, data=df, slab=paste.slab, method="FE")
fe_model
##
## Fixed-Effects Model (k = 16)
##
## Test for Heterogeneity:
## Q(df = 15) = 38.1595, p-val = 0.0009
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.1252 0.0170 7.3642 <.0001 0.0919 0.1585 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The Q-statistic with \(16\) degrees of freedom is \(38.1595\), and the p-value is \(0.0009\), which suggests that included studies do not share a common effect size.
The following is the breakdown of the model results:
- The “estimate” provides the estimated model coefficient or summary effect size, which is \(0.1252\)
- The “se” is the standard error of the “estimate”, which is \(0.0170\)
- The “zval” is the corresponding test-statistic, which is \(7.3642\)
- The “pval” is the corresponding p-value, wihch is \(<.0001\)
- The “ci.lb” is the corresponding lower bound of the confidence interval, which is \(0.0919\)
- The “ci.ub” is the corresponding upper bound of the confidence interval, which is \(0.1585\)
The q-statistic and \(I^2\) provide evidence for heterogeneity, but it does not provide any information on which studies are affecting the heterogeneity. Let’s use study IDs to identify the studies that are affecting heterogeneity.
new_re_model <- rma(yi, vi, data=df, slab=paste.slab)
summary(new_re_model)
##
## Random-Effects Model (k = 16; tau^2 estimator: REML)
##
## logLik deviance AIC BIC AICc
## 8.6096 -17.2191 -13.2191 -11.8030 -12.2191
##
## tau^2 (estimated amount of total heterogeneity): 0.0081 (SE = 0.0055)
## tau (square root of estimated tau^2 value): 0.0901
## I^2 (total heterogeneity / total variability): 61.73%
## H^2 (total variability / sampling variability): 2.61
##
## Test for Heterogeneity:
## Q(df = 15) = 38.1595, p-val = 0.0009
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.1499 0.0316 4.7501 <.0001 0.0881 0.2118 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Use Baujat plot to illustrate the studies that contribute to overall heterogeneity or overall results of the study.
baujat(new_re_model)
The studies closer to the top-right corner are those that most affect heterogeneity. In this case, the oneS affecting it the most are studies 10 and 12, but there are some other studies that also seem to affect it (e.g., 4, 11 and 3).
To identify outliers, and potential influential cases, we can run some diagnostics
plot(influence(re_model))
predict(re_model, transf=transf.ztor)
##
## pred ci.lb ci.ub cr.lb cr.ub
## 0.1488 0.0878 0.2087 -0.0371 0.3248
confint(re_model)
##
## estimate ci.lb ci.ub
## tau^2 0.0081 0.0017 0.0378
## tau 0.0901 0.0412 0.1944
## I^2(%) 61.7324 25.2799 88.2545
## H^2 2.6132 1.3383 8.5139
hist(df$yi, breaks = 10,
xlab="Fisher's Z Transformed Correlation Coefficient",
main="", col="gray")
curve(dnorm(x, mean=mean(df$yi), sd=sd(df$yi)),
add=TRUE, col="black") #line
funnel(re_model)
forest(re_model, atransf=transf.ztor, cex=.7)
Note that on the right side, the numbers are in the format Correlation[95% Confidence Interval].
library(tidyverse)
summary.df <- data.frame(matrix(ncol = 8))
colnames(summary.df) <- c("Moderator", "Value", "Number", "K", "Estimate",
"CI", "Z", "Pval")
summary.df <- summary.df[-1,]
o_numbers <- c(2186, 1290, 2666, 811, 1255, 2221, 626, 2524, 1130, 2346) # from table 2 in paper
summary.function <- function(variable, value, text1, text2, df){
res_x <- rma(yi, vi, data=df, subset=(variable==value))
res_x
summary.df <- summary.df %>% add_row(Moderator = text1,
Value = text2,
Estimate = round(res_x$beta, digits=3),
CI = paste(round(res_x$ci.lb, digits=3),
round(res_x$ci.ub, digits=3),
sep = ", ", collapse = NULL),
Z = round(res_x$zval, digits=3),
Pval = round(res_x$pval, digits=3),
K=res_x$k)
summary.df
}
summary.df <- summary.function(df$a_measure, "self-report",
"Medication adherence measurement",
"Self-report only (yes)", df)
summary.df <- summary.function(df$a_measure, "other",
"Medication adherence measurement",
"Self-report only (no)", df)
summary.df <- summary.function(df$c_measure, "NEO",
"Conscientiousness measurement", "NEO", df)
# NEO = conscientiousness was measured with some version of the NEO personality inventory
summary.df <- summary.function(df$c_measure, "other",
"Conscientiousness measurement",
"Other measure", df)
summary.df <- summary.function(df$design, "cross-sectional",
"Study design", "Cross-sectional", df)
summary.df <- summary.function(df$design, "prospective",
"Study design", "Prospective", df)
res_age1 <- rma(yi, vi, data=df, subset=(df$meanage<50))
summary.df <- summary.df %>% add_row(Moderator = "Mean age",
Value = "Less than 50 years old",
Estimate = round(res_age1$beta, digits=3),
CI = paste(round(res_age1$ci.lb, digits=3),
round(res_age1$ci.ub, digits=3),
sep = ", ", collapse = NULL),
Z = round(res_age1$zval, digits=3),
Pval = round(res_age1$pval, digits=3),
K=res_age1$k)
res_age2 <- rma(yi, vi, data=df, subset=(df$meanage>50))
summary.df <- summary.df %>% add_row(Moderator = "Mean age",
Value = "Greater than 50 years old",
Estimate = round(res_age2$beta, digits=3),
CI = paste(round(res_age2$ci.lb, digits=3),
round(res_age2$ci.ub, digits=3),
sep = ", ", collapse = NULL),
Z = round(res_age2$zval, digits=3),
Pval = round(res_age2$pval, digits=3),
K=res_age2$k)
summary.df <- summary.function(df$controls,
"multiple", "Controls used in analysis", "Yes", df)
summary.df <- summary.function(df$controls,
"none", "Controls used in analysis", "No", df)
for(i in 1:10){
if(summary.df$Pval[i]==0){
summary.df$Pval[i] <- "<0.001"
}
summary.df$Number[i] <- as.numeric(o_numbers[i])
}
summary.df