Read in and prepare the data

library(metafor)
library(tidyr)
library(dplyr)
df <- get(data(dat.molloy2014)) 
df <- mutate(df, id = 1:16) # add id column 
df <- df %>% select(id, authors:quality) %>% 
  select(-ri, ri) %>% select(-ni, ni)

Transform r values to Z values

df <- df %>% mutate(Zi=transf.rtoz(df$ri))
head(df)

Calculate the corresponding sample variances

df <- data.frame(escalc(measure="ZCOR", ri=ri, ni=ni, data=df))
head(df)

Re-run the multi-analysis

Regular random-effects model

paste.slab <- paste(df$authors, df$year, sep=", ")
re_model <- rma(yi, vi, data=df, slab=paste.slab) 
re_model 
## 
## Random-Effects Model (k = 16; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 0.0081 (SE = 0.0055)
## tau (square root of estimated tau^2 value):      0.0901
## I^2 (total heterogeneity / total variability):   61.73%
## H^2 (total variability / sampling variability):  2.61
## 
## Test for Heterogeneity:
## Q(df = 15) = 38.1595, p-val = 0.0009
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub 
##   0.1499  0.0316  4.7501  <.0001  0.0881  0.2118  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The Q-statistic with \(15\) degrees of freedom is \(38.1595\), and the p-value is \(0.0009\), which suggests that included studies do not share a common effect size.

The following is the breakdown of the model results:
- The “estimate” provides the estimated model coefficient or summary effect size, which is \(0.1499\)
- The “se” is the standard error of the “estimate”, which is \(0.0316\)
- The “zval” is the corresponding test-statistic, which is \(4.7501\)
- The “pval” is the corresponding p-value, wihch is \(<.0001\)
- The “ci.lb” is the corresponding lower bound of the confidence interval, which is \(0.0881\)
- The “ci.ub” is the corresponding upper bound of the confidence interval, which is \(0.2118\)


The q-statistic and \(I^2\) provide evidence for heterogeneity, but it does not provide any information on which studies are affecting the heterogeneity. Let’s use study IDs to identify the studies that are affecting heterogeneity.

Fixed effects model

fe_model <- rma(yi, vi, data=df, slab=paste.slab, method="FE") 
fe_model
## 
## Fixed-Effects Model (k = 16)
## 
## Test for Heterogeneity:
## Q(df = 15) = 38.1595, p-val = 0.0009
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub 
##   0.1252  0.0170  7.3642  <.0001  0.0919  0.1585  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The Q-statistic with \(16\) degrees of freedom is \(38.1595\), and the p-value is \(0.0009\), which suggests that included studies do not share a common effect size.

The following is the breakdown of the model results:
- The “estimate” provides the estimated model coefficient or summary effect size, which is \(0.1252\)
- The “se” is the standard error of the “estimate”, which is \(0.0170\)
- The “zval” is the corresponding test-statistic, which is \(7.3642\)
- The “pval” is the corresponding p-value, wihch is \(<.0001\)
- The “ci.lb” is the corresponding lower bound of the confidence interval, which is \(0.0919\)
- The “ci.ub” is the corresponding upper bound of the confidence interval, which is \(0.1585\)


The q-statistic and \(I^2\) provide evidence for heterogeneity, but it does not provide any information on which studies are affecting the heterogeneity. Let’s use study IDs to identify the studies that are affecting heterogeneity.

Studies affecting heterogeneity

new_re_model <- rma(yi, vi, data=df, slab=paste.slab)
summary(new_re_model)
## 
## Random-Effects Model (k = 16; tau^2 estimator: REML)
## 
##   logLik  deviance       AIC       BIC      AICc 
##   8.6096  -17.2191  -13.2191  -11.8030  -12.2191   
## 
## tau^2 (estimated amount of total heterogeneity): 0.0081 (SE = 0.0055)
## tau (square root of estimated tau^2 value):      0.0901
## I^2 (total heterogeneity / total variability):   61.73%
## H^2 (total variability / sampling variability):  2.61
## 
## Test for Heterogeneity:
## Q(df = 15) = 38.1595, p-val = 0.0009
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub 
##   0.1499  0.0316  4.7501  <.0001  0.0881  0.2118  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Use Baujat plot to illustrate the studies that contribute to overall heterogeneity or overall results of the study.

baujat(new_re_model)

The studies closer to the top-right corner are those that most affect heterogeneity. In this case, the oneS affecting it the most are studies 10 and 12, but there are some other studies that also seem to affect it (e.g., 4, 11 and 3).

To identify outliers, and potential influential cases, we can run some diagnostics

plot(influence(re_model))

  • The first plot (rstudent) shows externally standardized residuals. Two studies have very large residuals (10, 12) which is in accord with what we mentioned above.

\(I^2\) Confidence Interval

predict(re_model, transf=transf.ztor)
## 
##    pred  ci.lb  ci.ub   cr.lb  cr.ub 
##  0.1488 0.0878 0.2087 -0.0371 0.3248
confint(re_model)  
## 
##        estimate   ci.lb   ci.ub 
## tau^2    0.0081  0.0017  0.0378 
## tau      0.0901  0.0412  0.1944 
## I^2(%)  61.7324 25.2799 88.2545 
## H^2      2.6132  1.3383  8.5139

Frequency distribution of effect sizes and normal distribution

hist(df$yi, breaks = 10, 
     xlab="Fisher's Z Transformed Correlation Coefficient", 
     main="", col="gray")
curve(dnorm(x, mean=mean(df$yi), sd=sd(df$yi)), 
      add=TRUE, col="black") #line

Scatterplot of the distribution of effect sizes by standard error

funnel(re_model)

Forest plot

forest(re_model, atransf=transf.ztor, cex=.7)

Note that on the right side, the numbers are in the format Correlation[95% Confidence Interval].

Meta-analysis summary table

library(tidyverse) 
summary.df <- data.frame(matrix(ncol = 8))
colnames(summary.df) <- c("Moderator", "Value", "Number", "K", "Estimate", 
                          "CI", "Z", "Pval")
summary.df <- summary.df[-1,]
o_numbers <- c(2186, 1290, 2666, 811, 1255, 2221, 626, 2524, 1130, 2346) # from table 2 in paper

summary.function <- function(variable, value, text1, text2, df){
  res_x <- rma(yi, vi, data=df, subset=(variable==value))
  res_x
  summary.df <- summary.df %>% add_row(Moderator = text1, 
                       Value = text2,
                       Estimate = round(res_x$beta, digits=3),   
                       CI = paste(round(res_x$ci.lb, digits=3), 
                                  round(res_x$ci.ub, digits=3), 
                                  sep = ", ", collapse = NULL),   
                       Z = round(res_x$zval, digits=3),    
                       Pval = round(res_x$pval, digits=3), 
                       K=res_x$k)
  summary.df 
}

summary.df <- summary.function(df$a_measure, "self-report", 
                               "Medication adherence measurement", 
                               "Self-report only (yes)", df)
summary.df <- summary.function(df$a_measure, "other", 
                               "Medication adherence measurement", 
                               "Self-report only (no)", df)
summary.df <- summary.function(df$c_measure, "NEO", 
                               "Conscientiousness measurement", "NEO", df) 
# NEO = conscientiousness was measured with some version of the NEO personality inventory
summary.df <- summary.function(df$c_measure, "other", 
                               "Conscientiousness measurement", 
                               "Other measure", df) 
summary.df <- summary.function(df$design, "cross-sectional", 
                               "Study design", "Cross-sectional", df)
summary.df <- summary.function(df$design, "prospective", 
                               "Study design", "Prospective", df) 

res_age1 <- rma(yi, vi, data=df, subset=(df$meanage<50))
summary.df <- summary.df %>% add_row(Moderator = "Mean age", 
                       Value = "Less than 50 years old",
                       Estimate = round(res_age1$beta, digits=3),   
                       CI = paste(round(res_age1$ci.lb, digits=3), 
                                  round(res_age1$ci.ub, digits=3), 
                                  sep = ", ", collapse = NULL),   
                       Z = round(res_age1$zval, digits=3),    
                       Pval = round(res_age1$pval, digits=3),
                       K=res_age1$k)

res_age2 <- rma(yi, vi, data=df, subset=(df$meanage>50))
summary.df <- summary.df %>% add_row(Moderator = "Mean age", 
                       Value = "Greater than 50 years old",
                       Estimate = round(res_age2$beta, digits=3),   
                       CI = paste(round(res_age2$ci.lb, digits=3), 
                                  round(res_age2$ci.ub, digits=3), 
                                  sep = ", ", collapse = NULL),   
                       Z = round(res_age2$zval, digits=3),    
                       Pval = round(res_age2$pval, digits=3),
                       K=res_age2$k)

summary.df <- summary.function(df$controls, 
                "multiple", "Controls used in analysis", "Yes", df) 
summary.df <- summary.function(df$controls, 
                "none", "Controls used in analysis", "No", df)


for(i in 1:10){
  if(summary.df$Pval[i]==0){
    summary.df$Pval[i] <- "<0.001"
  }
  summary.df$Number[i] <- as.numeric(o_numbers[i])
}

summary.df