This document is used to highlight considerations for meta-analysis of CRD

Warning: For the contrast-based analysis, it only works for 2 treatments. However, it should be valid for any number of treatments for the arm-based analysis.

For further reference on contrast-based analysis for 3 or more treatments, refer to the Latin square code.

library(tidyverse,ggplot2)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
rm(list=ls())
set.seed(50)  # set seed for reproducibility.
heterorun = TRUE  # you could set this to FALSE for smaller studies.
nStudy = 25  # Number of studies
ntrt = 2     # Number of treatments 
(nreps =sample(seq(40,60,2),nStudy,replace=TRUE)) 
##  [1] 60 46 42 52 44 54 42 60 48 60 46 58 54 48 42 52 40 58 58 56 42 56 42 58 60

Simulation

Simulate a \(n_{trt}\) = 2 treatment completely randomized design data from 25 different studies with sample sizes ranging from 40 to 60 cows

getwd()  # Let's make sure you're in the right directory
## [1] "C:/Users/Robert J. Tempelman/OneDrive - Michigan State University/Tempelman/Meta_analysis/Simulation"
##### THE FOLLOWING JUST SIMULATES SOME DATA BASED ON THE FOLLOWING SPECIFICATIONS   #####
##########################################################################################################
######################## BEGINNING OF SIMULATION #########################################################
##########################################################################################################

# we should all simulate exactly the same data if we use the same seed!!!

# Let's data from a number (nStudy) of studies, each being a completely randomized design involving two treatments.

#We'll set the variance components 

# In a 
sigmae = 5  # residual (within cow) variance
sigmacow = 5  # between-cow variance
sigmastudy = 5  # between-study variance
  # i.e., some studies are characterized by above/below average responses!
sigmastudytrt = 3  # study by trt interaction variance
  # i.e., non-zero sigmastudytrt: treatment differences may randomly differ because of differences in preparations.

cat("number of cows per treatment per study",sep="\n")
## number of cows per treatment per study
 # simulated different number of reps per Study varying anywhere from 40 60 in increments of 2.
# ok typically studies are not really this big but wanting to demonstrate how to estimate heterogeneous residual variances per treatment in a joint analysis
# (a Bayesian analysis would be far more suitable for this.).  Otherwise, we can delete that code.
overallmean = 40
trteffects = seq(-1,+1,length.out=ntrt)  # treatment effects relative to overall mean
(trtlabels = toupper(letters[1:ntrt]))
## [1] "A" "B"
Studyeffects = rnorm(sd=sqrt(sigmastudy),nStudy)  # simulated study effects

studytrtint = vector(mode="list",length= nStudy)  # simulated study x treatment effects
for (Study in 1:nStudy){
  studytrtint[[Study]]=rnorm(sd = sqrt(sigmastudytrt),ntrt)
}

# save the true study by treatment effects for later.
study_trt = unlist(studytrtint)
Study_Trtlabels = expand.grid(trtlabels,1:nStudy)
Study_Trtlabels = paste0(Study_Trtlabels$Var1,':',Study_Trtlabels$Var2)
study_trt = data.frame(Study_Trtlabels,study_trt)

Study_data <- vector(mode="list",length=nStudy)  # we'll first save the data in this list, separate study for each element

for (Study in 1:nStudy) {
  Trt = rep(trtlabels,each=nreps[Study]) # get trt label assignments for each record
  Cow = seq(1:(ntrt*nreps[Study]))  # and cow labels for each record
  Trt_effects = rep(trteffects,each=nreps[Study])  #along with their effects
  Trt_Study_effects = rep(studytrtint[[Study]],each=nreps[Study]) # and the trt*study effects
  residuals = rnorm(sd = sqrt(sigmae+sigmacow),ntrt*nreps[Study]) # and finally the residuals
  FCM = round(overallmean + Trt_effects + Studyeffects[Study] + Trt_Study_effects + residuals,2)  # generate the record and round to 2nd decimal
  Study_data[[Study]] = data.frame(Study,Trt,Cow,FCM)  #store it.
}  

overallCRDdata = bind_rows(Study_data) %>% # the entire data set
    mutate(across(!starts_with('FCM'), as.factor))
cat("variable summary on meta-dataset",sep="\n")
## variable summary on meta-dataset
str(overallCRDdata)
## 'data.frame':    2556 obs. of  4 variables:
##  $ Study: Factor w/ 25 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Trt  : Factor w/ 2 levels "A","B": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Cow  : Factor w/ 120 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ FCM  : num  43.7 38 39.7 35.3 44.8 ...
head(overallCRDdata)
##   Study Trt Cow   FCM
## 1     1   A   1 43.72
## 2     1   A   2 37.99
## 3     1   A   3 39.66
## 4     1   A   4 35.26
## 5     1   A   5 44.77
## 6     1   A   6 34.26
write_csv(overallCRDdata,"overallCRDdata.csv")

##########################################################################################################
######################## END OF SIMULATION ###############################################################
##########################################################################################################

Analysis of raw data

Naive common effects analysis

# Analysis of raw data

#IPD (Individual Performance Data)
# using raw data for data analyses
# assuming equal residual variances for each study

library(emmeans)  # library useful for computing treatment means and mean differences
emm_options(pbkrtest.limit = nrow(overallCRDdata))  # allows KR degrees of freedom for specified size of dataset (default = 3000)
  # naive analysis that ignores study effects

# Common effects analysis (Ignoring Study Heterogeneity!)
fixed_analysis = lm(FCM~Trt,data=overallCRDdata)  # Fit Trt as only factor
summary(fixed_analysis)
## 
## Call:
## lm(formula = FCM ~ Trt, data = overallCRDdata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.3170  -2.8095  -0.0632   2.7512  17.0930 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  38.8670     0.1201   323.5   <2e-16 ***
## TrtB          1.8525     0.1699    10.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.295 on 2554 degrees of freedom
## Multiple R-squared:  0.04448,    Adjusted R-squared:  0.04411 
## F-statistic: 118.9 on 1 and 2554 DF,  p-value: < 2.2e-16
fixed_Trt= emmeans(fixed_analysis,"Trt")
(fixed_effects_means = summary(fixed_Trt))
##  Trt emmean   SE   df lower.CL upper.CL
##  A     38.9 0.12 2554     38.6     39.1
##  B     40.7 0.12 2554     40.5     41.0
## 
## Confidence level used: 0.95
(fixed_effects_diff = pairs(fixed_Trt))
##  contrast estimate   SE   df t.ratio p.value
##  A - B       -1.85 0.17 2554 -10.904  <.0001

Mixed model analysis

NOTE THIS MODEL PERFECTLY MATCHES THE MODEL GENERATION PROCESS

accounts for study heterogeneity

library(lme4)  # mixed model procedure in R.
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
overall_analysis = lmer(FCM~Trt+(1|Study/Trt),data=overallCRDdata)  # Fit Trt as Fixed, Study and Study*Trt as random
summary(overall_analysis)
## Linear mixed model fit by REML ['lmerMod']
## Formula: FCM ~ Trt + (1 | Study/Trt)
##    Data: overallCRDdata
## 
## REML criterion at convergence: 13268.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6967 -0.6534  0.0111  0.6612  3.2764 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Trt:Study (Intercept) 2.625    1.620   
##  Study     (Intercept) 6.150    2.480   
##  Residual              9.832    3.136   
## Number of obs: 2556, groups:  Trt:Study, 50; Study, 25
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   38.807      0.599  64.783
## TrtB           1.923      0.475   4.048
## 
## Correlation of Fixed Effects:
##      (Intr)
## TrtB -0.396
lsmeansraw_Trt= emmeans(overall_analysis,"Trt")
(overall_meansraw = summary(lsmeansraw_Trt))
##  Trt emmean    SE   df lower.CL upper.CL
##  A     38.8 0.599 32.6     37.6     40.0
##  B     40.7 0.599 32.6     39.5     41.9
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
(effectsize_raw = pairs(lsmeansraw_Trt))
##  contrast estimate    SE df t.ratio p.value
##  A - B       -1.92 0.475 24  -4.048  0.0005
## 
## Degrees-of-freedom method: kenward-roger

Quality Control Checks on Simulation

Study effects (Estimated versus True)

# This is really not important but it might be need to see how the estimated (Predicted) study effects line up with the truth
# Estimated ("Predicted") Study Effects
BLUP_Studyeffects = coef(overall_analysis)$Study[,1]-mean(coef(overall_analysis)$Study[,1])  # need to subtract the mean to express relative to zero
trt = data.frame(Studyeffects,BLUP_Studyeffects)
# Plot Estimated ("Predicted") Study Effects vs True Study Effects
ggplot(data=trt,aes(x=Studyeffects ,y=BLUP_Studyeffects)) + geom_point() + geom_abline(intercept=0,slope=1) +
  ggtitle("Predicted vs True Study Effects") +
  xlab("True effects") + 
  ylab("BLUP")

## Study by Treatment effects (Estimated versus True)

# Let's do the same thing for the study by treatment effects
#Estimated study by treatment effects 
BLUP_study_trt = coef(overall_analysis)$`Trt:Study`[,1]-mean(coef(overall_analysis)$`Trt:Study`[,1]) # need to subtract the mean to express relative to zero
BLUP_study_trt = data.frame(rownames(coef(overall_analysis)$`Trt:Study`),BLUP_study_trt)
colnames(BLUP_study_trt) = c("Study_Trtlabels","BLUP_study_trt")
study_trt = study_trt %>%
   left_join(BLUP_study_trt,by=c("Study_Trtlabels"))
# Plot Estimated ("Predicted") Study by Treatment Effects vs True Study by Treatment Effects
ggplot(study_trt,aes(x=study_trt ,y=BLUP_study_trt)) + geom_point() + geom_abline(intercept=0,slope=1) +
  ggtitle("Predicted vs True Study by Treatment Effects") +
  xlab("True effects") + 
  ylab("BLUP") +
  geom_text(x=-3, y=2, label="Notice shrinkage to zero by BLUP")

Quite honestly, one might really compare a meta-analysis on raw data where heterogeneous residual and other variance components are modeled across studies but this would best require a Bayesian analysis which is beyond the scope of this workshop.
A frequentist approach to this often doesn’t converge. The following frequentist analysis only considers heterogeneous residual variances

if(heterorun) {
library(nlme)
# modeling heterogeneous residual variances across Studies and including random effects of Study and Treatment by Study
##  THIS MIGHT BE CONSIDERED THE REFERENCE ANALYSIS AGAINST A META-ANALYSIS ONLY USING SUMMARY STATISTICS IF INDIVIDUAL COW DATA WAS READILY AVAILABLE AND RESIDUAL VARIABILITY WAS TRULY HETEROGENEOUS ACROSS STUDIES
##  HOWEVER THIS MIGHT NOT TYPICALLY CONVERGE IF INDIVIDUAL STUDIES ARE "SMALL"
### FURTHERMORE, IF OTHER VARIANCE COMPONENTS ARE TRULY HETEROGENEOUS ACROSS STUDIES, THEN THIS IS REALLY NOT THE RIGHT REFERENCE ANALYSIS
### WOULD NEED A BAYESIAN APPROACH TO MODEL HETEROGENEOUS VARIANCE COMPONENTS ACROSS STUDIES BUT BEYOND SCOPE OF THIS WORKSHOP.
hetero.lme <- lme(FCM ~ Trt , data=overallCRDdata, random = ~ 1|Study/Trt, weights=varIdent(form = ~ 1 | Study),method="REML")  # this will not always converge
summary(hetero.lme)  
lsmeansraw_Trt_hetero= emmeans(hetero.lme,"Trt")
(overall_meansraw_hetero = summary(lsmeansraw_Trt_hetero))


# doesn't seem to be a big difference between the two (hetero versus homogeneous residual variances)
(effectsize_raw_hetero  = pairs(lsmeansraw_Trt_hetero))
effectsize_raw
}
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:lme4':
## 
##     lmList
## The following object is masked from 'package:dplyr':
## 
##     collapse
##  contrast estimate    SE df t.ratio p.value
##  A - B       -1.92 0.475 24  -4.048  0.0005
## 
## Degrees-of-freedom method: kenward-roger

In reality the real gold standard would allow all variance components to be heterogeneous across studies or herds but that would require a solid Bayesian analysis (e.g. https://pubmed.ncbi.nlm.nih.gov/27865511/ or https://gsejournal.biomedcentral.com/articles/10.1186/1297-9686-37-1-31 ) which is beyond the scope of this course.

#Meta-analysis using summary statistics

Let’s create the meta-analysis data points (effect size, stderr) for each experiment

First need to create some functions

 #need to create some functions first.
# need to create a separate function for model
separate_model = function(df) {
  lm(FCM~Trt,data=df)
}

# function to save the mean trt difference for each Study
contrast = function(model) {
  lsmeans_Trt= emmeans(model,"Trt")
  effectsize = pairs(lsmeans_Trt)
  effectsize = as.data.frame(summary(effectsize))
  return(effectsize)
}

# function to save the trt means for each Study
lsmeans = function(model) {
  lsmeans_Trt= emmeans(model,"Trt")
  means = summary(lsmeans_Trt)
  return(means)
}

This is what the analysis would look like for one study (Study #1)

Study1_analysis = lm(FCM~Trt,data=filter(overallCRDdata,Study==1))
joint_tests(Study1_analysis)
##  model term df1 df2 F.ratio p.value
##  Trt          1 118  50.637  <.0001
summary(Study1_analysis)
## 
## Call:
## lm(formula = FCM ~ Trt, data = filter(overallCRDdata, Study == 
##     1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.2992 -2.3273 -0.3512  2.1658  7.6608 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  38.3392     0.4019  95.403  < 2e-16 ***
## TrtB          4.0442     0.5683   7.116 9.31e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.113 on 118 degrees of freedom
## Multiple R-squared:  0.3003, Adjusted R-squared:  0.2943 
## F-statistic: 50.64 on 1 and 118 DF,  p-value: 9.312e-11
lsmeans(Study1_analysis)
##  Trt emmean    SE  df lower.CL upper.CL
##  A     38.3 0.402 118     37.5     39.1
##  B     42.4 0.402 118     41.6     43.2
## 
## Confidence level used: 0.95
contrast(Study1_analysis)
##   contrast  estimate        SE  df   t.ratio      p.value
## 1    A - B -4.044167 0.5683209 118 -7.115991 9.311615e-11

Create the sample statistics for each trial

Study_specific_contrasts = overallCRDdata %>%
  group_by(Study) %>%
  nest() %>%
  mutate(model = map(data,separate_model)) %>%
  mutate(effectsize = map(model,contrast)) %>%
  mutate(nrec = as.numeric(map(data,nrow))) %>% 
  unnest(effectsize)    %>%
  dplyr::select(-c(data,model)) %>%
  mutate(weight = (1/SE)^2) %>%  # What i use in the mixed model
  mutate(sampvar = SE^2)         # What the r package metafor needs

head(Study_specific_contrasts)
## # A tibble: 6 × 10
## # Groups:   Study [6]
##   Study contrast estimate    SE    df t.ratio  p.value  nrec weight sampvar
##   <fct> <chr>       <dbl> <dbl> <dbl>   <dbl>    <dbl> <dbl>  <dbl>   <dbl>
## 1 1     A - B      -4.04  0.568   118   -7.12 9.31e-11   120   3.10   0.323
## 2 2     A - B      -2.17  0.692    90   -3.14 2.32e- 3    92   2.09   0.479
## 3 3     A - B      -3.77  0.715    82   -5.27 1.10e- 6    84   1.95   0.512
## 4 4     A - B       2.32  0.643   102    3.61 4.80e- 4   104   2.42   0.413
## 5 5     A - B      -8.21  0.678    86  -12.1  2.75e-20    88   2.17   0.460
## 6 6     A - B       0.631 0.618   106    1.02 3.10e- 1   108   2.62   0.382
# this might not be too helpful to look at if there is a lot of differences in standard errors between studies.
boxplot(estimate~contrast,main="Box plot of study-specific treatment differences",data=Study_specific_contrasts)

# let's just consider the A-B contrast for now
Study_specific_contrastsAB = filter(Study_specific_contrasts,contrast=='A - B')

Common effects model

  1. Basic calculation
weighted.mean(Study_specific_contrastsAB$estimate,Study_specific_contrastsAB$weight)
## [1] -1.908415
(stderr = (sum(Study_specific_contrastsAB$weight))^(-1/2))
## [1] 0.1230142
  1. Linear model
# glmmTMB is used because it allows one to fix the residual variance to 1 (just a "trick" to allow proper weightings by 1/SE^2 which already reflect residual var!)
library(glmmTMB)
## Warning: package 'glmmTMB' was built under R version 4.2.2
# overall mean model
FM_1meta <- glmmTMB(estimate ~ 1 ,
                weights = weight,
                family = gaussian,
                REML=TRUE,
                data = Study_specific_contrastsAB,
                start = list(betad = log(1)),  # fix residual variance = 1
                map = list(betad = factor(NA))
)
summary(FM_1meta)
##  Family: gaussian  ( identity )
## Formula:          estimate ~ 1
## Data: Study_specific_contrastsAB
## Weights: weight
## 
##      AIC      BIC   logLik deviance df.resid 
##    477.0    478.3   -237.5    475.0       25 
## 
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.908      0.123  -15.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  1. Using metafor. Also provides a forest plot
library(metafor)
## Warning: package 'metafor' was built under R version 4.2.3
## Loading required package: metadat
## Warning: package 'metadat' was built under R version 4.2.3
## Loading required package: numDeriv
## 
## Loading the 'metafor' package (version 4.2-0). For an
## introduction to the package please type: help(metafor)
(res_EE = rma.uni(yi=estimate,vi=sampvar,data=Study_specific_contrastsAB,method="EE"))  # common effects
## 
## Equal-Effects Model (k = 25)
## 
## I^2 (total heterogeneity / total variability):   93.17%
## H^2 (total variability / sampling variability):  14.63
## 
## Test for Heterogeneity:
## Q(df = 24) = 351.2288, p-val < .0001
## 
## Model Results:
## 
## estimate      se      zval    pval    ci.lb    ci.ub      
##  -1.9084  0.1230  -15.5138  <.0001  -2.1495  -1.6673  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest(res_EE)

#Mixed Effects Models

Contrast based analysis

Let’s look at properly modeling heterogeneity between studies.

  1. Mixed effects model
library(glmmTMB)
# random effects model
RM_1meta <- glmmTMB(estimate ~ 1 + (1|Study),  
              weights = weight,
              family = gaussian,
              REML=TRUE,
              data = Study_specific_contrastsAB,
              start = list(betad = log(1)),  # fix residual variance = 1
              map = list(betad = factor(NA))
)
summary(RM_1meta)
##  Family: gaussian  ( identity )
## Formula:          estimate ~ 1 + (1 | Study)
## Data: Study_specific_contrastsAB
## Weights: weight
## 
##      AIC      BIC   logLik deviance df.resid 
##    216.1    218.5   -106.1    212.1       24 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  Study    (Intercept) 5.25     2.291   
##  Residual             1.00     1.000   
## Number of obs: 25, groups:  Study, 25
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.9266     0.4752  -4.054 5.03e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vc = VarCorr(RM_1meta)  # variance components
print(vc,comp=("Variance"))
## 
## Conditional model:
##  Groups   Name        Variance
##  Study    (Intercept) 5.2504  
##  Residual             1.0000
  1. metafor
(res_REML = rma.uni(yi=estimate,vi=sampvar,data=Study_specific_contrastsAB,method="REML"))  # default
## 
## Random-Effects Model (k = 25; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 5.2504 (SE = 1.6295)
## tau (square root of estimated tau^2 value):      2.2914
## I^2 (total heterogeneity / total variability):   93.27%
## H^2 (total variability / sampling variability):  14.85
## 
## Test for Heterogeneity:
## Q(df = 24) = 351.2288, p-val < .0001
## 
## Model Results:
## 
## estimate      se     zval    pval    ci.lb    ci.ub      
##  -1.9266  0.4752  -4.0542  <.0001  -2.8579  -0.9952  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest(res_REML)

Compare the width of this interval to the one provided by the common effects model

Likelihood ratio test

How to choose between the mixed effects and common effects model?? Two ways to do this.

# mixed model
anova(res_EE,res_REML)  # likelihood ratio test to test for differences in heterogeneity.
## 
##         df      AIC      BIC     AICc    logLik      LRT   pval       QE  tau^2 
## Full     2 113.6630 116.0191 114.2345  -54.8315                 351.2288 5.2504 
## Reduced  1 374.5877 375.7657 374.7695 -186.2938 262.9246 <.0001 351.2288 0.0000 
##             R^2 
## Full            
## Reduced 0.0000%
# metafor has it built in too
anova(FM_1meta,RM_1meta)  # this works too.
## Data: Study_specific_contrastsAB
## Models:
## FM_1meta: estimate ~ 1, zi=~0, disp=~1
## RM_1meta: estimate ~ 1 + (1 | Study), zi=~0, disp=~1
##          Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## FM_1meta  1 477.03 478.25 -237.52   475.03                             
## RM_1meta  2 216.11 218.55 -106.06   212.11 262.92      1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Confidence and Prediction Intervals for Contrasts under the Mixed Model Analysis

where confidence intervals are useful for assessing reliability of the meta-estimate for the corresponding parameter, the prediction interval indicates what is the plausible range of responses in future experiments.

This may be more pertinent for an “extension”-based message in terms of relaying to farmers,consultants, etc. what uncertainty they may anticipate with respect to their own operations.

# confirming the metafor CI on the effect size using the mixed model analysis.
Studyvar = c(((VarCorr(RM_1meta))["cond"]$cond$Study))
overallest = coef(summary(RM_1meta))$cond
cat("Confidence Interval",sep="\n")
## Confidence Interval
(LCL = overallest[1,"Estimate"]- 1.96*overallest[1,"Std. Error"])
## [1] -2.857952
(UCL = overallest[1,"Estimate"]+ 1.96*overallest[1,"Std. Error"])
## [1] -0.9951734
# Prediction intervals
# useful for determining uncertainty for  a future experiment.
cat("Prediction Interval",sep="\n")
## Prediction Interval
(LPL = overallest[1,"Estimate"]- 1.96*sqrt(overallest[1,"Std. Error"]^2 + Studyvar))
## [1] -6.513221
(UPL = overallest[1,"Estimate"]+ 1.96*sqrt(overallest[1,"Std. Error"]^2 + Studyvar))
## [1] 2.660096

Arm Based Analysis

Let’s consider study-specific means rather than study-specific mean differences as the response variable.

# Suppose you have study specific means instead
# often referred to as "arm-based" (rather than contrast-based) analysis.

# We'll also save the Study_specific means in a different file.
# Their differences should correspond to the study-specific contrasts in the above file.
Study_specific_means = overallCRDdata %>%
  group_by(Study) %>%
  nest() %>%
  mutate(model = map(data,separate_model)) %>%
  mutate(lsmeans = map(model,lsmeans)) %>%
  mutate(nrec = as.numeric(map(data,nrow))) %>% 
  unnest(lsmeans)    %>%
  dplyr::select(-c(data,model,lower.CL,upper.CL)) %>%
  mutate(weights = (1/SE^2),sampvar = SE^2)

head(Study_specific_means)
## # A tibble: 6 × 8
## # Groups:   Study [3]
##   Study Trt   emmean    SE    df  nrec weights sampvar
##   <fct> <fct>  <dbl> <dbl> <dbl> <dbl>   <dbl>   <dbl>
## 1 1     A       38.3 0.402   118   120    6.19   0.161
## 2 1     B       42.4 0.402   118   120    6.19   0.161
## 3 2     A       35.2 0.489    90    92    4.18   0.239
## 4 2     B       37.4 0.489    90    92    4.18   0.239
## 5 3     A       33.7 0.506    82    84    3.91   0.256
## 6 3     B       37.5 0.506    82    84    3.91   0.256
boxplot(emmean~Trt,data=Study_specific_means,main="Study-specific treatment means")

Arms-based analysis

Mixed model software based

  1. Typical (incomplete) mixed model meta-analysis observed in dairy/animal sciences
# this is what many animal scientists seem to be doing.
RM_0arm <- glmmTMB(emmean ~ Trt + (1|Study),
                     weights = (1/SE^2),
                     family = gaussian,
                     REML=TRUE,
                     data = Study_specific_means,
                     start = list(betad = log(1)),  # fix residual variance = 1
                     map = list(betad = factor(NA))
)
summary(RM_0arm)
##  Family: gaussian  ( identity )
## Formula:          emmean ~ Trt + (1 | Study)
## Data: Study_specific_means
## Weights: (1/SE^2)
## 
##      AIC      BIC   logLik deviance df.resid 
##    977.7    983.4   -485.8    971.7       49 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  Study    (Intercept) 7.459    2.731   
##  Residual             1.000    1.000   
## Number of obs: 50, groups:  Study, 25
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  38.8148     0.5533   70.16   <2e-16 ***
## TrtB          1.9084     0.1230   15.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_R0_Trt= emmeans(RM_0arm,"Trt")
summary(lsmeans_R0_Trt)
##  Trt emmean    SE df lower.CL upper.CL
##  A     38.8 0.553 49     37.7     39.9
##  B     40.7 0.553 49     39.6     41.8
## 
## Confidence level used: 0.95
(effectsize_R0_Trt = pairs(lsmeans_R0_Trt))
##  contrast estimate    SE df t.ratio p.value
##  A - B       -1.91 0.123 49 -15.514  <.0001
# this standard error is typically underreported...compare it to what we observed from analyzing the raw data!!
cat("Compare this to inferences from analyzing the raw data",sep="\n")
## Compare this to inferences from analyzing the raw data
effectsize_raw
##  contrast estimate    SE df t.ratio p.value
##  A - B       -1.92 0.475 24  -4.048  0.0005
## 
## Degrees-of-freedom method: kenward-roger
  1. Model R1 from Madden et al. (2016)

One needs to model random inconsistency as well!!!!

# you need to model random inconsistency as well!   
# (this is model R1 in Madden et al. (2016))
RM_1arm <- glmmTMB(emmean ~ Trt + (1|Study/Trt),
                weights = (1/SE^2),
                family = gaussian,
                REML=TRUE,
                data = Study_specific_means,
                start = list(betad = log(1)),  # fix residual variance = 1
                map = list(betad = factor(NA))
)
# (this is also model R1 in Madden et al. (2016))
summary(RM_1arm) 
##  Family: gaussian  ( identity )
## Formula:          emmean ~ Trt + (1 | Study/Trt)
## Data: Study_specific_means
## Weights: (1/SE^2)
## 
##      AIC      BIC   logLik deviance df.resid 
##    716.8    724.4   -354.4    708.8       48 
## 
## Random effects:
## 
## Conditional model:
##  Groups    Name        Variance Std.Dev.
##  Trt:Study (Intercept) 2.625    1.620   
##  Study     (Intercept) 6.146    2.479   
##  Residual              1.000    1.000   
## Number of obs: 50, groups:  Trt:Study, 50; Study, 25
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  38.8058     0.5990   64.79  < 2e-16 ***
## TrtB          1.9266     0.4752    4.05 5.03e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_R1_Trt= emmeans(RM_1arm,"Trt")
summary(lsmeans_R1_Trt)
##  Trt emmean    SE df lower.CL upper.CL
##  A     38.8 0.599 48     37.6     40.0
##  B     40.7 0.599 48     39.5     41.9
## 
## Confidence level used: 0.95
(effectsize_R1_Trt = pairs(lsmeans_R1_Trt))
##  contrast estimate    SE df t.ratio p.value
##  A - B       -1.93 0.475 48  -4.054  0.0002
cat("Compare this to inferences from analyzing the raw data",sep="\n")
## Compare this to inferences from analyzing the raw data
effectsize_raw
##  contrast estimate    SE df t.ratio p.value
##  A - B       -1.92 0.475 24  -4.048  0.0005
## 
## Degrees-of-freedom method: kenward-roger

Model R3 in Madden et al. (2016)

# The following is an alternative based on separate variances for each treatment
# Model R3 in Madden et al. 2016...specifies study variances to be different for each treatment (UN= unstructured)
RM_3arm <- glmmTMB(emmean ~ Trt + (Trt-1|Study),
                     weights = (1/SE^2),
                     family = gaussian,
                     REML=TRUE,
                     data = Study_specific_means,
                     start = list(betad = log(1)),  # fix residual variance = 1
                     map = list(betad = factor(NA))
)
summary(RM_3arm)
##  Family: gaussian  ( identity )
## Formula:          emmean ~ Trt + (Trt - 1 | Study)
## Data: Study_specific_means
## Weights: (1/SE^2)
## 
##      AIC      BIC   logLik deviance df.resid 
##    716.7    726.3   -353.4    706.7       47 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name Variance Std.Dev. Corr 
##  Study    TrtA 10.643   3.262         
##           TrtB  6.903   2.627    0.72 
##  Residual       1.000   1.000         
## Number of obs: 50, groups:  Study, 25
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  38.8052     0.6585   58.93   <2e-16 ***
## TrtB          1.9275     0.4753    4.06    5e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_R3_Trt= emmeans(RM_3arm,"Trt")
summary(lsmeans_R3_Trt)
##  Trt emmean    SE df lower.CL upper.CL
##  A     38.8 0.659 47     37.5     40.1
##  B     40.7 0.533 47     39.7     41.8
## 
## Confidence level used: 0.95
(effectsize_R3_Trt = pairs(lsmeans_R3_Trt))
##  contrast estimate    SE df t.ratio p.value
##  A - B       -1.93 0.475 47  -4.056  0.0002

What too many people are doing right now! Estimating the residual variance and just treating study as a random effect

# The following is an alternative based on separate variances for each treatment
# Model R3 in Madden et al. 2016...specifies study variances to be different for each treatment (UN= unstructured)
RM_wrong<- glmmTMB(emmean ~ Trt + (1|Study),
                     weights = (1/SE^2),
                     family = gaussian,
                     REML=TRUE,
                     data = Study_specific_means
)
summary(RM_wrong)
##  Family: gaussian  ( identity )
## Formula:          emmean ~ Trt + (1 | Study)
## Data: Study_specific_means
## Weights: (1/SE^2)
## 
##      AIC      BIC   logLik deviance df.resid 
##    959.2    966.9   -475.6    951.2       48 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  Study    (Intercept) 7.414    2.723   
##  Residual             1.474    1.214   
## Number of obs: 50, groups:  Study, 25
## 
## Dispersion estimate for gaussian family (sigma^2): 1.47 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  38.8152     0.5549   69.94   <2e-16 ***
## TrtB          1.9084     0.1493   12.78   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(RM_wrong,"Trt")
##  Trt emmean    SE df lower.CL upper.CL
##  A     38.8 0.555 48     37.7     39.9
##  B     40.7 0.555 48     39.6     41.8
## 
## Confidence level used: 0.95
summary(emmeans(RM_wrong,"Trt"))
##  Trt emmean    SE df lower.CL upper.CL
##  A     38.8 0.555 48     37.7     39.9
##  B     40.7 0.555 48     39.6     41.8
## 
## Confidence level used: 0.95
(effectsize_R3_Trt = pairs(emmeans(RM_wrong,"Trt")))
##  contrast estimate    SE df t.ratio p.value
##  A - B       -1.91 0.149 48 -12.780  <.0001
cat("Compare this to inferences from analyzing the raw data",sep="\n")
## Compare this to inferences from analyzing the raw data
effectsize_raw
##  contrast estimate    SE df t.ratio p.value
##  A - B       -1.92 0.475 24  -4.048  0.0005
## 
## Degrees-of-freedom method: kenward-roger

Using metafor

Model M0: accounts for study heterogeneity but fails to model random inconsistency in treatment effects. Model M1: equal study variance for each treatment and equal covariance between any pair of treatments. Model M3: unequal study variances for each treatment and unequal covariances between any pair of treatments

#Need to use the multivariate option of metafor!!
library(metafor)
# incomplete accounting for heterogeneity -> does not model random inconsistency!
(res.mv0a <- rma.mv(emmean, sampvar, mods = ~ Trt , random = ~ 1|Study,  data=Study_specific_means)) 
## 
## Multivariate Meta-Analysis Model (k = 50; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed  factor 
## sigma^2    7.4590  2.7311     25     no   Study 
## 
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 240.6771, p-val < .0001
## 
## Model Results:
## 
##          estimate      se     zval    pval    ci.lb    ci.ub      
## intrcpt   38.8148  0.5533  70.1557  <.0001  37.7304  39.8992  *** 
## TrtB       1.9084  0.1230  15.5138  <.0001   1.6673   2.1495  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on contrasts
(res.mv0b <- rma.mv(emmean, sampvar, mods = ~ Trt -1 , random = ~ 1|Study,  data=Study_specific_means)) 
## 
## Multivariate Meta-Analysis Model (k = 50; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed  factor 
## sigma^2    7.4590  2.7311     25     no   Study 
## 
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
## 
## Test of Moderators (coefficients 1:2):
## QM(df = 2) = 5472.1248, p-val < .0001
## 
## Model Results:
## 
##       estimate      se     zval    pval    ci.lb    ci.ub      
## TrtA   38.8148  0.5533  70.1557  <.0001  37.7304  39.8992  *** 
## TrtB   40.7232  0.5533  73.6051  <.0001  39.6389  41.8076  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on means

# this is equivalent to modeling Study and Study*Trt as independent random effects (as in RM_means1) 
#Model R1 in Madden et al. (2016) which is equivalent to compound symmetry
(res.mv1a <- rma.mv(emmean, sampvar, mods = ~ Trt , random = ~ Trt | Study, struct="CS", data=Study_specific_means))  
## 
## Multivariate Meta-Analysis Model (k = 50; method: REML)
## 
## Variance Components:
## 
## outer factor: Study (nlvls = 25)
## inner factor: Trt   (nlvls = 2)
## 
##             estim    sqrt  fixed 
## tau^2      8.7716  2.9617     no 
## rho        0.7007             no 
## 
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 16.4368, p-val < .0001
## 
## Model Results:
## 
##          estimate      se     zval    pval    ci.lb    ci.ub      
## intrcpt   38.8058  0.5990  64.7866  <.0001  37.6318  39.9797  *** 
## TrtB       1.9266  0.4752   4.0542  <.0001   0.9952   2.8579  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on contrasts
(res.mv1b <- rma.mv(emmean, sampvar, mods = ~ Trt -1 , random = ~ Trt | Study, struct="CS", data=Study_specific_means)) 
## 
## Multivariate Meta-Analysis Model (k = 50; method: REML)
## 
## Variance Components:
## 
## outer factor: Study (nlvls = 25)
## inner factor: Trt   (nlvls = 2)
## 
##             estim    sqrt  fixed 
## tau^2      8.7716  2.9617     no 
## rho        0.7007             no 
## 
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
## 
## Test of Moderators (coefficients 1:2):
## QM(df = 2) = 5247.8844, p-val < .0001
## 
## Model Results:
## 
##       estimate      se     zval    pval    ci.lb    ci.ub      
## TrtA   38.8058  0.5990  64.7866  <.0001  37.6318  39.9797  *** 
## TrtB   40.7323  0.5990  68.0031  <.0001  39.5583  41.9063  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on means

#Model R2 in Madden et al. (2016)  # Heterogeneous compound symmetry
(res.mv2a <- rma.mv(emmean, sampvar, mods = ~ Trt  , random = ~ Trt | Study, struct="HCS", data=Study_specific_means)) 
## 
## Multivariate Meta-Analysis Model (k = 50; method: REML)
## 
## Variance Components:
## 
## outer factor: Study (nlvls = 25)
## inner factor: Trt   (nlvls = 2)
## 
##              estim    sqrt  k.lvl  fixed  level 
## tau^2.1    10.6431  3.2624     25     no      A 
## tau^2.2     6.9028  2.6273     25     no      B 
## rho         0.7171                    no        
## 
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 16.4480, p-val < .0001
## 
## Model Results:
## 
##          estimate      se     zval    pval    ci.lb    ci.ub      
## intrcpt   38.8052  0.6585  58.9288  <.0001  37.5146  40.0959  *** 
## TrtB       1.9275  0.4753   4.0556  <.0001   0.9960   2.8591  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on contrasts
(res.mv2b <- rma.mv(emmean, sampvar, mods = ~ Trt -1 , random = ~ Trt | Study, struct="HCS", data=Study_specific_means)) 
## 
## Multivariate Meta-Analysis Model (k = 50; method: REML)
## 
## Variance Components:
## 
## outer factor: Study (nlvls = 25)
## inner factor: Trt   (nlvls = 2)
## 
##              estim    sqrt  k.lvl  fixed  level 
## tau^2.1    10.6431  3.2624     25     no      A 
## tau^2.2     6.9028  2.6273     25     no      B 
## rho         0.7171                    no        
## 
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
## 
## Test of Moderators (coefficients 1:2):
## QM(df = 2) = 5898.5488, p-val < .0001
## 
## Model Results:
## 
##       estimate      se     zval    pval    ci.lb    ci.ub      
## TrtA   38.8052  0.6585  58.9288  <.0001  37.5146  40.0959  *** 
## TrtB   40.7328  0.5329  76.4313  <.0001  39.6882  41.7773  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on means

# Model R3 in Madden et al. (2016)  # Unstructured
(res.mv3a <- rma.mv(emmean, sampvar, mods = ~ Trt , random = ~ Trt | Study, struct="UN", data=Study_specific_means)) 
## 
## Multivariate Meta-Analysis Model (k = 50; method: REML)
## 
## Variance Components:
## 
## outer factor: Study (nlvls = 25)
## inner factor: Trt   (nlvls = 2)
## 
##              estim    sqrt  k.lvl  fixed  level 
## tau^2.1    10.6433  3.2624     25     no      A 
## tau^2.2     6.9029  2.6273     25     no      B 
## 
##     rho.A  rho.B     A   B 
## A       1            -  25 
## B  0.7171      1    no   - 
## 
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 16.4478, p-val < .0001
## 
## Model Results:
## 
##          estimate      se     zval    pval    ci.lb    ci.ub      
## intrcpt   38.8052  0.6585  58.9283  <.0001  37.5146  40.0959  *** 
## TrtB       1.9275  0.4753   4.0556  <.0001   0.9960   2.8591  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on contrasts
(res.mv4b <- rma.mv(emmean, sampvar, mods = ~ Trt -1 , random = ~ Trt | Study, struct="UN", data=Study_specific_means)) 
## 
## Multivariate Meta-Analysis Model (k = 50; method: REML)
## 
## Variance Components:
## 
## outer factor: Study (nlvls = 25)
## inner factor: Trt   (nlvls = 2)
## 
##              estim    sqrt  k.lvl  fixed  level 
## tau^2.1    10.6433  3.2624     25     no      A 
## tau^2.2     6.9029  2.6273     25     no      B 
## 
##     rho.A  rho.B     A   B 
## A       1            -  25 
## B  0.7171      1    no   - 
## 
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
## 
## Test of Moderators (coefficients 1:2):
## QM(df = 2) = 5898.5114, p-val < .0001
## 
## Model Results:
## 
##       estimate      se     zval    pval    ci.lb    ci.ub      
## TrtA   38.8052  0.6585  58.9283  <.0001  37.5146  40.0959  *** 
## TrtB   40.7328  0.5329  76.4311  <.0001  39.6882  41.7773  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on means

Likelihood ratio test

anova(RM_0arm,RM_1arm,RM_3arm) # using mixed model
## Data: Study_specific_means
## Models:
## RM_0arm: emmean ~ Trt + (1 | Study), zi=~0, disp=~1
## RM_1arm: emmean ~ Trt + (1 | Study/Trt), zi=~0, disp=~1
## RM_3arm: emmean ~ Trt + (Trt - 1 | Study), zi=~0, disp=~1
##         Df    AIC    BIC  logLik deviance    Chisq Chi Df Pr(>Chisq)    
## RM_0arm  3 977.69 983.42 -485.84   971.69                               
## RM_1arm  4 716.76 724.41 -354.38   708.76 262.9246      1     <2e-16 ***
## RM_3arm  5 716.71 726.27 -353.35   706.71   2.0556      1     0.1516    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(res.mv1a,res.mv3a)
## 
##         df      AIC      BIC     AICc    logLik    LRT   pval        QE 
## Full     5 234.2533 243.6093 235.6818 -112.1266               2279.6105 
## Reduced  4 234.3089 241.7937 235.2391 -113.1545 2.0556 0.1516 2279.6105

Compare Study specific standard errors of means (SEM) to Study specific standard errors of mean differences (SED)

just look at A vs B

For CRDs, SEM = SED / sqrt(2)

Study_specific_SEM = Study_specific_means %>% 
   filter(Trt =='A' | Trt=='B')%>%
   group_by(Study) %>%
   summarize(SEM=mean(SE))

Study_specific_SED = Study_specific_contrastsAB %>% 
   group_by(Study) %>%
   summarize(SED=mean(SE))

SEM_SED_compare = Study_specific_SEM %>%
    left_join(Study_specific_SED,by="Study") %>%
    mutate(SEM_SED_ratio = SEM/SED)

head(SEM_SED_compare)
## # A tibble: 6 × 4
##   Study   SEM   SED SEM_SED_ratio
##   <fct> <dbl> <dbl>         <dbl>
## 1 1     0.402 0.568         0.707
## 2 2     0.489 0.692         0.707
## 3 3     0.506 0.715         0.707
## 4 4     0.455 0.643         0.707
## 5 5     0.479 0.678         0.707
## 6 6     0.437 0.618         0.707