For further reference on contrast-based analysis for 3 or more treatments, refer to the Latin square code.
library(tidyverse,ggplot2)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
rm(list=ls())
set.seed(50) # set seed for reproducibility.
heterorun = TRUE # you could set this to FALSE for smaller studies.
nStudy = 25 # Number of studies
ntrt = 2 # Number of treatments
(nreps =sample(seq(40,60,2),nStudy,replace=TRUE))
## [1] 60 46 42 52 44 54 42 60 48 60 46 58 54 48 42 52 40 58 58 56 42 56 42 58 60
Simulate a \(n_{trt}\) = 2 treatment completely randomized design data from 25 different studies with sample sizes ranging from 40 to 60 cows
getwd() # Let's make sure you're in the right directory
## [1] "C:/Users/Robert J. Tempelman/OneDrive - Michigan State University/Tempelman/Meta_analysis/Simulation"
##### THE FOLLOWING JUST SIMULATES SOME DATA BASED ON THE FOLLOWING SPECIFICATIONS #####
##########################################################################################################
######################## BEGINNING OF SIMULATION #########################################################
##########################################################################################################
# we should all simulate exactly the same data if we use the same seed!!!
# Let's data from a number (nStudy) of studies, each being a completely randomized design involving two treatments.
#We'll set the variance components
# In a
sigmae = 5 # residual (within cow) variance
sigmacow = 5 # between-cow variance
sigmastudy = 5 # between-study variance
# i.e., some studies are characterized by above/below average responses!
sigmastudytrt = 3 # study by trt interaction variance
# i.e., non-zero sigmastudytrt: treatment differences may randomly differ because of differences in preparations.
cat("number of cows per treatment per study",sep="\n")
## number of cows per treatment per study
# simulated different number of reps per Study varying anywhere from 40 60 in increments of 2.
# ok typically studies are not really this big but wanting to demonstrate how to estimate heterogeneous residual variances per treatment in a joint analysis
# (a Bayesian analysis would be far more suitable for this.). Otherwise, we can delete that code.
overallmean = 40
trteffects = seq(-1,+1,length.out=ntrt) # treatment effects relative to overall mean
(trtlabels = toupper(letters[1:ntrt]))
## [1] "A" "B"
Studyeffects = rnorm(sd=sqrt(sigmastudy),nStudy) # simulated study effects
studytrtint = vector(mode="list",length= nStudy) # simulated study x treatment effects
for (Study in 1:nStudy){
studytrtint[[Study]]=rnorm(sd = sqrt(sigmastudytrt),ntrt)
}
# save the true study by treatment effects for later.
study_trt = unlist(studytrtint)
Study_Trtlabels = expand.grid(trtlabels,1:nStudy)
Study_Trtlabels = paste0(Study_Trtlabels$Var1,':',Study_Trtlabels$Var2)
study_trt = data.frame(Study_Trtlabels,study_trt)
Study_data <- vector(mode="list",length=nStudy) # we'll first save the data in this list, separate study for each element
for (Study in 1:nStudy) {
Trt = rep(trtlabels,each=nreps[Study]) # get trt label assignments for each record
Cow = seq(1:(ntrt*nreps[Study])) # and cow labels for each record
Trt_effects = rep(trteffects,each=nreps[Study]) #along with their effects
Trt_Study_effects = rep(studytrtint[[Study]],each=nreps[Study]) # and the trt*study effects
residuals = rnorm(sd = sqrt(sigmae+sigmacow),ntrt*nreps[Study]) # and finally the residuals
FCM = round(overallmean + Trt_effects + Studyeffects[Study] + Trt_Study_effects + residuals,2) # generate the record and round to 2nd decimal
Study_data[[Study]] = data.frame(Study,Trt,Cow,FCM) #store it.
}
overallCRDdata = bind_rows(Study_data) %>% # the entire data set
mutate(across(!starts_with('FCM'), as.factor))
cat("variable summary on meta-dataset",sep="\n")
## variable summary on meta-dataset
str(overallCRDdata)
## 'data.frame': 2556 obs. of 4 variables:
## $ Study: Factor w/ 25 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Trt : Factor w/ 2 levels "A","B": 1 1 1 1 1 1 1 1 1 1 ...
## $ Cow : Factor w/ 120 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ FCM : num 43.7 38 39.7 35.3 44.8 ...
head(overallCRDdata)
## Study Trt Cow FCM
## 1 1 A 1 43.72
## 2 1 A 2 37.99
## 3 1 A 3 39.66
## 4 1 A 4 35.26
## 5 1 A 5 44.77
## 6 1 A 6 34.26
write_csv(overallCRDdata,"overallCRDdata.csv")
##########################################################################################################
######################## END OF SIMULATION ###############################################################
##########################################################################################################
# Analysis of raw data
#IPD (Individual Performance Data)
# using raw data for data analyses
# assuming equal residual variances for each study
library(emmeans) # library useful for computing treatment means and mean differences
emm_options(pbkrtest.limit = nrow(overallCRDdata)) # allows KR degrees of freedom for specified size of dataset (default = 3000)
# naive analysis that ignores study effects
# Common effects analysis (Ignoring Study Heterogeneity!)
fixed_analysis = lm(FCM~Trt,data=overallCRDdata) # Fit Trt as only factor
summary(fixed_analysis)
##
## Call:
## lm(formula = FCM ~ Trt, data = overallCRDdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.3170 -2.8095 -0.0632 2.7512 17.0930
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.8670 0.1201 323.5 <2e-16 ***
## TrtB 1.8525 0.1699 10.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.295 on 2554 degrees of freedom
## Multiple R-squared: 0.04448, Adjusted R-squared: 0.04411
## F-statistic: 118.9 on 1 and 2554 DF, p-value: < 2.2e-16
fixed_Trt= emmeans(fixed_analysis,"Trt")
(fixed_effects_means = summary(fixed_Trt))
## Trt emmean SE df lower.CL upper.CL
## A 38.9 0.12 2554 38.6 39.1
## B 40.7 0.12 2554 40.5 41.0
##
## Confidence level used: 0.95
(fixed_effects_diff = pairs(fixed_Trt))
## contrast estimate SE df t.ratio p.value
## A - B -1.85 0.17 2554 -10.904 <.0001
library(lme4) # mixed model procedure in R.
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
overall_analysis = lmer(FCM~Trt+(1|Study/Trt),data=overallCRDdata) # Fit Trt as Fixed, Study and Study*Trt as random
summary(overall_analysis)
## Linear mixed model fit by REML ['lmerMod']
## Formula: FCM ~ Trt + (1 | Study/Trt)
## Data: overallCRDdata
##
## REML criterion at convergence: 13268.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6967 -0.6534 0.0111 0.6612 3.2764
##
## Random effects:
## Groups Name Variance Std.Dev.
## Trt:Study (Intercept) 2.625 1.620
## Study (Intercept) 6.150 2.480
## Residual 9.832 3.136
## Number of obs: 2556, groups: Trt:Study, 50; Study, 25
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 38.807 0.599 64.783
## TrtB 1.923 0.475 4.048
##
## Correlation of Fixed Effects:
## (Intr)
## TrtB -0.396
lsmeansraw_Trt= emmeans(overall_analysis,"Trt")
(overall_meansraw = summary(lsmeansraw_Trt))
## Trt emmean SE df lower.CL upper.CL
## A 38.8 0.599 32.6 37.6 40.0
## B 40.7 0.599 32.6 39.5 41.9
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
(effectsize_raw = pairs(lsmeansraw_Trt))
## contrast estimate SE df t.ratio p.value
## A - B -1.92 0.475 24 -4.048 0.0005
##
## Degrees-of-freedom method: kenward-roger
# This is really not important but it might be need to see how the estimated (Predicted) study effects line up with the truth
# Estimated ("Predicted") Study Effects
BLUP_Studyeffects = coef(overall_analysis)$Study[,1]-mean(coef(overall_analysis)$Study[,1]) # need to subtract the mean to express relative to zero
trt = data.frame(Studyeffects,BLUP_Studyeffects)
# Plot Estimated ("Predicted") Study Effects vs True Study Effects
ggplot(data=trt,aes(x=Studyeffects ,y=BLUP_Studyeffects)) + geom_point() + geom_abline(intercept=0,slope=1) +
ggtitle("Predicted vs True Study Effects") +
xlab("True effects") +
ylab("BLUP")
## Study by Treatment effects (Estimated versus True)
# Let's do the same thing for the study by treatment effects
#Estimated study by treatment effects
BLUP_study_trt = coef(overall_analysis)$`Trt:Study`[,1]-mean(coef(overall_analysis)$`Trt:Study`[,1]) # need to subtract the mean to express relative to zero
BLUP_study_trt = data.frame(rownames(coef(overall_analysis)$`Trt:Study`),BLUP_study_trt)
colnames(BLUP_study_trt) = c("Study_Trtlabels","BLUP_study_trt")
study_trt = study_trt %>%
left_join(BLUP_study_trt,by=c("Study_Trtlabels"))
# Plot Estimated ("Predicted") Study by Treatment Effects vs True Study by Treatment Effects
ggplot(study_trt,aes(x=study_trt ,y=BLUP_study_trt)) + geom_point() + geom_abline(intercept=0,slope=1) +
ggtitle("Predicted vs True Study by Treatment Effects") +
xlab("True effects") +
ylab("BLUP") +
geom_text(x=-3, y=2, label="Notice shrinkage to zero by BLUP")
Quite honestly, one might really compare a meta-analysis on raw data
where heterogeneous residual and other variance components are modeled
across studies but this would best require a Bayesian analysis which is
beyond the scope of this workshop.
A frequentist approach to this often doesn’t converge. The following
frequentist analysis only considers heterogeneous residual variances
if(heterorun) {
library(nlme)
# modeling heterogeneous residual variances across Studies and including random effects of Study and Treatment by Study
## THIS MIGHT BE CONSIDERED THE REFERENCE ANALYSIS AGAINST A META-ANALYSIS ONLY USING SUMMARY STATISTICS IF INDIVIDUAL COW DATA WAS READILY AVAILABLE AND RESIDUAL VARIABILITY WAS TRULY HETEROGENEOUS ACROSS STUDIES
## HOWEVER THIS MIGHT NOT TYPICALLY CONVERGE IF INDIVIDUAL STUDIES ARE "SMALL"
### FURTHERMORE, IF OTHER VARIANCE COMPONENTS ARE TRULY HETEROGENEOUS ACROSS STUDIES, THEN THIS IS REALLY NOT THE RIGHT REFERENCE ANALYSIS
### WOULD NEED A BAYESIAN APPROACH TO MODEL HETEROGENEOUS VARIANCE COMPONENTS ACROSS STUDIES BUT BEYOND SCOPE OF THIS WORKSHOP.
hetero.lme <- lme(FCM ~ Trt , data=overallCRDdata, random = ~ 1|Study/Trt, weights=varIdent(form = ~ 1 | Study),method="REML") # this will not always converge
summary(hetero.lme)
lsmeansraw_Trt_hetero= emmeans(hetero.lme,"Trt")
(overall_meansraw_hetero = summary(lsmeansraw_Trt_hetero))
# doesn't seem to be a big difference between the two (hetero versus homogeneous residual variances)
(effectsize_raw_hetero = pairs(lsmeansraw_Trt_hetero))
effectsize_raw
}
##
## Attaching package: 'nlme'
## The following object is masked from 'package:lme4':
##
## lmList
## The following object is masked from 'package:dplyr':
##
## collapse
## contrast estimate SE df t.ratio p.value
## A - B -1.92 0.475 24 -4.048 0.0005
##
## Degrees-of-freedom method: kenward-roger
In reality the real gold standard would allow all variance components to be heterogeneous across studies or herds but that would require a solid Bayesian analysis (e.g. https://pubmed.ncbi.nlm.nih.gov/27865511/ or https://gsejournal.biomedcentral.com/articles/10.1186/1297-9686-37-1-31 ) which is beyond the scope of this course.
#Meta-analysis using summary statistics
Let’s create the meta-analysis data points (effect size, stderr) for each experiment
First need to create some functions
#need to create some functions first.
# need to create a separate function for model
separate_model = function(df) {
lm(FCM~Trt,data=df)
}
# function to save the mean trt difference for each Study
contrast = function(model) {
lsmeans_Trt= emmeans(model,"Trt")
effectsize = pairs(lsmeans_Trt)
effectsize = as.data.frame(summary(effectsize))
return(effectsize)
}
# function to save the trt means for each Study
lsmeans = function(model) {
lsmeans_Trt= emmeans(model,"Trt")
means = summary(lsmeans_Trt)
return(means)
}
This is what the analysis would look like for one study (Study #1)
Study1_analysis = lm(FCM~Trt,data=filter(overallCRDdata,Study==1))
joint_tests(Study1_analysis)
## model term df1 df2 F.ratio p.value
## Trt 1 118 50.637 <.0001
summary(Study1_analysis)
##
## Call:
## lm(formula = FCM ~ Trt, data = filter(overallCRDdata, Study ==
## 1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.2992 -2.3273 -0.3512 2.1658 7.6608
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.3392 0.4019 95.403 < 2e-16 ***
## TrtB 4.0442 0.5683 7.116 9.31e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.113 on 118 degrees of freedom
## Multiple R-squared: 0.3003, Adjusted R-squared: 0.2943
## F-statistic: 50.64 on 1 and 118 DF, p-value: 9.312e-11
lsmeans(Study1_analysis)
## Trt emmean SE df lower.CL upper.CL
## A 38.3 0.402 118 37.5 39.1
## B 42.4 0.402 118 41.6 43.2
##
## Confidence level used: 0.95
contrast(Study1_analysis)
## contrast estimate SE df t.ratio p.value
## 1 A - B -4.044167 0.5683209 118 -7.115991 9.311615e-11
Create the sample statistics for each trial
Study_specific_contrasts = overallCRDdata %>%
group_by(Study) %>%
nest() %>%
mutate(model = map(data,separate_model)) %>%
mutate(effectsize = map(model,contrast)) %>%
mutate(nrec = as.numeric(map(data,nrow))) %>%
unnest(effectsize) %>%
dplyr::select(-c(data,model)) %>%
mutate(weight = (1/SE)^2) %>% # What i use in the mixed model
mutate(sampvar = SE^2) # What the r package metafor needs
head(Study_specific_contrasts)
## # A tibble: 6 × 10
## # Groups: Study [6]
## Study contrast estimate SE df t.ratio p.value nrec weight sampvar
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 A - B -4.04 0.568 118 -7.12 9.31e-11 120 3.10 0.323
## 2 2 A - B -2.17 0.692 90 -3.14 2.32e- 3 92 2.09 0.479
## 3 3 A - B -3.77 0.715 82 -5.27 1.10e- 6 84 1.95 0.512
## 4 4 A - B 2.32 0.643 102 3.61 4.80e- 4 104 2.42 0.413
## 5 5 A - B -8.21 0.678 86 -12.1 2.75e-20 88 2.17 0.460
## 6 6 A - B 0.631 0.618 106 1.02 3.10e- 1 108 2.62 0.382
# this might not be too helpful to look at if there is a lot of differences in standard errors between studies.
boxplot(estimate~contrast,main="Box plot of study-specific treatment differences",data=Study_specific_contrasts)
# let's just consider the A-B contrast for now
Study_specific_contrastsAB = filter(Study_specific_contrasts,contrast=='A - B')
weighted.mean(Study_specific_contrastsAB$estimate,Study_specific_contrastsAB$weight)
## [1] -1.908415
(stderr = (sum(Study_specific_contrastsAB$weight))^(-1/2))
## [1] 0.1230142
# glmmTMB is used because it allows one to fix the residual variance to 1 (just a "trick" to allow proper weightings by 1/SE^2 which already reflect residual var!)
library(glmmTMB)
## Warning: package 'glmmTMB' was built under R version 4.2.2
# overall mean model
FM_1meta <- glmmTMB(estimate ~ 1 ,
weights = weight,
family = gaussian,
REML=TRUE,
data = Study_specific_contrastsAB,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(FM_1meta)
## Family: gaussian ( identity )
## Formula: estimate ~ 1
## Data: Study_specific_contrastsAB
## Weights: weight
##
## AIC BIC logLik deviance df.resid
## 477.0 478.3 -237.5 475.0 25
##
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.908 0.123 -15.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(metafor)
## Warning: package 'metafor' was built under R version 4.2.3
## Loading required package: metadat
## Warning: package 'metadat' was built under R version 4.2.3
## Loading required package: numDeriv
##
## Loading the 'metafor' package (version 4.2-0). For an
## introduction to the package please type: help(metafor)
(res_EE = rma.uni(yi=estimate,vi=sampvar,data=Study_specific_contrastsAB,method="EE")) # common effects
##
## Equal-Effects Model (k = 25)
##
## I^2 (total heterogeneity / total variability): 93.17%
## H^2 (total variability / sampling variability): 14.63
##
## Test for Heterogeneity:
## Q(df = 24) = 351.2288, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## -1.9084 0.1230 -15.5138 <.0001 -2.1495 -1.6673 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest(res_EE)
#Mixed Effects Models
Let’s look at properly modeling heterogeneity between studies.
library(glmmTMB)
# random effects model
RM_1meta <- glmmTMB(estimate ~ 1 + (1|Study),
weights = weight,
family = gaussian,
REML=TRUE,
data = Study_specific_contrastsAB,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(RM_1meta)
## Family: gaussian ( identity )
## Formula: estimate ~ 1 + (1 | Study)
## Data: Study_specific_contrastsAB
## Weights: weight
##
## AIC BIC logLik deviance df.resid
## 216.1 218.5 -106.1 212.1 24
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Study (Intercept) 5.25 2.291
## Residual 1.00 1.000
## Number of obs: 25, groups: Study, 25
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.9266 0.4752 -4.054 5.03e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vc = VarCorr(RM_1meta) # variance components
print(vc,comp=("Variance"))
##
## Conditional model:
## Groups Name Variance
## Study (Intercept) 5.2504
## Residual 1.0000
(res_REML = rma.uni(yi=estimate,vi=sampvar,data=Study_specific_contrastsAB,method="REML")) # default
##
## Random-Effects Model (k = 25; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 5.2504 (SE = 1.6295)
## tau (square root of estimated tau^2 value): 2.2914
## I^2 (total heterogeneity / total variability): 93.27%
## H^2 (total variability / sampling variability): 14.85
##
## Test for Heterogeneity:
## Q(df = 24) = 351.2288, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## -1.9266 0.4752 -4.0542 <.0001 -2.8579 -0.9952 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest(res_REML)
Compare the width of this interval to the one provided by the common
effects model
How to choose between the mixed effects and common effects model?? Two ways to do this.
# mixed model
anova(res_EE,res_REML) # likelihood ratio test to test for differences in heterogeneity.
##
## df AIC BIC AICc logLik LRT pval QE tau^2
## Full 2 113.6630 116.0191 114.2345 -54.8315 351.2288 5.2504
## Reduced 1 374.5877 375.7657 374.7695 -186.2938 262.9246 <.0001 351.2288 0.0000
## R^2
## Full
## Reduced 0.0000%
# metafor has it built in too
anova(FM_1meta,RM_1meta) # this works too.
## Data: Study_specific_contrastsAB
## Models:
## FM_1meta: estimate ~ 1, zi=~0, disp=~1
## RM_1meta: estimate ~ 1 + (1 | Study), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## FM_1meta 1 477.03 478.25 -237.52 475.03
## RM_1meta 2 216.11 218.55 -106.06 212.11 262.92 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
where confidence intervals are useful for assessing reliability of the meta-estimate for the corresponding parameter, the prediction interval indicates what is the plausible range of responses in future experiments.
This may be more pertinent for an “extension”-based message in terms of relaying to farmers,consultants, etc. what uncertainty they may anticipate with respect to their own operations.
# confirming the metafor CI on the effect size using the mixed model analysis.
Studyvar = c(((VarCorr(RM_1meta))["cond"]$cond$Study))
overallest = coef(summary(RM_1meta))$cond
cat("Confidence Interval",sep="\n")
## Confidence Interval
(LCL = overallest[1,"Estimate"]- 1.96*overallest[1,"Std. Error"])
## [1] -2.857952
(UCL = overallest[1,"Estimate"]+ 1.96*overallest[1,"Std. Error"])
## [1] -0.9951734
# Prediction intervals
# useful for determining uncertainty for a future experiment.
cat("Prediction Interval",sep="\n")
## Prediction Interval
(LPL = overallest[1,"Estimate"]- 1.96*sqrt(overallest[1,"Std. Error"]^2 + Studyvar))
## [1] -6.513221
(UPL = overallest[1,"Estimate"]+ 1.96*sqrt(overallest[1,"Std. Error"]^2 + Studyvar))
## [1] 2.660096
Let’s consider study-specific means rather than study-specific mean differences as the response variable.
# Suppose you have study specific means instead
# often referred to as "arm-based" (rather than contrast-based) analysis.
# We'll also save the Study_specific means in a different file.
# Their differences should correspond to the study-specific contrasts in the above file.
Study_specific_means = overallCRDdata %>%
group_by(Study) %>%
nest() %>%
mutate(model = map(data,separate_model)) %>%
mutate(lsmeans = map(model,lsmeans)) %>%
mutate(nrec = as.numeric(map(data,nrow))) %>%
unnest(lsmeans) %>%
dplyr::select(-c(data,model,lower.CL,upper.CL)) %>%
mutate(weights = (1/SE^2),sampvar = SE^2)
head(Study_specific_means)
## # A tibble: 6 × 8
## # Groups: Study [3]
## Study Trt emmean SE df nrec weights sampvar
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 A 38.3 0.402 118 120 6.19 0.161
## 2 1 B 42.4 0.402 118 120 6.19 0.161
## 3 2 A 35.2 0.489 90 92 4.18 0.239
## 4 2 B 37.4 0.489 90 92 4.18 0.239
## 5 3 A 33.7 0.506 82 84 3.91 0.256
## 6 3 B 37.5 0.506 82 84 3.91 0.256
boxplot(emmean~Trt,data=Study_specific_means,main="Study-specific treatment means")
# this is what many animal scientists seem to be doing.
RM_0arm <- glmmTMB(emmean ~ Trt + (1|Study),
weights = (1/SE^2),
family = gaussian,
REML=TRUE,
data = Study_specific_means,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(RM_0arm)
## Family: gaussian ( identity )
## Formula: emmean ~ Trt + (1 | Study)
## Data: Study_specific_means
## Weights: (1/SE^2)
##
## AIC BIC logLik deviance df.resid
## 977.7 983.4 -485.8 971.7 49
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Study (Intercept) 7.459 2.731
## Residual 1.000 1.000
## Number of obs: 50, groups: Study, 25
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 38.8148 0.5533 70.16 <2e-16 ***
## TrtB 1.9084 0.1230 15.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_R0_Trt= emmeans(RM_0arm,"Trt")
summary(lsmeans_R0_Trt)
## Trt emmean SE df lower.CL upper.CL
## A 38.8 0.553 49 37.7 39.9
## B 40.7 0.553 49 39.6 41.8
##
## Confidence level used: 0.95
(effectsize_R0_Trt = pairs(lsmeans_R0_Trt))
## contrast estimate SE df t.ratio p.value
## A - B -1.91 0.123 49 -15.514 <.0001
# this standard error is typically underreported...compare it to what we observed from analyzing the raw data!!
cat("Compare this to inferences from analyzing the raw data",sep="\n")
## Compare this to inferences from analyzing the raw data
effectsize_raw
## contrast estimate SE df t.ratio p.value
## A - B -1.92 0.475 24 -4.048 0.0005
##
## Degrees-of-freedom method: kenward-roger
One needs to model random inconsistency as well!!!!
# you need to model random inconsistency as well!
# (this is model R1 in Madden et al. (2016))
RM_1arm <- glmmTMB(emmean ~ Trt + (1|Study/Trt),
weights = (1/SE^2),
family = gaussian,
REML=TRUE,
data = Study_specific_means,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
# (this is also model R1 in Madden et al. (2016))
summary(RM_1arm)
## Family: gaussian ( identity )
## Formula: emmean ~ Trt + (1 | Study/Trt)
## Data: Study_specific_means
## Weights: (1/SE^2)
##
## AIC BIC logLik deviance df.resid
## 716.8 724.4 -354.4 708.8 48
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Trt:Study (Intercept) 2.625 1.620
## Study (Intercept) 6.146 2.479
## Residual 1.000 1.000
## Number of obs: 50, groups: Trt:Study, 50; Study, 25
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 38.8058 0.5990 64.79 < 2e-16 ***
## TrtB 1.9266 0.4752 4.05 5.03e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_R1_Trt= emmeans(RM_1arm,"Trt")
summary(lsmeans_R1_Trt)
## Trt emmean SE df lower.CL upper.CL
## A 38.8 0.599 48 37.6 40.0
## B 40.7 0.599 48 39.5 41.9
##
## Confidence level used: 0.95
(effectsize_R1_Trt = pairs(lsmeans_R1_Trt))
## contrast estimate SE df t.ratio p.value
## A - B -1.93 0.475 48 -4.054 0.0002
cat("Compare this to inferences from analyzing the raw data",sep="\n")
## Compare this to inferences from analyzing the raw data
effectsize_raw
## contrast estimate SE df t.ratio p.value
## A - B -1.92 0.475 24 -4.048 0.0005
##
## Degrees-of-freedom method: kenward-roger
Model R3 in Madden et al. (2016)
# The following is an alternative based on separate variances for each treatment
# Model R3 in Madden et al. 2016...specifies study variances to be different for each treatment (UN= unstructured)
RM_3arm <- glmmTMB(emmean ~ Trt + (Trt-1|Study),
weights = (1/SE^2),
family = gaussian,
REML=TRUE,
data = Study_specific_means,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(RM_3arm)
## Family: gaussian ( identity )
## Formula: emmean ~ Trt + (Trt - 1 | Study)
## Data: Study_specific_means
## Weights: (1/SE^2)
##
## AIC BIC logLik deviance df.resid
## 716.7 726.3 -353.4 706.7 47
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev. Corr
## Study TrtA 10.643 3.262
## TrtB 6.903 2.627 0.72
## Residual 1.000 1.000
## Number of obs: 50, groups: Study, 25
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 38.8052 0.6585 58.93 <2e-16 ***
## TrtB 1.9275 0.4753 4.06 5e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_R3_Trt= emmeans(RM_3arm,"Trt")
summary(lsmeans_R3_Trt)
## Trt emmean SE df lower.CL upper.CL
## A 38.8 0.659 47 37.5 40.1
## B 40.7 0.533 47 39.7 41.8
##
## Confidence level used: 0.95
(effectsize_R3_Trt = pairs(lsmeans_R3_Trt))
## contrast estimate SE df t.ratio p.value
## A - B -1.93 0.475 47 -4.056 0.0002
What too many people are doing right now! Estimating the residual variance and just treating study as a random effect
# The following is an alternative based on separate variances for each treatment
# Model R3 in Madden et al. 2016...specifies study variances to be different for each treatment (UN= unstructured)
RM_wrong<- glmmTMB(emmean ~ Trt + (1|Study),
weights = (1/SE^2),
family = gaussian,
REML=TRUE,
data = Study_specific_means
)
summary(RM_wrong)
## Family: gaussian ( identity )
## Formula: emmean ~ Trt + (1 | Study)
## Data: Study_specific_means
## Weights: (1/SE^2)
##
## AIC BIC logLik deviance df.resid
## 959.2 966.9 -475.6 951.2 48
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Study (Intercept) 7.414 2.723
## Residual 1.474 1.214
## Number of obs: 50, groups: Study, 25
##
## Dispersion estimate for gaussian family (sigma^2): 1.47
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 38.8152 0.5549 69.94 <2e-16 ***
## TrtB 1.9084 0.1493 12.78 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(RM_wrong,"Trt")
## Trt emmean SE df lower.CL upper.CL
## A 38.8 0.555 48 37.7 39.9
## B 40.7 0.555 48 39.6 41.8
##
## Confidence level used: 0.95
summary(emmeans(RM_wrong,"Trt"))
## Trt emmean SE df lower.CL upper.CL
## A 38.8 0.555 48 37.7 39.9
## B 40.7 0.555 48 39.6 41.8
##
## Confidence level used: 0.95
(effectsize_R3_Trt = pairs(emmeans(RM_wrong,"Trt")))
## contrast estimate SE df t.ratio p.value
## A - B -1.91 0.149 48 -12.780 <.0001
cat("Compare this to inferences from analyzing the raw data",sep="\n")
## Compare this to inferences from analyzing the raw data
effectsize_raw
## contrast estimate SE df t.ratio p.value
## A - B -1.92 0.475 24 -4.048 0.0005
##
## Degrees-of-freedom method: kenward-roger
Model M0: accounts for study heterogeneity but fails to model random inconsistency in treatment effects. Model M1: equal study variance for each treatment and equal covariance between any pair of treatments. Model M3: unequal study variances for each treatment and unequal covariances between any pair of treatments
#Need to use the multivariate option of metafor!!
library(metafor)
# incomplete accounting for heterogeneity -> does not model random inconsistency!
(res.mv0a <- rma.mv(emmean, sampvar, mods = ~ Trt , random = ~ 1|Study, data=Study_specific_means))
##
## Multivariate Meta-Analysis Model (k = 50; method: REML)
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 7.4590 2.7311 25 no Study
##
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
##
## Test of Moderators (coefficient 2):
## QM(df = 1) = 240.6771, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt 38.8148 0.5533 70.1557 <.0001 37.7304 39.8992 ***
## TrtB 1.9084 0.1230 15.5138 <.0001 1.6673 2.1495 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on contrasts
(res.mv0b <- rma.mv(emmean, sampvar, mods = ~ Trt -1 , random = ~ 1|Study, data=Study_specific_means))
##
## Multivariate Meta-Analysis Model (k = 50; method: REML)
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 7.4590 2.7311 25 no Study
##
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
##
## Test of Moderators (coefficients 1:2):
## QM(df = 2) = 5472.1248, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## TrtA 38.8148 0.5533 70.1557 <.0001 37.7304 39.8992 ***
## TrtB 40.7232 0.5533 73.6051 <.0001 39.6389 41.8076 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on means
# this is equivalent to modeling Study and Study*Trt as independent random effects (as in RM_means1)
#Model R1 in Madden et al. (2016) which is equivalent to compound symmetry
(res.mv1a <- rma.mv(emmean, sampvar, mods = ~ Trt , random = ~ Trt | Study, struct="CS", data=Study_specific_means))
##
## Multivariate Meta-Analysis Model (k = 50; method: REML)
##
## Variance Components:
##
## outer factor: Study (nlvls = 25)
## inner factor: Trt (nlvls = 2)
##
## estim sqrt fixed
## tau^2 8.7716 2.9617 no
## rho 0.7007 no
##
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
##
## Test of Moderators (coefficient 2):
## QM(df = 1) = 16.4368, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt 38.8058 0.5990 64.7866 <.0001 37.6318 39.9797 ***
## TrtB 1.9266 0.4752 4.0542 <.0001 0.9952 2.8579 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on contrasts
(res.mv1b <- rma.mv(emmean, sampvar, mods = ~ Trt -1 , random = ~ Trt | Study, struct="CS", data=Study_specific_means))
##
## Multivariate Meta-Analysis Model (k = 50; method: REML)
##
## Variance Components:
##
## outer factor: Study (nlvls = 25)
## inner factor: Trt (nlvls = 2)
##
## estim sqrt fixed
## tau^2 8.7716 2.9617 no
## rho 0.7007 no
##
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
##
## Test of Moderators (coefficients 1:2):
## QM(df = 2) = 5247.8844, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## TrtA 38.8058 0.5990 64.7866 <.0001 37.6318 39.9797 ***
## TrtB 40.7323 0.5990 68.0031 <.0001 39.5583 41.9063 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on means
#Model R2 in Madden et al. (2016) # Heterogeneous compound symmetry
(res.mv2a <- rma.mv(emmean, sampvar, mods = ~ Trt , random = ~ Trt | Study, struct="HCS", data=Study_specific_means))
##
## Multivariate Meta-Analysis Model (k = 50; method: REML)
##
## Variance Components:
##
## outer factor: Study (nlvls = 25)
## inner factor: Trt (nlvls = 2)
##
## estim sqrt k.lvl fixed level
## tau^2.1 10.6431 3.2624 25 no A
## tau^2.2 6.9028 2.6273 25 no B
## rho 0.7171 no
##
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
##
## Test of Moderators (coefficient 2):
## QM(df = 1) = 16.4480, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt 38.8052 0.6585 58.9288 <.0001 37.5146 40.0959 ***
## TrtB 1.9275 0.4753 4.0556 <.0001 0.9960 2.8591 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on contrasts
(res.mv2b <- rma.mv(emmean, sampvar, mods = ~ Trt -1 , random = ~ Trt | Study, struct="HCS", data=Study_specific_means))
##
## Multivariate Meta-Analysis Model (k = 50; method: REML)
##
## Variance Components:
##
## outer factor: Study (nlvls = 25)
## inner factor: Trt (nlvls = 2)
##
## estim sqrt k.lvl fixed level
## tau^2.1 10.6431 3.2624 25 no A
## tau^2.2 6.9028 2.6273 25 no B
## rho 0.7171 no
##
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
##
## Test of Moderators (coefficients 1:2):
## QM(df = 2) = 5898.5488, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## TrtA 38.8052 0.6585 58.9288 <.0001 37.5146 40.0959 ***
## TrtB 40.7328 0.5329 76.4313 <.0001 39.6882 41.7773 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on means
# Model R3 in Madden et al. (2016) # Unstructured
(res.mv3a <- rma.mv(emmean, sampvar, mods = ~ Trt , random = ~ Trt | Study, struct="UN", data=Study_specific_means))
##
## Multivariate Meta-Analysis Model (k = 50; method: REML)
##
## Variance Components:
##
## outer factor: Study (nlvls = 25)
## inner factor: Trt (nlvls = 2)
##
## estim sqrt k.lvl fixed level
## tau^2.1 10.6433 3.2624 25 no A
## tau^2.2 6.9029 2.6273 25 no B
##
## rho.A rho.B A B
## A 1 - 25
## B 0.7171 1 no -
##
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
##
## Test of Moderators (coefficient 2):
## QM(df = 1) = 16.4478, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## intrcpt 38.8052 0.6585 58.9283 <.0001 37.5146 40.0959 ***
## TrtB 1.9275 0.4753 4.0556 <.0001 0.9960 2.8591 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on contrasts
(res.mv4b <- rma.mv(emmean, sampvar, mods = ~ Trt -1 , random = ~ Trt | Study, struct="UN", data=Study_specific_means))
##
## Multivariate Meta-Analysis Model (k = 50; method: REML)
##
## Variance Components:
##
## outer factor: Study (nlvls = 25)
## inner factor: Trt (nlvls = 2)
##
## estim sqrt k.lvl fixed level
## tau^2.1 10.6433 3.2624 25 no A
## tau^2.2 6.9029 2.6273 25 no B
##
## rho.A rho.B A B
## A 1 - 25
## B 0.7171 1 no -
##
## Test for Residual Heterogeneity:
## QE(df = 48) = 2279.6105, p-val < .0001
##
## Test of Moderators (coefficients 1:2):
## QM(df = 2) = 5898.5114, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## TrtA 38.8052 0.6585 58.9283 <.0001 37.5146 40.0959 ***
## TrtB 40.7328 0.5329 76.4311 <.0001 39.6882 41.7773 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# for inferences on means
anova(RM_0arm,RM_1arm,RM_3arm) # using mixed model
## Data: Study_specific_means
## Models:
## RM_0arm: emmean ~ Trt + (1 | Study), zi=~0, disp=~1
## RM_1arm: emmean ~ Trt + (1 | Study/Trt), zi=~0, disp=~1
## RM_3arm: emmean ~ Trt + (Trt - 1 | Study), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## RM_0arm 3 977.69 983.42 -485.84 971.69
## RM_1arm 4 716.76 724.41 -354.38 708.76 262.9246 1 <2e-16 ***
## RM_3arm 5 716.71 726.27 -353.35 706.71 2.0556 1 0.1516
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(res.mv1a,res.mv3a)
##
## df AIC BIC AICc logLik LRT pval QE
## Full 5 234.2533 243.6093 235.6818 -112.1266 2279.6105
## Reduced 4 234.3089 241.7937 235.2391 -113.1545 2.0556 0.1516 2279.6105
For CRDs, SEM = SED / sqrt(2)
Study_specific_SEM = Study_specific_means %>%
filter(Trt =='A' | Trt=='B')%>%
group_by(Study) %>%
summarize(SEM=mean(SE))
Study_specific_SED = Study_specific_contrastsAB %>%
group_by(Study) %>%
summarize(SED=mean(SE))
SEM_SED_compare = Study_specific_SEM %>%
left_join(Study_specific_SED,by="Study") %>%
mutate(SEM_SED_ratio = SEM/SED)
head(SEM_SED_compare)
## # A tibble: 6 × 4
## Study SEM SED SEM_SED_ratio
## <fct> <dbl> <dbl> <dbl>
## 1 1 0.402 0.568 0.707
## 2 2 0.489 0.692 0.707
## 3 3 0.506 0.715 0.707
## 4 4 0.455 0.643 0.707
## 5 5 0.479 0.678 0.707
## 6 6 0.437 0.618 0.707