This document is used to highlight considerations for meta-analysis of Latin square/crossover designs

rm(list=ls())
library(tidyverse,ggplot2)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

set.seed(100)  # set seed for reproducibility.
nStudy = 50  # Number of studies
ntrt = 3    # Number of treatments 
ncows =sample(seq(ntrt*2,ntrt*10,ntrt),nStudy,replace=TRUE)  # simulated different number of replicated squares per Study

Simulation

Simulate a \(n_{trt}\) = 3 treatment Latin square design data from 50 different studies with sample sizes ranging from 6 to 30 cows

##### THE FOLLOWING JUST SIMULATES SOME DATA BASED ON THE FOLLOWING SPECIFICATIONS   #####
##########################################################################################################
######################## BEGINNING OF SIMULATION #########################################################
##########################################################################################################


sigmae = 2  # residual (within cow) variance
sigmacow = 8  # between-cow variance
sigmastudy = 5  # between-study variance
sigmastudytrt = 3  # study by trt interaction variance

sigmaperiod = 2 # variance between periods although typically considered as fixed.

overallmean = 30
trteffects = c(-1,0,+1)  # effects relative to overall mean
trtlabels = toupper(letters[1:ntrt])
if (length(trteffects) != ntrt) stop ("need to change the number of treatment effects to match ntrt")
periodeffects = rnorm(sd=sqrt(sigmaperiod),nStudy*ntrt)
StudyPeriod = split(periodeffects, ceiling(seq_along(periodeffects)/ntrt))

Studyeffects = rnorm(sd=sqrt(sigmastudy),nStudy)  # simulated study effects
studytrtint = vector(mode="list",length= nStudy)  # simulated study x treatment effects
for (Study in 1:nStudy){
  studytrtint[[Study]]=rnorm(sd = sqrt(sigmastudytrt),ntrt)
}


# for a balanced multiple Latin square design, ncows has to be a multiple of number of treatments

setuplatin =vector(mode="list",length=ntrt)
base = 1:ntrt
for(i in 1:ntrt){
  setuplatin[[i]]=(base+ntrt+1+i)%%ntrt+1
}  # setup for a single Latin square

Study_data <- vector(mode="list",length=nStudy)  # we'll first save the data in this list, separate study for each element
Study = 1

for (Study in 1:nStudy) {
  Trt_number = rep(unlist(setuplatin),ncows[Study]/ntrt)
  Effect_Trt = trteffects[Trt_number]
  Period = rep(1:3,ncows[Study])
  Effect_Period = StudyPeriod[[Study]][Period]
  Trt = toupper(letters[Trt_number]) # get trt label assignments for each record
  Cow = rep(1:ncows[Study],each=ntrt)  # and cow labels for each record
  Cow_effects = rnorm(sd=sqrt(sigmacow),ncows[Study])
  Effect_Cow = Cow_effects[Cow]
  Trt_Study_effects =  studytrtint[[Study]][Trt_number] # and the trt*study effects
  residuals = rnorm(sd = sqrt(sigmae),ntrt*ncows[Study]) # and finally the residuals
  FCM = round(overallmean + Effect_Trt + Effect_Period  + Effect_Cow + Studyeffects[Study] + Trt_Study_effects + residuals,2)  # generate the record and round to 2nd decimal
  Study_data[[Study]] = data.frame(Study,Trt,Cow,Period,FCM)  #store it.
}  

overallcrossoverdata = bind_rows(Study_data)  # the entire data set
cols = c("Study","Trt","Cow","Period")
overallcrossoverdata[cols] <- lapply(overallcrossoverdata [cols], factor)
str(overallcrossoverdata)

## 'data.frame':    2898 obs. of  5 variables:
##  $ Study : Factor w/ 50 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Trt   : Factor w/ 3 levels "A","B","C": 1 2 3 2 3 1 3 1 2 1 ...
##  $ Cow   : Factor w/ 30 levels "1","2","3","4",..: 1 1 1 2 2 2 3 3 3 4 ...
##  $ Period: Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3 1 ...
##  $ FCM   : num  25 26.2 28.1 31.6 32.8 ...

write_csv(overallcrossoverdata,"overallcrossoverdata.csv")

# save the true study by treatment effects for later.
study_trt = unlist(studytrtint)
Study_Trtlabels = expand.grid(trtlabels,1:nStudy)
Study_Trtlabels = paste0(Study_Trtlabels$Var1,':',Study_Trtlabels$Var2)
study_trt = data.frame(Study_Trtlabels,study_trt)

##########################################################################################################
######################## END OF SIMULATION ###############################################################
##########################################################################################################

Analysis based on individual cow records across experiments

library(lme4)  # mixed model procedure in R.

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

# NOTE THIS MODEL PERFECTLY MATCHES THE MODEL GENERATION PROCESS
## of course, we really never know the true model!
overallcrossover_analysis = lmer(FCM~Trt+ (1|Period:Study) + (1|Cow:Study) + (1|Study/Trt),data=overallcrossoverdata)  # Fit Trt as Fixed, Study and Study*Trt as random
# Cows and Periods are nested within Study.  If Period represents a physiological stage (i.e. all cows start at roughly the same DIM), then treat as fixed.
# Might also fit Treatment by Period then as well.
summary(overallcrossover_analysis)

## Linear mixed model fit by REML ['lmerMod']
## Formula: FCM ~ Trt + (1 | Period:Study) + (1 | Cow:Study) + (1 | Study/Trt)
##    Data: overallcrossoverdata
## 
## REML criterion at convergence: 13478.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.11679 -0.53659 -0.01668  0.52731  3.05294 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  Cow:Study    (Intercept) 7.872    2.806   
##  Trt:Study    (Intercept) 3.492    1.869   
##  Period:Study (Intercept) 1.957    1.399   
##  Study        (Intercept) 4.162    2.040   
##  Residual                 2.018    1.420   
## Number of obs: 2898, groups:  
## Cow:Study, 966; Trt:Study, 150; Period:Study, 150; Study, 50
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  29.7264     0.4222  70.401
## TrtB          1.2372     0.3804   3.253
## TrtC          1.9924     0.3804   5.238
## 
## Correlation of Fixed Effects:
##      (Intr) TrtB  
## TrtB -0.450       
## TrtC -0.450  0.500

# fairly precise estimates of variance components....if you were able to analyze all of the data jointly.

library(emmeans)
emm_options(pbkrtest.limit = nrow(overallcrossoverdata)) 
lsmeans_Trt= emmeans(overallcrossover_analysis,"Trt")  
# so if this is taking too long, just set the emm_options(pbkrtest.limit = 500) or some reasonably low number 
(overallcrossover_means = summary(lsmeans_Trt))

##  Trt emmean    SE df lower.CL upper.CL
##  A     29.7 0.422 86     28.9     30.6
##  B     31.0 0.422 86     30.1     31.8
##  C     31.7 0.422 86     30.9     32.6
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

(effectsize_IPD = pairs(lsmeans_Trt))  # overall effect sizes

##  contrast estimate   SE df t.ratio p.value
##  A - B      -1.237 0.38 98  -3.253  0.0044
##  A - C      -1.992 0.38 98  -5.238  <.0001
##  B - C      -0.755 0.38 98  -1.985  0.1212
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

The following is just for quality control checks on my simulation program

## THE FOLLOWING IS NOT REALLY IMPORTANT...JUST A QUALITY CONTROL CHECK
##Plot Estimated(Predicted) Study Effects versus True Study Effects
# Estimated ("Predicted") Study Effects
BLUP_Studyeffects = coef(overallcrossover_analysis)$Study[,1]-mean(coef(overallcrossover_analysis)$Study[,1])  # need to subtract the mean to express relative to zero
trt = data.frame(Studyeffects,BLUP_Studyeffects)
# Plot Estimated ("Predicted") Study Effects vs True Study Effects
ggplot(data=trt,aes(x=Studyeffects ,y=BLUP_Studyeffects)) + geom_point() + geom_abline(intercept=0,slope=1) +
  ggtitle("Predicted vs True Study Effects") +
  xlab("True effects") + 
  ylab("BLUP")

## THE FOLLOWING IS NOT REALLY IMPORTANT...JUST A QUALITY CONTROL CHECK
# Let's do the same thing for the study by treatment effects
#Estimated study by treatment effects 
BLUP_study_trt = coef(overallcrossover_analysis)$`Trt:Study`[,1]-mean(coef(overallcrossover_analysis)$`Trt:Study`[,1]) # need to subtract the mean to express relative to zero
BLUP_study_trt = data.frame(rownames(coef(overallcrossover_analysis)$`Trt:Study`),BLUP_study_trt)
colnames(BLUP_study_trt) = c("Study_Trtlabels","BLUP_study_trt")
study_trt = study_trt %>%
  left_join(BLUP_study_trt,by=c("Study_Trtlabels"))
# Plot Estimated ("Predicted") Study by Treatment Effects vs True Study by Treatment Effects
ggplot(study_trt,aes(x=study_trt ,y=BLUP_study_trt)) + geom_point() + geom_abline(intercept=0,slope=1) +
  ggtitle("Predicted vs True Study by Treatment Effects") +
  xlab("True effects") + 
  ylab("BLUP")

Meta-analysis

Write a few functions that will help compute the study-specific statistics.

# Let's conduct a separate mixed model analysis for each Study
# just like what we would anticipate for a meta-analysis
# Need to write some functions in order to do so

# need to create a separate function for model
# You could throw Trt*Period in there..maybe even consider Period as random
separate_model = function(df) {
  lmer(FCM~Trt+Period+(1|Cow),data=df)
}

# function to save the mean trt difference for each Study
contrast = function(model) {
  lsmeans_Trt= emmeans(model,"Trt")
  effectsize = pairs(lsmeans_Trt)
  effectsize = as.data.frame(summary(effectsize))
  return(effectsize)
}

# function to save the trt means for each Study
lsmeans = function(model) {
  lsmeans_Trt= emmeans(model,"Trt")
  means = summary(lsmeans_Trt)
  return(means)
}

#function to retrieve study-specific variance components
varcomps = function(model){
  varcomp = as.numeric(formatVC(VarCorr(model),comp="Variance")[,3])
  VClabel = (formatVC(VarCorr(model),comp="Variance")[,1])
  varcomp = data.frame(VClabel,varcomp) %>%
    pivot_wider(names_from=VClabel,values_from = varcomp)
  return(varcomp)
}

This is what it looks like for one study (Study 1)

Study1_analysis = lmer(FCM~Trt+Period+(1|Cow),data=filter(overallcrossoverdata,Study==1))
joint_tests(Study1_analysis)

##  model term df1 df2 F.ratio p.value
##  Trt          2  44 180.644  <.0001
##  Period       2  44  57.816  <.0001

summary(Study1_analysis)

## Linear mixed model fit by REML ['lmerMod']
## Formula: FCM ~ Trt + Period + (1 | Cow)
##    Data: filter(overallcrossoverdata, Study == 1)
## 
## REML criterion at convergence: 312.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.57964 -0.45057  0.05648  0.47164  1.50368 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cow      (Intercept) 12.048   3.471   
##  Residual              1.721   1.312   
## Number of obs: 72, groups:  Cow, 24
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  27.7299     0.7884   35.17
## TrtB          3.8296     0.3787   10.11
## TrtC          7.1933     0.3787   18.99
## Period2      -2.1171     0.3787   -5.59
## Period3      -4.0713     0.3787  -10.75
## 
## Correlation of Fixed Effects:
##         (Intr) TrtB   TrtC   Perid2
## TrtB    -0.240                     
## TrtC    -0.240  0.500              
## Period2 -0.240  0.000  0.000       
## Period3 -0.240  0.000  0.000  0.500

varcomps(Study1_analysis)

## # A tibble: 1 × 2
##     Cow Residual
##   <dbl>    <dbl>
## 1  12.0     1.72

lsmeans(Study1_analysis)

##  Trt emmean    SE   df lower.CL upper.CL
##  A     25.7 0.757 27.2     24.1     27.2
##  B     29.5 0.757 27.2     27.9     31.1
##  C     32.9 0.757 27.2     31.3     34.4
## 
## Results are averaged over the levels of: Period 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

contrast(Study1_analysis)

##   contrast  estimate        SE df    t.ratio      p.value
## 1    A - B -3.829583 0.3787095 44 -10.112191 2.265188e-12
## 2    A - C -7.193333 0.3787095 44 -18.994328 8.245626e-13
## 3    B - C -3.363750 0.3787095 44  -8.882137 6.863321e-11

# Let's save the contrasts (i.e. pairwise comparisons) from each study
Study_specific_contrasts = overallcrossoverdata %>%
  group_by(Study) %>%
  nest() %>%
  mutate(model = map(data,separate_model)) %>%
  mutate(effectsize = map(model,contrast)) %>%
  mutate(nrec = as.numeric(map(data,nrow))) %>% 
  unnest(effectsize)    %>%
  dplyr::select(-c(data,model)) %>%
  mutate(weight = (1/SE)^2) %>%  # What i use in the mixed model
  mutate(sampvar = SE^2)         # What the r package metafor needs

head(Study_specific_contrasts)

## # A tibble: 6 × 10
## # Groups:   Study [2]
##   Study contrast estimate    SE    df t.ratio  p.value  nrec weight sampvar
##   <fct> <chr>       <dbl> <dbl> <dbl>   <dbl>    <dbl> <dbl>  <dbl>   <dbl>
## 1 1     A - B       -3.83 0.379  44.0  -10.1  2.27e-12    72   6.97   0.143
## 2 1     A - C       -7.19 0.379  44.0  -19.0  8.25e-13    72   6.97   0.143
## 3 1     B - C       -3.36 0.379  44.0   -8.88 6.86e-11    72   6.97   0.143
## 4 2     A - B       -1.20 0.533  38.0   -2.25 7.59e- 2    63   3.52   0.284
## 5 2     A - C       -2.54 0.533  38.0   -4.77 7.92e- 5    63   3.52   0.284
## 6 2     B - C       -1.34 0.533  38.0   -2.52 4.14e- 2    63   3.52   0.284

Let’s focus on the A vs B contrast for now.

We’ll consider the common effects

(AB_contrast = filter(Study_specific_contrasts,contrast=="A - B"))

## # A tibble: 50 × 10
## # Groups:   Study [50]
##    Study contrast estimate    SE    df t.ratio  p.value  nrec weight sampvar
##    <fct> <chr>       <dbl> <dbl> <dbl>   <dbl>    <dbl> <dbl>  <dbl>   <dbl>
##  1 1     A - B      -3.83  0.379  44.0 -10.1   2.27e-12    72   6.97   0.143
##  2 2     A - B      -1.20  0.533  38.0  -2.25  7.59e- 2    63   3.52   0.284
##  3 3     A - B       3.68  0.497  20     7.41  1.07e- 6    36   4.05   0.247
##  4 4     A - B       0.181 0.361  56.0   0.501 8.71e- 1    90   7.66   0.131
##  5 5     A - B      -2.33  0.405  44.0  -5.77  2.19e- 6    72   6.10   0.164
##  6 6     A - B       5.21  0.334  38.0  15.6   0           63   8.95   0.112
##  7 7     A - B       0.928 0.392  38.0   2.36  5.91e- 2    63   6.49   0.154
##  8 8     A - B      -5.87  0.451  26.0 -13.0   2.01e-12    45   4.92   0.203
##  9 9     A - B      -0.569 0.391  44.0  -1.46  3.22e- 1    72   6.54   0.153
## 10 10    A - B      -2.04  0.342  38.0  -5.96  1.88e- 6    63   8.53   0.117
## # … with 40 more rows

# The following analysis is rather common (and typically wrong!!!)
# COMMON EFFECTS MODEL 

#CE1
weighted.mean(AB_contrast$estimate,AB_contrast$weight)

## [1] -1.218097

(stderr = (sum(AB_contrast$weight))^(-1/2))

## [1] 0.06154195

cat ("compare with analyses on individual data",sep="\n")

## compare with analyses on individual data

effectsize_IPD

##  contrast estimate   SE df t.ratio p.value
##  A - B      -1.237 0.38 98  -3.253  0.0044
##  A - C      -1.992 0.38 98  -5.238  <.0001
##  B - C      -0.755 0.38 98  -1.985  0.1212
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

# could go through the same exercise for the other comparisons as well.
#(AC_contrast = filter(Study_specific_contrasts,contrast=="A - C"))
#(BC_contrast = filter(Study_specific_contrasts,contrast=="B - C"))

#CE2
# # COMMON EFFECTS MODEL can also be conducted using a linear model
# glmmTMB is used because it allows one to fix the residual variance to 1 (just a "trick" to allow proper weightings by 1/SE^2 which already reflect residual var!)
library(glmmTMB)

## Warning: package 'glmmTMB' was built under R version 4.2.2

# overall mean model
FMAvsB_1 <- glmmTMB(estimate ~ 1 ,
                weights = weight,
                family = gaussian,
                REML=TRUE,
                data = AB_contrast,
                start = list(betad = log(1)),  # fix residual variance = 1
                map = list(betad = factor(NA))
)
summary(FMAvsB_1)

##  Family: gaussian  ( identity )
## Formula:          estimate ~ 1
## Data: AB_contrast
## Weights: weight
## 
##      AIC      BIC   logLik deviance df.resid 
##   2292.4   2294.4  -1145.2   2290.4       50 
## 
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.21810    0.06154  -19.79   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# again gives the same wrong answer as above
cat ("compare with analyses on individual data",sep="\n")

## compare with analyses on individual data

effectsize_IPD

##  contrast estimate   SE df t.ratio p.value
##  A - B      -1.237 0.38 98  -3.253  0.0044
##  A - C      -1.992 0.38 98  -5.238  <.0001
##  B - C      -0.755 0.38 98  -1.985  0.1212
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

#CE3
# Finally, onecan use metafor to get the same result:
library(metafor)

## Warning: package 'metafor' was built under R version 4.2.3

## Loading required package: metadat

## Warning: package 'metadat' was built under R version 4.2.3

## Loading required package: numDeriv

## 
## Loading the 'metafor' package (version 4.2-0). For an
## introduction to the package please type: help(metafor)

(res_EE_AvsB = rma.uni(yi=estimate,vi=sampvar,data=AB_contrast,method="EE"))  # fixed study effects

## 
## Equal-Effects Model (k = 50)
## 
## I^2 (total heterogeneity / total variability):   97.28%
## H^2 (total variability / sampling variability):  36.76
## 
## Test for Heterogeneity:
## Q(df = 49) = 1801.4421, p-val < .0001
## 
## Model Results:
## 
## estimate      se      zval    pval    ci.lb    ci.ub      
##  -1.2181  0.0615  -19.7929  <.0001  -1.3387  -1.0975  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest(res_EE_AvsB)

# an analysis that properly reflects heterogeneity between studies.
library(glmmTMB)
# random effects model
RM_1AvsB_1 <- glmmTMB(estimate ~ 1 + (1|Study),  # Study really involves both Study and Study*Treatment 
                weights = weight,
                family = gaussian,
                REML=TRUE,
                data = AB_contrast,
                start = list(betad = log(1)),  # fix residual variance = 1
                map = list(betad = factor(NA))
)
summary(RM_1AvsB_1)

##  Family: gaussian  ( identity )
## Formula:          estimate ~ 1 + (1 | Study)
## Data: AB_contrast
## Weights: weight
## 
##      AIC      BIC   logLik deviance df.resid 
##    709.8    713.6   -352.9    705.8       49 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  Study    (Intercept) 6.39     2.528   
##  Residual             1.00     1.000   
## Number of obs: 50, groups:  Study, 50
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.2373     0.3643  -3.396 0.000684 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat ("compare with analyses on individual data",sep="\n")

## compare with analyses on individual data

effectsize_IPD

##  contrast estimate   SE df t.ratio p.value
##  A - B      -1.237 0.38 98  -3.253  0.0044
##  A - C      -1.992 0.38 98  -5.238  <.0001
##  B - C      -0.755 0.38 98  -1.985  0.1212
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

# compare this to the analysis using the raw data...point estimate is ok; standard error is a little off.

#using metafor
(res_REMLAvsB_1 = rma.uni(yi=estimate,vi=sampvar,data=AB_contrast,method="REML"))  # fixed study effects

## 
## Random-Effects Model (k = 50; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 6.3902 (SE = 1.3406)
## tau (square root of estimated tau^2 value):      2.5279
## I^2 (total heterogeneity / total variability):   97.11%
## H^2 (total variability / sampling variability):  34.58
## 
## Test for Heterogeneity:
## Q(df = 49) = 1801.4421, p-val < .0001
## 
## Model Results:
## 
## estimate      se     zval    pval    ci.lb    ci.ub      
##  -1.2373  0.3643  -3.3961  0.0007  -1.9514  -0.5232  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# reminder as to what the corresponding estimate/se was from analyzing the raw data
cat ("compare with analyses on individual data",sep="\n")

## compare with analyses on individual data

effectsize_IPD

##  contrast estimate   SE df t.ratio p.value
##  A - B      -1.237 0.38 98  -3.253  0.0044
##  A - C      -1.992 0.38 98  -5.238  <.0001
##  B - C      -0.755 0.38 98  -1.985  0.1212
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

Multivariate analysis using all contrast information

RM_mvcontrast <- glmmTMB(estimate ~ contrast -1 + (1|Study/contrast),   # models the effect of study and study*contrast
                 weights = weight,
                 family = gaussian,
                 REML=TRUE,
                 data = Study_specific_contrasts,
                 start = list(betad = log(1)),  # fix residual variance = 1
                 map = list(betad = factor(NA))
)
summary(RM_mvcontrast)

##  Family: gaussian  ( identity )
## Formula:          estimate ~ contrast - 1 + (1 | Study/contrast)
## Data: Study_specific_contrasts
## Weights: weight
## 
##      AIC      BIC   logLik deviance df.resid 
##   2137.2   2152.3  -1063.6   2127.2      148 
## 
## Random effects:
## 
## Conditional model:
##  Groups         Name        Variance Std.Dev.
##  contrast:Study (Intercept) 5.950    2.439   
##  Study          (Intercept) 1.045    1.022   
##  Residual                   1.000    1.000   
## Number of obs: 150, groups:  contrast:Study, 150; Study, 50
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##               Estimate Std. Error z value Pr(>|z|)    
## contrastA - B  -1.2384     0.3806  -3.254  0.00114 ** 
## contrastA - C  -1.9896     0.3806  -5.228 1.72e-07 ***
## contrastB - C  -0.7522     0.3806  -1.976  0.04811 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat ("compare with analyses on individual data .  Almost matches perfectly!!",sep="\n")

## compare with analyses on individual data .  Almost matches perfectly!!

effectsize_IPD

##  contrast estimate   SE df t.ratio p.value
##  A - B      -1.237 0.38 98  -3.253  0.0044
##  A - C      -1.992 0.38 98  -5.238  <.0001
##  B - C      -0.755 0.38 98  -1.985  0.1212
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

foundationcontrast <- glmmTMB(estimate ~ contrast -1 + (1|Study/contrast),   # models the effect of study and study*contrast
                 weights = weight,
                 family = gaussian,
                 REML=TRUE,
                 data = filter(Study_specific_contrasts,contrast != 'A - B'),
                 start = list(betad = log(1)),  # fix residual variance = 1
                 map = list(betad = factor(NA))
)
summary(foundationcontrast)

##  Family: gaussian  ( identity )
## Formula:          estimate ~ contrast - 1 + (1 | Study/contrast)
## Data: filter(Study_specific_contrasts, contrast != "A - B")
## Weights: weight
## 
##      AIC      BIC   logLik deviance df.resid 
##   1413.7   1424.1   -702.8   1405.7       98 
## 
## Random effects:
## 
## Conditional model:
##  Groups         Name        Variance Std.Dev.
##  contrast:Study (Intercept) 3.088    1.757   
##  Study          (Intercept) 4.208    2.051   
##  Residual                   1.000    1.000   
## Number of obs: 100, groups:  contrast:Study, 100; Study, 50
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##               Estimate Std. Error z value Pr(>|z|)    
## contrastA - C  -1.9914     0.3884  -5.128 2.94e-07 ***
## contrastB - C  -0.7531     0.3884  -1.939   0.0525 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lsmeans_foundationcontrast= emmeans(foundationcontrast,"contrast")
(summary(lsmeans_foundationcontrast))

##  contrast emmean    SE df lower.CL upper.CL
##  A - C    -1.991 0.388 98    -2.76  -1.2207
##  B - C    -0.753 0.388 98    -1.52   0.0176
## 
## Confidence level used: 0.95

pairs(lsmeans_foundationcontrast)

##  contrast          estimate    SE df t.ratio p.value
##  (A - C) - (B - C)    -1.24 0.365 98  -3.392  0.0010

cat ("compare with analyses on individual data",sep="\n")

## compare with analyses on individual data

effectsize_IPD

##  contrast estimate   SE df t.ratio p.value
##  A - B      -1.237 0.38 98  -3.253  0.0044
##  A - C      -1.992 0.38 98  -5.238  <.0001
##  B - C      -0.755 0.38 98  -1.985  0.1212
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

Analysis based on Study specific means (Arm based analysis)

(instead of contrasts)

A suboptimal analysis: The use of regular standard errors on means would not be advisable in this case!

#lets save the study specific means

Study_specific_means = overallcrossoverdata %>%
  group_by(Study) %>%
  nest() %>%
  mutate(model = map(data,separate_model)) %>%
  mutate(lsmeans = map(model,lsmeans)) %>%
  mutate(nrec = as.numeric(map(data,nrow))) %>% 
  unnest(lsmeans)    %>%
  dplyr::select(-c(data,model)) %>%
  mutate(weight = (1/SE)^2) %>%     # these are not quite right!!
  mutate(sampvar = SE^2)          # these are not quite right!!
# but let's try them anyways

head(Study_specific_means)

## # A tibble: 6 × 10
## # Groups:   Study [2]
##   Study Trt   emmean    SE    df lower.CL upper.CL  nrec weight sampvar
##   <fct> <fct>  <dbl> <dbl> <dbl>    <dbl>    <dbl> <dbl>  <dbl>   <dbl>
## 1 1     A       25.7 0.757  27.3     24.1     27.2    72   1.74   0.574
## 2 1     B       29.5 0.757  27.3     27.9     31.1    72   1.74   0.574
## 3 1     C       32.9 0.757  27.3     31.3     34.4    72   1.74   0.574
## 4 2     A       29.1 0.911  25.3     27.3     31.0    63   1.21   0.829
## 5 2     B       30.3 0.911  25.3     28.5     32.2    63   1.21   0.829
## 6 2     C       31.7 0.911  25.3     29.8     33.5    63   1.21   0.829

Let’s use the suboptimal weights anyways

RM_mv_subopt <- glmmTMB(emmean ~ Trt   + (1|Study/Trt),  # 
                 weights = weight,
                 family = gaussian,
                 REML=TRUE,
                 data = Study_specific_means,
                 start = list(betad = log(1)),  # fix residual variance = 1
                 map = list(betad = factor(NA))
)
summary(RM_mv_subopt)

##  Family: gaussian  ( identity )
## Formula:          emmean ~ Trt + (1 | Study/Trt)
## Data: Study_specific_means
## Weights: weight
## 
##      AIC      BIC   logLik deviance df.resid 
##   1140.4   1155.4   -565.2   1130.4      148 
## 
## Random effects:
## 
## Conditional model:
##  Groups    Name        Variance Std.Dev.
##  Trt:Study (Intercept) 3.068    1.752   
##  Study     (Intercept) 5.258    2.293   
##  Residual              1.000    1.000   
## Number of obs: 150, groups:  Trt:Study, 150; Study, 50
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  29.7155     0.4211   70.56  < 2e-16 ***
## TrtB          1.2509     0.3798    3.29 0.000989 ***
## TrtC          1.9979     0.3798    5.26 1.43e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lsmeans_RM_mv_subopt= emmeans(RM_mv_subopt,"Trt")
(summary(lsmeans_RM_mv_subopt))

##  Trt emmean    SE  df lower.CL upper.CL
##  A     29.7 0.421 148     28.9     30.5
##  B     31.0 0.421 148     30.1     31.8
##  C     31.7 0.421 148     30.9     32.5
## 
## Confidence level used: 0.95

pairs(lsmeans_RM_mv_subopt)

##  contrast estimate   SE  df t.ratio p.value
##  A - B      -1.251 0.38 148  -3.294  0.0035
##  A - C      -1.998 0.38 148  -5.261  <.0001
##  B - C      -0.747 0.38 148  -1.967  0.1240
## 
## P value adjustment: tukey method for comparing a family of 3 estimates

# not too far off from what we had with raw data analysis (?)
effectsize_IPD

##  contrast estimate   SE df t.ratio p.value
##  A - B      -1.237 0.38 98  -3.253  0.0044
##  A - C      -1.992 0.38 98  -5.238  <.0001
##  B - C      -0.755 0.38 98  -1.985  0.1212
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

Let’s look at study-specific variance components

Study_specific_var = overallcrossoverdata %>%
  group_by(Study) %>%
  nest() %>%
  mutate(model = map(data,separate_model)) %>%
  mutate(varcomp = map(model,varcomps))   %>%
  unnest(varcomp) %>%
  dplyr::select(-c(data,model)) 

boxplot(Study_specific_var[,-1] )

#Let's look at the relationship between SEM, SED, and the variance components

SE_compare =  Study_specific_means %>%
    group_by(Study) %>%
    summarize(SEM = mean(SE))  %>%
    full_join(distinct(Study_specific_contrasts,SE),by="Study") %>%
    group_by(Study,SEM) %>%
    summarize(SED=mean(SE)) %>%
    full_join(Study_specific_var,by="Study") %>%
    mutate(SEM_altered = SED/sqrt(2)) %>%
    mutate(SEM_ratio = SEM/SEM_altered) %>%
    mutate(sd_ratio = sqrt((Cow+Residual)/Residual)) %>%
    mutate(weight_altered = (1/SEM_altered)^2) %>%   
    mutate(sampvar_altered = SEM_altered^2)    %>%
    mutate(weight_old = (1/SEM)^2) %>%  
    mutate(sampvar_old= SEM^2)

## `summarise()` has grouped output by 'Study'. You can override using the
## `.groups` argument.

head(SE_compare)

## # A tibble: 6 × 12
## # Groups:   Study [6]
##   Study   SEM   SED   Cow Residual SEM_altered SEM_ratio sd_ratio weight_altered
##   <fct> <dbl> <dbl> <dbl>    <dbl>       <dbl>     <dbl>    <dbl>          <dbl>
## 1 1     0.757 0.379 12.0      1.72       0.268      2.83     2.83          13.9 
## 2 2     0.911 0.533 14.4      2.99       0.377      2.41     2.41           7.03
## 3 3     0.985 0.497 10.2      1.48       0.351      2.80     2.80           8.10
## 4 4     0.537 0.361  6.69     1.96       0.255      2.10     2.10          15.3 
## 5 5     0.611 0.405  7.01     1.97       0.286      2.14     2.14          12.2 
## 6 6     0.782 0.334 11.7      1.17       0.236      3.31     3.31          17.9 
## # … with 3 more variables: sampvar_altered <dbl>, weight_old <dbl>,
## #   sampvar_old <dbl>

Lesson: ratio of SEM to SED/sqrt(2) is same as sqrt(cow+residual/residual VC)

ggplot(SE_compare, aes(x=weight_old, y=weight_altered)) + 
  geom_point()

SED_basic  = SE_compare %>%
  dplyr::select(c(Study,SEM,SED))
Study_specific_means2= Study_specific_means%>%
  full_join(SED_basic,by="Study")%>%
  mutate(SEM_altered = SED/sqrt(2)) %>%
  mutate(weight_altered = (1/SEM_altered)^2) %>%  
  mutate(sampvar_altered = SEM_altered^2)  

RM_mv2arm <- glmmTMB(emmean ~ Trt   + (1|Study/Trt),   
                 weights = weight_altered,
                 family = gaussian,
                 REML=TRUE,
                 data = Study_specific_means2,
                 start = list(betad = log(1)),  # fix residual variance = 1
                 map = list(betad = factor(NA))
)
summary(RM_mv2arm)

##  Family: gaussian  ( identity )
## Formula:          emmean ~ Trt + (1 | Study/Trt)
## Data: Study_specific_means2
## Weights: weight_altered
## 
##      AIC      BIC   logLik deviance df.resid 
##   3680.4   3695.5  -1835.2   3670.4      148 
## 
## Random effects:
## 
## Conditional model:
##  Groups    Name        Variance Std.Dev.
##  Trt:Study (Intercept) 3.498    1.870   
##  Study     (Intercept) 5.279    2.298   
##  Residual              1.000    1.000   
## Number of obs: 150, groups:  Trt:Study, 150; Study, 50
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  29.7210     0.4219   70.44  < 2e-16 ***
## TrtB          1.2372     0.3806    3.25  0.00115 ** 
## TrtC          1.9892     0.3806    5.23 1.73e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lsmeans_Trt_mv2arm= emmeans(RM_mv2arm,"Trt")
( summary(lsmeans_Trt_mv2arm))

##  Trt emmean    SE  df lower.CL upper.CL
##  A     29.7 0.422 148     28.9     30.6
##  B     31.0 0.422 148     30.1     31.8
##  C     31.7 0.422 148     30.9     32.5
## 
## Confidence level used: 0.95

pairs(lsmeans_Trt_mv2arm)

##  contrast estimate    SE  df t.ratio p.value
##  A - B      -1.237 0.381 148  -3.251  0.0041
##  A - C      -1.989 0.381 148  -5.226  <.0001
##  B - C      -0.752 0.381 148  -1.976  0.1218
## 
## P value adjustment: tukey method for comparing a family of 3 estimates

cat ("compare with analyses on individual data",sep="\n")

## compare with analyses on individual data

effectsize_IPD

##  contrast estimate   SE df t.ratio p.value
##  A - B      -1.237 0.38 98  -3.253  0.0044
##  A - C      -1.992 0.38 98  -5.238  <.0001
##  B - C      -0.755 0.38 98  -1.985  0.1212
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

A network meta-analysis

Use same simulated datasets as above but this time just randomly choose two of three treatments from each study

trtchoose = vector(mode="list",length= nStudy)  # 

for (Study in 1:nStudy) {
  #trtchoose[[Study]] = toupper(letters[sample(1:ntrt,(ntrt-1))])
  trtchoose[[Study]] = sample(1:ntrt,(ntrt-1))
  }
Trt=unlist(trtchoose)
Study = rep(1:nStudy,each=(ntrt-1))
IB_design = data.frame(Study,Trt) %>%
  mutate(Trt = toupper(letters[Trt])) %>%
  mutate_all(as.factor)

xtabs(~Study+Trt,data=IB_design)

##      Trt
## Study A B C
##    1  0 1 1
##    2  0 1 1
##    3  1 0 1
##    4  0 1 1
##    5  1 1 0
##    6  1 0 1
##    7  1 0 1
##    8  1 1 0
##    9  1 0 1
##    10 1 1 0
##    11 0 1 1
##    12 1 0 1
##    13 1 0 1
##    14 0 1 1
##    15 0 1 1
##    16 1 0 1
##    17 0 1 1
##    18 0 1 1
##    19 1 0 1
##    20 1 0 1
##    21 0 1 1
##    22 0 1 1
##    23 1 1 0
##    24 1 1 0
##    25 1 1 0
##    26 0 1 1
##    27 0 1 1
##    28 0 1 1
##    29 1 0 1
##    30 1 1 0
##    31 1 0 1
##    32 0 1 1
##    33 0 1 1
##    34 1 1 0
##    35 0 1 1
##    36 0 1 1
##    37 1 0 1
##    38 1 1 0
##    39 0 1 1
##    40 1 0 1
##    41 1 1 0
##    42 1 1 0
##    43 0 1 1
##    44 1 0 1
##    45 1 0 1
##    46 1 0 1
##    47 1 1 0
##    48 1 1 0
##    49 1 1 0
##    50 0 1 1

IB_Data = IB_design %>%
  left_join(overallcrossoverdata,by=c("Study","Trt"))

analysis of IB data

# NOTE THIS MODEL PERFECTLY MATCHES THE MODEL GENERATION PROCESS
IB_analysis = lmer(FCM~Trt+ (1|Period:Study) + (1|Cow:Study) + (1|Study/Trt),data=IB_Data)  # Fit Trt as Fixed, Study and Study*Trt as random
# Cows and Periods are nested within Study.  If Period represents a physiological stage (i.e. all cows start at roughly the same DIM), then treat as fixed.
# Might also fit Treatment by Period then as well.
summary(IB_analysis)

## Linear mixed model fit by REML ['lmerMod']
## Formula: FCM ~ Trt + (1 | Period:Study) + (1 | Cow:Study) + (1 | Study/Trt)
##    Data: IB_Data
## 
## REML criterion at convergence: 9462.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.56280 -0.47020 -0.02223  0.47362  2.48991 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  Cow:Study    (Intercept) 7.928    2.816   
##  Period:Study (Intercept) 1.873    1.369   
##  Trt:Study    (Intercept) 3.399    1.844   
##  Study        (Intercept) 3.050    1.746   
##  Residual                 2.015    1.419   
## Number of obs: 1932, groups:  
## Cow:Study, 966; Period:Study, 150; Trt:Study, 100; Study, 50
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  29.9518     0.4712  63.567
## TrtB          1.4368     0.5257   2.733
## TrtC          1.7271     0.5130   3.367
## 
## Correlation of Fixed Effects:
##      (Intr) TrtB  
## TrtB -0.600       
## TrtC -0.607  0.561

lsmeans_TrtIB= emmeans(IB_analysis,"Trt")  
# so if this is taking too long, just set the emm_options(pbkrtest.limit = 500) or some reasonably low number 
(IB_analysis_means = summary(lsmeans_TrtIB))

##  Trt emmean    SE   df lower.CL upper.CL
##  A     30.0 0.473 95.0     29.0     30.9
##  B     31.4 0.450 91.7     30.5     32.3
##  C     31.7 0.439 88.7     30.8     32.6
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

(effectsize_IB = pairs(lsmeans_TrtIB))  # overall effect sizes

##  contrast estimate    SE   df t.ratio p.value
##  A - B       -1.44 0.530 59.7  -2.712  0.0234
##  A - C       -1.73 0.517 57.9  -3.343  0.0041
##  B - C       -0.29 0.490 55.5  -0.593  0.8245
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

Study specific contrasts

Study_specific_contrasts_IB = IB_Data %>%
  group_by(Study) %>%
  nest() %>%
  mutate(model = map(data,separate_model)) %>%
  mutate(effectsize = map(model,contrast)) %>%
  mutate(nrec = as.numeric(map(data,nrow))) %>% 
  unnest(effectsize)    %>%
  dplyr::select(-c(data,model)) %>%
  mutate(weight = (1/SE)^2) %>%  # What i use in the mixed model
  mutate(sampvar = SE^2) %>%         # What the r package metafor needs 
  mutate(contrast = factor(contrast))

Naively only considering studies with A-B contrasts

Study_specific_contrasts_IB_AB = filter(Study_specific_contrasts_IB,contrast == 'A - B')
 
 # random effects model
RM_1_IB <- glmmTMB(estimate ~ 1 + (1|Study),   
                weights = (1/SE)^2,
                family = gaussian,
                REML=TRUE,
                data = Study_specific_contrasts_IB_AB,
                start = list(betad = log(1)),  # fix residual variance = 1
                map = list(betad = factor(NA))
)
summary(RM_1_IB)

##  Family: gaussian  ( identity )
## Formula:          estimate ~ 1 + (1 | Study)
## Data: Study_specific_contrasts_IB_AB
## Weights: (1/SE)^2
## 
##      AIC      BIC   logLik deviance df.resid 
##    157.0    158.3    -76.5    153.0       13 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  Study    (Intercept) 5.005    2.237   
##  Residual             1.000    1.000   
## Number of obs: 14, groups:  Study, 14
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  -1.7025     0.6175  -2.757  0.00583 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## compare with analysis from using raw data

effectsize_IB

##  contrast estimate    SE   df t.ratio p.value
##  A - B       -1.44 0.530 59.7  -2.712  0.0234
##  A - C       -1.73 0.517 57.9  -3.343  0.0041
##  B - C       -0.29 0.490 55.5  -0.593  0.8245
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

can also use study-specific means to do this

Study_specific_means_IB = IB_Data %>%
  group_by(Study) %>%
  nest() %>%
  mutate(model = map(data,separate_model)) %>%
  mutate(lsmeans = map(model,lsmeans)) %>%
  mutate(nrec = map(data,nrow)) %>% 
  unnest(lsmeans)    %>%
  dplyr::select(-c(data,model))

SEDuse = Study_specific_contrasts_IB %>%
  group_by(Study) %>%
  summarize(SED = mean(SE)) %>%
  mutate(SEM_alter = SED/sqrt(2)) %>%
  mutate(weight_alter = (1/SEM_alter^2)) %>%
  mutate(sampvar_alter = SEM_alter^2)

Study_specific_means_IB = Study_specific_means_IB %>%
  left_join(SEDuse,by="Study")

RM_arm_IB <- glmmTMB(emmean ~ Trt   + (1|Study/Trt),   
                 weights = weight_alter,
                 family = gaussian,
                 REML=TRUE,
                 data = Study_specific_means_IB,
                 start = list(betad = log(1)),  # fix residual variance = 1
                 map = list(betad = factor(NA))
)
summary(RM_arm_IB)

##  Family: gaussian  ( identity )
## Formula:          emmean ~ Trt + (1 | Study/Trt)
## Data: Study_specific_means_IB
## Weights: weight_alter
## 
##      AIC      BIC   logLik deviance df.resid 
##   2608.5   2621.5  -1299.3   2598.5       98 
## 
## Random effects:
## 
## Conditional model:
##  Groups    Name        Variance Std.Dev.
##  Trt:Study (Intercept) 3.355    1.832   
##  Study     (Intercept) 4.164    2.041   
##  Residual              1.000    1.000   
## Number of obs: 100, groups:  Trt:Study, 100; Study, 50
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  31.3737     0.4470   70.18  < 2e-16 ***
## TrtC          0.2934     0.4840    0.61  0.54449    
## TrtA         -1.4131     0.5246   -2.69  0.00707 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lsmeans_Trt_RM_arm_IB= emmeans(RM_arm_IB,"Trt")
( summary(lsmeans_Trt_RM_arm_IB))

##  Trt emmean    SE df lower.CL upper.CL
##  B     31.4 0.447 98     30.5     32.3
##  C     31.7 0.438 98     30.8     32.5
##  A     30.0 0.470 98     29.0     30.9
## 
## Confidence level used: 0.95

pairs(lsmeans_Trt_RM_arm_IB)

##  contrast estimate    SE df t.ratio p.value
##  B - C      -0.293 0.484 98  -0.606  0.8172
##  B - A       1.413 0.525 98   2.694  0.0224
##  C - A       1.706 0.513 98   3.325  0.0035
## 
## P value adjustment: tukey method for comparing a family of 3 estimates

# compare with analyses using raw data
effectsize_IB

##  contrast estimate    SE   df t.ratio p.value
##  A - B       -1.44 0.530 59.7  -2.712  0.0234
##  A - C       -1.73 0.517 57.9  -3.343  0.0041
##  B - C       -0.29 0.490 55.5  -0.593  0.8245
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

Meta analysis for Latin Square Designs

National Animal Nutrition Program Meta-Analysis Workshop

Robert J. Tempelman

2023-06-25