rm(list=ls())
library(tidyverse,ggplot2)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
set.seed(100) # set seed for reproducibility.
nStudy = 50 # Number of studies
ntrt = 3 # Number of treatments
ncows =sample(seq(ntrt*2,ntrt*10,ntrt),nStudy,replace=TRUE) # simulated different number of replicated squares per Study
Simulate a \(n_{trt}\) = 3 treatment Latin square design data from 50 different studies with sample sizes ranging from 6 to 30 cows
##### THE FOLLOWING JUST SIMULATES SOME DATA BASED ON THE FOLLOWING SPECIFICATIONS #####
##########################################################################################################
######################## BEGINNING OF SIMULATION #########################################################
##########################################################################################################
sigmae = 2 # residual (within cow) variance
sigmacow = 8 # between-cow variance
sigmastudy = 5 # between-study variance
sigmastudytrt = 3 # study by trt interaction variance
sigmaperiod = 2 # variance between periods although typically considered as fixed.
overallmean = 30
trteffects = c(-1,0,+1) # effects relative to overall mean
trtlabels = toupper(letters[1:ntrt])
if (length(trteffects) != ntrt) stop ("need to change the number of treatment effects to match ntrt")
periodeffects = rnorm(sd=sqrt(sigmaperiod),nStudy*ntrt)
StudyPeriod = split(periodeffects, ceiling(seq_along(periodeffects)/ntrt))
Studyeffects = rnorm(sd=sqrt(sigmastudy),nStudy) # simulated study effects
studytrtint = vector(mode="list",length= nStudy) # simulated study x treatment effects
for (Study in 1:nStudy){
studytrtint[[Study]]=rnorm(sd = sqrt(sigmastudytrt),ntrt)
}
# for a balanced multiple Latin square design, ncows has to be a multiple of number of treatments
setuplatin =vector(mode="list",length=ntrt)
base = 1:ntrt
for(i in 1:ntrt){
setuplatin[[i]]=(base+ntrt+1+i)%%ntrt+1
} # setup for a single Latin square
Study_data <- vector(mode="list",length=nStudy) # we'll first save the data in this list, separate study for each element
Study = 1
for (Study in 1:nStudy) {
Trt_number = rep(unlist(setuplatin),ncows[Study]/ntrt)
Effect_Trt = trteffects[Trt_number]
Period = rep(1:3,ncows[Study])
Effect_Period = StudyPeriod[[Study]][Period]
Trt = toupper(letters[Trt_number]) # get trt label assignments for each record
Cow = rep(1:ncows[Study],each=ntrt) # and cow labels for each record
Cow_effects = rnorm(sd=sqrt(sigmacow),ncows[Study])
Effect_Cow = Cow_effects[Cow]
Trt_Study_effects = studytrtint[[Study]][Trt_number] # and the trt*study effects
residuals = rnorm(sd = sqrt(sigmae),ntrt*ncows[Study]) # and finally the residuals
FCM = round(overallmean + Effect_Trt + Effect_Period + Effect_Cow + Studyeffects[Study] + Trt_Study_effects + residuals,2) # generate the record and round to 2nd decimal
Study_data[[Study]] = data.frame(Study,Trt,Cow,Period,FCM) #store it.
}
overallcrossoverdata = bind_rows(Study_data) # the entire data set
cols = c("Study","Trt","Cow","Period")
overallcrossoverdata[cols] <- lapply(overallcrossoverdata [cols], factor)
str(overallcrossoverdata)
## 'data.frame': 2898 obs. of 5 variables:
## $ Study : Factor w/ 50 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Trt : Factor w/ 3 levels "A","B","C": 1 2 3 2 3 1 3 1 2 1 ...
## $ Cow : Factor w/ 30 levels "1","2","3","4",..: 1 1 1 2 2 2 3 3 3 4 ...
## $ Period: Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3 1 ...
## $ FCM : num 25 26.2 28.1 31.6 32.8 ...
write_csv(overallcrossoverdata,"overallcrossoverdata.csv")
# save the true study by treatment effects for later.
study_trt = unlist(studytrtint)
Study_Trtlabels = expand.grid(trtlabels,1:nStudy)
Study_Trtlabels = paste0(Study_Trtlabels$Var1,':',Study_Trtlabels$Var2)
study_trt = data.frame(Study_Trtlabels,study_trt)
##########################################################################################################
######################## END OF SIMULATION ###############################################################
##########################################################################################################
library(lme4) # mixed model procedure in R.
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
# NOTE THIS MODEL PERFECTLY MATCHES THE MODEL GENERATION PROCESS
## of course, we really never know the true model!
overallcrossover_analysis = lmer(FCM~Trt+ (1|Period:Study) + (1|Cow:Study) + (1|Study/Trt),data=overallcrossoverdata) # Fit Trt as Fixed, Study and Study*Trt as random
# Cows and Periods are nested within Study. If Period represents a physiological stage (i.e. all cows start at roughly the same DIM), then treat as fixed.
# Might also fit Treatment by Period then as well.
summary(overallcrossover_analysis)
## Linear mixed model fit by REML ['lmerMod']
## Formula: FCM ~ Trt + (1 | Period:Study) + (1 | Cow:Study) + (1 | Study/Trt)
## Data: overallcrossoverdata
##
## REML criterion at convergence: 13478.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.11679 -0.53659 -0.01668 0.52731 3.05294
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cow:Study (Intercept) 7.872 2.806
## Trt:Study (Intercept) 3.492 1.869
## Period:Study (Intercept) 1.957 1.399
## Study (Intercept) 4.162 2.040
## Residual 2.018 1.420
## Number of obs: 2898, groups:
## Cow:Study, 966; Trt:Study, 150; Period:Study, 150; Study, 50
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 29.7264 0.4222 70.401
## TrtB 1.2372 0.3804 3.253
## TrtC 1.9924 0.3804 5.238
##
## Correlation of Fixed Effects:
## (Intr) TrtB
## TrtB -0.450
## TrtC -0.450 0.500
# fairly precise estimates of variance components....if you were able to analyze all of the data jointly.
library(emmeans)
emm_options(pbkrtest.limit = nrow(overallcrossoverdata))
lsmeans_Trt= emmeans(overallcrossover_analysis,"Trt")
# so if this is taking too long, just set the emm_options(pbkrtest.limit = 500) or some reasonably low number
(overallcrossover_means = summary(lsmeans_Trt))
## Trt emmean SE df lower.CL upper.CL
## A 29.7 0.422 86 28.9 30.6
## B 31.0 0.422 86 30.1 31.8
## C 31.7 0.422 86 30.9 32.6
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
(effectsize_IPD = pairs(lsmeans_Trt)) # overall effect sizes
## contrast estimate SE df t.ratio p.value
## A - B -1.237 0.38 98 -3.253 0.0044
## A - C -1.992 0.38 98 -5.238 <.0001
## B - C -0.755 0.38 98 -1.985 0.1212
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
The following is just for quality control checks on my simulation program
## THE FOLLOWING IS NOT REALLY IMPORTANT...JUST A QUALITY CONTROL CHECK
##Plot Estimated(Predicted) Study Effects versus True Study Effects
# Estimated ("Predicted") Study Effects
BLUP_Studyeffects = coef(overallcrossover_analysis)$Study[,1]-mean(coef(overallcrossover_analysis)$Study[,1]) # need to subtract the mean to express relative to zero
trt = data.frame(Studyeffects,BLUP_Studyeffects)
# Plot Estimated ("Predicted") Study Effects vs True Study Effects
ggplot(data=trt,aes(x=Studyeffects ,y=BLUP_Studyeffects)) + geom_point() + geom_abline(intercept=0,slope=1) +
ggtitle("Predicted vs True Study Effects") +
xlab("True effects") +
ylab("BLUP")
## THE FOLLOWING IS NOT REALLY IMPORTANT...JUST A QUALITY CONTROL CHECK
# Let's do the same thing for the study by treatment effects
#Estimated study by treatment effects
BLUP_study_trt = coef(overallcrossover_analysis)$`Trt:Study`[,1]-mean(coef(overallcrossover_analysis)$`Trt:Study`[,1]) # need to subtract the mean to express relative to zero
BLUP_study_trt = data.frame(rownames(coef(overallcrossover_analysis)$`Trt:Study`),BLUP_study_trt)
colnames(BLUP_study_trt) = c("Study_Trtlabels","BLUP_study_trt")
study_trt = study_trt %>%
left_join(BLUP_study_trt,by=c("Study_Trtlabels"))
# Plot Estimated ("Predicted") Study by Treatment Effects vs True Study by Treatment Effects
ggplot(study_trt,aes(x=study_trt ,y=BLUP_study_trt)) + geom_point() + geom_abline(intercept=0,slope=1) +
ggtitle("Predicted vs True Study by Treatment Effects") +
xlab("True effects") +
ylab("BLUP")
Write a few functions that will help compute the study-specific statistics.
# Let's conduct a separate mixed model analysis for each Study
# just like what we would anticipate for a meta-analysis
# Need to write some functions in order to do so
# need to create a separate function for model
# You could throw Trt*Period in there..maybe even consider Period as random
separate_model = function(df) {
lmer(FCM~Trt+Period+(1|Cow),data=df)
}
# function to save the mean trt difference for each Study
contrast = function(model) {
lsmeans_Trt= emmeans(model,"Trt")
effectsize = pairs(lsmeans_Trt)
effectsize = as.data.frame(summary(effectsize))
return(effectsize)
}
# function to save the trt means for each Study
lsmeans = function(model) {
lsmeans_Trt= emmeans(model,"Trt")
means = summary(lsmeans_Trt)
return(means)
}
#function to retrieve study-specific variance components
varcomps = function(model){
varcomp = as.numeric(formatVC(VarCorr(model),comp="Variance")[,3])
VClabel = (formatVC(VarCorr(model),comp="Variance")[,1])
varcomp = data.frame(VClabel,varcomp) %>%
pivot_wider(names_from=VClabel,values_from = varcomp)
return(varcomp)
}
This is what it looks like for one study (Study 1)
Study1_analysis = lmer(FCM~Trt+Period+(1|Cow),data=filter(overallcrossoverdata,Study==1))
joint_tests(Study1_analysis)
## model term df1 df2 F.ratio p.value
## Trt 2 44 180.644 <.0001
## Period 2 44 57.816 <.0001
summary(Study1_analysis)
## Linear mixed model fit by REML ['lmerMod']
## Formula: FCM ~ Trt + Period + (1 | Cow)
## Data: filter(overallcrossoverdata, Study == 1)
##
## REML criterion at convergence: 312.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.57964 -0.45057 0.05648 0.47164 1.50368
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cow (Intercept) 12.048 3.471
## Residual 1.721 1.312
## Number of obs: 72, groups: Cow, 24
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 27.7299 0.7884 35.17
## TrtB 3.8296 0.3787 10.11
## TrtC 7.1933 0.3787 18.99
## Period2 -2.1171 0.3787 -5.59
## Period3 -4.0713 0.3787 -10.75
##
## Correlation of Fixed Effects:
## (Intr) TrtB TrtC Perid2
## TrtB -0.240
## TrtC -0.240 0.500
## Period2 -0.240 0.000 0.000
## Period3 -0.240 0.000 0.000 0.500
varcomps(Study1_analysis)
## # A tibble: 1 × 2
## Cow Residual
## <dbl> <dbl>
## 1 12.0 1.72
lsmeans(Study1_analysis)
## Trt emmean SE df lower.CL upper.CL
## A 25.7 0.757 27.2 24.1 27.2
## B 29.5 0.757 27.2 27.9 31.1
## C 32.9 0.757 27.2 31.3 34.4
##
## Results are averaged over the levels of: Period
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
contrast(Study1_analysis)
## contrast estimate SE df t.ratio p.value
## 1 A - B -3.829583 0.3787095 44 -10.112191 2.265188e-12
## 2 A - C -7.193333 0.3787095 44 -18.994328 8.245626e-13
## 3 B - C -3.363750 0.3787095 44 -8.882137 6.863321e-11
# Let's save the contrasts (i.e. pairwise comparisons) from each study
Study_specific_contrasts = overallcrossoverdata %>%
group_by(Study) %>%
nest() %>%
mutate(model = map(data,separate_model)) %>%
mutate(effectsize = map(model,contrast)) %>%
mutate(nrec = as.numeric(map(data,nrow))) %>%
unnest(effectsize) %>%
dplyr::select(-c(data,model)) %>%
mutate(weight = (1/SE)^2) %>% # What i use in the mixed model
mutate(sampvar = SE^2) # What the r package metafor needs
head(Study_specific_contrasts)
## # A tibble: 6 × 10
## # Groups: Study [2]
## Study contrast estimate SE df t.ratio p.value nrec weight sampvar
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 A - B -3.83 0.379 44.0 -10.1 2.27e-12 72 6.97 0.143
## 2 1 A - C -7.19 0.379 44.0 -19.0 8.25e-13 72 6.97 0.143
## 3 1 B - C -3.36 0.379 44.0 -8.88 6.86e-11 72 6.97 0.143
## 4 2 A - B -1.20 0.533 38.0 -2.25 7.59e- 2 63 3.52 0.284
## 5 2 A - C -2.54 0.533 38.0 -4.77 7.92e- 5 63 3.52 0.284
## 6 2 B - C -1.34 0.533 38.0 -2.52 4.14e- 2 63 3.52 0.284
Let’s focus on the A vs B contrast for now.
We’ll consider the common effects
(AB_contrast = filter(Study_specific_contrasts,contrast=="A - B"))
## # A tibble: 50 × 10
## # Groups: Study [50]
## Study contrast estimate SE df t.ratio p.value nrec weight sampvar
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 A - B -3.83 0.379 44.0 -10.1 2.27e-12 72 6.97 0.143
## 2 2 A - B -1.20 0.533 38.0 -2.25 7.59e- 2 63 3.52 0.284
## 3 3 A - B 3.68 0.497 20 7.41 1.07e- 6 36 4.05 0.247
## 4 4 A - B 0.181 0.361 56.0 0.501 8.71e- 1 90 7.66 0.131
## 5 5 A - B -2.33 0.405 44.0 -5.77 2.19e- 6 72 6.10 0.164
## 6 6 A - B 5.21 0.334 38.0 15.6 0 63 8.95 0.112
## 7 7 A - B 0.928 0.392 38.0 2.36 5.91e- 2 63 6.49 0.154
## 8 8 A - B -5.87 0.451 26.0 -13.0 2.01e-12 45 4.92 0.203
## 9 9 A - B -0.569 0.391 44.0 -1.46 3.22e- 1 72 6.54 0.153
## 10 10 A - B -2.04 0.342 38.0 -5.96 1.88e- 6 63 8.53 0.117
## # … with 40 more rows
# The following analysis is rather common (and typically wrong!!!)
# COMMON EFFECTS MODEL
#CE1
weighted.mean(AB_contrast$estimate,AB_contrast$weight)
## [1] -1.218097
(stderr = (sum(AB_contrast$weight))^(-1/2))
## [1] 0.06154195
cat ("compare with analyses on individual data",sep="\n")
## compare with analyses on individual data
effectsize_IPD
## contrast estimate SE df t.ratio p.value
## A - B -1.237 0.38 98 -3.253 0.0044
## A - C -1.992 0.38 98 -5.238 <.0001
## B - C -0.755 0.38 98 -1.985 0.1212
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
# could go through the same exercise for the other comparisons as well.
#(AC_contrast = filter(Study_specific_contrasts,contrast=="A - C"))
#(BC_contrast = filter(Study_specific_contrasts,contrast=="B - C"))
#CE2
# # COMMON EFFECTS MODEL can also be conducted using a linear model
# glmmTMB is used because it allows one to fix the residual variance to 1 (just a "trick" to allow proper weightings by 1/SE^2 which already reflect residual var!)
library(glmmTMB)
## Warning: package 'glmmTMB' was built under R version 4.2.2
# overall mean model
FMAvsB_1 <- glmmTMB(estimate ~ 1 ,
weights = weight,
family = gaussian,
REML=TRUE,
data = AB_contrast,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(FMAvsB_1)
## Family: gaussian ( identity )
## Formula: estimate ~ 1
## Data: AB_contrast
## Weights: weight
##
## AIC BIC logLik deviance df.resid
## 2292.4 2294.4 -1145.2 2290.4 50
##
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.21810 0.06154 -19.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# again gives the same wrong answer as above
cat ("compare with analyses on individual data",sep="\n")
## compare with analyses on individual data
effectsize_IPD
## contrast estimate SE df t.ratio p.value
## A - B -1.237 0.38 98 -3.253 0.0044
## A - C -1.992 0.38 98 -5.238 <.0001
## B - C -0.755 0.38 98 -1.985 0.1212
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
#CE3
# Finally, onecan use metafor to get the same result:
library(metafor)
## Warning: package 'metafor' was built under R version 4.2.3
## Loading required package: metadat
## Warning: package 'metadat' was built under R version 4.2.3
## Loading required package: numDeriv
##
## Loading the 'metafor' package (version 4.2-0). For an
## introduction to the package please type: help(metafor)
(res_EE_AvsB = rma.uni(yi=estimate,vi=sampvar,data=AB_contrast,method="EE")) # fixed study effects
##
## Equal-Effects Model (k = 50)
##
## I^2 (total heterogeneity / total variability): 97.28%
## H^2 (total variability / sampling variability): 36.76
##
## Test for Heterogeneity:
## Q(df = 49) = 1801.4421, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## -1.2181 0.0615 -19.7929 <.0001 -1.3387 -1.0975 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest(res_EE_AvsB)
# an analysis that properly reflects heterogeneity between studies.
library(glmmTMB)
# random effects model
RM_1AvsB_1 <- glmmTMB(estimate ~ 1 + (1|Study), # Study really involves both Study and Study*Treatment
weights = weight,
family = gaussian,
REML=TRUE,
data = AB_contrast,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(RM_1AvsB_1)
## Family: gaussian ( identity )
## Formula: estimate ~ 1 + (1 | Study)
## Data: AB_contrast
## Weights: weight
##
## AIC BIC logLik deviance df.resid
## 709.8 713.6 -352.9 705.8 49
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Study (Intercept) 6.39 2.528
## Residual 1.00 1.000
## Number of obs: 50, groups: Study, 50
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2373 0.3643 -3.396 0.000684 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat ("compare with analyses on individual data",sep="\n")
## compare with analyses on individual data
effectsize_IPD
## contrast estimate SE df t.ratio p.value
## A - B -1.237 0.38 98 -3.253 0.0044
## A - C -1.992 0.38 98 -5.238 <.0001
## B - C -0.755 0.38 98 -1.985 0.1212
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
# compare this to the analysis using the raw data...point estimate is ok; standard error is a little off.
#using metafor
(res_REMLAvsB_1 = rma.uni(yi=estimate,vi=sampvar,data=AB_contrast,method="REML")) # fixed study effects
##
## Random-Effects Model (k = 50; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 6.3902 (SE = 1.3406)
## tau (square root of estimated tau^2 value): 2.5279
## I^2 (total heterogeneity / total variability): 97.11%
## H^2 (total variability / sampling variability): 34.58
##
## Test for Heterogeneity:
## Q(df = 49) = 1801.4421, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## -1.2373 0.3643 -3.3961 0.0007 -1.9514 -0.5232 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# reminder as to what the corresponding estimate/se was from analyzing the raw data
cat ("compare with analyses on individual data",sep="\n")
## compare with analyses on individual data
effectsize_IPD
## contrast estimate SE df t.ratio p.value
## A - B -1.237 0.38 98 -3.253 0.0044
## A - C -1.992 0.38 98 -5.238 <.0001
## B - C -0.755 0.38 98 -1.985 0.1212
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
RM_mvcontrast <- glmmTMB(estimate ~ contrast -1 + (1|Study/contrast), # models the effect of study and study*contrast
weights = weight,
family = gaussian,
REML=TRUE,
data = Study_specific_contrasts,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(RM_mvcontrast)
## Family: gaussian ( identity )
## Formula: estimate ~ contrast - 1 + (1 | Study/contrast)
## Data: Study_specific_contrasts
## Weights: weight
##
## AIC BIC logLik deviance df.resid
## 2137.2 2152.3 -1063.6 2127.2 148
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## contrast:Study (Intercept) 5.950 2.439
## Study (Intercept) 1.045 1.022
## Residual 1.000 1.000
## Number of obs: 150, groups: contrast:Study, 150; Study, 50
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## contrastA - B -1.2384 0.3806 -3.254 0.00114 **
## contrastA - C -1.9896 0.3806 -5.228 1.72e-07 ***
## contrastB - C -0.7522 0.3806 -1.976 0.04811 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat ("compare with analyses on individual data . Almost matches perfectly!!",sep="\n")
## compare with analyses on individual data . Almost matches perfectly!!
effectsize_IPD
## contrast estimate SE df t.ratio p.value
## A - B -1.237 0.38 98 -3.253 0.0044
## A - C -1.992 0.38 98 -5.238 <.0001
## B - C -0.755 0.38 98 -1.985 0.1212
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
foundationcontrast <- glmmTMB(estimate ~ contrast -1 + (1|Study/contrast), # models the effect of study and study*contrast
weights = weight,
family = gaussian,
REML=TRUE,
data = filter(Study_specific_contrasts,contrast != 'A - B'),
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(foundationcontrast)
## Family: gaussian ( identity )
## Formula: estimate ~ contrast - 1 + (1 | Study/contrast)
## Data: filter(Study_specific_contrasts, contrast != "A - B")
## Weights: weight
##
## AIC BIC logLik deviance df.resid
## 1413.7 1424.1 -702.8 1405.7 98
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## contrast:Study (Intercept) 3.088 1.757
## Study (Intercept) 4.208 2.051
## Residual 1.000 1.000
## Number of obs: 100, groups: contrast:Study, 100; Study, 50
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## contrastA - C -1.9914 0.3884 -5.128 2.94e-07 ***
## contrastB - C -0.7531 0.3884 -1.939 0.0525 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_foundationcontrast= emmeans(foundationcontrast,"contrast")
(summary(lsmeans_foundationcontrast))
## contrast emmean SE df lower.CL upper.CL
## A - C -1.991 0.388 98 -2.76 -1.2207
## B - C -0.753 0.388 98 -1.52 0.0176
##
## Confidence level used: 0.95
pairs(lsmeans_foundationcontrast)
## contrast estimate SE df t.ratio p.value
## (A - C) - (B - C) -1.24 0.365 98 -3.392 0.0010
cat ("compare with analyses on individual data",sep="\n")
## compare with analyses on individual data
effectsize_IPD
## contrast estimate SE df t.ratio p.value
## A - B -1.237 0.38 98 -3.253 0.0044
## A - C -1.992 0.38 98 -5.238 <.0001
## B - C -0.755 0.38 98 -1.985 0.1212
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
(instead of contrasts)
A suboptimal analysis: The use of regular standard errors on means would not be advisable in this case!
#lets save the study specific means
Study_specific_means = overallcrossoverdata %>%
group_by(Study) %>%
nest() %>%
mutate(model = map(data,separate_model)) %>%
mutate(lsmeans = map(model,lsmeans)) %>%
mutate(nrec = as.numeric(map(data,nrow))) %>%
unnest(lsmeans) %>%
dplyr::select(-c(data,model)) %>%
mutate(weight = (1/SE)^2) %>% # these are not quite right!!
mutate(sampvar = SE^2) # these are not quite right!!
# but let's try them anyways
head(Study_specific_means)
## # A tibble: 6 × 10
## # Groups: Study [2]
## Study Trt emmean SE df lower.CL upper.CL nrec weight sampvar
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 A 25.7 0.757 27.3 24.1 27.2 72 1.74 0.574
## 2 1 B 29.5 0.757 27.3 27.9 31.1 72 1.74 0.574
## 3 1 C 32.9 0.757 27.3 31.3 34.4 72 1.74 0.574
## 4 2 A 29.1 0.911 25.3 27.3 31.0 63 1.21 0.829
## 5 2 B 30.3 0.911 25.3 28.5 32.2 63 1.21 0.829
## 6 2 C 31.7 0.911 25.3 29.8 33.5 63 1.21 0.829
Let’s use the suboptimal weights anyways
RM_mv_subopt <- glmmTMB(emmean ~ Trt + (1|Study/Trt), #
weights = weight,
family = gaussian,
REML=TRUE,
data = Study_specific_means,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(RM_mv_subopt)
## Family: gaussian ( identity )
## Formula: emmean ~ Trt + (1 | Study/Trt)
## Data: Study_specific_means
## Weights: weight
##
## AIC BIC logLik deviance df.resid
## 1140.4 1155.4 -565.2 1130.4 148
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Trt:Study (Intercept) 3.068 1.752
## Study (Intercept) 5.258 2.293
## Residual 1.000 1.000
## Number of obs: 150, groups: Trt:Study, 150; Study, 50
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 29.7155 0.4211 70.56 < 2e-16 ***
## TrtB 1.2509 0.3798 3.29 0.000989 ***
## TrtC 1.9979 0.3798 5.26 1.43e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_RM_mv_subopt= emmeans(RM_mv_subopt,"Trt")
(summary(lsmeans_RM_mv_subopt))
## Trt emmean SE df lower.CL upper.CL
## A 29.7 0.421 148 28.9 30.5
## B 31.0 0.421 148 30.1 31.8
## C 31.7 0.421 148 30.9 32.5
##
## Confidence level used: 0.95
pairs(lsmeans_RM_mv_subopt)
## contrast estimate SE df t.ratio p.value
## A - B -1.251 0.38 148 -3.294 0.0035
## A - C -1.998 0.38 148 -5.261 <.0001
## B - C -0.747 0.38 148 -1.967 0.1240
##
## P value adjustment: tukey method for comparing a family of 3 estimates
# not too far off from what we had with raw data analysis (?)
effectsize_IPD
## contrast estimate SE df t.ratio p.value
## A - B -1.237 0.38 98 -3.253 0.0044
## A - C -1.992 0.38 98 -5.238 <.0001
## B - C -0.755 0.38 98 -1.985 0.1212
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
Study_specific_var = overallcrossoverdata %>%
group_by(Study) %>%
nest() %>%
mutate(model = map(data,separate_model)) %>%
mutate(varcomp = map(model,varcomps)) %>%
unnest(varcomp) %>%
dplyr::select(-c(data,model))
boxplot(Study_specific_var[,-1] )
#Let's look at the relationship between SEM, SED, and the variance components
SE_compare = Study_specific_means %>%
group_by(Study) %>%
summarize(SEM = mean(SE)) %>%
full_join(distinct(Study_specific_contrasts,SE),by="Study") %>%
group_by(Study,SEM) %>%
summarize(SED=mean(SE)) %>%
full_join(Study_specific_var,by="Study") %>%
mutate(SEM_altered = SED/sqrt(2)) %>%
mutate(SEM_ratio = SEM/SEM_altered) %>%
mutate(sd_ratio = sqrt((Cow+Residual)/Residual)) %>%
mutate(weight_altered = (1/SEM_altered)^2) %>%
mutate(sampvar_altered = SEM_altered^2) %>%
mutate(weight_old = (1/SEM)^2) %>%
mutate(sampvar_old= SEM^2)
## `summarise()` has grouped output by 'Study'. You can override using the
## `.groups` argument.
head(SE_compare)
## # A tibble: 6 × 12
## # Groups: Study [6]
## Study SEM SED Cow Residual SEM_altered SEM_ratio sd_ratio weight_altered
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.757 0.379 12.0 1.72 0.268 2.83 2.83 13.9
## 2 2 0.911 0.533 14.4 2.99 0.377 2.41 2.41 7.03
## 3 3 0.985 0.497 10.2 1.48 0.351 2.80 2.80 8.10
## 4 4 0.537 0.361 6.69 1.96 0.255 2.10 2.10 15.3
## 5 5 0.611 0.405 7.01 1.97 0.286 2.14 2.14 12.2
## 6 6 0.782 0.334 11.7 1.17 0.236 3.31 3.31 17.9
## # … with 3 more variables: sampvar_altered <dbl>, weight_old <dbl>,
## # sampvar_old <dbl>
Lesson: ratio of SEM to SED/sqrt(2) is same as sqrt(cow+residual/residual VC)
ggplot(SE_compare, aes(x=weight_old, y=weight_altered)) +
geom_point()
SED_basic = SE_compare %>%
dplyr::select(c(Study,SEM,SED))
Study_specific_means2= Study_specific_means%>%
full_join(SED_basic,by="Study")%>%
mutate(SEM_altered = SED/sqrt(2)) %>%
mutate(weight_altered = (1/SEM_altered)^2) %>%
mutate(sampvar_altered = SEM_altered^2)
RM_mv2arm <- glmmTMB(emmean ~ Trt + (1|Study/Trt),
weights = weight_altered,
family = gaussian,
REML=TRUE,
data = Study_specific_means2,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(RM_mv2arm)
## Family: gaussian ( identity )
## Formula: emmean ~ Trt + (1 | Study/Trt)
## Data: Study_specific_means2
## Weights: weight_altered
##
## AIC BIC logLik deviance df.resid
## 3680.4 3695.5 -1835.2 3670.4 148
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Trt:Study (Intercept) 3.498 1.870
## Study (Intercept) 5.279 2.298
## Residual 1.000 1.000
## Number of obs: 150, groups: Trt:Study, 150; Study, 50
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 29.7210 0.4219 70.44 < 2e-16 ***
## TrtB 1.2372 0.3806 3.25 0.00115 **
## TrtC 1.9892 0.3806 5.23 1.73e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_Trt_mv2arm= emmeans(RM_mv2arm,"Trt")
( summary(lsmeans_Trt_mv2arm))
## Trt emmean SE df lower.CL upper.CL
## A 29.7 0.422 148 28.9 30.6
## B 31.0 0.422 148 30.1 31.8
## C 31.7 0.422 148 30.9 32.5
##
## Confidence level used: 0.95
pairs(lsmeans_Trt_mv2arm)
## contrast estimate SE df t.ratio p.value
## A - B -1.237 0.381 148 -3.251 0.0041
## A - C -1.989 0.381 148 -5.226 <.0001
## B - C -0.752 0.381 148 -1.976 0.1218
##
## P value adjustment: tukey method for comparing a family of 3 estimates
cat ("compare with analyses on individual data",sep="\n")
## compare with analyses on individual data
effectsize_IPD
## contrast estimate SE df t.ratio p.value
## A - B -1.237 0.38 98 -3.253 0.0044
## A - C -1.992 0.38 98 -5.238 <.0001
## B - C -0.755 0.38 98 -1.985 0.1212
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
Use same simulated datasets as above but this time just randomly choose two of three treatments from each study
trtchoose = vector(mode="list",length= nStudy) #
for (Study in 1:nStudy) {
#trtchoose[[Study]] = toupper(letters[sample(1:ntrt,(ntrt-1))])
trtchoose[[Study]] = sample(1:ntrt,(ntrt-1))
}
Trt=unlist(trtchoose)
Study = rep(1:nStudy,each=(ntrt-1))
IB_design = data.frame(Study,Trt) %>%
mutate(Trt = toupper(letters[Trt])) %>%
mutate_all(as.factor)
xtabs(~Study+Trt,data=IB_design)
## Trt
## Study A B C
## 1 0 1 1
## 2 0 1 1
## 3 1 0 1
## 4 0 1 1
## 5 1 1 0
## 6 1 0 1
## 7 1 0 1
## 8 1 1 0
## 9 1 0 1
## 10 1 1 0
## 11 0 1 1
## 12 1 0 1
## 13 1 0 1
## 14 0 1 1
## 15 0 1 1
## 16 1 0 1
## 17 0 1 1
## 18 0 1 1
## 19 1 0 1
## 20 1 0 1
## 21 0 1 1
## 22 0 1 1
## 23 1 1 0
## 24 1 1 0
## 25 1 1 0
## 26 0 1 1
## 27 0 1 1
## 28 0 1 1
## 29 1 0 1
## 30 1 1 0
## 31 1 0 1
## 32 0 1 1
## 33 0 1 1
## 34 1 1 0
## 35 0 1 1
## 36 0 1 1
## 37 1 0 1
## 38 1 1 0
## 39 0 1 1
## 40 1 0 1
## 41 1 1 0
## 42 1 1 0
## 43 0 1 1
## 44 1 0 1
## 45 1 0 1
## 46 1 0 1
## 47 1 1 0
## 48 1 1 0
## 49 1 1 0
## 50 0 1 1
IB_Data = IB_design %>%
left_join(overallcrossoverdata,by=c("Study","Trt"))
# NOTE THIS MODEL PERFECTLY MATCHES THE MODEL GENERATION PROCESS
IB_analysis = lmer(FCM~Trt+ (1|Period:Study) + (1|Cow:Study) + (1|Study/Trt),data=IB_Data) # Fit Trt as Fixed, Study and Study*Trt as random
# Cows and Periods are nested within Study. If Period represents a physiological stage (i.e. all cows start at roughly the same DIM), then treat as fixed.
# Might also fit Treatment by Period then as well.
summary(IB_analysis)
## Linear mixed model fit by REML ['lmerMod']
## Formula: FCM ~ Trt + (1 | Period:Study) + (1 | Cow:Study) + (1 | Study/Trt)
## Data: IB_Data
##
## REML criterion at convergence: 9462.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.56280 -0.47020 -0.02223 0.47362 2.48991
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cow:Study (Intercept) 7.928 2.816
## Period:Study (Intercept) 1.873 1.369
## Trt:Study (Intercept) 3.399 1.844
## Study (Intercept) 3.050 1.746
## Residual 2.015 1.419
## Number of obs: 1932, groups:
## Cow:Study, 966; Period:Study, 150; Trt:Study, 100; Study, 50
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 29.9518 0.4712 63.567
## TrtB 1.4368 0.5257 2.733
## TrtC 1.7271 0.5130 3.367
##
## Correlation of Fixed Effects:
## (Intr) TrtB
## TrtB -0.600
## TrtC -0.607 0.561
lsmeans_TrtIB= emmeans(IB_analysis,"Trt")
# so if this is taking too long, just set the emm_options(pbkrtest.limit = 500) or some reasonably low number
(IB_analysis_means = summary(lsmeans_TrtIB))
## Trt emmean SE df lower.CL upper.CL
## A 30.0 0.473 95.0 29.0 30.9
## B 31.4 0.450 91.7 30.5 32.3
## C 31.7 0.439 88.7 30.8 32.6
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
(effectsize_IB = pairs(lsmeans_TrtIB)) # overall effect sizes
## contrast estimate SE df t.ratio p.value
## A - B -1.44 0.530 59.7 -2.712 0.0234
## A - C -1.73 0.517 57.9 -3.343 0.0041
## B - C -0.29 0.490 55.5 -0.593 0.8245
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
Study_specific_contrasts_IB = IB_Data %>%
group_by(Study) %>%
nest() %>%
mutate(model = map(data,separate_model)) %>%
mutate(effectsize = map(model,contrast)) %>%
mutate(nrec = as.numeric(map(data,nrow))) %>%
unnest(effectsize) %>%
dplyr::select(-c(data,model)) %>%
mutate(weight = (1/SE)^2) %>% # What i use in the mixed model
mutate(sampvar = SE^2) %>% # What the r package metafor needs
mutate(contrast = factor(contrast))
Study_specific_contrasts_IB_AB = filter(Study_specific_contrasts_IB,contrast == 'A - B')
# random effects model
RM_1_IB <- glmmTMB(estimate ~ 1 + (1|Study),
weights = (1/SE)^2,
family = gaussian,
REML=TRUE,
data = Study_specific_contrasts_IB_AB,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(RM_1_IB)
## Family: gaussian ( identity )
## Formula: estimate ~ 1 + (1 | Study)
## Data: Study_specific_contrasts_IB_AB
## Weights: (1/SE)^2
##
## AIC BIC logLik deviance df.resid
## 157.0 158.3 -76.5 153.0 13
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Study (Intercept) 5.005 2.237
## Residual 1.000 1.000
## Number of obs: 14, groups: Study, 14
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.7025 0.6175 -2.757 0.00583 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## compare with analysis from using raw data
effectsize_IB
## contrast estimate SE df t.ratio p.value
## A - B -1.44 0.530 59.7 -2.712 0.0234
## A - C -1.73 0.517 57.9 -3.343 0.0041
## B - C -0.29 0.490 55.5 -0.593 0.8245
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
Study_specific_means_IB = IB_Data %>%
group_by(Study) %>%
nest() %>%
mutate(model = map(data,separate_model)) %>%
mutate(lsmeans = map(model,lsmeans)) %>%
mutate(nrec = map(data,nrow)) %>%
unnest(lsmeans) %>%
dplyr::select(-c(data,model))
SEDuse = Study_specific_contrasts_IB %>%
group_by(Study) %>%
summarize(SED = mean(SE)) %>%
mutate(SEM_alter = SED/sqrt(2)) %>%
mutate(weight_alter = (1/SEM_alter^2)) %>%
mutate(sampvar_alter = SEM_alter^2)
Study_specific_means_IB = Study_specific_means_IB %>%
left_join(SEDuse,by="Study")
RM_arm_IB <- glmmTMB(emmean ~ Trt + (1|Study/Trt),
weights = weight_alter,
family = gaussian,
REML=TRUE,
data = Study_specific_means_IB,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(RM_arm_IB)
## Family: gaussian ( identity )
## Formula: emmean ~ Trt + (1 | Study/Trt)
## Data: Study_specific_means_IB
## Weights: weight_alter
##
## AIC BIC logLik deviance df.resid
## 2608.5 2621.5 -1299.3 2598.5 98
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Trt:Study (Intercept) 3.355 1.832
## Study (Intercept) 4.164 2.041
## Residual 1.000 1.000
## Number of obs: 100, groups: Trt:Study, 100; Study, 50
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 31.3737 0.4470 70.18 < 2e-16 ***
## TrtC 0.2934 0.4840 0.61 0.54449
## TrtA -1.4131 0.5246 -2.69 0.00707 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_Trt_RM_arm_IB= emmeans(RM_arm_IB,"Trt")
( summary(lsmeans_Trt_RM_arm_IB))
## Trt emmean SE df lower.CL upper.CL
## B 31.4 0.447 98 30.5 32.3
## C 31.7 0.438 98 30.8 32.5
## A 30.0 0.470 98 29.0 30.9
##
## Confidence level used: 0.95
pairs(lsmeans_Trt_RM_arm_IB)
## contrast estimate SE df t.ratio p.value
## B - C -0.293 0.484 98 -0.606 0.8172
## B - A 1.413 0.525 98 2.694 0.0224
## C - A 1.706 0.513 98 3.325 0.0035
##
## P value adjustment: tukey method for comparing a family of 3 estimates
# compare with analyses using raw data
effectsize_IB
## contrast estimate SE df t.ratio p.value
## A - B -1.44 0.530 59.7 -2.712 0.0234
## A - C -1.73 0.517 57.9 -3.343 0.0041
## B - C -0.29 0.490 55.5 -0.593 0.8245
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates