rm(list=ls())  # always a good idea to clean-up the directory
library(tidyverse,ggplot)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Introduction

We’ll revisit the simulated data provided by Dr. St_Pierre in the following paper

“Invited Review: Integrating Quantitative Findings from Multiple Studies Using Mixed Model Methodology” by N.R. St-Pierre J. Dairy Sci. 84:741-755 https://www.sciencedirect.com/science/article/pii/S0022030201745304

The following is the original data as provided in the Appendix from St-Pierre (2001)… The first few observations are also printed with the head() function. Let’s double-check the data against the Appendix

urlfile="https://raw.githubusercontent.com/Tempelman/Meta_analysis/main/Dataregs2.csv"
Dataregs2<-read_csv(url(urlfile)) # Data provided in Appendix of St-Pierre (2001)

## Rows: 108 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (5): Obs, Study, X, Y, AdjustedY
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(Dataregs2)

## spc_tbl_ [108 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Obs      : num [1:108] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Study    : num [1:108] 1 1 1 1 1 1 2 2 2 2 ...
##  $ X        : num [1:108] 1.16 1.94 2.5 3.99 4.36 ...
##  $ Y        : num [1:108] -2.92 -1.03 -1 1.33 1.14 ...
##  $ AdjustedY: num [1:108] 0.886 0.643 3 4.358 3.814 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Obs = col_double(),
##   ..   Study = col_double(),
##   ..   X = col_double(),
##   ..   Y = col_double(),
##   ..   AdjustedY = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

head(Dataregs2)

## # A tibble: 6 × 5
##     Obs Study     X     Y AdjustedY
##   <dbl> <dbl> <dbl> <dbl>     <dbl>
## 1     1     1  1.16 -2.92     0.886
## 2     2     1  1.94 -1.03     0.643
## 3     3     1  2.50 -1.00     3.00 
## 4     4     1  3.99  1.33     4.36 
## 5     5     1  4.36  1.14     3.81 
## 6     6     1  4.99  1.59     4.36

How about a scatterplot of the data?…well it should really look like what’s given in Figure 2a of St-Pierre (2001). ..but it doesn’t:

ggplot(Dataregs2,aes(x=X,y=Y)) + geom_point() +
  scale_x_continuous(breaks=seq(0,12,2)) + scale_y_continuous(breaks=seq(-4,20,2))

Corrected data was kindly provided by Dr. White as forwarded from Dr. St-Pierre.It is also available at https://raw.githubusercontent.com/Tempelman/Meta_analysis/main/Dataregscorrected.csv

Let’s display the first few records.

urlfile2="https://raw.githubusercontent.com/Tempelman/Meta_analysis/main/Dataregscorrected.csv"
Dataregs1<-read_csv(url(urlfile2)) # Revised data

## Rows: 108 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Study
## dbl (2): X, Y
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(Dataregs1)

## # A tibble: 6 × 3
##   Study     X      Y
##   <chr> <dbl>  <dbl>
## 1 A      1.16 -2.04 
## 2 A      1.94 -2.44 
## 3 A      2.50 -0.204
## 4 A      3.99  0.843
## 5 A      4.36  0.221
## 6 A      4.99  0.632

such that the revised corrected scatterplot looks as follows (and is precisely what is provided in Figure 2A in St Pierre 2001):

ggplot(Dataregs1,aes(x=X,y=Y)) + geom_point() +
  scale_x_continuous(breaks=seq(0,12,2)) + scale_y_continuous(breaks=seq(-4,20,2)) + 
  ggtitle("Figure 2A in St-Pierre (2001)")

Ok…Dr. St-Pierre did not actually generate this data from a simple linear regression model: $y_i = \beta_0 + \beta_1{x_i} + e_i$ but from the following random coefficients model:

\[y_{ij} = \beta_0 + s_i + (\beta_1 + b_{i}) {x_{ij}} + e_{ij}; i=1,2,...,n_{study}; j=1,2,...,n_{i}\]

Note that i is used to index the study ($n_{study} = 20 in St-Pierre’s example) whereas ij is used to index observation j within study i.

Here $e_{ij} \sim N(0,0.25)$ Furthermore,

\[\begin{pmatrix} s_i \\ b_{i} \end{pmatrix}\sim N\left(\begin{pmatrix} 0 \\ 0 \end{pmatrix},\begin{pmatrix} 4 & 0.5\sqrt{4}\sqrt{0.04} \\ 0.5\sqrt{4}\sqrt{0.04} & 0.04 \end{pmatrix}\right)\]

In other words, the study-specific intercepts had a variance of 4 about a mean of 0 whereas the study-specific slopes had a variance of 0.04 about a mean of 1 such that there is an across-study correlation of 0.5 between intercepts and slopes.

Let’s fit an overall regression model that ignores the study-specific heterogeneity and fit a simple linear regression model: $y_i = \beta_0 + \beta_1{x_i} + e_i$. In other words, we’ll generate the same results as Figure 3 in St-Pierre (2001) who used SAS.

overall_regression = lm(Y~X,data=Dataregs1)
summary(overall_regression)

## 
## Call:
## lm(formula = Y ~ X, data = Dataregs1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.884 -1.348  0.114  1.452  6.428 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.18762    0.56916  -9.115 5.51e-15 ***
## X            1.95949    0.09904  19.784  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.104 on 106 degrees of freedom
## Multiple R-squared:  0.7869, Adjusted R-squared:  0.7849 
## F-statistic: 391.4 on 1 and 106 DF,  p-value: < 2.2e-16

anova(overall_regression)

## Analysis of Variance Table
## 
## Response: Y
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## X           1 1733.39 1733.39  391.41 < 2.2e-16 ***
## Residuals 106  469.43    4.43                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Wonderful! We get the exact same estimates as provided by St-Pierre (2001)!!! But as noted by St-Pierre, they sure seem a lot different from the “truth”! (intercept 0 slope 1).

THe following plot superimposes the line of best fit (blue) and intercept 0 slope 1 line (dashed) just as in Figure 4 of St-Pierre (2001)!

ggplot(Dataregs1, aes(x=X, y=Y)) + 
  geom_point()+
  geom_smooth(method=lm, se=FALSE) + 
  geom_abline(intercept =0, slope = 1,lty=2) +
  ggtitle ("Figure 4 in St-Pierre 2001")

## `geom_smooth()` using formula = 'y ~ x'

Let’s also reproduce the residual plot (Figure 5 in St-Pierre, 2001)

plot(overall_regression,which=1)
title("Figure 5 in St Pierre (2001)",line=-2,adj=0.2)

Accounting for heterogeneity in intercepts and slopes between studies: Fixed effects model

So let’s account for the heterogeneity using the fixed effects model approach first presented by St-Pierre (2001) on page 745. This is not really a great idea especially if the number of records per study is low and the distribution of Study effects is reasonably symmetric. Also scope of inference is then limited to these specific studies only and not to future studies (implications for prediction intervals). Mixed/random effects model analysis would be much better (broad scope inference). See later

The following reproduces much of the first page of Figure 6 of St-Pierre (2001). Based on corner (SAS) parameterization (zero out last level)

Dataregs1$Study = factor(Dataregs1$Study)  # make sure Study is considered to be a factor (Study was numbered)
contrasts(Dataregs1$Study) <-contr.SAS(nlevels(Dataregs1$Study))  # use the SAS parameterization for comparison with StPierre
fixed_model = lm(Y~X+Study+X:Study,data=Dataregs1)   
summary(fixed_model)  # reproduce the estimates provided on first page of Figure 6

## 
## Call:
## lm(formula = Y ~ X + Study + X:Study, data = Dataregs1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9536 -0.2508 -0.0026  0.2249  1.1184 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.690720   2.440294   2.742 0.007802 ** 
## X             1.197855   0.287972   4.160 9.15e-05 ***
## Study1       -9.768069   2.493187  -3.918 0.000210 ***
## Study2      -10.843705   2.501469  -4.335 4.93e-05 ***
## Study3      -10.766691   2.514929  -4.281 5.97e-05 ***
## Study4       -8.499209   2.714912  -3.131 0.002571 ** 
## Study5      -10.669785   2.785288  -3.831 0.000281 ***
## Study6       -9.427621   2.595558  -3.632 0.000540 ***
## Study7       -7.585221   2.503452  -3.030 0.003457 ** 
## Study8       -8.390132   2.691819  -3.117 0.002678 ** 
## Study9       -7.185186   2.551109  -2.816 0.006349 ** 
## Study10      -6.534671   2.507720  -2.606 0.011251 *  
## Study11      -5.409765   2.753483  -1.965 0.053537 .  
## Study12      -6.467757   2.520197  -2.566 0.012484 *  
## Study13      -9.414579   2.832575  -3.324 0.001433 ** 
## Study14      -6.680089   2.956811  -2.259 0.027079 *  
## Study15      -6.189303   2.546881  -2.430 0.017737 *  
## Study16      -6.034778   2.666441  -2.263 0.026818 *  
## Study17      -3.596203   3.006972  -1.196 0.235868    
## Study18      -4.099714   2.621653  -1.564 0.122509    
## Study19      -5.647652   2.834811  -1.992 0.050360 .  
## X:Study1     -0.381141   0.323910  -1.177 0.243422    
## X:Study2     -0.021995   0.320918  -0.069 0.945560    
## X:Study3      0.196437   0.319985   0.614 0.541334    
## X:Study4     -0.696974   0.376997  -1.849 0.068841 .  
## X:Study5      0.074679   0.497844   0.150 0.881205    
## X:Study6     -0.140550   0.368345  -0.382 0.703969    
## X:Study7     -0.388225   0.313008  -1.240 0.219127    
## X:Study8     -0.017739   0.397632  -0.045 0.964547    
## X:Study9     -0.254489   0.324184  -0.785 0.435171    
## X:Study10    -0.142429   0.305454  -0.466 0.642502    
## X:Study11    -0.587067   0.419097  -1.401 0.165824    
## X:Study12    -0.146479   0.308979  -0.474 0.636967    
## X:Study13     0.264952   0.357991   0.740 0.461783    
## X:Study14     0.180751   0.412285   0.438 0.662477    
## X:Study15    -0.002865   0.313080  -0.009 0.992726    
## X:Study16    -0.327600   0.329787  -0.993 0.324052    
## X:Study17    -0.246595   0.367793  -0.670 0.504827    
## X:Study18     0.011638   0.313315   0.037 0.970478    
## X:Study19     0.263701   0.335030   0.787 0.433960    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5001 on 68 degrees of freedom
## Multiple R-squared:  0.9923, Adjusted R-squared:  0.9879 
## F-statistic: 224.1 on 39 and 68 DF,  p-value: < 2.2e-16

The following produces the least squares means for Study also provided at the top of page 747 of St-Pierre (2001)

library(emmeans)
lsmeans_study= emmeans(fixed_model,"Study")

## NOTE: Results may be misleading due to involvement in interactions

xgrid<-ref_grid(fixed_model,at=list(X=0))  # define the Study lsmeans at X = 0.
summary(xgrid)

##  X Study prediction    SE df
##  0 A        -3.0773 0.511 68
##  0 B        -4.1530 0.550 68
##  0 C        -4.0760 0.608 68
##  0 D        -1.8085 1.190 68
##  0 E        -3.9791 1.343 68
##  0 F        -2.7369 0.884 68
##  0 G        -0.8945 0.559 68
##  0 H        -1.6994 1.136 68
##  0 I        -0.4945 0.744 68
##  0 J         0.1560 0.578 68
##  0 K         1.2810 1.275 68
##  0 L         0.2230 0.630 68
##  0 M        -2.7239 1.438 68
##  0 N         0.0106 1.670 68
##  0 O         0.5014 0.729 68
##  0 P         0.6559 1.075 68
##  0 Q         3.0945 1.757 68
##  0 R         2.5910 0.958 68
##  0 S         1.0431 1.443 68
##  0 T         6.6907 2.440 68

The following is used to generate the study specific slopes provided at the bottom of page 747 of St-Pierre (2001).

#Study specific slopes provided at bottom of page 747
library(multcomp)

## Loading required package: mvtnorm

## Loading required package: survival

## Loading required package: TH.data

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## 
## Attaching package: 'TH.data'

## The following object is masked from 'package:MASS':
## 
##     geyser

# following reproduces the contrasts for the Study specific slopes in Equation [6] from St-Pierre
Slope_contrast = matrix(rep(0,21*40),nrow=21)
Slope_contrast[,2]=1
Slope_contrast[1,22:40]=1/20
Slope_contrast[2:20,22:40]=diag(19)
colnames(Slope_contrast) = names(coef(fixed_model))
rownames(Slope_contrast) = c("Overall Slope",paste0('Study',1:20))
print(Slope_contrast)  # these are provided in Equation [6] from St-Pierre

##               (Intercept) X Study1 Study2 Study3 Study4 Study5 Study6 Study7
## Overall Slope           0 1      0      0      0      0      0      0      0
## Study1                  0 1      0      0      0      0      0      0      0
## Study2                  0 1      0      0      0      0      0      0      0
## Study3                  0 1      0      0      0      0      0      0      0
## Study4                  0 1      0      0      0      0      0      0      0
## Study5                  0 1      0      0      0      0      0      0      0
## Study6                  0 1      0      0      0      0      0      0      0
## Study7                  0 1      0      0      0      0      0      0      0
## Study8                  0 1      0      0      0      0      0      0      0
## Study9                  0 1      0      0      0      0      0      0      0
## Study10                 0 1      0      0      0      0      0      0      0
## Study11                 0 1      0      0      0      0      0      0      0
## Study12                 0 1      0      0      0      0      0      0      0
## Study13                 0 1      0      0      0      0      0      0      0
## Study14                 0 1      0      0      0      0      0      0      0
## Study15                 0 1      0      0      0      0      0      0      0
## Study16                 0 1      0      0      0      0      0      0      0
## Study17                 0 1      0      0      0      0      0      0      0
## Study18                 0 1      0      0      0      0      0      0      0
## Study19                 0 1      0      0      0      0      0      0      0
## Study20                 0 1      0      0      0      0      0      0      0
##               Study8 Study9 Study10 Study11 Study12 Study13 Study14 Study15
## Overall Slope      0      0       0       0       0       0       0       0
## Study1             0      0       0       0       0       0       0       0
## Study2             0      0       0       0       0       0       0       0
## Study3             0      0       0       0       0       0       0       0
## Study4             0      0       0       0       0       0       0       0
## Study5             0      0       0       0       0       0       0       0
## Study6             0      0       0       0       0       0       0       0
## Study7             0      0       0       0       0       0       0       0
## Study8             0      0       0       0       0       0       0       0
## Study9             0      0       0       0       0       0       0       0
## Study10            0      0       0       0       0       0       0       0
## Study11            0      0       0       0       0       0       0       0
## Study12            0      0       0       0       0       0       0       0
## Study13            0      0       0       0       0       0       0       0
## Study14            0      0       0       0       0       0       0       0
## Study15            0      0       0       0       0       0       0       0
## Study16            0      0       0       0       0       0       0       0
## Study17            0      0       0       0       0       0       0       0
## Study18            0      0       0       0       0       0       0       0
## Study19            0      0       0       0       0       0       0       0
## Study20            0      0       0       0       0       0       0       0
##               Study16 Study17 Study18 Study19 X:Study1 X:Study2 X:Study3
## Overall Slope       0       0       0       0     0.05     0.05     0.05
## Study1              0       0       0       0     1.00     0.00     0.00
## Study2              0       0       0       0     0.00     1.00     0.00
## Study3              0       0       0       0     0.00     0.00     1.00
## Study4              0       0       0       0     0.00     0.00     0.00
## Study5              0       0       0       0     0.00     0.00     0.00
## Study6              0       0       0       0     0.00     0.00     0.00
## Study7              0       0       0       0     0.00     0.00     0.00
## Study8              0       0       0       0     0.00     0.00     0.00
## Study9              0       0       0       0     0.00     0.00     0.00
## Study10             0       0       0       0     0.00     0.00     0.00
## Study11             0       0       0       0     0.00     0.00     0.00
## Study12             0       0       0       0     0.00     0.00     0.00
## Study13             0       0       0       0     0.00     0.00     0.00
## Study14             0       0       0       0     0.00     0.00     0.00
## Study15             0       0       0       0     0.00     0.00     0.00
## Study16             0       0       0       0     0.00     0.00     0.00
## Study17             0       0       0       0     0.00     0.00     0.00
## Study18             0       0       0       0     0.00     0.00     0.00
## Study19             0       0       0       0     0.00     0.00     0.00
## Study20             0       0       0       0     0.00     0.00     0.00
##               X:Study4 X:Study5 X:Study6 X:Study7 X:Study8 X:Study9 X:Study10
## Overall Slope     0.05     0.05     0.05     0.05     0.05     0.05      0.05
## Study1            0.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study2            0.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study3            0.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study4            1.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study5            0.00     1.00     0.00     0.00     0.00     0.00      0.00
## Study6            0.00     0.00     1.00     0.00     0.00     0.00      0.00
## Study7            0.00     0.00     0.00     1.00     0.00     0.00      0.00
## Study8            0.00     0.00     0.00     0.00     1.00     0.00      0.00
## Study9            0.00     0.00     0.00     0.00     0.00     1.00      0.00
## Study10           0.00     0.00     0.00     0.00     0.00     0.00      1.00
## Study11           0.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study12           0.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study13           0.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study14           0.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study15           0.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study16           0.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study17           0.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study18           0.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study19           0.00     0.00     0.00     0.00     0.00     0.00      0.00
## Study20           0.00     0.00     0.00     0.00     0.00     0.00      0.00
##               X:Study11 X:Study12 X:Study13 X:Study14 X:Study15 X:Study16
## Overall Slope      0.05      0.05      0.05      0.05      0.05      0.05
## Study1             0.00      0.00      0.00      0.00      0.00      0.00
## Study2             0.00      0.00      0.00      0.00      0.00      0.00
## Study3             0.00      0.00      0.00      0.00      0.00      0.00
## Study4             0.00      0.00      0.00      0.00      0.00      0.00
## Study5             0.00      0.00      0.00      0.00      0.00      0.00
## Study6             0.00      0.00      0.00      0.00      0.00      0.00
## Study7             0.00      0.00      0.00      0.00      0.00      0.00
## Study8             0.00      0.00      0.00      0.00      0.00      0.00
## Study9             0.00      0.00      0.00      0.00      0.00      0.00
## Study10            0.00      0.00      0.00      0.00      0.00      0.00
## Study11            1.00      0.00      0.00      0.00      0.00      0.00
## Study12            0.00      1.00      0.00      0.00      0.00      0.00
## Study13            0.00      0.00      1.00      0.00      0.00      0.00
## Study14            0.00      0.00      0.00      1.00      0.00      0.00
## Study15            0.00      0.00      0.00      0.00      1.00      0.00
## Study16            0.00      0.00      0.00      0.00      0.00      1.00
## Study17            0.00      0.00      0.00      0.00      0.00      0.00
## Study18            0.00      0.00      0.00      0.00      0.00      0.00
## Study19            0.00      0.00      0.00      0.00      0.00      0.00
## Study20            0.00      0.00      0.00      0.00      0.00      0.00
##               X:Study17 X:Study18 X:Study19
## Overall Slope      0.05      0.05      0.05
## Study1             0.00      0.00      0.00
## Study2             0.00      0.00      0.00
## Study3             0.00      0.00      0.00
## Study4             0.00      0.00      0.00
## Study5             0.00      0.00      0.00
## Study6             0.00      0.00      0.00
## Study7             0.00      0.00      0.00
## Study8             0.00      0.00      0.00
## Study9             0.00      0.00      0.00
## Study10            0.00      0.00      0.00
## Study11            0.00      0.00      0.00
## Study12            0.00      0.00      0.00
## Study13            0.00      0.00      0.00
## Study14            0.00      0.00      0.00
## Study15            0.00      0.00      0.00
## Study16            0.00      0.00      0.00
## Study17            1.00      0.00      0.00
## Study18            0.00      1.00      0.00
## Study19            0.00      0.00      1.00
## Study20            0.00      0.00      0.00

Slopes <- glht(fixed_model, linfct = Slope_contrast)
summary(Slopes)

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lm(formula = Y ~ X + Study + X:Study, data = Dataregs1)
## 
## Linear Hypotheses:
##                    Estimate Std. Error t value Pr(>|t|)    
## Overall Slope == 0  1.07976    0.04794  22.521  < 0.001 ***
## Study1 == 0         0.81671    0.14829   5.508  < 0.001 ***
## Study2 == 0         1.17586    0.14163   8.302  < 0.001 ***
## Study3 == 0         1.39429    0.13951   9.994  < 0.001 ***
## Study4 == 0         0.50088    0.24331   2.059  0.57016    
## Study5 == 0         1.27253    0.40610   3.134  0.05004 .  
## Study6 == 0         1.05731    0.22967   4.604  < 0.001 ***
## Study7 == 0         0.80963    0.12266   6.600  < 0.001 ***
## Study8 == 0         1.18012    0.27419   4.304  0.00110 ** 
## Study9 == 0         0.94337    0.14889   6.336  < 0.001 ***
## Study10 == 0        1.05543    0.10185  10.362  < 0.001 ***
## Study11 == 0        0.61079    0.30449   2.006  0.61302    
## Study12 == 0        1.05138    0.11198   9.389  < 0.001 ***
## Study13 == 0        1.46281    0.21267   6.878  < 0.001 ***
## Study14 == 0        1.37861    0.29504   4.673  < 0.001 ***
## Study15 == 0        1.19499    0.12285   9.728  < 0.001 ***
## Study16 == 0        0.87026    0.16072   5.415  < 0.001 ***
## Study17 == 0        0.95126    0.22879   4.158  0.00193 ** 
## Study18 == 0        1.20949    0.12344   9.798  < 0.001 ***
## Study19 == 0        1.46156    0.17122   8.536  < 0.001 ***
## Study20 == 0        1.19786    0.28797   4.160  0.00188 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

The following prints the overall intercept reported near the bottom of Figure 6 (page 1747)

Overall_int_contrast = matrix(rep(0,1*40),nrow=1)
Overall_int_contrast[1,1] = 1
Overall_int_contrast[1,3:21]=0.05

colnames(Overall_int_contrast) = names(coef(fixed_model))
print(Overall_int_contrast)

##      (Intercept) X Study1 Study2 Study3 Study4 Study5 Study6 Study7 Study8
## [1,]           1 0   0.05   0.05   0.05   0.05   0.05   0.05   0.05   0.05
##      Study9 Study10 Study11 Study12 Study13 Study14 Study15 Study16 Study17
## [1,]   0.05    0.05    0.05    0.05    0.05    0.05    0.05    0.05    0.05
##      Study18 Study19 X:Study1 X:Study2 X:Study3 X:Study4 X:Study5 X:Study6
## [1,]    0.05    0.05        0        0        0        0        0        0
##      X:Study7 X:Study8 X:Study9 X:Study10 X:Study11 X:Study12 X:Study13
## [1,]        0        0        0         0         0         0         0
##      X:Study14 X:Study15 X:Study16 X:Study17 X:Study18 X:Study19
## [1,]         0         0         0         0         0         0

Overall_int <- glht(fixed_model, linfct = Overall_int_contrast)
summary(Overall_int)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lm(formula = Y ~ X + Study + X:Study, data = Dataregs1)
## 
## Linear Hypotheses:
##        Estimate Std. Error t value Pr(>|t|)  
## 1 == 0  -0.4698     0.2647  -1.775   0.0804 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Finally….the ANOVA table as also provided at the top of page 746 of St-Pierre (2001)

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

contrasts(Dataregs1$Study) <-contr.sum(nlevels(Dataregs1$Study)) # Type 3 ANOVA involving interactions with covariates only works well with contr.sum
fixed_model = lm(Y~X+Study+X:Study,data=Dataregs1)  # rerun same model again
summary(fixed_model)  # notice overall estimates of intercept and x match up with what is reported here.

## 
## Call:
## lm(formula = Y ~ X + Study + X:Study, data = Dataregs1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9536 -0.2508 -0.0026  0.2249  1.1184 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.46979    0.26472  -1.775  0.08043 .  
## X            1.07976    0.04794  22.521  < 2e-16 ***
## Study1      -2.60756    0.55220  -4.722 1.21e-05 ***
## Study2      -3.68320    0.58494  -6.297 2.56e-08 ***
## Study3      -3.60618    0.63476  -5.681 3.04e-07 ***
## Study4      -1.33870    1.15940  -1.155  0.25228    
## Study5      -3.50928    1.30099  -2.697  0.00880 ** 
## Study6      -2.26711    0.87964  -2.577  0.01213 *  
## Study7      -0.42471    0.59253  -0.717  0.47596    
## Study8      -1.22963    1.10988  -1.108  0.27181    
## Study9      -0.02468    0.75358  -0.033  0.97397    
## Study10      0.62584    0.60855   1.028  0.30741    
## Study11      1.75074    1.23857   1.414  0.16206    
## Study12      0.69275    0.65330   1.060  0.29272    
## Study13     -2.25407    1.38985  -1.622  0.10947    
## Study14      0.48042    1.60593   0.299  0.76574    
## Study15      0.97120    0.74060   1.311  0.19414    
## Study16      1.12573    1.05331   1.069  0.28896    
## Study17      3.56430    1.68767   2.112  0.03837 *  
## Study18      3.06079    0.94673   3.233  0.00189 ** 
## Study19      1.51285    1.39394   1.085  0.28162    
## X:Study1    -0.26304    0.14862  -1.770  0.08124 .  
## X:Study2     0.09610    0.14266   0.674  0.50282    
## X:Study3     0.31454    0.14077   2.234  0.02874 *  
## X:Study4    -0.57887    0.23575  -2.455  0.01663 *  
## X:Study5     0.19278    0.38824   0.497  0.62111    
## X:Study6    -0.02245    0.22310  -0.101  0.92014    
## X:Study7    -0.27013    0.12586  -2.146  0.03542 *  
## X:Study8     0.10036    0.26451   0.379  0.70555    
## X:Study9    -0.13639    0.14916  -0.914  0.36375    
## X:Study10   -0.02433    0.10787  -0.226  0.82223    
## X:Study11   -0.46897    0.29282  -1.602  0.11388    
## X:Study12   -0.02838    0.11655  -0.243  0.80836    
## X:Study13    0.38305    0.20738   1.847  0.06908 .  
## X:Study14    0.29885    0.28398   1.052  0.29636    
## X:Study15    0.11523    0.12602   0.914  0.36372    
## X:Study16   -0.20950    0.15983  -1.311  0.19435    
## X:Study17   -0.12850    0.22228  -0.578  0.56512    
## X:Study18    0.12974    0.12654   1.025  0.30888    
## X:Study19    0.38180    0.16936   2.254  0.02740 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5001 on 68 degrees of freedom
## Multiple R-squared:  0.9923, Adjusted R-squared:  0.9879 
## F-statistic: 224.1 on 39 and 68 DF,  p-value: < 2.2e-16

anova(fixed_model)         # Anova Type = 1 (never look at those!)

## Analysis of Variance Table
## 
## Response: Y
##           Df  Sum Sq Mean Sq   F value Pr(>F)    
## X          1 1733.39 1733.39 6931.2132 <2e-16 ***
## Study     19  443.34   23.33   93.3032 <2e-16 ***
## X:Study   19    9.08    0.48    1.9111 0.0273 *  
## Residuals 68   17.01    0.25                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova(fixed_model,type=3)

## Anova Table (Type III tests)
## 
## Response: Y
##              Sum Sq Df  F value    Pr(>F)    
## (Intercept)   0.788  1   3.1495   0.08043 .  
## X           126.846  1 507.2124 < 2.2e-16 ***
## Study        32.950 19   6.9345  1.06e-09 ***
## X:Study       9.081 19   1.9111   0.02730 *  
## Residuals    17.006 68                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lsmeans_study= emmeans(fixed_model,"Study")  # doesn't change the least squares means for study though

## NOTE: Results may be misleading due to involvement in interactions

Accounting for heterogeneity between studies: Mixed effects model

Treating Study-specific intercepts and slopes now as random effects This is also known as a random coefficients model, otherwise known as a random regression model by some of your animal breeding colleagues.

The following reproduces much of Figure 7

library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

mixed_model = lmer(Y~X+(1+X|Study),data=Dataregs1)
anova(mixed_model)

## Analysis of Variance Table
##   npar Sum Sq Mean Sq F value
## X    1 94.645  94.645  378.01

summary(mixed_model)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Y ~ X + (1 + X | Study)
##    Data: Dataregs1
## 
## REML criterion at convergence: 270.5
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.88372 -0.60598 -0.09681  0.49121  2.27511 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Study    (Intercept) 5.41392  2.3268       
##           X           0.03305  0.1818   0.46
##  Residual             0.25038  0.5004       
## Number of obs: 108, groups:  Study, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -0.60846    0.56284  -1.081
## X            1.09741    0.05644  19.442
## 
## Correlation of Fixed Effects:
##   (Intr)
## X 0.056

vc = VarCorr(mixed_model)
print(vc,comp=c("Variance","Std.Dev"))  # print the variance component estimates

##  Groups   Name        Variance Std.Dev. Cov  
##  Study    (Intercept) 5.413919 2.32678       
##           X           0.033046 0.18179  0.196
##  Residual             0.250376 0.50038

coef(mixed_model)  # these don't look like the bottom of page 750 or top of page 751 because they need to be centered around zero.

## $Study
##   (Intercept)         X
## A -3.28911461 0.8882510
## B -3.72438914 1.0647173
## C -3.21535418 1.1922856
## D -3.35297563 0.8237472
## E -3.08571480 1.0105673
## F -2.56159285 1.0143345
## G -1.32267530 0.9096754
## H -1.31595335 1.0876151
## I -0.83719211 1.0141745
## J  0.01538269 1.0806032
## K -0.38217246 1.0119980
## L  0.05088920 1.0822443
## M -0.94631057 1.1983730
## N  0.86712305 1.2223537
## O  0.56324065 1.1832327
## P -0.25555936 1.0077810
## Q  1.51550372 1.1558143
## R  2.32133132 1.2430061
## S  1.97797279 1.3476773
## T  4.80838661 1.4098375
## 
## attr(,"class")
## [1] "coef.mer"

The residual plot

 plot(mixed_model,which=1,main = "Residual plot for Mixed Effects Model")

Bottom of Figure 7 from St-Pierre (2001):

Study random effects for intercept and slope expressed as a difference from the mean

Study_specific_int = coef(mixed_model)$Study["(Intercept)"]-mean(coef(mixed_model)$Study["(Intercept)"][,1])
Study_specific_slope = coef(mixed_model)$Study["X"]-mean(coef(mixed_model)$Study["X"][,1])
data.frame(Study_specific_int,Study_specific_slope)

##   X.Intercept.            X
## A   -2.6806559 -0.209163467
## B   -3.1159304 -0.032697176
## C   -2.6068955  0.094871152
## D   -2.7445169 -0.273667196
## E   -2.4772561 -0.086847191
## F   -1.9531341 -0.083079931
## G   -0.7142166 -0.187739032
## H   -0.7074946 -0.009799376
## I   -0.2287334 -0.083239922
## J    0.6238414 -0.016811291
## K    0.2262863 -0.085416483
## L    0.6593479 -0.015170171
## M   -0.3378519  0.100958533
## N    1.4755818  0.124939285
## O    1.1716994  0.085818242
## P    0.3528994 -0.089633453
## Q    2.1239624  0.058399874
## R    2.9297900  0.145591655
## S    2.5864315  0.250262905
## T    5.4168453  0.312423043

Figure 10 from St-Pierre (2001)

Xmat = cbind(1,Dataregs1$X)
parmest = coef(summary(mixed_model))[,'Estimate']
y_adj=Xmat%*%parmest+resid(mixed_model)
data_adj = data.frame(Dataregs1$X,y_adj)
colnames(data_adj) = c("X","Y_adj") 

ggplot(data_adj,aes(x=X,y=Y_adj)) + geom_point() +
  scale_x_continuous(breaks=seq(0,12,2)) + scale_y_continuous(breaks=seq(-4,20,2))+ 
  geom_smooth(method=lm, se=FALSE) + 
  geom_abline(intercept =0, slope = 1,lty=2) +
  ggtitle("Figure 10 from St-Pierre 2001")

## `geom_smooth()` using formula = 'y ~ x'

Meta-analysis using study-specific sample least-squares estimates of slopes

We’ll extend St-Pierre (2001) and conduct a separate regression analysis for each study:

first need to write a function for this.

linear_model = function(df) {
  lm(Y~X,data=df)
}

library(broom)  # needed for tidy function below.
Dataregs1_analysis = Dataregs1 %>%
    group_by(Study) %>%
    nest() %>%
  mutate(model = map(data,linear_model)) %>%
  mutate(tidied=map(model,tidy)) %>% 
  mutate(nrec = as.numeric(map(data,nrow))) %>% 
  unnest(tidied) %>%
  mutate(term = ifelse(term=="(Intercept)","Intercept",term))%>%
  dplyr::select(-c(data,model))  

data.frame(Dataregs1_analysis)

##    Study      term    estimate  std.error   statistic      p.value nrec
## 1      A Intercept -3.07734838 0.76656078 -4.01448711 1.593713e-02    6
## 2      A         X  0.81671421 0.22252478  3.67021698 2.138706e-02    6
## 3      B Intercept -4.15298434 0.63281610 -6.56270335 7.195518e-03    5
## 4      B         X  1.17586084 0.16301208  7.21333571 5.492902e-03    5
## 5      C Intercept -4.07597045 0.61009504 -6.68087785 2.609649e-03    6
## 6      C         X  1.39429265 0.13995723  9.96227671 5.702866e-04    6
## 7      D Intercept -1.80848842 1.09619111 -1.64979300 1.743302e-01    6
## 8      D         X  0.50088152 0.22415810  2.23450111 8.916425e-02    6
## 9      E Intercept -3.97906524        NaN         NaN          NaN    2
## 10     E         X  1.27253453        NaN         NaN          NaN    2
## 11     F Intercept -2.73690034 0.28862499 -9.48254800 6.901111e-04    6
## 12     F         X  1.05730564 0.07496731 14.10355530 1.466964e-04    6
## 13     G Intercept -0.89450057 0.76420586 -1.17049688 3.067779e-01    6
## 14     G         X  0.80963066 0.16775732  4.82620172 8.484667e-03    6
## 15     H Intercept -1.69941157 0.86675884 -1.96065097 1.889651e-01    4
## 16     H         X  1.18011620 0.20917948  5.64164426 3.001152e-02    4
## 17     I Intercept -0.49446586 0.48942691 -1.01029562 3.695035e-01    6
## 18     I         X  0.94336636 0.09797836  9.62831343 6.506493e-04    6
## 19     J Intercept  0.15604943 0.53619794  0.29102953 7.854956e-01    6
## 20     J         X  1.05542688 0.09455099 11.16251527 3.666204e-04    6
## 21     K Intercept  1.28095536 0.82704622  1.54883165 2.191841e-01    5
## 22     K         X  0.61078846 0.19745010  3.09338141 5.357182e-02    5
## 23     L Intercept  0.22296281 0.83524943  0.26694159 7.984519e-01    8
## 24     L         X  1.05137687 0.14856809  7.07673413 3.991357e-04    8
## 25     M Intercept -2.72385882 0.91929825 -2.96297617 5.940186e-02    5
## 26     M         X  1.46280711 0.13593939 10.76073034 1.716349e-03    5
## 27     N Intercept  0.01063077 0.96092228  0.01106309 9.929573e-01    3
## 28     N         X  1.37860631 0.16980599  8.11871399 7.802089e-02    3
## 29     O Intercept  0.50141758 0.77961237  0.64316268 5.484495e-01    7
## 30     O         X  1.19499058 0.13135924  9.09711861 2.686212e-04    7
## 31     P Intercept  0.65594247 0.82834030  0.79187559 4.727463e-01    6
## 32     P         X  0.87025577 0.12388376  7.02477702 2.163288e-03    6
## 33     Q Intercept  3.09451685 1.43899304  2.15047382 1.206540e-01    5
## 34     Q         X  0.95126025 0.18738448  5.07651558 1.476401e-02    5
## 35     R Intercept  2.59100578 0.99595024  2.60154140 3.534577e-02    9
## 36     R         X  1.20949348 0.12831436  9.42601815 3.154529e-05    9
## 37     S Intercept  1.04306823 1.26255143  0.82615900 4.692870e-01    5
## 38     S         X  1.46155673 0.14985181  9.75334718 2.289886e-03    5
## 39     T Intercept  6.69072017        NaN         NaN          NaN    2
## 40     T         X  1.19785542        NaN         NaN          NaN    2

We probably shouldn’t include studies with only 2 records….standard errors are not defined!! Remove studies ‘E’ and ‘T’

Let’s do the random coefficients mixed model analysis again on the original raw data but without those studies.

We’ll use this as the “gold standard” analysis.

Dataregs1a = Dataregs1 %>%
   filter(!Study %in% c('E', 'T')) %>% 
   droplevels()
mixed_model = lmer(Y~X+(1+X|Study),data=Dataregs1a)
summary(mixed_model)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Y ~ X + (1 + X | Study)
##    Data: Dataregs1a
## 
## REML criterion at convergence: 250.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8504 -0.6395 -0.1013  0.4883  2.2890 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Study    (Intercept) 3.72381  1.9297       
##           X           0.03215  0.1793   0.33
##  Residual             0.25122  0.5012       
## Number of obs: 104, groups:  Study, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -0.78266    0.50278  -1.557
## X            1.08502    0.05775  18.787
## 
## Correlation of Fixed Effects:
##   (Intr)
## X -0.063

Meta-analyses software typically requires either determination of the weights (inverse squared standard errors) or sampling variances (squared standard errors)

 Dataregs1_analysis = Dataregs1_analysis %>%
  na.omit() %>%
  mutate(weight = (1/std.error^2)) %>%
  mutate(sampvar = std.error^2)
  head(Dataregs1_analysis)

## # A tibble: 6 × 9
## # Groups:   Study [3]
##   Study term      estimate std.error statistic  p.value  nrec weight sampvar
##   <fct> <chr>        <dbl>     <dbl>     <dbl>    <dbl> <dbl>  <dbl>   <dbl>
## 1 A     Intercept   -3.08      0.767     -4.01 0.0159       6   1.70  0.588 
## 2 A     X            0.817     0.223      3.67 0.0214       6  20.2   0.0495
## 3 B     Intercept   -4.15      0.633     -6.56 0.00720      5   2.50  0.400 
## 4 B     X            1.18      0.163      7.21 0.00549      5  37.6   0.0266
## 5 C     Intercept   -4.08      0.610     -6.68 0.00261      6   2.69  0.372 
## 6 C     X            1.39      0.140      9.96 0.000570     6  51.1   0.0196

  # put the study specific intercepts and study specific slopes in separate files.
  Dataregs_intercepts = Dataregs1_analysis %>% filter(term=="Intercept")
  Dataregs_slopes = Dataregs1_analysis %>% filter(term=="X")
  head(Dataregs_intercepts)

## # A tibble: 6 × 9
## # Groups:   Study [6]
##   Study term      estimate std.error statistic  p.value  nrec weight sampvar
##   <fct> <chr>        <dbl>     <dbl>     <dbl>    <dbl> <dbl>  <dbl>   <dbl>
## 1 A     Intercept   -3.08      0.767     -4.01 0.0159       6  1.70   0.588 
## 2 B     Intercept   -4.15      0.633     -6.56 0.00720      5  2.50   0.400 
## 3 C     Intercept   -4.08      0.610     -6.68 0.00261      6  2.69   0.372 
## 4 D     Intercept   -1.81      1.10      -1.65 0.174        6  0.832  1.20  
## 5 F     Intercept   -2.74      0.289     -9.48 0.000690     6 12.0    0.0833
## 6 G     Intercept   -0.895     0.764     -1.17 0.307        6  1.71   0.584

  head(Dataregs_slopes)

## # A tibble: 6 × 9
## # Groups:   Study [6]
##   Study term  estimate std.error statistic  p.value  nrec weight sampvar
##   <fct> <chr>    <dbl>     <dbl>     <dbl>    <dbl> <dbl>  <dbl>   <dbl>
## 1 A     X        0.817    0.223       3.67 0.0214       6   20.2 0.0495 
## 2 B     X        1.18     0.163       7.21 0.00549      5   37.6 0.0266 
## 3 C     X        1.39     0.140       9.96 0.000570     6   51.1 0.0196 
## 4 D     X        0.501    0.224       2.23 0.0892       6   19.9 0.0502 
## 5 F     X        1.06     0.0750     14.1  0.000147     6  178.  0.00562
## 6 G     X        0.810    0.168       4.83 0.00848      6   35.5 0.0281

  write_csv(Dataregs_slopes,"Dataregs_slopes.csv")

Common Effects Analysis

The following analysis is rather common and is typically the wrong analysis to use if there is indeed study to study heterogeneity. Problem:: it often substantially understates uncertainty on the meta-estimates themselves

Common Effects Analysis (1)

# COMMON EFFECTS MODEL
cat("Overall estimated regression coefficient",sep='\n')

## Overall estimated regression coefficient

(weighted.mean(Dataregs_slopes$estimate,Dataregs_slopes$weight))

## [1] 1.086413

cat("Estimated Standard Error of Overall estimated regression coefficient",sep='\n')

## Estimated Standard Error of Overall estimated regression coefficient

(stderr = (sum(Dataregs_slopes$weight)^(-1/2)))

## [1] 0.03165859

Relative to analysis of original data, point estimate seems fine but estimated standard error is understated

Common Effects Analysis (2)

Using linear model software (glmmTMB package) to do the same CE analysis as above.
The trick is to hold residual variance constant to 1 and specify weights as inverse of sampling variances

for more details on how to fix variance components in linear mixed models (glmmTMB package) in r, refer to https://stackoverflow.com/questions/75569862/linear-mixed-model-with-fix-var-component-in-r

It is really MUCH more easier to do this sort of thing in SAS. See https://pubmed.ncbi.nlm.nih.gov/27111798/ and https://link.springer.com/article/10.1186/1471-2288-14-61

library(glmmTMB)
CE_1 <- glmmTMB(estimate ~ 1 ,  
                weights = weight,
                family = gaussian,
                REML=TRUE,
                data = Dataregs_slopes,
                start = list(betad = log(1)),  # fix residual variance = 1
                map = list(betad = factor(NA))
)
summary(CE_1)

##  Family: gaussian  ( identity )
## Formula:          estimate ~ 1
## Data: Dataregs_slopes
## Weights: weight
## 
##      AIC      BIC   logLik deviance df.resid 
##   1887.5   1888.3   -942.7   1885.5       18 
## 
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.08641    0.03166   34.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Common Effects Analysis (3)

using the R package metafor https://wviechtb.github.io/metafor/

library(metafor)

## Warning: package 'metafor' was built under R version 4.3.3

## Loading required package: metadat

## Warning: package 'metadat' was built under R version 4.3.3

## Loading required package: numDeriv

## 
## Loading the 'metafor' package (version 4.6-0). For an
## introduction to the package please type: help(metafor)

## 
## Attaching package: 'metafor'

## The following object is masked from 'package:car':
## 
##     vif

(res_EE = rma.uni(yi=estimate ,vi=sampvar,data=Dataregs_slopes,method="EE"))

## 
## Equal-Effects Model (k = 18)
## 
## I^2 (total heterogeneity / total variability):   63.57%
## H^2 (total variability / sampling variability):  2.75
## 
## Test for Heterogeneity:
## Q(df = 17) = 46.6657, p-val = 0.0001
## 
## Model Results:
## 
## estimate      se     zval    pval   ci.lb   ci.ub      
##   1.0864  0.0317  34.3165  <.0001  1.0244  1.1485  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Forest plot

great way to visualize undercertainty of individual studies along with meta-estimate and meta-CI (bottom row)

forest(res_EE)

Suboptimal analysis

that only considers using weights as inverse standard errors and “estimates” residual variance

library(glmmTMB)
CE_suboptim <- glmmTMB(estimate ~ 1 ,  
                weights = weight,
                family = gaussian,
                REML=TRUE,
                data = Dataregs_slopes
)
summary(CE_suboptim)

##  Family: gaussian  ( identity )
## Formula:          estimate ~ 1
## Data: Dataregs_slopes
## Weights: weight
## 
##      AIC      BIC   logLik deviance df.resid 
##   -212.0   -210.2    108.0   -216.0       17 
## 
## 
## Dispersion estimate for gaussian family (sigma^2): 0.0468 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.08641    0.00685   158.6   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Suboptimal analysis after normalizing weights

This is considered to be an appropriate standard for the Journal of Dairy Science.

that only considers using weights as inverse standard errors and “estimates” residual variance

meanweight = mean(Dataregs_slopes$weight)
Dataregs_slopes1 = Dataregs_slopes %>%
   mutate(scaledweight = weight/meanweight)
write_csv(Dataregs_slopes1,"Dataregs_slopes1.csv")
library(glmmTMB)
CE_suboptim2 <- glmmTMB(estimate ~ 1 ,  
                weights = scaledweight,
                family = gaussian,
                REML=TRUE,
                data = Dataregs_slopes1
)
summary(CE_suboptim2)

##  Family: gaussian  ( identity )
## Formula:          estimate ~ 1
## Data: Dataregs_slopes1
## Weights: scaledweight
## 
##      AIC      BIC   logLik deviance df.resid 
##      4.0      5.8      0.0      0.0       17 
## 
## 
## Dispersion estimate for gaussian family (sigma^2): 0.0495 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.08641    0.05245   20.71   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Mixed model analysis

A mixed model analysis more appropriately accounts for between study heterogeneity in estimated intercept and slope effects!

Mixed model (1)

Heterogeneous treatment effects model using mixed models software. The trick is to hold residual variance constant to 1 (This should be done!!!)

Compare to mixed model analysis on raw data. Notice that the estimated study variance component closely corresponds to

RM_1 <- glmmTMB(estimate ~ 1 + (1|Study),   
                weights = weight,
                family = gaussian,
                REML=TRUE,
                data = Dataregs_slopes,
                start = list(betad = log(1)),  # fix residual variance = 1
                map = list(betad = factor(NA))
)

## Warning: contrasts dropped from factor Study due to missing levels

summary(RM_1)

##  Family: gaussian  ( identity )
## Formula:          estimate ~ 1 + (1 | Study)
## Data: Dataregs_slopes
## Weights: weight
## 
##      AIC      BIC   logLik deviance df.resid 
##   1879.3   1881.1   -937.7   1875.3       17 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  Study    (Intercept) 0.03898  0.1974  
##  Residual             1.00000  1.0000  
## Number of obs: 18, groups:  Study, 18
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.0810     0.0588   18.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The above mixed effects model accounting for Study heterogeneity would not even run if you tried to estimate the residual variance (you would get an error message)

Mixed model (2)

Heterogeneous effects model using metafor. Notice that metafor requires the specification of sampling variances, not weights (just inverses of each other)

(res_REML = rma.uni(yi=estimate,vi=sampvar,data=Dataregs_slopes,method="REML"))  # default

## 
## Random-Effects Model (k = 18; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 0.0390 (SE = 0.0209)
## tau (square root of estimated tau^2 value):      0.1974
## I^2 (total heterogeneity / total variability):   67.72%
## H^2 (total variability / sampling variability):  3.10
## 
## Test for Heterogeneity:
## Q(df = 17) = 46.6657, p-val = 0.0001
## 
## Model Results:
## 
## estimate      se     zval    pval   ci.lb   ci.ub      
##   1.0810  0.0586  18.4410  <.0001  0.9661  1.1959  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest(res_REML)

Compare the width of the confidence interval for the mixed model (immediately above as the bottom interval of [0.97,1.20]) to that previously provided for the common effects model

rstudent.rma.uni(res_REML)

## 
##      resid     se       z 
## 1  -0.2749 0.3040 -0.9042 
## 2   0.1010 0.2708  0.3728 
## 3   0.3322 0.2425  1.3698 
## 4  -0.6040 0.2889 -2.0902 
## 5  -0.0243 0.2341 -0.1039 
## 6  -0.2862 0.2638 -1.0849 
## 7   0.1042 0.2996  0.3477 
## 8  -0.1473 0.2356 -0.6253 
## 9  -0.0263 0.2402 -0.1095 
## 10 -0.4928 0.2717 -1.8136 
## 11 -0.0304 0.2637 -0.1154 
## 12  0.4044 0.2294  1.7631 
## 13  0.3133 0.2654  1.1806 
## 14  0.1222 0.2534  0.4825 
## 15 -0.2249 0.2411 -0.9327 
## 16 -0.1355 0.2840 -0.4771 
## 17  0.1378 0.2512  0.5483 
## 18  0.4016 0.2418  1.6608

### calculate influence diagnostics
inf <- influence(res_REML)
 
### plot the influence diagnostics
plot(inf)

dfbetas(res_REML)

## 
##    intrcpt 
## 1  -0.1808 
## 2   0.1005 
## 3   0.3304 
## 4  -0.4415 
## 5  -0.0107 
## 6  -0.2555 
## 7   0.0834 
## 8  -0.1616 
## 9  -0.0123 
## 10 -0.4113 
## 11 -0.0137 
## 12  0.4139 
## 13  0.2691 
## 14  0.1358 
## 15 -0.2418 
## 16 -0.0966 
## 17  0.1527 
## 18  0.3795

Likelihood ratio test to test for importance of heterogeneity

anova(CE_1,RM_1)

## Data: Dataregs_slopes
## Models:
## CE_1: estimate ~ 1, zi=~0, disp=~1
## RM_1: estimate ~ 1 + (1 | Study), zi=~0, disp=~1
##      Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)   
## CE_1  1 1887.5 1888.3 -942.73   1885.5                            
## RM_1  2 1879.3 1881.1 -937.67   1875.3 10.123      1   0.001464 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# this works too:

anova(res_EE,res_REML)

## 
##         df     AIC     BIC    AICc  logLik     LRT   pval      QE  tau^2 
## Full     2  6.9167  8.5831  7.7738 -1.4583                46.6657 0.0390 
## Reduced  1 15.0398 15.8730 15.3065 -6.5199 10.1231 0.0015 46.6657 0.0000 
##             R^2 
## Full            
## Reduced 0.0000%

Analysis of intercepts

Could also do the same thing for intercepts. Let’s do the mixed model analysis there.

(res_int_REML = rma.uni(yi=estimate,vi=sampvar,data=Dataregs_intercepts,method="REML"))  # default

## 
## Random-Effects Model (k = 18; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 3.6391 (SE = 1.4859)
## tau (square root of estimated tau^2 value):      1.9076
## I^2 (total heterogeneity / total variability):   88.43%
## H^2 (total variability / sampling variability):  8.64
## 
## Test for Heterogeneity:
## Q(df = 17) = 136.8109, p-val < .0001
## 
## Model Results:
## 
## estimate      se     zval    pval    ci.lb   ci.ub    
##  -0.7864  0.4916  -1.5998  0.1096  -1.7498  0.1770    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest(res_int_REML)

Multivariate analysis of intercept and slope.

More statisticians advocate a multivariate mixed model analysis when inferring upon two or more “effect sizes” in a meta-analysis since they are likely to be correlated. In fact, even if you’re just interested in estimating slope (not intercept), it may benefit to conduct a multivariate analysis as we’ll see in other examples.

I think think of two possible coding strategies for this. Again the SAS options provided by Madden et al. (2016) (https://pubmed.ncbi.nlm.nih.gov/27111798/) might be more comprehensive than what are available in R.

Multivariate mixed model (1)

using glmmTMB (mixed model analysis)

RMM_1 <- glmmTMB(estimate ~ term + (term-1|Study),   
                weights = weight,
                family = gaussian,
                REML=TRUE,
                data = Dataregs1_analysis,
                start = list(betad = log(1)),  # fix residual variance = 1
                map = list(betad = factor(NA))
)

## Warning: contrasts dropped from factor Study due to missing levels

summary(RMM_1)

##  Family: gaussian  ( identity )
## Formula:          estimate ~ term + (term - 1 | Study)
## Data: Dataregs1_analysis
## Weights: weight
## 
##      AIC      BIC   logLik deviance df.resid 
##   2012.0   2019.9  -1001.0   2002.0       33 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name          Variance Std.Dev. Corr  
##  Study    termIntercept 3.60318  1.8982         
##           termX         0.03827  0.1956   -0.27 
##  Residual               1.00000  1.0000         
## Number of obs: 36, groups:  Study, 18
## 
## Dispersion estimate for gaussian family (sigma^2):    1 
## 
## Conditional model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.7954     0.4910  -1.620 0.105249    
## termX         1.8774     0.5057   3.713 0.000205 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lsmeans_RMM_1= emmeans(RMM_1,"term")
summary(lsmeans_RMM_1)

##  term      emmean     SE df lower.CL upper.CL
##  Intercept -0.795 0.4910 33   -1.794    0.204
##  X          1.082 0.0584 33    0.963    1.201
## 
## Confidence level used: 0.95

Multivariate mixed model (2)

using metafor (forest plot might need some work for better formatted output.)

(res.mv <- rma.mv(estimate, sampvar, mods = ~ term -1, random = ~ term | Study, struct="UN", data=Dataregs1_analysis))

## 
## Multivariate Meta-Analysis Model (k = 36; method: REML)
## 
## Variance Components:
## 
## outer factor: Study (nlvls = 18)
## inner factor: term  (nlvls = 2)
## 
##             estim    sqrt  k.lvl  fixed      level 
## tau^2.1    3.6032  1.8982     18     no  Intercept 
## tau^2.2    0.0383  0.1956     18     no          X 
## 
##            rho.Intr  rho.X    Intr   X 
## Intercept         1              -  18 
## X           -0.2674      1      no   - 
## 
## Test for Residual Heterogeneity:
## QE(df = 34) = 183.4766, p-val < .0001
## 
## Test of Moderators (coefficients 1:2):
## QM(df = 2) = 349.6061, p-val < .0001
## 
## Model Results:
## 
##                estimate      se     zval    pval    ci.lb   ci.ub      
## termIntercept   -0.7954  0.4894  -1.6252  0.1041  -1.7546  0.1638      
## termX            1.0820  0.0582  18.5847  <.0001   0.9679  1.1961  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest(res.mv)

Confidence and Prediction Intervals for Contrasts under the Mixed Model Analysis

Where confidence intervals are useful for assessing reliability of the meta-estimate for the corresponding parameter, the prediction interval indicates what is the plausible range of responses in future experiments.

This may be more pertinent for an “extension”-based message in terms of relaying to farmers,consultants, etc. what uncertainty they may anticipate with respect to their own operations.

The following is just for slope analyses

# confirming the metafor CI on the effect size using the mixed model analysis.
Studyvar = c(((VarCorr(RM_1))["cond"]$cond$Study))
overallest = coef(summary(RM_1))$cond
cat("Confidence Interval",sep="\n")

## Confidence Interval

(LCL = overallest[1,"Estimate"]- 1.96*overallest[1,"Std. Error"])

## [1] 0.9657192

(UCL = overallest[1,"Estimate"]+ 1.96*overallest[1,"Std. Error"])

## [1] 1.19622

# Prediction intervals
# useful for determining uncertainty for  a future experiment.
cat("Prediction Interval",sep="\n")

## Prediction Interval

(LPL = overallest[1,"Estimate"]- 1.96*sqrt(overallest[1,"Std. Error"]^2 + Studyvar))

## [1] 0.6771959

(UPL = overallest[1,"Estimate"]+ 1.96*sqrt(overallest[1,"Std. Error"]^2 + Studyvar))

## [1] 1.484744

St-Pierre 2001 Example

National Animal Nutrition Program Meta-Analysis Workshop

Robert J. Tempelman

June 25, 2023