rm(list=ls()) # always a good idea to clean-up the directory
library(tidyverse,ggplot)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
We’ll revisit the simulated data provided by Dr. St_Pierre in the following paper
“Invited Review: Integrating Quantitative Findings from Multiple Studies Using Mixed Model Methodology” by N.R. St-Pierre J. Dairy Sci. 84:741-755 https://www.sciencedirect.com/science/article/pii/S0022030201745304
The following is the original data as provided in the Appendix from St-Pierre (2001)… The first few observations are also printed with the head() function. Let’s double-check the data against the Appendix
urlfile="https://raw.githubusercontent.com/Tempelman/Meta_analysis/main/Dataregs2.csv"
Dataregs2<-read_csv(url(urlfile)) # Data provided in Appendix of St-Pierre (2001)
## Rows: 108 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (5): Obs, Study, X, Y, AdjustedY
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(Dataregs2)
## spc_tbl_ [108 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Obs : num [1:108] 1 2 3 4 5 6 7 8 9 10 ...
## $ Study : num [1:108] 1 1 1 1 1 1 2 2 2 2 ...
## $ X : num [1:108] 1.16 1.94 2.5 3.99 4.36 ...
## $ Y : num [1:108] -2.92 -1.03 -1 1.33 1.14 ...
## $ AdjustedY: num [1:108] 0.886 0.643 3 4.358 3.814 ...
## - attr(*, "spec")=
## .. cols(
## .. Obs = col_double(),
## .. Study = col_double(),
## .. X = col_double(),
## .. Y = col_double(),
## .. AdjustedY = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
head(Dataregs2)
## # A tibble: 6 × 5
## Obs Study X Y AdjustedY
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 1.16 -2.92 0.886
## 2 2 1 1.94 -1.03 0.643
## 3 3 1 2.50 -1.00 3.00
## 4 4 1 3.99 1.33 4.36
## 5 5 1 4.36 1.14 3.81
## 6 6 1 4.99 1.59 4.36
How about a scatterplot of the data?…well it should really look like what’s given in Figure 2a of St-Pierre (2001). ..but it doesn’t:
ggplot(Dataregs2,aes(x=X,y=Y)) + geom_point() +
scale_x_continuous(breaks=seq(0,12,2)) + scale_y_continuous(breaks=seq(-4,20,2))
Corrected data was kindly provided by Dr. White as forwarded from Dr. St-Pierre.It is also available at https://raw.githubusercontent.com/Tempelman/Meta_analysis/main/Dataregscorrected.csv
Let’s display the first few records.
urlfile2="https://raw.githubusercontent.com/Tempelman/Meta_analysis/main/Dataregscorrected.csv"
Dataregs1<-read_csv(url(urlfile2)) # Revised data
## Rows: 108 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Study
## dbl (2): X, Y
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Dataregs1)
## # A tibble: 6 × 3
## Study X Y
## <chr> <dbl> <dbl>
## 1 A 1.16 -2.04
## 2 A 1.94 -2.44
## 3 A 2.50 -0.204
## 4 A 3.99 0.843
## 5 A 4.36 0.221
## 6 A 4.99 0.632
such that the revised corrected scatterplot looks as follows (and is precisely what is provided in Figure 2A in St Pierre 2001):
ggplot(Dataregs1,aes(x=X,y=Y)) + geom_point() +
scale_x_continuous(breaks=seq(0,12,2)) + scale_y_continuous(breaks=seq(-4,20,2)) +
ggtitle("Figure 2A in St-Pierre (2001)")
Ok…Dr. St-Pierre did not actually generate this data from a simple linear regression model: \(y_i = \beta_0 + \beta_1{x_i} + e_i\) but from the following random coefficients model:
\[y_{ij} = \beta_0 + s_i + (\beta_1 + b_{i}) {x_{ij}} + e_{ij}; i=1,2,...,n_{study}; j=1,2,...,n_{i}\]
Note that i is used to index the study ($n_{study} = 20 in St-Pierre’s example) whereas ij is used to index observation j within study i.
Here \(e_{ij} \sim N(0,0.25)\) Furthermore,
\[\begin{pmatrix} s_i \\ b_{i} \end{pmatrix}\sim N\left(\begin{pmatrix} 0 \\ 0 \end{pmatrix},\begin{pmatrix} 4 & 0.5\sqrt{4}\sqrt{0.04} \\ 0.5\sqrt{4}\sqrt{0.04} & 0.04 \end{pmatrix}\right)\]
In other words, the study-specific intercepts had a variance of 4 about a mean of 0 whereas the study-specific slopes had a variance of 0.04 about a mean of 1 such that there is an across-study correlation of 0.5 between intercepts and slopes.
Let’s fit an overall regression model that ignores the study-specific heterogeneity and fit a simple linear regression model: \(y_i = \beta_0 + \beta_1{x_i} + e_i\). In other words, we’ll generate the same results as Figure 3 in St-Pierre (2001) who used SAS.
overall_regression = lm(Y~X,data=Dataregs1)
summary(overall_regression)
##
## Call:
## lm(formula = Y ~ X, data = Dataregs1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.884 -1.348 0.114 1.452 6.428
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.18762 0.56916 -9.115 5.51e-15 ***
## X 1.95949 0.09904 19.784 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.104 on 106 degrees of freedom
## Multiple R-squared: 0.7869, Adjusted R-squared: 0.7849
## F-statistic: 391.4 on 1 and 106 DF, p-value: < 2.2e-16
anova(overall_regression)
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## X 1 1733.39 1733.39 391.41 < 2.2e-16 ***
## Residuals 106 469.43 4.43
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Wonderful! We get the exact same estimates as provided by St-Pierre (2001)!!! But as noted by St-Pierre, they sure seem a lot different from the “truth”! (intercept 0 slope 1).
THe following plot superimposes the line of best fit (blue) and intercept 0 slope 1 line (dashed) just as in Figure 4 of St-Pierre (2001)!
ggplot(Dataregs1, aes(x=X, y=Y)) +
geom_point()+
geom_smooth(method=lm, se=FALSE) +
geom_abline(intercept =0, slope = 1,lty=2) +
ggtitle ("Figure 4 in St-Pierre 2001")
## `geom_smooth()` using formula = 'y ~ x'
Let’s also reproduce the residual plot (Figure 5 in St-Pierre, 2001)
plot(overall_regression,which=1)
title("Figure 5 in St Pierre (2001)",line=-2,adj=0.2)
So let’s account for the heterogeneity using the fixed effects model approach first presented by St-Pierre (2001) on page 745. This is not really a great idea especially if the number of records per study is low and the distribution of Study effects is reasonably symmetric. Also scope of inference is then limited to these specific studies only and not to future studies (implications for prediction intervals). Mixed/random effects model analysis would be much better (broad scope inference). See later
The following reproduces much of the first page of Figure 6 of St-Pierre (2001). Based on corner (SAS) parameterization (zero out last level)
Dataregs1$Study = factor(Dataregs1$Study) # make sure Study is considered to be a factor (Study was numbered)
contrasts(Dataregs1$Study) <-contr.SAS(nlevels(Dataregs1$Study)) # use the SAS parameterization for comparison with StPierre
fixed_model = lm(Y~X+Study+X:Study,data=Dataregs1)
summary(fixed_model) # reproduce the estimates provided on first page of Figure 6
##
## Call:
## lm(formula = Y ~ X + Study + X:Study, data = Dataregs1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9536 -0.2508 -0.0026 0.2249 1.1184
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.690720 2.440294 2.742 0.007802 **
## X 1.197855 0.287972 4.160 9.15e-05 ***
## Study1 -9.768069 2.493187 -3.918 0.000210 ***
## Study2 -10.843705 2.501469 -4.335 4.93e-05 ***
## Study3 -10.766691 2.514929 -4.281 5.97e-05 ***
## Study4 -8.499209 2.714912 -3.131 0.002571 **
## Study5 -10.669785 2.785288 -3.831 0.000281 ***
## Study6 -9.427621 2.595558 -3.632 0.000540 ***
## Study7 -7.585221 2.503452 -3.030 0.003457 **
## Study8 -8.390132 2.691819 -3.117 0.002678 **
## Study9 -7.185186 2.551109 -2.816 0.006349 **
## Study10 -6.534671 2.507720 -2.606 0.011251 *
## Study11 -5.409765 2.753483 -1.965 0.053537 .
## Study12 -6.467757 2.520197 -2.566 0.012484 *
## Study13 -9.414579 2.832575 -3.324 0.001433 **
## Study14 -6.680089 2.956811 -2.259 0.027079 *
## Study15 -6.189303 2.546881 -2.430 0.017737 *
## Study16 -6.034778 2.666441 -2.263 0.026818 *
## Study17 -3.596203 3.006972 -1.196 0.235868
## Study18 -4.099714 2.621653 -1.564 0.122509
## Study19 -5.647652 2.834811 -1.992 0.050360 .
## X:Study1 -0.381141 0.323910 -1.177 0.243422
## X:Study2 -0.021995 0.320918 -0.069 0.945560
## X:Study3 0.196437 0.319985 0.614 0.541334
## X:Study4 -0.696974 0.376997 -1.849 0.068841 .
## X:Study5 0.074679 0.497844 0.150 0.881205
## X:Study6 -0.140550 0.368345 -0.382 0.703969
## X:Study7 -0.388225 0.313008 -1.240 0.219127
## X:Study8 -0.017739 0.397632 -0.045 0.964547
## X:Study9 -0.254489 0.324184 -0.785 0.435171
## X:Study10 -0.142429 0.305454 -0.466 0.642502
## X:Study11 -0.587067 0.419097 -1.401 0.165824
## X:Study12 -0.146479 0.308979 -0.474 0.636967
## X:Study13 0.264952 0.357991 0.740 0.461783
## X:Study14 0.180751 0.412285 0.438 0.662477
## X:Study15 -0.002865 0.313080 -0.009 0.992726
## X:Study16 -0.327600 0.329787 -0.993 0.324052
## X:Study17 -0.246595 0.367793 -0.670 0.504827
## X:Study18 0.011638 0.313315 0.037 0.970478
## X:Study19 0.263701 0.335030 0.787 0.433960
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5001 on 68 degrees of freedom
## Multiple R-squared: 0.9923, Adjusted R-squared: 0.9879
## F-statistic: 224.1 on 39 and 68 DF, p-value: < 2.2e-16
The following produces the least squares means for Study also provided at the top of page 747 of St-Pierre (2001)
library(emmeans)
lsmeans_study= emmeans(fixed_model,"Study")
## NOTE: Results may be misleading due to involvement in interactions
xgrid<-ref_grid(fixed_model,at=list(X=0)) # define the Study lsmeans at X = 0.
summary(xgrid)
## X Study prediction SE df
## 0 A -3.0773 0.511 68
## 0 B -4.1530 0.550 68
## 0 C -4.0760 0.608 68
## 0 D -1.8085 1.190 68
## 0 E -3.9791 1.343 68
## 0 F -2.7369 0.884 68
## 0 G -0.8945 0.559 68
## 0 H -1.6994 1.136 68
## 0 I -0.4945 0.744 68
## 0 J 0.1560 0.578 68
## 0 K 1.2810 1.275 68
## 0 L 0.2230 0.630 68
## 0 M -2.7239 1.438 68
## 0 N 0.0106 1.670 68
## 0 O 0.5014 0.729 68
## 0 P 0.6559 1.075 68
## 0 Q 3.0945 1.757 68
## 0 R 2.5910 0.958 68
## 0 S 1.0431 1.443 68
## 0 T 6.6907 2.440 68
The following is used to generate the study specific slopes provided at the bottom of page 747 of St-Pierre (2001).
#Study specific slopes provided at bottom of page 747
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
# following reproduces the contrasts for the Study specific slopes in Equation [6] from St-Pierre
Slope_contrast = matrix(rep(0,21*40),nrow=21)
Slope_contrast[,2]=1
Slope_contrast[1,22:40]=1/20
Slope_contrast[2:20,22:40]=diag(19)
colnames(Slope_contrast) = names(coef(fixed_model))
rownames(Slope_contrast) = c("Overall Slope",paste0('Study',1:20))
print(Slope_contrast) # these are provided in Equation [6] from St-Pierre
## (Intercept) X Study1 Study2 Study3 Study4 Study5 Study6 Study7
## Overall Slope 0 1 0 0 0 0 0 0 0
## Study1 0 1 0 0 0 0 0 0 0
## Study2 0 1 0 0 0 0 0 0 0
## Study3 0 1 0 0 0 0 0 0 0
## Study4 0 1 0 0 0 0 0 0 0
## Study5 0 1 0 0 0 0 0 0 0
## Study6 0 1 0 0 0 0 0 0 0
## Study7 0 1 0 0 0 0 0 0 0
## Study8 0 1 0 0 0 0 0 0 0
## Study9 0 1 0 0 0 0 0 0 0
## Study10 0 1 0 0 0 0 0 0 0
## Study11 0 1 0 0 0 0 0 0 0
## Study12 0 1 0 0 0 0 0 0 0
## Study13 0 1 0 0 0 0 0 0 0
## Study14 0 1 0 0 0 0 0 0 0
## Study15 0 1 0 0 0 0 0 0 0
## Study16 0 1 0 0 0 0 0 0 0
## Study17 0 1 0 0 0 0 0 0 0
## Study18 0 1 0 0 0 0 0 0 0
## Study19 0 1 0 0 0 0 0 0 0
## Study20 0 1 0 0 0 0 0 0 0
## Study8 Study9 Study10 Study11 Study12 Study13 Study14 Study15
## Overall Slope 0 0 0 0 0 0 0 0
## Study1 0 0 0 0 0 0 0 0
## Study2 0 0 0 0 0 0 0 0
## Study3 0 0 0 0 0 0 0 0
## Study4 0 0 0 0 0 0 0 0
## Study5 0 0 0 0 0 0 0 0
## Study6 0 0 0 0 0 0 0 0
## Study7 0 0 0 0 0 0 0 0
## Study8 0 0 0 0 0 0 0 0
## Study9 0 0 0 0 0 0 0 0
## Study10 0 0 0 0 0 0 0 0
## Study11 0 0 0 0 0 0 0 0
## Study12 0 0 0 0 0 0 0 0
## Study13 0 0 0 0 0 0 0 0
## Study14 0 0 0 0 0 0 0 0
## Study15 0 0 0 0 0 0 0 0
## Study16 0 0 0 0 0 0 0 0
## Study17 0 0 0 0 0 0 0 0
## Study18 0 0 0 0 0 0 0 0
## Study19 0 0 0 0 0 0 0 0
## Study20 0 0 0 0 0 0 0 0
## Study16 Study17 Study18 Study19 X:Study1 X:Study2 X:Study3
## Overall Slope 0 0 0 0 0.05 0.05 0.05
## Study1 0 0 0 0 1.00 0.00 0.00
## Study2 0 0 0 0 0.00 1.00 0.00
## Study3 0 0 0 0 0.00 0.00 1.00
## Study4 0 0 0 0 0.00 0.00 0.00
## Study5 0 0 0 0 0.00 0.00 0.00
## Study6 0 0 0 0 0.00 0.00 0.00
## Study7 0 0 0 0 0.00 0.00 0.00
## Study8 0 0 0 0 0.00 0.00 0.00
## Study9 0 0 0 0 0.00 0.00 0.00
## Study10 0 0 0 0 0.00 0.00 0.00
## Study11 0 0 0 0 0.00 0.00 0.00
## Study12 0 0 0 0 0.00 0.00 0.00
## Study13 0 0 0 0 0.00 0.00 0.00
## Study14 0 0 0 0 0.00 0.00 0.00
## Study15 0 0 0 0 0.00 0.00 0.00
## Study16 0 0 0 0 0.00 0.00 0.00
## Study17 0 0 0 0 0.00 0.00 0.00
## Study18 0 0 0 0 0.00 0.00 0.00
## Study19 0 0 0 0 0.00 0.00 0.00
## Study20 0 0 0 0 0.00 0.00 0.00
## X:Study4 X:Study5 X:Study6 X:Study7 X:Study8 X:Study9 X:Study10
## Overall Slope 0.05 0.05 0.05 0.05 0.05 0.05 0.05
## Study1 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study2 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study3 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study4 1.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study5 0.00 1.00 0.00 0.00 0.00 0.00 0.00
## Study6 0.00 0.00 1.00 0.00 0.00 0.00 0.00
## Study7 0.00 0.00 0.00 1.00 0.00 0.00 0.00
## Study8 0.00 0.00 0.00 0.00 1.00 0.00 0.00
## Study9 0.00 0.00 0.00 0.00 0.00 1.00 0.00
## Study10 0.00 0.00 0.00 0.00 0.00 0.00 1.00
## Study11 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study12 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study13 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study14 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study15 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study16 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study17 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study18 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study19 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Study20 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## X:Study11 X:Study12 X:Study13 X:Study14 X:Study15 X:Study16
## Overall Slope 0.05 0.05 0.05 0.05 0.05 0.05
## Study1 0.00 0.00 0.00 0.00 0.00 0.00
## Study2 0.00 0.00 0.00 0.00 0.00 0.00
## Study3 0.00 0.00 0.00 0.00 0.00 0.00
## Study4 0.00 0.00 0.00 0.00 0.00 0.00
## Study5 0.00 0.00 0.00 0.00 0.00 0.00
## Study6 0.00 0.00 0.00 0.00 0.00 0.00
## Study7 0.00 0.00 0.00 0.00 0.00 0.00
## Study8 0.00 0.00 0.00 0.00 0.00 0.00
## Study9 0.00 0.00 0.00 0.00 0.00 0.00
## Study10 0.00 0.00 0.00 0.00 0.00 0.00
## Study11 1.00 0.00 0.00 0.00 0.00 0.00
## Study12 0.00 1.00 0.00 0.00 0.00 0.00
## Study13 0.00 0.00 1.00 0.00 0.00 0.00
## Study14 0.00 0.00 0.00 1.00 0.00 0.00
## Study15 0.00 0.00 0.00 0.00 1.00 0.00
## Study16 0.00 0.00 0.00 0.00 0.00 1.00
## Study17 0.00 0.00 0.00 0.00 0.00 0.00
## Study18 0.00 0.00 0.00 0.00 0.00 0.00
## Study19 0.00 0.00 0.00 0.00 0.00 0.00
## Study20 0.00 0.00 0.00 0.00 0.00 0.00
## X:Study17 X:Study18 X:Study19
## Overall Slope 0.05 0.05 0.05
## Study1 0.00 0.00 0.00
## Study2 0.00 0.00 0.00
## Study3 0.00 0.00 0.00
## Study4 0.00 0.00 0.00
## Study5 0.00 0.00 0.00
## Study6 0.00 0.00 0.00
## Study7 0.00 0.00 0.00
## Study8 0.00 0.00 0.00
## Study9 0.00 0.00 0.00
## Study10 0.00 0.00 0.00
## Study11 0.00 0.00 0.00
## Study12 0.00 0.00 0.00
## Study13 0.00 0.00 0.00
## Study14 0.00 0.00 0.00
## Study15 0.00 0.00 0.00
## Study16 0.00 0.00 0.00
## Study17 1.00 0.00 0.00
## Study18 0.00 1.00 0.00
## Study19 0.00 0.00 1.00
## Study20 0.00 0.00 0.00
Slopes <- glht(fixed_model, linfct = Slope_contrast)
summary(Slopes)
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
##
## Simultaneous Tests for General Linear Hypotheses
##
## Fit: lm(formula = Y ~ X + Study + X:Study, data = Dataregs1)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## Overall Slope == 0 1.07976 0.04794 22.521 < 0.001 ***
## Study1 == 0 0.81671 0.14829 5.508 < 0.001 ***
## Study2 == 0 1.17586 0.14163 8.302 < 0.001 ***
## Study3 == 0 1.39429 0.13951 9.994 < 0.001 ***
## Study4 == 0 0.50088 0.24331 2.059 0.57016
## Study5 == 0 1.27253 0.40610 3.134 0.05004 .
## Study6 == 0 1.05731 0.22967 4.604 < 0.001 ***
## Study7 == 0 0.80963 0.12266 6.600 < 0.001 ***
## Study8 == 0 1.18012 0.27419 4.304 0.00110 **
## Study9 == 0 0.94337 0.14889 6.336 < 0.001 ***
## Study10 == 0 1.05543 0.10185 10.362 < 0.001 ***
## Study11 == 0 0.61079 0.30449 2.006 0.61302
## Study12 == 0 1.05138 0.11198 9.389 < 0.001 ***
## Study13 == 0 1.46281 0.21267 6.878 < 0.001 ***
## Study14 == 0 1.37861 0.29504 4.673 < 0.001 ***
## Study15 == 0 1.19499 0.12285 9.728 < 0.001 ***
## Study16 == 0 0.87026 0.16072 5.415 < 0.001 ***
## Study17 == 0 0.95126 0.22879 4.158 0.00193 **
## Study18 == 0 1.20949 0.12344 9.798 < 0.001 ***
## Study19 == 0 1.46156 0.17122 8.536 < 0.001 ***
## Study20 == 0 1.19786 0.28797 4.160 0.00188 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
The following prints the overall intercept reported near the bottom of Figure 6 (page 1747)
Overall_int_contrast = matrix(rep(0,1*40),nrow=1)
Overall_int_contrast[1,1] = 1
Overall_int_contrast[1,3:21]=0.05
colnames(Overall_int_contrast) = names(coef(fixed_model))
print(Overall_int_contrast)
## (Intercept) X Study1 Study2 Study3 Study4 Study5 Study6 Study7 Study8
## [1,] 1 0 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05
## Study9 Study10 Study11 Study12 Study13 Study14 Study15 Study16 Study17
## [1,] 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05
## Study18 Study19 X:Study1 X:Study2 X:Study3 X:Study4 X:Study5 X:Study6
## [1,] 0.05 0.05 0 0 0 0 0 0
## X:Study7 X:Study8 X:Study9 X:Study10 X:Study11 X:Study12 X:Study13
## [1,] 0 0 0 0 0 0 0
## X:Study14 X:Study15 X:Study16 X:Study17 X:Study18 X:Study19
## [1,] 0 0 0 0 0 0
Overall_int <- glht(fixed_model, linfct = Overall_int_contrast)
summary(Overall_int)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Fit: lm(formula = Y ~ X + Study + X:Study, data = Dataregs1)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 1 == 0 -0.4698 0.2647 -1.775 0.0804 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
Finally….the ANOVA table as also provided at the top of page 746 of St-Pierre (2001)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
contrasts(Dataregs1$Study) <-contr.sum(nlevels(Dataregs1$Study)) # Type 3 ANOVA involving interactions with covariates only works well with contr.sum
fixed_model = lm(Y~X+Study+X:Study,data=Dataregs1) # rerun same model again
summary(fixed_model) # notice overall estimates of intercept and x match up with what is reported here.
##
## Call:
## lm(formula = Y ~ X + Study + X:Study, data = Dataregs1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9536 -0.2508 -0.0026 0.2249 1.1184
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.46979 0.26472 -1.775 0.08043 .
## X 1.07976 0.04794 22.521 < 2e-16 ***
## Study1 -2.60756 0.55220 -4.722 1.21e-05 ***
## Study2 -3.68320 0.58494 -6.297 2.56e-08 ***
## Study3 -3.60618 0.63476 -5.681 3.04e-07 ***
## Study4 -1.33870 1.15940 -1.155 0.25228
## Study5 -3.50928 1.30099 -2.697 0.00880 **
## Study6 -2.26711 0.87964 -2.577 0.01213 *
## Study7 -0.42471 0.59253 -0.717 0.47596
## Study8 -1.22963 1.10988 -1.108 0.27181
## Study9 -0.02468 0.75358 -0.033 0.97397
## Study10 0.62584 0.60855 1.028 0.30741
## Study11 1.75074 1.23857 1.414 0.16206
## Study12 0.69275 0.65330 1.060 0.29272
## Study13 -2.25407 1.38985 -1.622 0.10947
## Study14 0.48042 1.60593 0.299 0.76574
## Study15 0.97120 0.74060 1.311 0.19414
## Study16 1.12573 1.05331 1.069 0.28896
## Study17 3.56430 1.68767 2.112 0.03837 *
## Study18 3.06079 0.94673 3.233 0.00189 **
## Study19 1.51285 1.39394 1.085 0.28162
## X:Study1 -0.26304 0.14862 -1.770 0.08124 .
## X:Study2 0.09610 0.14266 0.674 0.50282
## X:Study3 0.31454 0.14077 2.234 0.02874 *
## X:Study4 -0.57887 0.23575 -2.455 0.01663 *
## X:Study5 0.19278 0.38824 0.497 0.62111
## X:Study6 -0.02245 0.22310 -0.101 0.92014
## X:Study7 -0.27013 0.12586 -2.146 0.03542 *
## X:Study8 0.10036 0.26451 0.379 0.70555
## X:Study9 -0.13639 0.14916 -0.914 0.36375
## X:Study10 -0.02433 0.10787 -0.226 0.82223
## X:Study11 -0.46897 0.29282 -1.602 0.11388
## X:Study12 -0.02838 0.11655 -0.243 0.80836
## X:Study13 0.38305 0.20738 1.847 0.06908 .
## X:Study14 0.29885 0.28398 1.052 0.29636
## X:Study15 0.11523 0.12602 0.914 0.36372
## X:Study16 -0.20950 0.15983 -1.311 0.19435
## X:Study17 -0.12850 0.22228 -0.578 0.56512
## X:Study18 0.12974 0.12654 1.025 0.30888
## X:Study19 0.38180 0.16936 2.254 0.02740 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5001 on 68 degrees of freedom
## Multiple R-squared: 0.9923, Adjusted R-squared: 0.9879
## F-statistic: 224.1 on 39 and 68 DF, p-value: < 2.2e-16
anova(fixed_model) # Anova Type = 1 (never look at those!)
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## X 1 1733.39 1733.39 6931.2132 <2e-16 ***
## Study 19 443.34 23.33 93.3032 <2e-16 ***
## X:Study 19 9.08 0.48 1.9111 0.0273 *
## Residuals 68 17.01 0.25
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(fixed_model,type=3)
## Anova Table (Type III tests)
##
## Response: Y
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.788 1 3.1495 0.08043 .
## X 126.846 1 507.2124 < 2.2e-16 ***
## Study 32.950 19 6.9345 1.06e-09 ***
## X:Study 9.081 19 1.9111 0.02730 *
## Residuals 17.006 68
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_study= emmeans(fixed_model,"Study") # doesn't change the least squares means for study though
## NOTE: Results may be misleading due to involvement in interactions
Treating Study-specific intercepts and slopes now as random effects This is also known as a random coefficients model, otherwise known as a random regression model by some of your animal breeding colleagues.
The following reproduces much of Figure 7
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
mixed_model = lmer(Y~X+(1+X|Study),data=Dataregs1)
anova(mixed_model)
## Analysis of Variance Table
## npar Sum Sq Mean Sq F value
## X 1 94.645 94.645 378.01
summary(mixed_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Y ~ X + (1 + X | Study)
## Data: Dataregs1
##
## REML criterion at convergence: 270.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.88372 -0.60598 -0.09681 0.49121 2.27511
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Study (Intercept) 5.41392 2.3268
## X 0.03305 0.1818 0.46
## Residual 0.25038 0.5004
## Number of obs: 108, groups: Study, 20
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.60846 0.56284 -1.081
## X 1.09741 0.05644 19.442
##
## Correlation of Fixed Effects:
## (Intr)
## X 0.056
vc = VarCorr(mixed_model)
print(vc,comp=c("Variance","Std.Dev")) # print the variance component estimates
## Groups Name Variance Std.Dev. Cov
## Study (Intercept) 5.413919 2.32678
## X 0.033046 0.18179 0.196
## Residual 0.250376 0.50038
coef(mixed_model) # these don't look like the bottom of page 750 or top of page 751 because they need to be centered around zero.
## $Study
## (Intercept) X
## A -3.28911461 0.8882510
## B -3.72438914 1.0647173
## C -3.21535418 1.1922856
## D -3.35297563 0.8237472
## E -3.08571480 1.0105673
## F -2.56159285 1.0143345
## G -1.32267530 0.9096754
## H -1.31595335 1.0876151
## I -0.83719211 1.0141745
## J 0.01538269 1.0806032
## K -0.38217246 1.0119980
## L 0.05088920 1.0822443
## M -0.94631057 1.1983730
## N 0.86712305 1.2223537
## O 0.56324065 1.1832327
## P -0.25555936 1.0077810
## Q 1.51550372 1.1558143
## R 2.32133132 1.2430061
## S 1.97797279 1.3476773
## T 4.80838661 1.4098375
##
## attr(,"class")
## [1] "coef.mer"
The residual plot
plot(mixed_model,which=1,main = "Residual plot for Mixed Effects Model")
Bottom of Figure 7 from St-Pierre (2001):
Study random effects for intercept and slope expressed as a difference from the mean
Study_specific_int = coef(mixed_model)$Study["(Intercept)"]-mean(coef(mixed_model)$Study["(Intercept)"][,1])
Study_specific_slope = coef(mixed_model)$Study["X"]-mean(coef(mixed_model)$Study["X"][,1])
data.frame(Study_specific_int,Study_specific_slope)
## X.Intercept. X
## A -2.6806559 -0.209163467
## B -3.1159304 -0.032697176
## C -2.6068955 0.094871152
## D -2.7445169 -0.273667196
## E -2.4772561 -0.086847191
## F -1.9531341 -0.083079931
## G -0.7142166 -0.187739032
## H -0.7074946 -0.009799376
## I -0.2287334 -0.083239922
## J 0.6238414 -0.016811291
## K 0.2262863 -0.085416483
## L 0.6593479 -0.015170171
## M -0.3378519 0.100958533
## N 1.4755818 0.124939285
## O 1.1716994 0.085818242
## P 0.3528994 -0.089633453
## Q 2.1239624 0.058399874
## R 2.9297900 0.145591655
## S 2.5864315 0.250262905
## T 5.4168453 0.312423043
Figure 10 from St-Pierre (2001)
Xmat = cbind(1,Dataregs1$X)
parmest = coef(summary(mixed_model))[,'Estimate']
y_adj=Xmat%*%parmest+resid(mixed_model)
data_adj = data.frame(Dataregs1$X,y_adj)
colnames(data_adj) = c("X","Y_adj")
ggplot(data_adj,aes(x=X,y=Y_adj)) + geom_point() +
scale_x_continuous(breaks=seq(0,12,2)) + scale_y_continuous(breaks=seq(-4,20,2))+
geom_smooth(method=lm, se=FALSE) +
geom_abline(intercept =0, slope = 1,lty=2) +
ggtitle("Figure 10 from St-Pierre 2001")
## `geom_smooth()` using formula = 'y ~ x'
We’ll extend St-Pierre (2001) and conduct a separate regression analysis for each study:
first need to write a function for this.
linear_model = function(df) {
lm(Y~X,data=df)
}
library(broom) # needed for tidy function below.
Dataregs1_analysis = Dataregs1 %>%
group_by(Study) %>%
nest() %>%
mutate(model = map(data,linear_model)) %>%
mutate(tidied=map(model,tidy)) %>%
mutate(nrec = as.numeric(map(data,nrow))) %>%
unnest(tidied) %>%
mutate(term = ifelse(term=="(Intercept)","Intercept",term))%>%
dplyr::select(-c(data,model))
data.frame(Dataregs1_analysis)
## Study term estimate std.error statistic p.value nrec
## 1 A Intercept -3.07734838 0.76656078 -4.01448711 1.593713e-02 6
## 2 A X 0.81671421 0.22252478 3.67021698 2.138706e-02 6
## 3 B Intercept -4.15298434 0.63281610 -6.56270335 7.195518e-03 5
## 4 B X 1.17586084 0.16301208 7.21333571 5.492902e-03 5
## 5 C Intercept -4.07597045 0.61009504 -6.68087785 2.609649e-03 6
## 6 C X 1.39429265 0.13995723 9.96227671 5.702866e-04 6
## 7 D Intercept -1.80848842 1.09619111 -1.64979300 1.743302e-01 6
## 8 D X 0.50088152 0.22415810 2.23450111 8.916425e-02 6
## 9 E Intercept -3.97906524 NaN NaN NaN 2
## 10 E X 1.27253453 NaN NaN NaN 2
## 11 F Intercept -2.73690034 0.28862499 -9.48254800 6.901111e-04 6
## 12 F X 1.05730564 0.07496731 14.10355530 1.466964e-04 6
## 13 G Intercept -0.89450057 0.76420586 -1.17049688 3.067779e-01 6
## 14 G X 0.80963066 0.16775732 4.82620172 8.484667e-03 6
## 15 H Intercept -1.69941157 0.86675884 -1.96065097 1.889651e-01 4
## 16 H X 1.18011620 0.20917948 5.64164426 3.001152e-02 4
## 17 I Intercept -0.49446586 0.48942691 -1.01029562 3.695035e-01 6
## 18 I X 0.94336636 0.09797836 9.62831343 6.506493e-04 6
## 19 J Intercept 0.15604943 0.53619794 0.29102953 7.854956e-01 6
## 20 J X 1.05542688 0.09455099 11.16251527 3.666204e-04 6
## 21 K Intercept 1.28095536 0.82704622 1.54883165 2.191841e-01 5
## 22 K X 0.61078846 0.19745010 3.09338141 5.357182e-02 5
## 23 L Intercept 0.22296281 0.83524943 0.26694159 7.984519e-01 8
## 24 L X 1.05137687 0.14856809 7.07673413 3.991357e-04 8
## 25 M Intercept -2.72385882 0.91929825 -2.96297617 5.940186e-02 5
## 26 M X 1.46280711 0.13593939 10.76073034 1.716349e-03 5
## 27 N Intercept 0.01063077 0.96092228 0.01106309 9.929573e-01 3
## 28 N X 1.37860631 0.16980599 8.11871399 7.802089e-02 3
## 29 O Intercept 0.50141758 0.77961237 0.64316268 5.484495e-01 7
## 30 O X 1.19499058 0.13135924 9.09711861 2.686212e-04 7
## 31 P Intercept 0.65594247 0.82834030 0.79187559 4.727463e-01 6
## 32 P X 0.87025577 0.12388376 7.02477702 2.163288e-03 6
## 33 Q Intercept 3.09451685 1.43899304 2.15047382 1.206540e-01 5
## 34 Q X 0.95126025 0.18738448 5.07651558 1.476401e-02 5
## 35 R Intercept 2.59100578 0.99595024 2.60154140 3.534577e-02 9
## 36 R X 1.20949348 0.12831436 9.42601815 3.154529e-05 9
## 37 S Intercept 1.04306823 1.26255143 0.82615900 4.692870e-01 5
## 38 S X 1.46155673 0.14985181 9.75334718 2.289886e-03 5
## 39 T Intercept 6.69072017 NaN NaN NaN 2
## 40 T X 1.19785542 NaN NaN NaN 2
We probably shouldn’t include studies with only 2 records….standard errors are not defined!! Remove studies ‘E’ and ‘T’
Let’s do the random coefficients mixed model analysis again on the original raw data but without those studies.
We’ll use this as the “gold standard” analysis.
Dataregs1a = Dataregs1 %>%
filter(!Study %in% c('E', 'T')) %>%
droplevels()
mixed_model = lmer(Y~X+(1+X|Study),data=Dataregs1a)
summary(mixed_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Y ~ X + (1 + X | Study)
## Data: Dataregs1a
##
## REML criterion at convergence: 250.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8504 -0.6395 -0.1013 0.4883 2.2890
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Study (Intercept) 3.72381 1.9297
## X 0.03215 0.1793 0.33
## Residual 0.25122 0.5012
## Number of obs: 104, groups: Study, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.78266 0.50278 -1.557
## X 1.08502 0.05775 18.787
##
## Correlation of Fixed Effects:
## (Intr)
## X -0.063
Meta-analyses software typically requires either determination of the weights (inverse squared standard errors) or sampling variances (squared standard errors)
Dataregs1_analysis = Dataregs1_analysis %>%
na.omit() %>%
mutate(weight = (1/std.error^2)) %>%
mutate(sampvar = std.error^2)
head(Dataregs1_analysis)
## # A tibble: 6 × 9
## # Groups: Study [3]
## Study term estimate std.error statistic p.value nrec weight sampvar
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A Intercept -3.08 0.767 -4.01 0.0159 6 1.70 0.588
## 2 A X 0.817 0.223 3.67 0.0214 6 20.2 0.0495
## 3 B Intercept -4.15 0.633 -6.56 0.00720 5 2.50 0.400
## 4 B X 1.18 0.163 7.21 0.00549 5 37.6 0.0266
## 5 C Intercept -4.08 0.610 -6.68 0.00261 6 2.69 0.372
## 6 C X 1.39 0.140 9.96 0.000570 6 51.1 0.0196
# put the study specific intercepts and study specific slopes in separate files.
Dataregs_intercepts = Dataregs1_analysis %>% filter(term=="Intercept")
Dataregs_slopes = Dataregs1_analysis %>% filter(term=="X")
head(Dataregs_intercepts)
## # A tibble: 6 × 9
## # Groups: Study [6]
## Study term estimate std.error statistic p.value nrec weight sampvar
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A Intercept -3.08 0.767 -4.01 0.0159 6 1.70 0.588
## 2 B Intercept -4.15 0.633 -6.56 0.00720 5 2.50 0.400
## 3 C Intercept -4.08 0.610 -6.68 0.00261 6 2.69 0.372
## 4 D Intercept -1.81 1.10 -1.65 0.174 6 0.832 1.20
## 5 F Intercept -2.74 0.289 -9.48 0.000690 6 12.0 0.0833
## 6 G Intercept -0.895 0.764 -1.17 0.307 6 1.71 0.584
head(Dataregs_slopes)
## # A tibble: 6 × 9
## # Groups: Study [6]
## Study term estimate std.error statistic p.value nrec weight sampvar
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A X 0.817 0.223 3.67 0.0214 6 20.2 0.0495
## 2 B X 1.18 0.163 7.21 0.00549 5 37.6 0.0266
## 3 C X 1.39 0.140 9.96 0.000570 6 51.1 0.0196
## 4 D X 0.501 0.224 2.23 0.0892 6 19.9 0.0502
## 5 F X 1.06 0.0750 14.1 0.000147 6 178. 0.00562
## 6 G X 0.810 0.168 4.83 0.00848 6 35.5 0.0281
write_csv(Dataregs_slopes,"Dataregs_slopes.csv")
The following analysis is rather common and is typically the wrong analysis to use if there is indeed study to study heterogeneity. Problem:: it often substantially understates uncertainty on the meta-estimates themselves
# COMMON EFFECTS MODEL
cat("Overall estimated regression coefficient",sep='\n')
## Overall estimated regression coefficient
(weighted.mean(Dataregs_slopes$estimate,Dataregs_slopes$weight))
## [1] 1.086413
cat("Estimated Standard Error of Overall estimated regression coefficient",sep='\n')
## Estimated Standard Error of Overall estimated regression coefficient
(stderr = (sum(Dataregs_slopes$weight)^(-1/2)))
## [1] 0.03165859
Relative to analysis of original data, point estimate seems fine but estimated standard error is understated
Using linear model software (glmmTMB package) to do the same CE
analysis as above.
The trick is to hold residual variance constant to 1 and specify weights
as inverse of sampling variances
for more details on how to fix variance components in linear mixed models (glmmTMB package) in r, refer to https://stackoverflow.com/questions/75569862/linear-mixed-model-with-fix-var-component-in-r
It is really MUCH more easier to do this sort of thing in SAS. See https://pubmed.ncbi.nlm.nih.gov/27111798/ and https://link.springer.com/article/10.1186/1471-2288-14-61
library(glmmTMB)
CE_1 <- glmmTMB(estimate ~ 1 ,
weights = weight,
family = gaussian,
REML=TRUE,
data = Dataregs_slopes,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
summary(CE_1)
## Family: gaussian ( identity )
## Formula: estimate ~ 1
## Data: Dataregs_slopes
## Weights: weight
##
## AIC BIC logLik deviance df.resid
## 1887.5 1888.3 -942.7 1885.5 18
##
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.08641 0.03166 34.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
using the R package metafor https://wviechtb.github.io/metafor/
library(metafor)
## Warning: package 'metafor' was built under R version 4.3.3
## Loading required package: metadat
## Warning: package 'metadat' was built under R version 4.3.3
## Loading required package: numDeriv
##
## Loading the 'metafor' package (version 4.6-0). For an
## introduction to the package please type: help(metafor)
##
## Attaching package: 'metafor'
## The following object is masked from 'package:car':
##
## vif
(res_EE = rma.uni(yi=estimate ,vi=sampvar,data=Dataregs_slopes,method="EE"))
##
## Equal-Effects Model (k = 18)
##
## I^2 (total heterogeneity / total variability): 63.57%
## H^2 (total variability / sampling variability): 2.75
##
## Test for Heterogeneity:
## Q(df = 17) = 46.6657, p-val = 0.0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 1.0864 0.0317 34.3165 <.0001 1.0244 1.1485 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
great way to visualize undercertainty of individual studies along with meta-estimate and meta-CI (bottom row)
forest(res_EE)
that only considers using weights as inverse standard errors and “estimates” residual variance
library(glmmTMB)
CE_suboptim <- glmmTMB(estimate ~ 1 ,
weights = weight,
family = gaussian,
REML=TRUE,
data = Dataregs_slopes
)
summary(CE_suboptim)
## Family: gaussian ( identity )
## Formula: estimate ~ 1
## Data: Dataregs_slopes
## Weights: weight
##
## AIC BIC logLik deviance df.resid
## -212.0 -210.2 108.0 -216.0 17
##
##
## Dispersion estimate for gaussian family (sigma^2): 0.0468
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.08641 0.00685 158.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
that only considers using weights as inverse standard errors and “estimates” residual variance
meanweight = mean(Dataregs_slopes$weight)
Dataregs_slopes1 = Dataregs_slopes %>%
mutate(scaledweight = weight/meanweight)
write_csv(Dataregs_slopes1,"Dataregs_slopes1.csv")
library(glmmTMB)
CE_suboptim2 <- glmmTMB(estimate ~ 1 ,
weights = scaledweight,
family = gaussian,
REML=TRUE,
data = Dataregs_slopes1
)
summary(CE_suboptim2)
## Family: gaussian ( identity )
## Formula: estimate ~ 1
## Data: Dataregs_slopes1
## Weights: scaledweight
##
## AIC BIC logLik deviance df.resid
## 4.0 5.8 0.0 0.0 17
##
##
## Dispersion estimate for gaussian family (sigma^2): 0.0495
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.08641 0.05245 20.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A mixed model analysis more appropriately accounts for between study heterogeneity in estimated intercept and slope effects!
Heterogeneous treatment effects model using mixed models software. The trick is to hold residual variance constant to 1 (This should be done!!!)
Compare to mixed model analysis on raw data. Notice that the estimated study variance component closely corresponds to
RM_1 <- glmmTMB(estimate ~ 1 + (1|Study),
weights = weight,
family = gaussian,
REML=TRUE,
data = Dataregs_slopes,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
## Warning: contrasts dropped from factor Study due to missing levels
summary(RM_1)
## Family: gaussian ( identity )
## Formula: estimate ~ 1 + (1 | Study)
## Data: Dataregs_slopes
## Weights: weight
##
## AIC BIC logLik deviance df.resid
## 1879.3 1881.1 -937.7 1875.3 17
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Study (Intercept) 0.03898 0.1974
## Residual 1.00000 1.0000
## Number of obs: 18, groups: Study, 18
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0810 0.0588 18.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The above mixed effects model accounting for Study heterogeneity would not even run if you tried to estimate the residual variance (you would get an error message)
Heterogeneous effects model using metafor. Notice that metafor requires the specification of sampling variances, not weights (just inverses of each other)
(res_REML = rma.uni(yi=estimate,vi=sampvar,data=Dataregs_slopes,method="REML")) # default
##
## Random-Effects Model (k = 18; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 0.0390 (SE = 0.0209)
## tau (square root of estimated tau^2 value): 0.1974
## I^2 (total heterogeneity / total variability): 67.72%
## H^2 (total variability / sampling variability): 3.10
##
## Test for Heterogeneity:
## Q(df = 17) = 46.6657, p-val = 0.0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 1.0810 0.0586 18.4410 <.0001 0.9661 1.1959 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest(res_REML)
Compare the width of the confidence interval for the mixed model
(immediately above as the bottom interval of [0.97,1.20]) to that
previously provided for the common effects model
rstudent.rma.uni(res_REML)
##
## resid se z
## 1 -0.2749 0.3040 -0.9042
## 2 0.1010 0.2708 0.3728
## 3 0.3322 0.2425 1.3698
## 4 -0.6040 0.2889 -2.0902
## 5 -0.0243 0.2341 -0.1039
## 6 -0.2862 0.2638 -1.0849
## 7 0.1042 0.2996 0.3477
## 8 -0.1473 0.2356 -0.6253
## 9 -0.0263 0.2402 -0.1095
## 10 -0.4928 0.2717 -1.8136
## 11 -0.0304 0.2637 -0.1154
## 12 0.4044 0.2294 1.7631
## 13 0.3133 0.2654 1.1806
## 14 0.1222 0.2534 0.4825
## 15 -0.2249 0.2411 -0.9327
## 16 -0.1355 0.2840 -0.4771
## 17 0.1378 0.2512 0.5483
## 18 0.4016 0.2418 1.6608
### calculate influence diagnostics
inf <- influence(res_REML)
### plot the influence diagnostics
plot(inf)
dfbetas(res_REML)
##
## intrcpt
## 1 -0.1808
## 2 0.1005
## 3 0.3304
## 4 -0.4415
## 5 -0.0107
## 6 -0.2555
## 7 0.0834
## 8 -0.1616
## 9 -0.0123
## 10 -0.4113
## 11 -0.0137
## 12 0.4139
## 13 0.2691
## 14 0.1358
## 15 -0.2418
## 16 -0.0966
## 17 0.1527
## 18 0.3795
anova(CE_1,RM_1)
## Data: Dataregs_slopes
## Models:
## CE_1: estimate ~ 1, zi=~0, disp=~1
## RM_1: estimate ~ 1 + (1 | Study), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## CE_1 1 1887.5 1888.3 -942.73 1885.5
## RM_1 2 1879.3 1881.1 -937.67 1875.3 10.123 1 0.001464 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# this works too:
anova(res_EE,res_REML)
##
## df AIC BIC AICc logLik LRT pval QE tau^2
## Full 2 6.9167 8.5831 7.7738 -1.4583 46.6657 0.0390
## Reduced 1 15.0398 15.8730 15.3065 -6.5199 10.1231 0.0015 46.6657 0.0000
## R^2
## Full
## Reduced 0.0000%
Could also do the same thing for intercepts. Let’s do the mixed model analysis there.
(res_int_REML = rma.uni(yi=estimate,vi=sampvar,data=Dataregs_intercepts,method="REML")) # default
##
## Random-Effects Model (k = 18; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 3.6391 (SE = 1.4859)
## tau (square root of estimated tau^2 value): 1.9076
## I^2 (total heterogeneity / total variability): 88.43%
## H^2 (total variability / sampling variability): 8.64
##
## Test for Heterogeneity:
## Q(df = 17) = 136.8109, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## -0.7864 0.4916 -1.5998 0.1096 -1.7498 0.1770
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest(res_int_REML)
More statisticians advocate a multivariate mixed model analysis when inferring upon two or more “effect sizes” in a meta-analysis since they are likely to be correlated. In fact, even if you’re just interested in estimating slope (not intercept), it may benefit to conduct a multivariate analysis as we’ll see in other examples.
I think think of two possible coding strategies for this. Again the SAS options provided by Madden et al. (2016) (https://pubmed.ncbi.nlm.nih.gov/27111798/) might be more comprehensive than what are available in R.
using glmmTMB (mixed model analysis)
RMM_1 <- glmmTMB(estimate ~ term + (term-1|Study),
weights = weight,
family = gaussian,
REML=TRUE,
data = Dataregs1_analysis,
start = list(betad = log(1)), # fix residual variance = 1
map = list(betad = factor(NA))
)
## Warning: contrasts dropped from factor Study due to missing levels
summary(RMM_1)
## Family: gaussian ( identity )
## Formula: estimate ~ term + (term - 1 | Study)
## Data: Dataregs1_analysis
## Weights: weight
##
## AIC BIC logLik deviance df.resid
## 2012.0 2019.9 -1001.0 2002.0 33
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev. Corr
## Study termIntercept 3.60318 1.8982
## termX 0.03827 0.1956 -0.27
## Residual 1.00000 1.0000
## Number of obs: 36, groups: Study, 18
##
## Dispersion estimate for gaussian family (sigma^2): 1
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7954 0.4910 -1.620 0.105249
## termX 1.8774 0.5057 3.713 0.000205 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lsmeans_RMM_1= emmeans(RMM_1,"term")
summary(lsmeans_RMM_1)
## term emmean SE df lower.CL upper.CL
## Intercept -0.795 0.4910 33 -1.794 0.204
## X 1.082 0.0584 33 0.963 1.201
##
## Confidence level used: 0.95
using metafor (forest plot might need some work for better formatted output.)
(res.mv <- rma.mv(estimate, sampvar, mods = ~ term -1, random = ~ term | Study, struct="UN", data=Dataregs1_analysis))
##
## Multivariate Meta-Analysis Model (k = 36; method: REML)
##
## Variance Components:
##
## outer factor: Study (nlvls = 18)
## inner factor: term (nlvls = 2)
##
## estim sqrt k.lvl fixed level
## tau^2.1 3.6032 1.8982 18 no Intercept
## tau^2.2 0.0383 0.1956 18 no X
##
## rho.Intr rho.X Intr X
## Intercept 1 - 18
## X -0.2674 1 no -
##
## Test for Residual Heterogeneity:
## QE(df = 34) = 183.4766, p-val < .0001
##
## Test of Moderators (coefficients 1:2):
## QM(df = 2) = 349.6061, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## termIntercept -0.7954 0.4894 -1.6252 0.1041 -1.7546 0.1638
## termX 1.0820 0.0582 18.5847 <.0001 0.9679 1.1961 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest(res.mv)
Where confidence intervals are useful for assessing reliability of the meta-estimate for the corresponding parameter, the prediction interval indicates what is the plausible range of responses in future experiments.
This may be more pertinent for an “extension”-based message in terms of relaying to farmers,consultants, etc. what uncertainty they may anticipate with respect to their own operations.
The following is just for slope analyses
# confirming the metafor CI on the effect size using the mixed model analysis.
Studyvar = c(((VarCorr(RM_1))["cond"]$cond$Study))
overallest = coef(summary(RM_1))$cond
cat("Confidence Interval",sep="\n")
## Confidence Interval
(LCL = overallest[1,"Estimate"]- 1.96*overallest[1,"Std. Error"])
## [1] 0.9657192
(UCL = overallest[1,"Estimate"]+ 1.96*overallest[1,"Std. Error"])
## [1] 1.19622
# Prediction intervals
# useful for determining uncertainty for a future experiment.
cat("Prediction Interval",sep="\n")
## Prediction Interval
(LPL = overallest[1,"Estimate"]- 1.96*sqrt(overallest[1,"Std. Error"]^2 + Studyvar))
## [1] 0.6771959
(UPL = overallest[1,"Estimate"]+ 1.96*sqrt(overallest[1,"Std. Error"]^2 + Studyvar))
## [1] 1.484744