Meta analysis demo

Read data

We’re reading data from Kevin’s database, selecting only data where only CO2 was manipulated, from experiments that lasted more than 2 years, and include OTC and FACE experiments. This is a demo and we’re using just ANPP data (any of c("aboveground_production", "annual_aboveground_biomass_production", "ANPP")).

df <- read_csv("~/data/gcme/data_received_190325/NewData_wide_CORRECTED2.csv") %>%
  
  # something is wrong in columns ambient_Sd, ambient_Se, elevated...
  mutate( ambient_Sd  = as.numeric(ambient_Sd),  ambient_Se  = as.numeric(ambient_Se), 
          elevated_Sd = as.numeric(elevated_Sd), elevated_Se = as.numeric(elevated_Se) )
  
# save experiments names
df_experiments <- df %>% select(exp_nam, prev_name) %>% distinct()

# take only experiments >1 yr
list_exp_gt1yr <- df %>% 
  filter(!is.na(Year)) %>% 
  group_by(exp_nam) %>% 
  summarise(nyears=max(Year)) %>% 
  filter(nyears>1) %>% 
  select(exp_nam) %>% 
  unlist() %>% 
  unname()

selvars <- c("aboveground_production", "annual_aboveground_biomass_production", "ANPP")

df_c <- df %>%
  
  ## filter experiments with only manipulated CO2
  filter(treatment=="c") %>% 
  
  ## More than 1 year data
  filter(exp_nam %in% list_exp_gt1yr) %>% 
  
  ## Combine with experiments meta info
  left_join( 
    read_csv("~/data/gcme/data_received_190325/table_var_exp_names_experiments.csv") %>% 
      select(prev_name, Fumigation_type, Vegetation_type),
    by = c("prev_name")
  ) %>% 
  
  ## Filter only Fumigation_type OTC or FACE
  filter( Fumigation_type %in% c("OTC", "FACE") ) %>%
  
  ## reduce for demo
  dplyr::select(exp_nam, factors, treatment, Data_type, Sampling_Year, ambient, ambient_Sd = ambient_Sd, ambient_Se, elevated, elevated_Sd, elevated_Se, n_plots) %>% 
  
  ## use only ANPP data
  filter(Data_type %in% selvars) %>% 
  mutate(varnam = "anpp") %>% 
  
  ## create a unique indendifying
  mutate(id = paste0("i", 1:n()))

The data now looks like this:

df_c %>% 
  knitr::kable()

exp_nam	factors	treatment	Data_type	Sampling_Year	ambient	ambient_Sd	ambient_Se	elevated	elevated_Sd	elevated_Se	n_plots	varnam	id
EUROFACE4_pa	cf	c	aboveground_production	2002	12.760	NA	0.9100	12.420	NA	1.22000	3	anpp	i1
EUROFACE4_pe	cf	c	aboveground_production	2002	11.420	NA	1.5600	11.210	NA	2.70000	3	anpp	i2
EUROFACE4_pn	cf	c	aboveground_production	2002	11.410	NA	0.7900	13.400	NA	0.68000	3	anpp	i3
EUROFACE4_pa	cf	c	annual_aboveground_biomass_production	2002	15.830	NA	0.9300	15.280	NA	1.40000	3	anpp	i4
EUROFACE4_pa	cf	c	annual_aboveground_biomass_production	2003	24.790	NA	0.9300	24.890	NA	1.21000	3	anpp	i5
EUROFACE4_pe	cf	c	annual_aboveground_biomass_production	2002	14.930	NA	2.2400	14.660	NA	2.52000	3	anpp	i6
EUROFACE4_pe	cf	c	annual_aboveground_biomass_production	2003	24.450	NA	3.4500	23.340	NA	2.52000	3	anpp	i7
EUROFACE4_pn	cf	c	annual_aboveground_biomass_production	2002	21.400	NA	1.4000	21.780	NA	1.12000	3	anpp	i8
EUROFACE4_pn	cf	c	annual_aboveground_biomass_production	2003	33.150	NA	1.3000	32.980	NA	3.64000	3	anpp	i9
TasFACE	cw	c	ANPP	NA	142.900	NA	53.3000	150.500	NA	9.70000	3	anpp	i10
TasFACE	cw	c	ANPP	NA	35.700	NA	12.1000	129.600	NA	40.40000	3	anpp	i11
SwissFACE_lolium2	cf	c	ANPP	1993	818.900	NA	36.0000	820.400	NA	36.00000	3	anpp	i12
SwissFACE_lolium2	cf	c	ANPP	1993	844.000	NA	36.0000	908.300	NA	36.00000	3	anpp	i13
SwissFACE_lolium2	cf	c	ANPP	1994	572.600	NA	80.9000	457.200	NA	80.90000	3	anpp	i14
SwissFACE_lolium2	cf	c	ANPP	1994	885.100	NA	80.9000	850.600	NA	80.90000	3	anpp	i15
SwissFACE_lolium2	cf	c	ANPP	1995	703.500	NA	89.9000	615.000	NA	89.90000	3	anpp	i16
SwissFACE_lolium2	cf	c	ANPP	1995	1096.900	NA	89.9000	1242.100	NA	89.90000	3	anpp	i17
SwissFACE_trifolium2	cf	c	ANPP	1993	1118.600	NA	19.5000	1391.400	NA	19.50000	3	anpp	i18
SwissFACE_trifolium2	cf	c	ANPP	1993	1073.600	NA	34.5000	1182.200	NA	34.50000	3	anpp	i19
SwissFACE_trifolium2	cf	c	ANPP	1994	1123.500	NA	60.5000	1396.200	NA	60.50000	3	anpp	i20
SwissFACE_trifolium2	cf	c	ANPP	1994	1086.800	NA	43.2000	1316.400	NA	43.20000	3	anpp	i21
SwissFACE_trifolium2	cf	c	ANPP	1995	1223.000	NA	51.9000	1236.300	NA	51.90000	3	anpp	i22
SwissFACE_trifolium2	cf	c	ANPP	1995	927.100	NA	60.4000	1122.000	NA	60.40000	3	anpp	i23
BioCON	cfd	c	ANPP	NA	67.100	NA	2.9300	71.770	NA	3.64000	3	anpp	i24
BioCON	cfd	c	ANPP	NA	106.390	NA	5.6000	132.420	NA	7.55000	3	anpp	i25
BioCON	cfd	c	ANPP	NA	139.380	NA	5.6900	167.210	NA	7.22000	3	anpp	i26
BioCON	cfd	c	ANPP	NA	168.010	NA	6.1000	179.450	NA	6.06000	3	anpp	i27
MI	c	c	ANPP	1997	160.000	NA	33.5000	263.300	NA	43.30000	8	anpp	i28
MI	c	c	ANPP	1998	57.000	NA	14.0000	126.800	NA	28.80000	8	anpp	i29
MI	c	c	ANPP	1999	136.100	NA	44.3000	293.400	NA	39.70000	8	anpp	i30
JRBP_FACE3	cfwi	c	ANPP	1999	392.157	NA	29.4120	500.000	NA	49.02000	6	anpp	i31
JRBP_FACE3	cfwi	c	ANPP	2000	551.613	NA	116.1290	406.452	NA	29.03200	6	anpp	i32
JRBP_FACE3	cfwi	c	ANPP	2001	551.455	NA	48.1770	550.929	NA	48.17700	6	anpp	i33
JRBP_FACE3	cfwi	c	ANPP	2002	416.023	NA	19.1960	483.289	NA	57.80000	6	anpp	i34
JRBP_FACE3	cfwi	c	ANPP	2003	500.160	NA	58.9830	471.281	NA	49.28600	6	anpp	i35
POPFACE_pa	c	c	ANPP	2000	881.000	92	NA	1033.000	204	NA	3	anpp	i36
POPFACE_pa	c	c	ANPP	2001	1163.000	8	NA	1664.000	415	NA	3	anpp	i37
POPFACE_pe	c	c	ANPP	2000	849.000	39	NA	1188.000	128	NA	3	anpp	i38
POPFACE_pe	c	c	ANPP	2001	1113.000	115	NA	1327.000	131	NA	3	anpp	i39
POPFACE_pn	c	c	ANPP	2000	1161.000	67	NA	1225.000	93	NA	3	anpp	i40
POPFACE_pn	c	c	ANPP	2001	1590.000	63	NA	1906.000	110	NA	3	anpp	i41
SwissFACE_lolium2	cf	c	ANPP	NA	401.000	NA	46.0000	392.000	NA	46.00000	3	anpp	i42
JRBP_FACE3	cfwi	c	ANPP	2004	166.522	NA	280.3042	202.449	NA	91.18491	6	anpp	i43
GiFACE	cdi	c	ANPP	1998	660.000	105	NA	672.000	62	NA	3	anpp	i44
GiFACE	cdi	c	ANPP	1999	684.000	133	NA	664.000	165	NA	3	anpp	i45
GiFACE	cdi	c	ANPP	2000	810.000	40	NA	889.000	28	NA	3	anpp	i46
GiFACE	cdi	c	ANPP	2001	691.000	48	NA	752.000	30	NA	3	anpp	i47
GiFACE	cdi	c	ANPP	2002	745.000	50	NA	838.000	20	NA	3	anpp	i48
GiFACE	cdi	c	ANPP	2003	574.000	68	NA	547.000	36	NA	3	anpp	i49
GiFACE	cdi	c	ANPP	2004	825.000	135	NA	837.000	91	NA	3	anpp	i50
GiFACE	cdi	c	ANPP	2005	740.000	58	NA	702.000	167	NA	3	anpp	i51
GiFACE	cdi	c	ANPP	2006	665.000	72	NA	742.000	34	NA	3	anpp	i52
GiFACE	cdi	c	ANPP	2007	653.000	113	NA	723.000	60	NA	3	anpp	i53
GiFACE	cdi	c	ANPP	2008	624.000	155	NA	731.000	38	NA	3	anpp	i54
EUROFACE4_pa	cf	c	ANPP	2002	15.696	NA	1.0820	15.315	NA	1.26100	3	anpp	i55
EUROFACE4_pa	cf	c	ANPP	2003	24.514	NA	1.4420	24.853	NA	1.44200	3	anpp	i56
EUROFACE4_pe	cf	c	ANPP	2002	15.465	NA	1.9820	14.724	NA	2.70100	3	anpp	i57
EUROFACE4_pe	cf	c	ANPP	2003	24.465	NA	3.6010	23.181	NA	3.05800	3	anpp	i58
EUROFACE4_pn	cf	c	ANPP	2002	21.337	NA	1.4420	21.675	NA	1.44200	3	anpp	i59
EUROFACE4_pn	cf	c	ANPP	2003	33.213	NA	1.2620	33.190	NA	3.59900	3	anpp	i60
TasFACE	cw	c	ANPP	NA	179.000	NA	27.0000	285.000	NA	40.00000	3	anpp	i61
DUKE	cf	c	ANPP	1996	1502.000	NA	28.4000	1502.000	NA	28.40000	3	anpp	i62
DUKE	cf	c	ANPP	1996	1502.000	NA	28.4000	1502.000	NA	28.40000	3	anpp	i63
DUKE	cf	c	ANPP	1997	1536.700	NA	23.3000	1814.000	NA	26.00000	3	anpp	i64
DUKE	cf	c	ANPP	1997	1536.700	NA	23.3000	1814.000	NA	26.00000	3	anpp	i65
DUKE	cf	c	ANPP	1998	1527.400	NA	49.7000	1877.800	NA	43.80000	3	anpp	i66
DUKE	cf	c	ANPP	1998	1527.400	NA	49.7000	1877.800	NA	43.80000	3	anpp	i67
DUKE	cf	c	ANPP	1999	1629.100	NA	20.5000	1941.600	NA	17.50000	3	anpp	i68
DUKE	cf	c	ANPP	1999	1629.100	NA	20.5000	1941.600	NA	17.50000	3	anpp	i69
DUKE	cf	c	ANPP	2000	1596.600	NA	40.8000	1891.500	NA	40.80000	3	anpp	i70
DUKE	cf	c	ANPP	2000	1596.600	NA	40.8000	1891.500	NA	40.80000	3	anpp	i71
DUKE	cf	c	ANPP	2001	1397.600	NA	35.0000	1645.700	NA	35.10000	3	anpp	i72
DUKE	cf	c	ANPP	2001	1397.600	NA	35.0000	1645.700	NA	35.10000	3	anpp	i73
DUKE	cf	c	ANPP	2002	1148.900	NA	43.8000	1429.200	NA	35.00000	3	anpp	i74
DUKE	cf	c	ANPP	2002	1148.900	NA	43.8000	1429.200	NA	35.00000	3	anpp	i75

Response ratio

Calculate log response ratio within each plot, using metafor::escalc(measure = "ROM", ...).

Questions:

Do you calculate the response ratio before aggregating (done in the next step)?
What’s the standard error of the response ratio?

df_c <- df_c %>%         
  
  mutate( 
    ambient_Sd = ifelse( is.na(ambient_Sd),  ambient_Se  * sqrt(n_plots), ambient_Sd ),
    elevated_Sd  = ifelse( is.na(elevated_Sd), elevated_Se * sqrt(n_plots), elevated_Sd )
    ) %>%

  ## Get logarithm of response ratio and its variance
  metafor::escalc( 
    measure = "ROM", 
    m1i = elevated, sd1i = elevated_Sd, n1i = n_plots, 
    m2i = ambient,  sd2i = ambient_Sd,  n2i = n_plots, 
    data=., 
    append = TRUE, var.names = c("logr", "logr_var") ) %>% 
  as_tibble() %>% 
  
  ## calculate standard error of response ratio
  mutate( logr_se = sqrt(logr_var)/sqrt(n_plots) )

Aggregate

Aggregate data by experiment (and variable name - here only ANPP), pooling all measurements (multiple years, sampling dates and plots). For this we’re using MAd::agg() with a correlation among within-study outcome variables of 0.5 (cor = 0.5) is assumed as per the function’s default.

In the data we always have the same number of plots in ambient and elevated and calculate standard error of effect size from its standard deviation (sqrt(logr_var)) and N.

df_c_agg <- df_c %>% 
  
  filter(!is.na(logr_var) & !is.na(logr)) %>% 
  mutate( id = exp_nam ) %>% 
  
  MAd::agg( id = id, es = logr, var = logr_var, n.1 = n_plots, n.2 = n_plots, cor = 0.5, method = "BHHR", data = . ) %>% 
  
  rename(exp_nam = id,
         logr = es, 
         logr_var = var) %>% 

  ## add number of plots column and varnam
  left_join( df_c %>% 
               ungroup() %>% 
               group_by( exp_nam, varnam ) %>%
               summarise( n_plots = sum(n_plots) ) %>% 
               select( exp_nam, varnam, n_plots ),
             by = "exp_nam" ) %>% 
  
  ## calculate standard error of effect size
  mutate( logr_se = sqrt(logr_var)/sqrt(n_plots) )

## `summarise()` has grouped output by 'exp_nam'. You can override using the `.groups` argument.

Yunke: Here you wanted to ask the question why agg() doesn’t account for the variances, yielding identical resuls as a simple mean(), right? Please modify code to demonstrate that.

This now yields a value and standard error for each experiment.

df_c_agg %>% 
  knitr::kable()

exp_nam	logr	logr_var	varnam	n_plots	logr_se
EUROFACE4_pa	-0.0138365	0.0056138	anpp	15	0.0193456
EUROFACE4_pe	-0.0372565	0.0293724	anpp	15	0.0442511
EUROFACE4_pn	0.0376497	0.0059229	anpp	15	0.0198710
TasFACE	0.6020745	0.0827716	anpp	9	0.0959002
SwissFACE_lolium2	-0.0317718	0.0106935	anpp	21	0.0225658
SwissFACE_trifolium2	0.1541973	0.0018035	anpp	18	0.0100097
BioCON	0.1335173	0.0025240	anpp	12	0.0145028
MI	0.6886090	0.0675434	anpp	24	0.0530501
JRBP_FACE3	0.0370620	0.1304988	anpp	36	0.0602077
POPFACE_pa	0.2586932	0.0139906	anpp	6	0.0482883
POPFACE_pe	0.2559145	0.0042399	anpp	6	0.0265827
POPFACE_pn	0.1174660	0.0017225	anpp	6	0.0169437
GiFACE	0.0515306	0.0052676	anpp	33	0.0126343
DUKE	0.1570184	0.0005076	anpp	42	0.0034765

Meta analysis

Now, we summarise that across multiple experiments to get a meta analysis of effect sizes, treating experiments as a random factor, and no moderators accounted for.

out_meta <- df_c_agg %>% 
  metafor::rma.mv( logr, logr_var, method = "REML", random = ~ 1 | exp_nam, slab = exp_nam, control = list(stepadj=0.3), data = . )

Yunke: Note, we treat experiments as random factor here, not year.

Standard “forest plot”.

metafor::forest(out_meta, xlab="Log Response Ratio", mlab="", xlim=c(-1,1), cex=0.5)

Some other nice plot, showing a box with the confidence interval of the meta-analytic estimate.

out_meta_scaled <- predict( out_meta, transf=exp )

df_box <- tibble(
  varnam = "anpp", 
  middle = out_meta$b[1,1], 
  ymin   = out_meta$ci.lb, 
  ymax   = out_meta$ci.ub,
  
  middle_scaled = out_meta_scaled$pred, 
  ymin_scaled   = out_meta_scaled$ci.lb, 
  ymax_scaled   = out_meta_scaled$ci.ub
)

df_c_agg %>%
  ggplot( aes(x = varnam, y = logr)) +
  geom_jitter( color = rgb(0,0,0,0.3), aes( size = 1/logr_se ), position = position_jitter(w = 0.2, h = 0) ) +
  geom_crossbar( data = df_box, aes(x = varnam, y = middle, ymin = ymin, ymax = ymax), fill = "grey80", alpha = 0.6, width = 0.5 ) +
  geom_hline( yintercept = 0.0, size = 0.5 ) +
  labs(x = "", y = "Log Response Ratio", size = expression(paste("Error"^{-1}))) +
  coord_flip() +
  ylim(-1,1)

Meta analysis demo

Beni Stocker

12/3/2021

Read data

Response ratio

Aggregate

Meta analysis