library(tidyverse) # for working with ease
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(metafor) # for meta analysis
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading required package: metadat
##
## Loading the 'metafor' package (version 3.8-1). For an
## introduction to the package please type: help(metafor)
library(MAd) # for meta analysis
The intention is to use the MESI db to perform a meta analysis of variables under N fertilisation:
Load data files.
I first exported each excel tab to a separate CSV file and then combined them into a single big table. Respective code is commented out below and the combined file is read directly.
# files <- list.files(path = "~/data/nfert_liang/data_csv/", full.names = TRUE)
# df <- purrr::map_dfr(as.list(files), ~read_csv(.))
# use_names <- c(
# "variable", "reference", "lat", "lon", "mat",
# "map", "ph_soil", "species", "family", "genus",
# "photopathway", "legume", "managed", "growthform", "functionaltype",
# "broadleaf_conifer", "evergreen_deciduous","ninorg_form", "ndep_kg_ha_yr", "duration_yr",
# "load", "value_t", "value_c", "n_t", "n_c",
# "sd_t", "sd_c", "ln_rr", "v", "w",
# "cluster", "w_dash"
# )
# names(df) <- use_names
# write_csv(df, file = "~/data/nfert_liang/nfert_liang_combined.csv")
# modify nfert_liang_combined.csv by hand to have nice column names.
df <- read_csv("~/data/nfert_liang/nfert_liang_combined.csv")
## New names:
## Rows: 2683 Columns: 48
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (15): variable, reference, lat, lon, species, family, genus, photopathwa... dbl
## (17): mat, map, ph_soil, ndep_kg_ha_yr, duration_yr, load, value_t, valu... lgl
## (16): ...33, ...34, ...35, ...36, ...37, ...38, ...39, ...40, ...41, ......
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
## • `` -> `...36`
## • `` -> `...37`
## • `` -> `...38`
## • `` -> `...39`
## • `` -> `...40`
## • `` -> `...41`
## • `` -> `...42`
## • `` -> `...43`
## • `` -> `...44`
## • `` -> `...45`
## • `` -> `...46`
## • `` -> `...47`
## • `` -> `...48`
Make data (more) conform with MESI.
df <- df %>%
mutate(db = "liang",
treatment = "f") %>%
rename(citation = reference,
response = variable,
x_c = value_c,
x_t = value_t,
rep_c = n_c,
rep_t = n_t,
growth_form = growthform
) %>%
mutate(growth_form = ifelse("Woody", # Liang name
"woody", # MESI name
ifelse("Herb", # Liang name
"herbaceous" # MESI name
)))
# ## growth forms in Liang and in MESI
# df$growthform %>% unique()
# df$growth_form %>% unique()
Missing info in Liang et al.: - elevation (elv
)
Issues
Let’s use just data for above specified variables.
use_vars <- c("Area", "Vcmax", "gs", "inst-WUE", "Narea", "Nmass", "SLA", "LMA", "LAI", "Leaf area per plant", "Leaf biomass")
df2 <- df %>%
filter(response %in% use_vars)
Test analysis and plot of ANPP data. Calculate the response ratio of
ANPP (mean and variance) for each experiment. To get that, we first need
to calcuate the means and standard deviation for the ambient and
elevated levels, pooling multiple measurements (years, sampling dates),
each given with mean \(\mu_i\), number
\(N_i\) (replicates/plots), and
standard deviation \(\sigma_i\) or
standard error. For the function metafor::escalc()
, we need
standard deviations (\(SD\)). Calculate
them for those rows where only standard errors \(SE\) are given as: \[
SD = SE \sqrt{N}
\]
Calculate "ROM"
- the log transformed ratio of means
(Hedges et al., 1999; Lajeunesse, 2011) for each observation pair
(ambient and elevated).
df3 <- df2 %>%
mutate(id = 1:n()) %>%
filter(sd_c > 0, sd_t > 0) %>%
## keep only essential variables and drop rows containing missing values for essential variables
select(id, exp = citation, response, x_t, x_c, sd_t, sd_c, rep_t, rep_c) %>%
drop_na() %>%
## Get logarithm of response ratio and its variance
metafor::escalc(
measure = "ROM",
m1i = x_t, sd1i = sd_t, n1i = rep_t,
m2i = x_c, sd2i = sd_c, n2i = rep_c,
data = .,
append = TRUE,
var.names = c("logr", "logr_var")
) %>%
## to keep the output readable from the console output
as_tibble() %>%
## get standard error
mutate( logr_se = sqrt(logr_var) / sqrt(rep_t) )
Aggregate all measurements (species, multiple years, sampling dates and plots) by experiment (and response variable - although here only one) for meta analysis.
df4 <- df3 %>%
filter(!is.na(logr_var) & !is.na(logr)) %>%
# re-create ID (common ID per experiment and response variable)
select(-id) %>%
mutate( id = paste(exp, response, sep = "_XXX_")) %>%
MAd::agg(
id = id,
es = logr,
var = logr_var,
cor = 1.0,
method = "BHHR",
data = .
) %>%
## to keep the output readable from the console output
as_tibble() %>%
# separate ID again for ease of data use
mutate( id = str_split(id, "_XXX_") ) %>%
mutate( exp = purrr::map_chr(id, 1),
response = purrr::map_chr(id, 2) ) %>%
## rename again
select(exp, response, logr = es, logr_var = var) %>%
## add number of observations (sum of plots and repeated samplings)
left_join(
df3 %>%
group_by(exp, response) %>%
summarise(n_c = sum(rep_c), n_t = sum(rep_t)),
by = c("exp", "response")
) %>%
## get standard error. Verify if number available observations are identical
## for ambient and elevated. Use N from control here (n_c).
mutate( logr_se = sqrt(logr_var) / sqrt(n_c) )
## `summarise()` has grouped output by 'exp'. You can override using the `.groups`
## argument.
Aggregate log-ratios across multiple experiments, taking into account their respective variance and using the experiment identity as a grouping factor for random intercepts.
source("~/lt_cn_review/R/analyse_meta.R")
out <- purrr::map(as.list(use_vars), ~analyse_meta(df4 %>% rename(var = response), nam_target = .))
## Warning in analyse_meta(df4 %>% rename(var = response), nam_target = .): Too
## little data for meta analysis for Area
names(out) <- use_vars
df_box <- purrr::map_dfr(out, "df_box")
Plot dots and my box.
df4 %>%
## give it a nice order (for plotting)
mutate(response = factor(response, levels = rev(c("Area", "Vcmax", "gs", "inst-WUE", "Narea", "Nmass", "SLA", "LMA", "LAI", "Leaf area per plant", "Leaf biomass")))) %>%
ggplot( aes(x = response, y = logr)) +
geom_jitter( color = rgb(0,0,0,0.3), aes( size = 1/logr_se ), position = position_jitter(w = 0.2, h = 0) ) +
geom_crossbar( data = df_box, aes(x = var, y = middle, ymin = ymin, ymax = ymax), fill = "tomato", color = "black", alpha = 0.6, width = 0.5 ) +
geom_hline( yintercept = 0.0, size = 0.5) +
labs(x = "Variable", y = "Log Response Ratio", size = expression(paste("Error"^{-1}))) +
coord_flip() +
ylim(-1, 1) +
labs(title = "Liang et al. 2020 data", subtitle = "Response to N-fertilisation")
## Warning: Removed 3 rows containing missing values (geom_point).
Findings:
Below: experiment-level meta analysis plots. Note that variable names written to (console) output are pasted in abvoe the respective “forest plot”.
# Vcmax
metafor::forest(out$Vcmax$modl)
# Narea
metafor::forest(out$Narea$modl)
# Nmass
metafor::forest(out$Nmass$modl)
# LMA
metafor::forest(out$LMA$modl)
# SLA
metafor::forest(out$SLA$modl)