df <- read_csv("~/data/gcme/data_received_190325/NewData_wide_CORRECTED2.csv") %>%
# something is wrong in columns ambient_Sd, ambient_Se, elevated...
mutate( ambient_Sd = as.numeric(ambient_Sd), ambient_Se = as.numeric(ambient_Se),
elevated_Sd = as.numeric(elevated_Sd), elevated_Se = as.numeric(elevated_Se) )
# save experiments names
df_experiments <- df %>% select(exp_nam, prev_name) %>% distinct()
# take only experiments >1 yr
list_exp_gt1yr <- df %>%
filter(!is.na(Year)) %>%
group_by(exp_nam) %>%
summarise(nyears=max(Year)) %>%
filter(nyears>1) %>%
select(exp_nam) %>%
unlist() %>%
unname()
df_c <- df %>%
# ## Take this info from experiments below
# select(-Fumigation_type, -Vegetation_type) %>%
# ## Add prev_name back
# left_join( df_experiments, by = "exp_nam") %>%
## filter experiments with only manipulated CO2
## (no other factors manipulated, strong reduction of data)
filter(treatment=="c") %>%
## More than 1 year data
filter(exp_nam %in% list_exp_gt1yr) %>%
## Combine with experiments meta info
left_join(
read_csv("~/data/gcme/data_received_190325/table_var_exp_names_experiments.csv") %>%
select(prev_name, Fumigation_type=my_fumigation_type, Vegetation_type),
by = c("prev_name")
) %>%
## Filter only Fumigation_type OTC or FACE
filter( Fumigation_type %in% c("OTC", "FACE") ) %>%
{.}
Complement based on Kevin’s comments:
df_c %>%
filter(exp_nam == "MI") %>%
filter(Data_type %in% c("ANPP", "BNPP"))
## # A tibble: 3 × 53
## ALIAS exp_nam prev_name factors treatment TT_Nut_Detail co2_a co2_e fert_a
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 25213 MI MI_c c c c 410 760 0
## 2 25214 MI MI_c c c c 410 760 0
## 3 25215 MI MI_c c c c 410 760 0
## # … with 44 more variables: fert_e <dbl>, nfert_a <dbl>, nfert_e <dbl>,
## # nfertQ_a <dbl>, nfertQ_e <dbl>, pfert_a <dbl>, pfert_e <dbl>,
## # pfertQ_a <dbl>, pfertQ_e <dbl>, kfert_a <dbl>, kfert_e <dbl>,
## # kfertQ_a <dbl>, kfertQ_e <dbl>, lime_a <dbl>, lime_e <dbl>, foth_a <dbl>,
## # foth_e <dbl>, warm_a <dbl>, warm_e <dbl>, warmQ_e <dbl>, irr_a <dbl>,
## # irr_e <dbl>, d_a <dbl>, d_e <dbl>, ozone_a <dbl>, ozone_e <dbl>,
## # swarm_a <dbl>, swarm_e <dbl>, swarmQ_e <dbl>, Data_type <chr>, …
df_c <- df_c %>%
filter(exp_nam != "POPFACE_pooled")
# df_c %>%
# pull(exp_nam) %>%
# unique() %>%
# sort()
df_c <- df_c %>%
mutate(exp_nam = ifelse(exp_nam == "Durham_DukeFACE", "DUKE", exp_nam))
Look at available variables. They are a bit inconsistently named. Make a sound selection.
all_selvars <- c()
# df_c$Data_type %>% unique()
selvars <- c("NPP")
all_selvars <- c(all_selvars, selvars)
df_c_sub <- df_c %>%
filter(Data_type %in% selvars) %>%
mutate(varnam = "npp")
selvars <- c("aboveground_production", "annual_aboveground_biomass_production", "ANPP")
all_selvars <- c(all_selvars, selvars)
df_c_sub <- df_c_sub %>%
bind_rows(
df_c %>%
filter(Data_type %in% selvars) %>%
mutate(varnam = "anpp")
)
selvars <- c("fine_root_production", "BNPP")
all_selvars <- c(all_selvars, selvars)
df_c_sub <- df_c_sub %>%
bind_rows(
df_c %>%
filter(Data_type %in% selvars) %>%
mutate(varnam = "bnpp")
)
selvars <- c("LAI", "max_LAI", "maximum_LAI", "maximum_leaf_area_index")
all_selvars <- c(all_selvars, selvars)
df_c_sub <- df_c_sub %>%
bind_rows(
df_c %>%
filter(Data_type %in% selvars) %>%
mutate(varnam = "lai")
)
selvars <- c("Vcmax", "Vcmax25", "Vcmax,_maximum_carboxylation_rate_of_Rubisco")
all_selvars <- c(all_selvars, selvars)
df_c_sub <- df_c_sub %>%
bind_rows(
df_c %>%
filter(Data_type %in% selvars) %>%
mutate(varnam = "vcmax")
)
selvars <- c("mineral_soil_N", "soil_mineral_N", "soil_NH4-N", "soil_NO3-N", "soil_NO3-N_", "soil_solution_mineral_N", "soil_solution_NH4+", "soil_solution_NO3-", "soil_NO3-_", "soil_NO3-", "soil_NH4+", "soil_NH4+-N", "resin_N", "resin_NH4+", "resin_NO3-", "rhizosphere_NO3-N", "rhizosphere_NH4-N")
all_selvars <- c(all_selvars, selvars)
df_c_sub <- df_c_sub %>%
bind_rows(
df_c %>%
filter(Data_type %in% selvars) %>%
mutate(varnam = "ninorg")
)
Everything that indicates more investment belowground, including the ratio ANPP:BNPP
selvars <- c("root-shoot_ratio")
all_selvars <- c(all_selvars, selvars)
df_c_sub <- df_c_sub %>%
bind_rows(
df_c %>%
filter(Data_type %in% selvars) %>%
mutate(varnam = "rootshoot")
)
Select experiments where both are available. Yields eight experiments.
# exp_selected <- df_c_agg %>%
# select(exp_nam, varnam, logr) %>%
# tidyr::spread( varnam, logr ) %>%
# filter(!is.na(anpp) & !is.na(bnpp)) %>%
# pull(exp_nam)
# ## problem: multiple data entries for the same sampling date: unclear how many plots are sampled, therefore, aggregate first, and ignore SE
# df_c_sub %>%
# filter(exp_nam %in% exp_selected & Data_type %in% c("ANPP", "BNPP")) %>%
# group_by(exp_nam, Data_type, Year) %>%
# summarise(ambient = mean(ambient)) %>%
# filter(exp_nam == "ORNERP_liqui2") %>%
# View()
# THIS IS VERY CRUDE. SHOULD DO IT BY SAMPLING DATE, AND NOT AGGREGATE FIRST.
df_c_bnpp_anpp <- df_c_sub %>%
filter(varnam %in% c("anpp", "bnpp")) %>%
filter(!is.na(ambient)) %>%
group_by(exp_nam, varnam) %>%
summarise(ambient = mean(ambient, na.rm = TRUE)) %>%
pivot_wider(
id_cols = c(exp_nam, varnam),
names_from = varnam,
values_from = ambient
) %>%
mutate(bnpp_anpp = bnpp / anpp) %>%
select(-anpp, -bnpp) %>%
rename(ambient = bnpp_anpp) %>%
## add elevated
left_join(
df_c_sub %>%
filter(varnam %in% c("anpp", "bnpp")) %>%
filter(!is.na(ambient)) %>%
group_by(exp_nam, varnam) %>%
summarise(elevated = mean(elevated, na.rm = TRUE)) %>%
pivot_wider(
id_cols = c(exp_nam, varnam),
names_from = varnam,
values_from = elevated
) %>%
mutate(bnpp_anpp = bnpp / anpp) %>%
select(-anpp, -bnpp) %>%
rename(elevated = bnpp_anpp),
by = c("exp_nam")
) %>%
mutate(logr = log(elevated / ambient))
## `summarise()` has grouped output by 'exp_nam'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'exp_nam'. You can override using the `.groups` argument.
Available data.
df_c_sub %>%
group_by(varnam) %>%
summarise(n_plots = sum(n_plots), n_exp = n()) %>%
knitr::kable()
varnam | n_plots | n_exp |
---|---|---|
anpp | 300 | 96 |
bnpp | 356 | 134 |
lai | 357 | 115 |
ninorg | 776 | 255 |
npp | 400 | 128 |
rootshoot | 100 | 32 |
vcmax | 596 | 183 |
All meaningful variables selected?
all_vars <- df_c$Data_type %>% unique() %>% sort()
# all_vars[!(all_vars %in% all_selvars)]
# all_vars[str_detect(all_vars, "root")]
Aggregate by variable and experiment, pooling multiple years, sampling dates, and plots/replicates and calculate log response ratio
df_c_sub <- df_c_sub %>%
# get standard deviation for all data
mutate(
my_ambient_sd = ambient_Sd,
my_elevated_sd = elevated_Sd
) %>%
rowwise() %>%
mutate(
my_ambient_sd = ifelse( is.na(my_ambient_sd), ambient_Se * sqrt(n_plots), my_ambient_sd ),
my_elevated_sd = ifelse( is.na(my_elevated_sd), elevated_Se * sqrt(n_plots), my_elevated_sd )
) %>%
## Get logarithm of response ratio and its variance
metafor::escalc(
measure = "ROM",
m1i = elevated, sd1i = my_elevated_sd, n1i = n_plots,
m2i = ambient, sd2i = my_ambient_sd, n2i = n_plots,
data=.,
append = TRUE, var.names = c("logr", "logr_var") ) %>%
as_tibble() %>%
mutate( logr_se = sqrt(logr_var)/sqrt(n_plots) )
## Warning: Some 'yi' and/or 'vi' values equal to +-Inf. Recoded to NAs.
write_csv(df_c_sub, file = "data/df_c_sub.csv")
Pool all measurements (multiple years, sampling dates and plots) by variable and experiment for meta analysis
df_c_agg <- df_c_sub %>%
filter(!is.na(logr_var) & !is.na(logr)) %>%
mutate( id = paste(exp_nam, varnam, sep = "_XXX_")) %>%
MAd::agg( id = id, es = logr, var = logr_var, n.1 = n_plots, n.2 = n_plots, cor = 1.0, method = "BHHR", data = . ) %>%
as_tibble() %>%
mutate( id = str_split(id, "_XXX_") ) %>%
mutate( exp_nam = purrr::map_chr(id, 1),
varnam = purrr::map_chr(id, 2) ) %>%
select(exp_nam, varnam, es, var) %>%
## add number of plots column and varnam
left_join( df_c_sub %>%
group_by( exp_nam, varnam ) %>%
summarise( n_plots = sum(n_plots) ) %>%
select( exp_nam, varnam, n_plots ),
by = c("exp_nam", "varnam") ) %>%
rename( logr = es, logr_var = var ) %>%
mutate( logr_se = sqrt(logr_var)/sqrt(n_plots) ) %>%
# left_join( df_varnams, by = "varnam" ) %>%
## filter NA for exp_nam due to unidentified experiment name in soil decomposition dataset
filter(exp_nam!="NA" & !is.na(exp_nam))
## `summarise()` has grouped output by 'exp_nam'. You can override using the `.groups` argument.
write_csv(df_c_agg, file = "data/df_c_agg.csv")
Aggregate by variable, doing a meta-analysis of the log response ratios, with experiment as random factor (method=“DL”)
agg_meta <- function(df, groupvar){
# print(groupvar)
out_meta <- df %>%
dplyr::filter(varnam == eval(parse_character(groupvar))) %>%
# main meta analysis function call, adjusted step size (see http://www.metafor-project.org/doku.php/tips:convergence_problems_rma)
# metafor::rma( logr, logr_var, method = "REML", slab = exp_nam, control = list(stepadj=0.3), data = . )
metafor::rma.mv( logr, logr_var, method = "REML", random = ~ 1 | exp_nam, slab = exp_nam, control = list(stepadj=0.3), data = . )
# transform back
out_meta_scaled <- predict( out_meta, transf=exp )
df_box <- tibble(
varnam = groupvar,
middle = out_meta$b[1,1],
ymin = out_meta$ci.lb,
ymax = out_meta$ci.ub,
middle_scaled = out_meta_scaled$pred,
ymin_scaled = out_meta_scaled$ci.lb,
ymax_scaled = out_meta_scaled$ci.ub
)
# print("... done.")
return(list(df_box = df_box, out_meta = out_meta))
}
# do meta-analysis on all variables
varlist <- unique(df_c_agg$varnam)
## do meta analysis for each variable
list_meta <- purrr::map(as.list(varlist), ~agg_meta(df_c_agg, .))
## extract box information
df_metabox <- purrr::map_dfr(list_meta, "df_box")
# left_join( df_varnams, by = "varnam" )
names(list_meta) <- varlist
df_c_agg %>%
# arrange(logr) %>%
# mutate( my_lab = factor(my_lab, levels=rev(c("Asat", "Leaf Nmass", "Leaf Narea", "ANPP", "N uptake", "N availability", "root:shoot ratio", "BNPP", "Litter", "Soil decomposition rate")))) %>%
ggplot( aes(x = varnam, y = logr)) +
geom_jitter( color = rgb(0,0,0,0.3), aes( size = 1/logr_se ), position = position_jitter(w = 0.2, h = 0) ) +
geom_crossbar( data = df_metabox, aes(x = varnam, y = middle, ymin = ymin, ymax = ymax), fill = "grey80", alpha = 0.6, width = 0.5 ) +
geom_hline( yintercept = 0.0, size = 0.5 ) +
labs(x = "", y = "Log Response Ratio", size = expression(paste("Error"^{-1}))) +
coord_flip() +
ylim(-1,1)
## Warning: Removed 4 rows containing missing values (geom_point).
A focus for additional data search should be the NA
entries in the following table
## make a wide table with ANPP and BNPP
df_c_wide <- df_c_agg %>%
select(exp_nam, varnam, logr) %>%
tidyr::spread( varnam, logr )
## add standard error
df_c_wide_se <- df_c_agg %>%
select(exp_nam, varnam, logr_se) %>%
tidyr::spread( varnam, logr_se)
write_csv(df_c_wide, file = "data/df_c_wide.csv")
write_csv(df_c_wide_se, file = "data/df_c_wide_se.csv")
df_c_wide %>%
knitr::kable()
exp_nam | anpp | bnpp | lai | ninorg | npp | rootshoot | vcmax |
---|---|---|---|---|---|---|---|
BioCON | 0.1335173 | NA | NA | -0.0734438 | NA | NA | -0.1023943 |
Brandbjerg | NA | 0.4139760 | NA | -0.4722798 | NA | NA | -0.2151340 |
Christchurch_pr | NA | 0.0865558 | NA | NA | NA | NA | NA |
DRI | NA | 0.3315263 | NA | -0.4276045 | NA | 0.0066907 | NA |
DUKE | 0.1570184 | 0.2866832 | NA | 0.0631468 | 0.1879209 | NA | 0.0187608 |
DUKE2 | NA | NA | NA | -0.1154478 | 0.2388914 | NA | -0.0849925 |
EucFACE | NA | NA | NA | 0.0237287 | NA | NA | 0.0297513 |
EUROFACE4_pa | -0.0138365 | NA | 0.0813199 | NA | NA | -0.0142906 | 0.0086040 |
EUROFACE4_pe | -0.0372565 | NA | 0.1266585 | NA | NA | 0.2452287 | 0.0275880 |
EUROFACE4_pn | 0.0376497 | NA | 0.0391570 | NA | NA | -0.0085656 | -0.0591629 |
EUROFACE7_pooled | NA | NA | 0.0590475 | -0.3136739 | NA | NA | NA |
GiFACE | 0.0515306 | NA | -0.0008307 | -0.0691125 | NA | NA | 0.0260297 |
Headley_fe | NA | NA | NA | -1.2325691 | NA | NA | NA |
Headley_ps | NA | NA | NA | -1.2325691 | NA | NA | NA |
Headley_qp | NA | NA | NA | -1.2325691 | NA | NA | NA |
JRBP_FACE3 | 0.0370620 | -0.8754687 | NA | NA | -0.2448323 | NA | NA |
JRBP_OTCsand | NA | NA | NA | NA | NA | 1.0929466 | NA |
JRBP_OTCser | NA | NA | NA | NA | NA | 0.8987976 | NA |
MBS_pe | NA | NA | NA | NA | NA | 0.0459851 | NA |
MBS_pt | NA | NA | NA | NA | NA | -0.1236670 | NA |
MI | 0.6886090 | 0.2238135 | NA | -0.6366194 | NA | NA | NA |
ORNERP_acer_pooled | NA | 0.6507337 | NA | NA | NA | NA | NA |
ORNERP_liqui | 0.1512726 | 0.7110731 | NA | NA | 0.1894697 | NA | NA |
ORNERP_liqui2 | 0.7968220 | 0.4652327 | 0.0358496 | 0.2375596 | 0.1777542 | NA | NA |
ORNERP_liriodendron | NA | NA | NA | NA | NA | -0.0163598 | NA |
ORNERP_quercus | NA | NA | NA | NA | NA | 0.0211276 | NA |
PHACE | NA | NA | NA | NA | NA | -0.1286968 | NA |
POPFACE_pa | 0.2586932 | 0.4233322 | 0.1464495 | -0.5297140 | 0.2382158 | 0.0819704 | -0.0779061 |
POPFACE_pe | 0.2559145 | 0.5910167 | 0.2224824 | -0.4258935 | 0.2466410 | 0.1763620 | -0.1380225 |
POPFACE_pn | 0.1174660 | 0.7075088 | 0.1450379 | -0.6787293 | 0.1398347 | 0.1969598 | -0.2251562 |
RiceFACE_China_32N_120E_Or_A | NA | NA | 0.1144037 | NA | NA | NA | NA |
RiceFACE_China_32N_120E_Tr_1 | NA | NA | NA | -0.1313938 | NA | NA | NA |
SCBG | NA | NA | NA | NA | NA | 0.1835359 | NA |
SwissFACE_lolium2 | -0.0317718 | NA | NA | 0.0592422 | NA | 0.0560216 | NA |
SwissFACE_trifolium2 | 0.1541973 | NA | NA | NA | NA | NA | -0.1325882 |
TasFACE | 0.6020745 | NA | NA | -0.1053724 | NA | NA | NA |
UA_OTC | NA | NA | NA | NA | NA | 0.3997112 | NA |
USDA_citrus | NA | NA | NA | NA | NA | 0.0063707 | NA |
tmp <- df_c_wide %>%
filter(!is.na(vcmax) & !is.na(lai)) %>%
left_join(
df_c_wide_se %>%
select(exp_nam, vcmax_se = vcmax, lai_se = lai) %>%
filter(!is.na(vcmax_se) & !is.na(lai_se)),
by = "exp_nam"
) %>%
mutate(certainty = 1/(vcmax_se * lai_se))
tmp %>%
ggplot(aes(x = lai, y = vcmax, label = exp_nam, size = certainty)) +
geom_point(color = "red") +
# geom_smooth(method = "lm", se = FALSE) +
xlim(-0.1, 0.4) + ylim(-0.4, 0.2) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_vline(xintercept = 0, linetype = "dotted") +
coord_equal() +
theme_classic() +
geom_text_repel(size = 2, color = "grey30") +
labs(title = expression("CO"[2] ~ "experiments"),
subtitle = paste("N = ", nrow(tmp)),
x = "d ln LAI",
y = "d ln Vcmax"
)
tmp <- df_c_wide %>%
filter(!is.na(vcmax) & !is.na(anpp)) %>%
left_join(
df_c_wide_se %>%
select(exp_nam, vcmax_se = vcmax, anpp_se = anpp) %>%
filter(!is.na(vcmax_se) & !is.na(anpp_se)),
by = "exp_nam"
) %>%
mutate(certainty = 1/(vcmax_se * anpp_se))
tmp %>%
ggplot(aes(x = anpp, y = vcmax, label = exp_nam, size = certainty)) +
geom_point(color = "red") +
# geom_smooth(method = "lm", se = FALSE) +
xlim(-0.1, 0.4) + ylim(-0.4, 0.2) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_vline(xintercept = 0, linetype = "dotted") +
coord_equal() +
theme_classic() +
geom_text_repel(size = 2, color = "grey30") +
labs(title = expression("CO"[2] ~ "experiments"),
subtitle = paste("N = ", nrow(tmp)),
x = "d ln ANPP",
y = "d ln Vcmax"
)
### Vcmax - NPP
tmp <- df_c_wide %>%
filter(!is.na(vcmax) & !is.na(npp)) %>%
left_join(
df_c_wide_se %>%
select(exp_nam, vcmax_se = vcmax, npp_se = npp) %>%
filter(!is.na(vcmax_se) & !is.na(npp_se)),
by = "exp_nam"
) %>%
mutate(certainty = 1/(vcmax_se * npp_se))
tmp %>%
ggplot(aes(x = npp, y = vcmax, label = exp_nam, size = certainty)) +
geom_point(color = "red") +
# geom_smooth(method = "lm", se = FALSE) +
xlim(-0.1, 0.4) + ylim(-0.4, 0.2) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_vline(xintercept = 0, linetype = "dotted") +
coord_equal() +
theme_classic() +
geom_text_repel(size = 2, color = "grey30") +
labs(title = expression("CO"[2] ~ "experiments"),
subtitle = paste("N = ", nrow(tmp)),
x = "d ln NPP",
y = "d ln Vcmax"
)
tmp <- df_c_wide %>%
filter(!is.na(vcmax) & !is.na(ninorg)) %>%
left_join(
df_c_wide_se %>%
select(exp_nam, vcmax_se = vcmax, ninorg_se = ninorg) %>%
filter(!is.na(vcmax_se) & !is.na(ninorg_se)),
by = "exp_nam"
) %>%
mutate(certainty = 1/(vcmax_se * ninorg_se))
tmp %>%
ggplot(aes(x = ninorg, y = vcmax, label = exp_nam, size = certainty)) +
geom_point(color = "red") +
# geom_smooth(method = "lm", se = FALSE) +
xlim(-0.8, 0.1) +
ylim(-0.3, 0.1) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_vline(xintercept = 0, linetype = "dotted") +
coord_equal() +
theme_classic() +
geom_text_repel(size = 2, color = "grey30") +
labs(title = expression("CO"[2] ~ "experiments"),
subtitle = paste("N = ", nrow(tmp)),
x = "d ln Ninorg",
y = "d ln Vcmax"
)
tmp <- df_c_wide %>%
filter(!is.na(vcmax) & !is.na(rootshoot)) %>%
left_join(
df_c_wide_se %>%
select(exp_nam, vcmax_se = vcmax, rootshoot_se = rootshoot) %>%
filter(!is.na(vcmax_se) & !is.na(rootshoot_se)),
by = "exp_nam"
) %>%
mutate(certainty = 1/(vcmax_se * rootshoot_se))
tmp %>%
ggplot(aes(x = rootshoot, y = vcmax, label = exp_nam, size = certainty)) +
geom_point(color = "red") +
# geom_smooth(method = "lm", se = FALSE) +
# xlim(-0.8, 0.1) +
# ylim(-0.3, 0.1) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_vline(xintercept = 0, linetype = "dotted") +
coord_equal() +
theme_classic() +
geom_text_repel(size = 2, color = "grey30") +
labs(title = expression("CO"[2] ~ "experiments"),
subtitle = paste("N = ", nrow(tmp)),
x = "d ln root:shoot ratio",
y = "d ln Vcmax"
)
tmp <- df_c_wide %>%
left_join(
df_c_bnpp_anpp,
by = "exp_nam"
) %>%
rename(bnpp_anpp = logr) %>%
filter(!is.na(vcmax) & !is.na(bnpp_anpp))
tmp %>%
ggplot(aes(x = bnpp_anpp, y = vcmax, label = exp_nam)) +
geom_point(color = "red") +
# geom_smooth(method = "lm", se = FALSE) +
# xlim(-0.8, 0.1) +
# ylim(-0.3, 0.1) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_vline(xintercept = 0, linetype = "dotted") +
coord_equal() +
theme_classic() +
geom_text_repel(size = 2, color = "grey30") +
labs(title = expression("CO"[2] ~ "experiments"),
subtitle = paste("N = ", nrow(tmp)),
x = "d ln BNPP:ANPP",
y = "d ln Vcmax"
)