The data used here is this leaf photosynthetic traits and N dataset:
Data reference
Dong, Ning, Prentice, Iain Colin, Wright, Ian, Wang, Han, Atkin,Owen, Bloomfield, Keith, Domingues, Tomas, Gleason, Sean, Maire, Vincent, Onoda, Yusuke, Poorter, Hendrik, & Smith, Nicholas. (2022). dataset for paper “Leaf nitrogen from the perspective of optimal plant function” (Version v1) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.6831903
Paper reference
Dong, N., Prentice, I. C., Wright, I. J., Wang, H., Atkin, O. K., Bloomfield, K. J., Domingues, T. F., Gleason, S. M., Maire, V., Onoda, Y., Poorter, H., & Smith, N. G. (2022). Leaf nitrogen from the perspective of optimal plant function. Journal of Ecology, 00, 1– 18. https://doi.org/10.1111/1365-2745.13967
df <- read_csv("~/data/leafn_vcmax_ning_dong/data_leafn_vcmax_ning_dong.csv") %>%
rename(lat = Latitude, lon = longitude)
## Rows: 3107 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Genus Species, Family, woody, DE, Legume, Dataset, Contributor, Re...
## dbl (25): sample_ID, site_id, Latitude, longitude, elv, CO2, fapar, LMA, Nar...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Quick overview of data.
skim(df)
Name | df |
Number of rows | 3107 |
Number of columns | 33 |
_______________________ | |
Column type frequency: | |
character | 8 |
numeric | 25 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
Genus Species | 0 | 1.00 | 4 | 44 | 0 | 2078 | 0 |
Family | 0 | 1.00 | 1 | 18 | 0 | 196 | 0 |
woody | 0 | 1.00 | 5 | 15 | 0 | 3 | 0 |
DE | 46 | 0.99 | 1 | 19 | 0 | 4 | 0 |
Legume | 0 | 1.00 | 2 | 3 | 0 | 2 | 0 |
Dataset | 0 | 1.00 | 2 | 14 | 0 | 15 | 0 |
Contributor | 0 | 1.00 | 3 | 28 | 0 | 55 | 0 |
Reference | 0 | 1.00 | 11 | 34 | 0 | 22 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
sample_ID | 0 | 1.00 | 1554.00 | 897.06 | 1.00 | 777.50 | 1554.00 | 2330.50 | 3107.00 | ▇▇▇▇▇ |
site_id | 0 | 1.00 | 162.52 | 85.99 | 1.00 | 74.00 | 169.00 | 234.00 | 315.00 | ▅▇▇▇▆ |
lat | 0 | 1.00 | 5.81 | 27.07 | -65.60 | -14.99 | 5.16 | 26.25 | 67.50 | ▁▇▅▆▂ |
lon | 0 | 1.00 | 13.90 | 96.02 | -159.58 | -71.56 | -5.88 | 108.53 | 172.30 | ▁▇▂▂▅ |
elv | 0 | 1.00 | 606.94 | 682.62 | 1.00 | 150.00 | 319.00 | 818.00 | 3379.00 | ▇▂▁▁▁ |
CO2 | 0 | 1.00 | 383.14 | 13.02 | 338.99 | 377.44 | 386.95 | 392.00 | 402.30 | ▁▂▃▇▇ |
fapar | 0 | 1.00 | 0.63 | 0.22 | 0.08 | 0.46 | 0.63 | 0.86 | 0.93 | ▂▂▆▃▇ |
LMA | 6 | 1.00 | 121.76 | 83.48 | 14.24 | 69.04 | 102.46 | 149.43 | 914.86 | ▇▁▁▁▁ |
Narea | 6 | 1.00 | 2.03 | 1.18 | 0.29 | 1.34 | 1.83 | 2.41 | 18.48 | ▇▁▁▁▁ |
Nmass | 6 | 1.00 | 0.02 | 0.01 | 0.00 | 0.01 | 0.02 | 0.02 | 0.08 | ▇▅▁▁▁ |
vcmax25_obs | 3 | 1.00 | 45.72 | 24.99 | 1.06 | 27.62 | 41.13 | 58.80 | 231.77 | ▇▅▁▁▁ |
vcmax_obs | 3 | 1.00 | 36.13 | 25.12 | 0.74 | 17.45 | 31.04 | 48.89 | 205.50 | ▇▃▁▁▁ |
lnD | 0 | 1.00 | -0.68 | 0.66 | -3.62 | -0.99 | -0.59 | -0.28 | 1.10 | ▁▁▃▇▁ |
mgdd0 | 13 | 1.00 | 21.57 | 5.87 | 0.26 | 16.10 | 22.96 | 26.82 | 33.85 | ▁▂▆▇▅ |
lnppfd | 0 | 1.00 | 6.66 | 0.15 | 6.08 | 6.53 | 6.67 | 6.75 | 7.01 | ▁▁▆▇▂ |
lnppfm | 0 | 1.00 | 2.30 | 0.34 | 1.69 | 2.02 | 2.32 | 2.56 | 3.09 | ▆▅▇▆▂ |
alpha | 2 | 1.00 | 0.91 | 0.24 | 0.27 | 0.82 | 0.93 | 1.10 | 1.25 | ▂▂▃▇▇ |
gdday | 0 | 1.00 | 347.13 | 46.43 | 0.00 | 365.00 | 365.00 | 365.00 | 365.00 | ▁▁▁▁▇ |
soil_clay | 2 | 1.00 | 24.87 | 8.59 | 5.00 | 19.00 | 23.00 | 31.00 | 43.00 | ▁▇▅▅▃ |
soil_ph | 2 | 1.00 | 5.78 | 0.88 | 4.30 | 5.10 | 5.40 | 6.30 | 8.20 | ▆▇▆▂▂ |
soil_cn | 2 | 1.00 | 12.29 | 1.99 | 9.00 | 11.00 | 12.00 | 13.00 | 24.00 | ▇▂▁▁▁ |
vcmax_predicted | 0 | 1.00 | 36.90 | 16.92 | 10.14 | 26.54 | 32.34 | 41.40 | 151.00 | ▇▃▁▁▁ |
vcmax25_predicted | 0 | 1.00 | 49.42 | 21.09 | 20.85 | 29.35 | 45.79 | 63.17 | 144.83 | ▇▆▂▁▁ |
lnLMA_predicted | 703 | 0.77 | 4.68 | 0.33 | 3.79 | 4.47 | 4.69 | 4.89 | 5.48 | ▁▃▇▅▂ |
Na_predicted | 703 | 0.77 | 1.98 | 0.47 | 1.36 | 1.62 | 1.90 | 2.23 | 3.69 | ▇▅▂▁▁ |
gg1 <- df %>%
ggplot(aes(vcmax25_obs)) +
geom_density() +
labs(title = "Vcmax25")
gg2 <- df %>%
ggplot(aes(Narea)) +
geom_density() +
labs(title = "Narea")
gg3 <- df %>%
ggplot(aes(Nmass)) +
geom_density() +
labs(title = "Nmass")
gg4 <- df %>%
ggplot(aes(LMA)) +
geom_density() +
labs(title = "LMA")
(gg1 + gg2) /
(gg3 + gg4)
Log-transform all variables to make them closer to normally distributed.
df <- df %>%
mutate(log_vcmax25_obs = log(vcmax25_obs),
log_Narea = log(Narea),
log_Nmass = log(Nmass),
log_LMA = log(LMA),
)
gg1 <- df %>%
ggplot(aes(log_vcmax25_obs)) +
geom_density() +
labs(title = "Nmass")
gg2 <- df %>%
ggplot(aes(log_Narea)) +
geom_density() +
labs(title = "Narea")
gg3 <- df %>%
ggplot(aes(log_Nmass)) +
geom_density() +
labs(title = "Nmass")
gg4 <- df %>%
ggplot(aes(log_LMA)) +
geom_density() +
labs(title = "LMA")
(gg1 + gg2) /
(gg3 + gg4)
This uses the soil C:N data as in Ning’s dataset.
gg1 <- df %>%
ggplot(aes(soil_cn, log_vcmax25_obs)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg2 <- df %>%
ggplot(aes(soil_cn, log_Narea)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg3 <- df %>%
ggplot(aes(soil_cn, log_Nmass)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg4 <- df %>%
ggplot(aes(soil_cn, log_LMA)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg1 + gg2 + gg3 + gg4
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
Issues
Taking it from ISRIC WISE30sec using ingestr. This also extracts soil pH.
filn <- "../data/data_leafn_vcmax_ning_dong_ALTCNPH.csv"
if (!file.exists(filn)){
library(ingestr)
settings_wise <- get_settings_wise(varnam = c("CNrt", "PHAQ"), layer = 1:7)
df_sites <- df %>%
select(sitename = site_id, lon, lat) %>%
distinct()
df_wise <- ingest(
df_sites,
source = "wise",
settings = settings_wise,
dir = "~/data/soil/wise/"
) %>%
unnest(data)
df <- df %>%
left_join(
df_wise %>%
rename(site_id = sitename),
by = "site_id"
)
write_csv(df, file = filn)
} else {
df <- read_csv(filn)
}
## Rows: 3117 Columns: 39
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Genus Species, Family, woody, DE, Legume, Dataset, Contributor, Re...
## dbl (31): sample_ID, site_id, lat, lon, elv, CO2, fapar, LMA, Narea, Nmass, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Plot with alternative C:N.
gg1 <- df %>%
ggplot(aes(CNrt, log_vcmax25_obs)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg2 <- df %>%
ggplot(aes(CNrt, log_Narea)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg3 <- df %>%
ggplot(aes(CNrt, log_Nmass)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg4 <- df %>%
ggplot(aes(CNrt, log_LMA)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg1 + gg2 + gg3 + gg4
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
Several climatic variables are provided in the dataset. To make analyses at different levels consistent - using the same climate and soil data sources - we re-extract this information here following this ingestr workflow, collecting the following variables:
filn <- "../data/data_leafn_vcmax_ning_dong_ALTCLIM.csv"
ingest_wc_bychunk <- function(idxs){
print(idxs)
## collect WC data for idxs sites
out <- ingest(
df_sites |>
slice(idxs),
source = "worldclim",
settings = settings_wc,
dir = "~/data/worldclim/"
)
## get growting-season means
out <- out |>
unnest(data) |>
## add latitude
left_join(df_sites, by = "sitename") |>
## vapour pressure kPa -> Pa
mutate(vapr = vapr * 1e3) |>
## PPFD from solar radiation: kJ m-2 day-1 -> mol m−2 s−1 PAR
mutate(ppfd = 1e3 * srad * kfFEC * 1.0e-6 / (60 * 60 * 24)) |>
## calculate VPD (Pa) based on tmin and tmax
rowwise() |>
mutate(vpd = ingestr::calc_vpd(eact = vapr, tmin = tmin, tmax = tmax)) |>
## calculate growth temperature (average daytime temperature)
mutate(doy = lubridate::yday(lubridate::ymd("2001-01-15") + months(month - 1))) |>
mutate(tgrowth = ingestr::calc_tgrowth(tmin, tmax, lat, doy)) |>
## average over growing season (where Tgrowth > 0 deg C)
group_by(sitename) |>
nest() |>
mutate(data_growingseason = purrr::map(data, ~get_growingseasonmean(.))) |>
unnest(data_growingseason) |>
select(-data)
return(out)
}
get_growingseasonmean <- function(df){
df |>
filter(tgrowth > 0) |>
ungroup() |>
summarise(across(c(tgrowth, vpd, ppfd), mean))
}
kfFEC <- 2.04
if (!file.exists(filn)){
library(ingestr)
## Collect climatic variables from WorldClim----------------------------------
df_sites <- df %>%
select(sitename = site_id, lon, lat) %>%
distinct()
settings_wc <- list(varnam = c("tmin", "tmax", "vapr", "srad"))
## do this in rounds, otherwise too memory demanding
nchunk <- nrow(df_sites) # only this extreme variant seems to work
nrows_chunk <- ceiling(nrow(df_sites)/nchunk)
irow_chunk <- split(seq(nrow(df_sites)), ceiling(seq_along(seq(nrow(df_sites)))/nrows_chunk))
df_wc <- purrr::map_dfr(
irow_chunk,
~ingest_wc_bychunk(.)
)
write_csv(df_wc, file = "../data/df_wc.csv")
## Collect N deposition-------------------------------------------------------
df_ndep <- ingest(df_sites |>
mutate(sitename = as.character(sitename)) |>
mutate(year_start = 1990, year_end = 2009),
source = "ndep",
timescale = "y",
dir = "~/data/ndep_lamarque/",
verbose = FALSE
) |>
unnest(cols = data) |>
group_by(sitename) |>
summarise(noy = mean(noy), nhx = mean(nhx)) |>
mutate(ndep = noy + nhx) |>
select(-noy, -nhx)
## Combine data---------------------------------------------------------------
df <- df %>%
left_join(
df_wc %>%
rename(site_id = sitename),
by = "site_id"
) |>
left_join(
df_ndep %>%
mutate(site_id = as.numeric(sitename)),
by = "site_id"
)
write_csv(df, file = filn)
} else {
df <- read_csv(filn)
}
## Rows: 3143 Columns: 44
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Genus Species, Family, woody, DE, Legume, Dataset, Contributor, Re...
## dbl (36): sample_ID, site_id, lat, lon, elv, CO2, fapar, LMA, Narea, Nmass, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Check distribution of predictor variables.
# looks alright
df |>
ggplot(aes(tgrowth)) +
geom_density() +
labs(title = "Tgrowth")
## Warning: Removed 3 rows containing non-finite values (stat_density).
# vpd needs to be transformed
df |>
ggplot(aes(log(vpd))) +
geom_density() +
labs(title = "VPD")
## Warning: Removed 3 rows containing non-finite values (stat_density).
df <- df |>
mutate(log_vpd = log(vpd))
# ppfd looks ok
df |>
ggplot(aes(ppfd)) +
geom_density() +
labs(title = "PPFD")
## Warning: Removed 3 rows containing non-finite values (stat_density).
# pH looks ok
df |>
ggplot(aes(PHAQ)) +
geom_density() +
labs(title = "pH")
## Warning: Removed 3 rows containing non-finite values (stat_density).
# ndep needs to be tranformed
df |>
ggplot(aes(log(ndep))) +
geom_density() +
labs(title = "N deposition")
df <- df |>
mutate(log_ndep = log(ndep))
Fit multivariate models.
# removed: mgdd0 (don't know what it is)
linmod_vcmax <- lm(log_vcmax25_obs ~ log_vpd + ppfd + alpha + tgrowth + PHAQ + CNrt + elv + log_ndep, data = df)
linmod_narea <- lm(log_Narea ~ log_vpd + ppfd + alpha + tgrowth + PHAQ + CNrt + elv + log_ndep, data = df)
linmod_nmass <- lm(log_Nmass ~ log_vpd + ppfd + alpha + tgrowth + PHAQ + CNrt + elv + log_ndep, data = df)
linmod_lma <- lm(log_LMA ~ log_vpd + ppfd + alpha + tgrowth + PHAQ + CNrt + elv + log_ndep, data = df)
Partial relationship analysis with soil C:N.
gg_vcmax <- visreg(linmod_vcmax, "CNrt", gg = TRUE)
gg_narea <- visreg(linmod_narea, "CNrt", gg = TRUE)
gg_nmass <- visreg(linmod_nmass, "CNrt", gg = TRUE)
gg_lma <- visreg(linmod_lma, "CNrt", gg = TRUE)
gg_vcmax <- gg_vcmax +
theme_classic()
gg_narea <- gg_narea +
theme_classic()
gg_nmass <- gg_nmass +
theme_classic()
gg_lma <- gg_lma +
theme_classic()
(gg_vcmax + gg_narea) /
(gg_nmass + gg_lma)
Partial relationship analysis with N deposition
gg_vcmax <- visreg(linmod_vcmax, "log_ndep", gg = TRUE)
gg_narea <- visreg(linmod_narea, "log_ndep", gg = TRUE)
gg_nmass <- visreg(linmod_nmass, "log_ndep", gg = TRUE)
gg_lma <- visreg(linmod_lma, "log_ndep", gg = TRUE)
gg_vcmax <- gg_vcmax +
theme_classic()
gg_narea <- gg_narea +
theme_classic()
gg_nmass <- gg_nmass +
theme_classic()
gg_lma <- gg_lma +
theme_classic()
(gg_vcmax + gg_narea) /
(gg_nmass + gg_lma)
# removed: mgdd0 (don't know what it is)
linmod_vcmax <- lm(log_vcmax25_obs ~ lnD + lnppfd + alpha + gdday + soil_ph + CNrt + elv, data = df)
linmod_narea <- lm(log_Narea ~ lnD + lnppfd + alpha + gdday + soil_ph + CNrt + elv, data = df)
linmod_nmass <- lm(log_Nmass ~ lnD + lnppfd + alpha + gdday + soil_ph + CNrt + elv, data = df)
linmod_lma <- lm(log_LMA ~ lnD + lnppfd + alpha + gdday + soil_ph + CNrt + elv, data = df)
library(visreg)
gg_vcmax <- visreg(linmod_vcmax, "CNrt", gg = TRUE)
gg_narea <- visreg(linmod_narea, "CNrt", gg = TRUE)
gg_nmass <- visreg(linmod_nmass, "CNrt", gg = TRUE)
gg_lma <- visreg(linmod_lma, "CNrt", gg = TRUE)
gg_vcmax <- gg_vcmax +
theme_classic()
gg_narea <- gg_narea +
theme_classic()
gg_nmass <- gg_nmass +
theme_classic()
gg_lma <- gg_lma +
theme_classic()
(gg_vcmax + gg_narea) /
(gg_nmass + gg_lma)
Findings:
Issues:
Consider leaf Narea as consisting of a metabolic \(N_v\) and a structural \(N_s\) leaf N component: \[ N_\text{area} = N_v + N_s \] and that \(N_v\) is proportional to \(V_\text{cmax25}\), while \(N_s\) is proportional to LMA (with a zero-y-axis intercept).
linmod_leafn = lm(Narea ~ vcmax25_obs + LMA + 0, data = df)
summary(linmod_leafn)
##
## Call:
## lm(formula = Narea ~ vcmax25_obs + LMA + 0, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4153 -0.3177 0.0516 0.4428 10.3225
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## vcmax25_obs 0.0136818 0.0004412 31.01 <2e-16 ***
## LMA 0.0109072 0.0001553 70.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.818 on 3132 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.8793, Adjusted R-squared: 0.8792
## F-statistic: 1.141e+04 on 2 and 3132 DF, p-value: < 2.2e-16
These coefficients can now be used in the CN-model for modelling leaf C:N with prescribed species-specific LMA.
For reference, the following values are returned:
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# vcmax25_obs 0.0136818 0.0004412 31.01 <2e-16 ***
# LMA 0.0109072 0.0001553 70.22 <2e-16 ***
The coefficient for vcmax25_obs is in units of gN m-2 / (micro-mol m-2 s-1).
The coefficient for LMA is in units of gN m-2 / (gDM m-2).
Visualise model fit.
df <- df |>
mutate(narea_pred = predict(linmod_leafn, newdata = df))
out <- df |>
analyse_modobs2(mod = "narea_pred", obs = "Narea")
## Loading required package: LSD
##
## Attaching package: 'LSD'
## The following objects are masked from 'package:rbeni':
##
## colorpalette, complementarycolor, convertcolor, convertgrey,
## daltonize, disco, distinctcolors, heatpairs, heatscatter,
## heatscatterpoints
## Loading required package: ggthemes
## Loading required package: RColorBrewer
out$gg +
xlim(0, 18) + ylim(0, 18)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
TO EXPLORE. BUT LOW PRIORITY.
The data used here is this leaf photosynthetic traits and N dataset:
Data reference
Ellsworth, David; Wright, Ian; Crous, Kristine Y.; Goll, Daniel S; Zaehle, Sönke; Cernusak, Lucas A.; et al. (2022): Convergence in phosphorus constraints to photosynthesis dataset. figshare. Dataset. https://doi.org/10.6084/m9.figshare.20010485.v1
Paper reference
Ellsworth, D.S., Crous, K.Y., De Kauwe, M.G. et al. Convergence in phosphorus constraints to photosynthesis in forests around the world. Nat Commun 13, 5005 (2022). https://doi.org/10.1038/s41467-022-32545-0
df_ellsworth <- read_csv("~/data/leafnp_vcmax_ellsworth/Ellsworth_NCOMMS_Figure1and2_fulldata.csv")
## Rows: 447 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Species, Family, Sitename, Continent, Plim_status
## dbl (24): Site_No, Cont_No, Species_No, Vcmax, Jmax, Rd, Vcmax_SE, Jmax_SE, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skim(df_ellsworth)
Name | df_ellsworth |
Number of rows | 447 |
Number of columns | 29 |
_______________________ | |
Column type frequency: | |
character | 5 |
numeric | 24 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
Species | 0 | 1.00 | 4 | 44 | 0 | 402 | 0 |
Family | 7 | 0.98 | 7 | 17 | 0 | 80 | 0 |
Sitename | 0 | 1.00 | 4 | 22 | 0 | 52 | 0 |
Continent | 0 | 1.00 | 4 | 9 | 0 | 4 | 0 |
Plim_status | 0 | 1.00 | 5 | 6 | 0 | 2 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Site_No | 0 | 1.00 | 34.37 | 19.12 | 1.00 | 15.00 | 39.00 | 51.00 | 64.00 | ▇▂▅▇▆ |
Cont_No | 0 | 1.00 | 235.55 | 138.52 | 1.00 | 119.50 | 230.00 | 354.50 | 474.00 | ▇▇▇▆▇ |
Species_No | 7 | 0.98 | 44.85 | 22.46 | 1.00 | 27.75 | 45.00 | 59.00 | 87.00 | ▅▆▇▇▆ |
Vcmax | 0 | 1.00 | 47.55 | 18.31 | 10.50 | 34.90 | 44.80 | 58.40 | 112.30 | ▃▇▅▂▁ |
Jmax | 0 | 1.00 | 95.33 | 34.82 | 29.30 | 71.70 | 90.10 | 112.70 | 243.40 | ▃▇▃▁▁ |
Rd | 0 | 1.00 | 0.56 | 0.70 | -1.55 | 0.18 | 0.53 | 0.92 | 3.29 | ▁▆▇▂▁ |
Vcmax_SE | 0 | 1.00 | 3.08 | 2.54 | 0.30 | 1.40 | 2.30 | 3.90 | 17.90 | ▇▂▁▁▁ |
Jmax_SE | 8 | 0.98 | 3.17 | 2.33 | 0.30 | 1.80 | 2.60 | 3.90 | 27.80 | ▇▁▁▁▁ |
Rd_SE | 0 | 1.00 | 0.47 | 0.26 | 0.04 | 0.29 | 0.40 | 0.59 | 1.64 | ▇▇▂▁▁ |
Photo | 0 | 1.00 | 11.02 | 4.19 | 2.40 | 7.90 | 10.60 | 13.45 | 25.00 | ▃▇▅▂▁ |
Tleaf | 0 | 1.00 | 29.67 | 3.00 | 18.70 | 28.10 | 29.90 | 31.15 | 36.80 | ▁▁▇▇▂ |
CO2S | 0 | 1.00 | 384.52 | 8.88 | 350.00 | 381.00 | 386.00 | 390.00 | 404.00 | ▁▁▃▇▂ |
PARi | 0 | 1.00 | 1747.93 | 297.97 | 998.00 | 1702.00 | 1800.00 | 2000.00 | 2001.00 | ▂▁▂▆▇ |
LMA | 0 | 1.00 | 126.05 | 44.58 | 27.45 | 95.81 | 120.70 | 150.93 | 316.77 | ▂▇▃▁▁ |
Perc_N | 0 | 1.00 | 1.83 | 0.65 | 0.54 | 1.38 | 1.76 | 2.17 | 4.28 | ▃▇▃▁▁ |
Perc_P | 0 | 1.00 | 0.10 | 0.06 | 0.01 | 0.06 | 0.09 | 0.13 | 0.44 | ▇▆▁▁▁ |
Narea | 0 | 1.00 | 2.12 | 0.61 | 0.68 | 1.71 | 2.05 | 2.48 | 5.30 | ▂▇▂▁▁ |
Parea | 0 | 1.00 | 0.12 | 0.06 | 0.01 | 0.08 | 0.11 | 0.14 | 0.43 | ▇▇▂▁▁ |
Photomass | 0 | 1.00 | 98.54 | 55.57 | 16.90 | 62.45 | 86.50 | 122.15 | 435.20 | ▇▃▁▁▁ |
Jmaxmass | 0 | 1.00 | 843.02 | 435.97 | 189.70 | 565.00 | 754.40 | 992.75 | 3389.10 | ▇▅▁▁▁ |
Vcmaxmass | 0 | 1.00 | 419.06 | 217.20 | 72.70 | 273.70 | 376.30 | 519.10 | 1643.10 | ▇▆▁▁▁ |
Nmass | 0 | 1.00 | 18.34 | 6.47 | 5.39 | 13.84 | 17.59 | 21.69 | 42.83 | ▃▇▃▁▁ |
Pmass | 0 | 1.00 | 1.02 | 0.57 | 0.08 | 0.60 | 0.94 | 1.28 | 4.40 | ▇▆▁▁▁ |
NPratio | 0 | 1.00 | 21.49 | 11.13 | 2.60 | 14.85 | 19.00 | 24.65 | 119.60 | ▇▂▁▁▁ |
Issues
df_trobit <- read_csv("~/data/leaf_traits/leaf_soil.csv")
## New names:
## Rows: 2757 Columns: 35
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): source, site, species, Family, Genus, Date, author dbl (26): ...1, lon_2,
## lat_2, lon, lat, z, start_yr, end_yr, Tleaf, narea, p... lgl (2): Author, rep
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
## 81 sites like this:
df_sites_trobit <- df_trobit %>%
select(site, lon, lat, lon_2, lat_2, z, species, Vcmax25, narea, lma, Jmax25, nmass) %>%
distinct(site, .keep_all = TRUE)
nrow(df_sites_trobit)
## [1] 81
## 68 sites when using lon_2 and lat_2:
df_sites_trobit_test <- df_trobit %>%
select(site, lon, lat, lon_2, lat_2, z, species, Vcmax25, narea, lma, Jmax25, nmass) %>%
distinct(lon_2, lat_2, .keep_all = TRUE)
nrow(df_sites_trobit_test)
## [1] 68
## 89 sites when using lon_2, lat_2, and z (elevation):
df_sites_trobit_test2 <- df_trobit %>%
select(site, lon, lat, lon_2, lat_2, z, species, Vcmax25, narea, lma, Jmax25, nmass) %>%
distinct(lon_2, lat_2, z, .keep_all = TRUE)
nrow(df_sites_trobit_test2)
## [1] 89
Issues
site
to identify
site ID, there may still be entries taken from distinct elevations. But
in general, elevational differences are small. Therefore use
site
for aggregation.duplicated_sites <- df_sites_trobit_test2 %>%
group_by(site) %>%
summarise(n=n()) %>%
filter(n>1) %>%
pull(site)
## note differences in elevation (column z)
df_sites_trobit_test2 %>%
filter(site %in% duplicated_sites)
## # A tibble: 22 × 12
## site lon lat lon_2 lat_2 z species Vcmax25 narea lma Jmax25
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 JEN-12 -73.6 -4.90 -73.6 -4.9 135 Micrandra sp… 31.0 1.93 123. 66.2
## 2 JEN-12 -73.6 -4.90 -73.6 -4.9 124 Alchornea tr… 49.1 3.4 107. NA
## 3 JEN-11 -73.6 -4.88 -73.6 -4.88 131 Sloanea brev… 30.3 1.19 101. 56.8
## 4 JEN-11 -73.6 -4.88 -73.6 -4.88 124 Pouteria cus… NA 3.54 232. NA
## 5 ALP-40 -73.4 -3.94 -73.4 -3.94 142 Emmotum flor… 21.4 1.97 261. NA
## 6 ALP-40 -73.4 -3.94 -73.4 -3.94 120 Cupania diph… 7.1 1.5 202. NA
## 7 SUC-01 -72.9 -3.25 -72.9 -3.25 117 Sloanea glad… 17.1 0.903 127. 40.8
## 8 SUC-01 -72.9 -3.25 -72.9 -3.25 111 Buchenavia t… 24.7 2.44 141 NA
## 9 SUC-05 -72.9 -3.26 -72.9 -3.26 132 Osteophloeum… NA 1.87 122. NA
## 10 SUC-05 -72.9 -3.26 -72.9 -3.26 111 Naucleopsis … 20.3 2.41 125. NA
## # … with 12 more rows, and 1 more variable: nmass <dbl>
Aggregate by site (mean) and take log of variables.
df_trobit_sitemean <- df_trobit %>%
group_by(site) %>%
summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)))
# log-transform
df_trobit_sitemean <- df_trobit_sitemean %>%
mutate(across(
c("Vcmax25", "Jmax25", "narea", "nmass", "leafCN", "lma"),
.fns = log,
.names = "log_{.col}")) %>%
rename(sitename = site) # for later use in ingestr
This uses observed soil C:N data. Do a simple regression (plotting against soil C:N), not controlling for other factors.
gg1 <- df_trobit_sitemean %>%
ggplot(aes(CN, log_Vcmax25)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg2 <- df_trobit_sitemean %>%
ggplot(aes(CN, log_narea)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg3 <- df_trobit_sitemean %>%
ggplot(aes(CN, log_nmass)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg4 <- df_trobit_sitemean %>%
ggplot(aes(CN, log_leafCN)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg5 <- df_trobit_sitemean %>%
ggplot(aes(CN, log_lma)) +
geom_point() +
geom_smooth(color = "red", method = "lm") +
theme_classic()
gg1 + gg2 + gg3 + gg4 + gg5
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
Several climatic variables are provided in the dataset. To make analyses at different levels consistent - using the same climate and soil data sources - we re-extract this information here following this ingestr workflow, collecting the following variables:
filn <- "../data/data_leafn_vcmax_trobit_ALTCLIM.csv"
get_growingseasonmean <- function(df){
df |>
filter(tgrowth > 0) |>
ungroup() |>
summarise(across(c(tgrowth, vpd, ppfd), mean))
}
kfFEC <- 2.04
if (!file.exists(filn)){
## Collect climatic variables from WorldClim----------------------------------
df_sites <- df_trobit_sitemean %>%
select(sitename, lon, lat, elv = z) %>%
distinct() |>
## at least one sitename is missing!
drop_na(sitename)
settings_wc <- list(varnam = c("tmin", "tmax", "vapr", "srad"))
## collect WC data for idxs sites
df_wc <- ingest(
df_sites,
source = "worldclim",
settings = settings_wc,
dir = "~/data/worldclim/"
)
## get growting-season means
df_wc <- df_wc |>
unnest(data) |>
## add latitude
left_join(df_sites, by = "sitename") |>
## vapour pressure kPa -> Pa
mutate(vapr = vapr * 1e3) |>
## PPFD from solar radiation: kJ m-2 day-1 -> mol m−2 s−1 PAR
mutate(ppfd = 1e3 * srad * kfFEC * 1.0e-6 / (60 * 60 * 24)) |>
## calculate VPD (Pa) based on tmin and tmax
rowwise() |>
mutate(vpd = ingestr::calc_vpd(eact = vapr, tmin = tmin, tmax = tmax)) |>
## calculate growth temperature (average daytime temperature)
mutate(doy = lubridate::yday(lubridate::ymd("2001-01-15") + months(month - 1))) |>
mutate(tgrowth = ingestr::calc_tgrowth(tmin, tmax, lat, doy)) |>
## average over growing season (where Tgrowth > 0 deg C)
group_by(sitename) |>
nest() |>
mutate(data_growingseason = purrr::map(data, ~get_growingseasonmean(.))) |>
unnest(data_growingseason) |>
select(-data)
## Collect N deposition-------------------------------------------------------
df_ndep <- ingest(
df_sites |>
mutate(year_start = 1990, year_end = 2009),
source = "ndep",
timescale = "y",
dir = "~/data/ndep_lamarque/",
verbose = FALSE
) |>
unnest(cols = data) |>
group_by(sitename) |>
summarise(noy = mean(noy), nhx = mean(nhx)) |>
mutate(ndep = noy + nhx) |>
select(-noy, -nhx)
## Combine data---------------------------------------------------------------
df_trobit_sitemean <- df_trobit_sitemean %>%
left_join(
df_wc,
by = "sitename"
) |>
left_join(
df_ndep,
by = "sitename"
)
write_csv(df_trobit_sitemean, file = filn)
} else {
df_trobit_sitemean <- read_csv(filn)
}
## New names:
## Rows: 81 Columns: 37
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): sitename dbl (36): ...2, lon_2, lat_2, lon, lat, z, start_yr, end_yr,
## Tleaf, narea, p...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `...1` -> `...2`
Missing: alpha
df_trobit_sitemean <- df_trobit_sitemean |>
mutate(log_vpd = log(vpd),
log_ndep = log(ndep))
# removed: mgdd0 (don't know what it is)
linmod_vcmax <- lm(log_Vcmax25 ~ log_vpd + ppfd + tgrowth + pH + CN + z + log_ndep,
data = df_trobit_sitemean)
linmod_narea <- lm(log_narea ~ log_vpd + ppfd + tgrowth + pH + CN + z + log_ndep,
data = df_trobit_sitemean)
linmod_nmass <- lm(log_nmass ~ log_vpd + ppfd + tgrowth + pH + CN + z + log_ndep,
data = df_trobit_sitemean)
linmod_lma <- lm(log_lma ~ log_vpd + ppfd + tgrowth + pH + CN + z + log_ndep,
data = df_trobit_sitemean)
linmod_cn <- lm(log_leafCN ~ log_vpd + ppfd + tgrowth + pH + CN + z + log_ndep,
data = df_trobit_sitemean)
Partial relationship analysis with soil C:N.
gg_vcmax <- visreg(linmod_vcmax, "CN", gg = TRUE)
gg_narea <- visreg(linmod_narea, "CN", gg = TRUE)
gg_nmass <- visreg(linmod_nmass, "CN", gg = TRUE)
gg_lma <- visreg(linmod_lma, "CN", gg = TRUE)
gg_cn <- visreg(linmod_cn, "CN", gg = TRUE)
gg_vcmax <- gg_vcmax +
theme_classic()
gg_narea <- gg_narea +
theme_classic()
gg_nmass <- gg_nmass +
theme_classic()
gg_lma <- gg_lma +
theme_classic()
gg_cn <- gg_cn +
theme_classic()
(gg_vcmax + gg_narea) /
(gg_nmass + gg_lma) /
(gg_cn + gg_cn)
Partial relationship analysis with N deposition.
gg_vcmax <- visreg(linmod_vcmax, "log_ndep", gg = TRUE)
gg_narea <- visreg(linmod_narea, "log_ndep", gg = TRUE)
gg_nmass <- visreg(linmod_nmass, "log_ndep", gg = TRUE)
gg_lma <- visreg(linmod_lma, "log_ndep", gg = TRUE)
gg_cn <- visreg(linmod_cn, "log_ndep", gg = TRUE)
gg_vcmax <- gg_vcmax +
theme_classic()
gg_narea <- gg_narea +
theme_classic()
gg_nmass <- gg_nmass +
theme_classic()
gg_lma <- gg_lma +
theme_classic()
gg_cn <- gg_cn +
theme_classic()
(gg_vcmax + gg_narea) /
(gg_nmass + gg_lma) /
(gg_cn + gg_cn)
Paper reference
Kattge, J., Bönisch, G., Díaz, S., Lavorel, S., Prentice, I. C., Leadley, P., . . . Wirth, C. (2020). TRY plant trait database – enhanced coverage and open access. Global Change Biology, 26(1), 119-188. doi:10.1111/gcb.14904
df <- read_csv("~/scratch/try_kattge_2020/data_cn_pdf.csv")
## Rows: 6129 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): AccSpeciesName, GrowthForm, LeafPhenology, Woodiness, LeafType, Pho...
## dbl (8): ID, lon, lat, CN, SLA, nmass, site_id, BIOME
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df |>
ggplot(aes(PFTs, CN)) +
geom_boxplot(fill = "azure3") +
coord_flip() +
theme_classic() +
labs(x = "PFT", y = expression(paste("C:N"[leaf])))