Leaf photosynthetic traits and N from field observations

Ning Dong’s data

The data used here is this leaf photosynthetic traits and N dataset:

Data reference

Dong, Ning, Prentice, Iain Colin, Wright, Ian, Wang, Han, Atkin,Owen, Bloomfield, Keith, Domingues, Tomas, Gleason, Sean, Maire, Vincent, Onoda, Yusuke, Poorter, Hendrik, & Smith, Nicholas. (2022). dataset for paper “Leaf nitrogen from the perspective of optimal plant function” (Version v1) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.6831903

Paper reference

Dong, N., Prentice, I. C., Wright, I. J., Wang, H., Atkin, O. K., Bloomfield, K. J., Domingues, T. F., Gleason, S. M., Maire, V., Onoda, Y., Poorter, H., & Smith, N. G. (2022). Leaf nitrogen from the perspective of optimal plant function. Journal of Ecology, 00, 1– 18. https://doi.org/10.1111/1365-2745.13967

df <- read_csv("~/data/leafn_vcmax_ning_dong/data_leafn_vcmax_ning_dong.csv") %>% 
  rename(lat = Latitude, lon = longitude)

## Rows: 3107 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): Genus Species, Family, woody, DE, Legume, Dataset, Contributor, Re...
## dbl (25): sample_ID, site_id, Latitude, longitude, elv, CO2, fapar, LMA, Nar...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Quick overview of data.

skim(df)

Data summary
Name	df
Number of rows	3107
Number of columns	33
_______________________
Column type frequency:
character	8
numeric	25
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
Genus Species	0	1.00	4	44	2078
Family	0	1.00	1	18	196
woody	0	1.00	5	15	3
DE	46	0.99	1	19	4
Legume	0	1.00	2	3	2
Dataset	0	1.00	2	14	15
Contributor	0	1.00	3	28	55
Reference	0	1.00	11	34	22

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
sample_ID	0	1.00	1554.00	897.06	1.00	777.50	1554.00	2330.50	3107.00	▇▇▇▇▇
site_id	0	1.00	162.52	85.99	1.00	74.00	169.00	234.00	315.00	▅▇▇▇▆
lat	0	1.00	5.81	27.07	-65.60	-14.99	5.16	26.25	67.50	▁▇▅▆▂
lon	0	1.00	13.90	96.02	-159.58	-71.56	-5.88	108.53	172.30	▁▇▂▂▅
elv	0	1.00	606.94	682.62	1.00	150.00	319.00	818.00	3379.00	▇▂▁▁▁
CO2	0	1.00	383.14	13.02	338.99	377.44	386.95	392.00	402.30	▁▂▃▇▇
fapar	0	1.00	0.63	0.22	0.08	0.46	0.63	0.86	0.93	▂▂▆▃▇
LMA	6	1.00	121.76	83.48	14.24	69.04	102.46	149.43	914.86	▇▁▁▁▁
Narea	6	1.00	2.03	1.18	0.29	1.34	1.83	2.41	18.48	▇▁▁▁▁
Nmass	6	1.00	0.02	0.01	0.00	0.01	0.02	0.02	0.08	▇▅▁▁▁
vcmax25_obs	3	1.00	45.72	24.99	1.06	27.62	41.13	58.80	231.77	▇▅▁▁▁
vcmax_obs	3	1.00	36.13	25.12	0.74	17.45	31.04	48.89	205.50	▇▃▁▁▁
lnD	0	1.00	-0.68	0.66	-3.62	-0.99	-0.59	-0.28	1.10	▁▁▃▇▁
mgdd0	13	1.00	21.57	5.87	0.26	16.10	22.96	26.82	33.85	▁▂▆▇▅
lnppfd	0	1.00	6.66	0.15	6.08	6.53	6.67	6.75	7.01	▁▁▆▇▂
lnppfm	0	1.00	2.30	0.34	1.69	2.02	2.32	2.56	3.09	▆▅▇▆▂
alpha	2	1.00	0.91	0.24	0.27	0.82	0.93	1.10	1.25	▂▂▃▇▇
gdday	0	1.00	347.13	46.43	0.00	365.00	365.00	365.00	365.00	▁▁▁▁▇
soil_clay	2	1.00	24.87	8.59	5.00	19.00	23.00	31.00	43.00	▁▇▅▅▃
soil_ph	2	1.00	5.78	0.88	4.30	5.10	5.40	6.30	8.20	▆▇▆▂▂
soil_cn	2	1.00	12.29	1.99	9.00	11.00	12.00	13.00	24.00	▇▂▁▁▁
vcmax_predicted	0	1.00	36.90	16.92	10.14	26.54	32.34	41.40	151.00	▇▃▁▁▁
vcmax25_predicted	0	1.00	49.42	21.09	20.85	29.35	45.79	63.17	144.83	▇▆▂▁▁
lnLMA_predicted	703	0.77	4.68	0.33	3.79	4.47	4.69	4.89	5.48	▁▃▇▅▂
Na_predicted	703	0.77	1.98	0.47	1.36	1.62	1.90	2.23	3.69	▇▅▂▁▁

Distribution of data

gg1 <- df %>% 
  ggplot(aes(vcmax25_obs)) +
  geom_density() +
  labs(title = "Vcmax25")

gg2 <- df %>% 
  ggplot(aes(Narea)) +
  geom_density() +
  labs(title = "Narea")

gg3 <- df %>% 
  ggplot(aes(Nmass)) +
  geom_density() +
  labs(title = "Nmass")

gg4 <- df %>% 
  ggplot(aes(LMA)) +
  geom_density() +
  labs(title = "LMA")

(gg1 + gg2) /
  (gg3 + gg4)

Log-transform all variables to make them closer to normally distributed.

df <- df %>% 
  mutate(log_vcmax25_obs = log(vcmax25_obs),
         log_Narea = log(Narea),
         log_Nmass = log(Nmass),
         log_LMA = log(LMA),
         )

gg1 <- df %>% 
  ggplot(aes(log_vcmax25_obs)) +
  geom_density() +
  labs(title = "Nmass")

gg2 <- df %>% 
  ggplot(aes(log_Narea)) +
  geom_density() +
  labs(title = "Narea")

gg3 <- df %>% 
  ggplot(aes(log_Nmass)) +
  geom_density() +
  labs(title = "Nmass")

gg4 <- df %>% 
  ggplot(aes(log_LMA)) +
  geom_density() +
  labs(title = "LMA")

(gg1 + gg2) /
  (gg3 + gg4)

Analysis vs soil C:N

This uses the soil C:N data as in Ning’s dataset.

gg1 <- df %>% 
  ggplot(aes(soil_cn, log_vcmax25_obs)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg2 <- df %>% 
  ggplot(aes(soil_cn, log_Narea)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg3 <- df %>% 
  ggplot(aes(soil_cn, log_Nmass)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg4 <- df %>% 
  ggplot(aes(soil_cn, log_LMA)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg1 + gg2 + gg3 + gg4

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Issues

Why soil C:N in discrete values? Where is it extracted from?

Alternative soil C:N data extraction

Taking it from ISRIC WISE30sec using ingestr. This also extracts soil pH.

filn <- "../data/data_leafn_vcmax_ning_dong_ALTCNPH.csv"
if (!file.exists(filn)){
  
  library(ingestr)
  settings_wise <- get_settings_wise(varnam = c("CNrt", "PHAQ"), layer = 1:7)
  
  df_sites <- df %>% 
    select(sitename = site_id, lon, lat) %>% 
    distinct()
  
  df_wise <- ingest(
    df_sites,
    source    = "wise",
    settings  = settings_wise,
    dir       = "~/data/soil/wise/"
    ) %>% 
    unnest(data)
  
  df <- df %>% 
    left_join(
      df_wise %>% 
        rename(site_id = sitename),
      by = "site_id"
    )
  
  write_csv(df, file = filn)  
  
} else {
  
  df <- read_csv(filn)
  
}

## Rows: 3117 Columns: 39
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): Genus Species, Family, woody, DE, Legume, Dataset, Contributor, Re...
## dbl (31): sample_ID, site_id, lat, lon, elv, CO2, fapar, LMA, Narea, Nmass, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Plot with alternative C:N.

gg1 <- df %>% 
  ggplot(aes(CNrt, log_vcmax25_obs)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg2 <- df %>% 
  ggplot(aes(CNrt, log_Narea)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg3 <- df %>% 
  ggplot(aes(CNrt, log_Nmass)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg4 <- df %>% 
  ggplot(aes(CNrt, log_LMA)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg1 + gg2 + gg3 + gg4

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Complement climate variables

Several climatic variables are provided in the dataset. To make analyses at different levels consistent - using the same climate and soil data sources - we re-extract this information here following this ingestr workflow, collecting the following variables:

growth temperature
growing season mean VPD
growing season mean PPFD
N deposition

filn <- "../data/data_leafn_vcmax_ning_dong_ALTCLIM.csv"

ingest_wc_bychunk <- function(idxs){
  
  print(idxs)
  
  ## collect WC data for idxs sites
  out <- ingest(
    df_sites |> 
      slice(idxs),
    source    = "worldclim",
    settings  = settings_wc,
    dir       = "~/data/worldclim/"
    )
  
  ## get growting-season means
  out <- out |> 
  
    unnest(data) |> 
  
    ## add latitude
    left_join(df_sites, by = "sitename") |> 
    
    ## vapour pressure kPa -> Pa
    mutate(vapr = vapr * 1e3) |>
    
    ## PPFD from solar radiation: kJ m-2 day-1 -> mol m−2 s−1 PAR
    mutate(ppfd = 1e3 * srad * kfFEC * 1.0e-6 / (60 * 60 * 24)) |>
  
    ## calculate VPD (Pa) based on tmin and tmax
    rowwise() |> 
    mutate(vpd = ingestr::calc_vpd(eact = vapr, tmin = tmin, tmax = tmax)) |> 
    
    ## calculate growth temperature (average daytime temperature)
    mutate(doy = lubridate::yday(lubridate::ymd("2001-01-15") + months(month - 1))) |> 
    mutate(tgrowth = ingestr::calc_tgrowth(tmin, tmax, lat, doy)) |> 
    
    ## average over growing season (where Tgrowth > 0 deg C)
    group_by(sitename) |> 
    nest() |> 
    mutate(data_growingseason = purrr::map(data, ~get_growingseasonmean(.))) |> 
    unnest(data_growingseason) |> 
    select(-data)
  
  return(out)
}
  
get_growingseasonmean <- function(df){
  df |> 
    filter(tgrowth > 0) |> 
    ungroup() |> 
    summarise(across(c(tgrowth, vpd, ppfd), mean))
}

kfFEC <- 2.04
 
if (!file.exists(filn)){
  
  library(ingestr)

  ## Collect climatic variables from WorldClim----------------------------------  
  df_sites <- df %>% 
    select(sitename = site_id, lon, lat) %>% 
    distinct()

  settings_wc <- list(varnam = c("tmin", "tmax", "vapr", "srad"))

  ## do this in rounds, otherwise too memory demanding
  nchunk <- nrow(df_sites)  # only this extreme variant seems to work
  nrows_chunk <- ceiling(nrow(df_sites)/nchunk)
  irow_chunk <- split(seq(nrow(df_sites)), ceiling(seq_along(seq(nrow(df_sites)))/nrows_chunk))
  
  df_wc <- purrr::map_dfr(
    irow_chunk,
    ~ingest_wc_bychunk(.)
  )
  
  write_csv(df_wc, file = "../data/df_wc.csv")
  
  
  ## Collect N deposition-------------------------------------------------------  
  df_ndep <- ingest(df_sites |> 
                      mutate(sitename = as.character(sitename)) |> 
                      mutate(year_start = 1990, year_end = 2009),
                    source    = "ndep",
                    timescale = "y",
                    dir       = "~/data/ndep_lamarque/",
                    verbose   = FALSE
                    ) |> 
    unnest(cols = data) |> 
    group_by(sitename) |> 
    summarise(noy = mean(noy), nhx = mean(nhx)) |> 
    mutate(ndep = noy + nhx) |> 
    select(-noy, -nhx)
  
  ## Combine data---------------------------------------------------------------
  df <- df %>% 
    left_join(
      df_wc %>% 
        rename(site_id = sitename),
      by = "site_id"
    ) |> 
    left_join(
      df_ndep %>% 
        mutate(site_id = as.numeric(sitename)),
      by = "site_id"
    )
  
  write_csv(df, file = filn)  
  
} else {
  df <- read_csv(filn)
}

## Rows: 3143 Columns: 44
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): Genus Species, Family, woody, DE, Legume, Dataset, Contributor, Re...
## dbl (36): sample_ID, site_id, lat, lon, elv, CO2, fapar, LMA, Narea, Nmass, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Multivariate analysis

New predictors

Check distribution of predictor variables.

# looks alright
df |> 
  ggplot(aes(tgrowth)) +
  geom_density() +
  labs(title = "Tgrowth")

## Warning: Removed 3 rows containing non-finite values (stat_density).

# vpd needs to be transformed
df |> 
  ggplot(aes(log(vpd))) +
  geom_density() +
  labs(title = "VPD")

## Warning: Removed 3 rows containing non-finite values (stat_density).

df <- df |> 
  mutate(log_vpd = log(vpd))

# ppfd looks ok
df |> 
  ggplot(aes(ppfd)) +
  geom_density() +
  labs(title = "PPFD")

## Warning: Removed 3 rows containing non-finite values (stat_density).

# pH looks ok
df |> 
  ggplot(aes(PHAQ)) +
  geom_density() +
  labs(title = "pH")

## Warning: Removed 3 rows containing non-finite values (stat_density).

# ndep needs to be tranformed
df |> 
  ggplot(aes(log(ndep))) +
  geom_density() +
  labs(title = "N deposition")

df <- df |> 
  mutate(log_ndep = log(ndep))

Fit multivariate models.

# removed: mgdd0 (don't know what it is)
linmod_vcmax <- lm(log_vcmax25_obs ~ log_vpd  + ppfd + alpha + tgrowth + PHAQ + CNrt + elv + log_ndep, data = df)
linmod_narea <- lm(log_Narea ~       log_vpd  + ppfd + alpha + tgrowth + PHAQ + CNrt + elv + log_ndep, data = df)
linmod_nmass <- lm(log_Nmass ~       log_vpd  + ppfd + alpha + tgrowth + PHAQ + CNrt + elv + log_ndep, data = df)
linmod_lma   <- lm(log_LMA ~         log_vpd  + ppfd + alpha + tgrowth + PHAQ + CNrt + elv + log_ndep, data = df)

Partial relationship analysis with soil C:N.

gg_vcmax <- visreg(linmod_vcmax, "CNrt", gg = TRUE)
gg_narea <- visreg(linmod_narea, "CNrt", gg = TRUE)
gg_nmass <- visreg(linmod_nmass, "CNrt", gg = TRUE)
gg_lma   <- visreg(linmod_lma,   "CNrt", gg = TRUE)

gg_vcmax <- gg_vcmax +
  theme_classic()
gg_narea <- gg_narea +
  theme_classic()
gg_nmass <- gg_nmass +
  theme_classic()
gg_lma <- gg_lma +
  theme_classic()

(gg_vcmax + gg_narea) /
  (gg_nmass + gg_lma)

Partial relationship analysis with N deposition

gg_vcmax <- visreg(linmod_vcmax, "log_ndep", gg = TRUE)
gg_narea <- visreg(linmod_narea, "log_ndep", gg = TRUE)
gg_nmass <- visreg(linmod_nmass, "log_ndep", gg = TRUE)
gg_lma   <- visreg(linmod_lma,   "log_ndep", gg = TRUE)

gg_vcmax <- gg_vcmax +
  theme_classic()
gg_narea <- gg_narea +
  theme_classic()
gg_nmass <- gg_nmass +
  theme_classic()
gg_lma <- gg_lma +
  theme_classic()

(gg_vcmax + gg_narea) /
  (gg_nmass + gg_lma)

Old predictors

# removed: mgdd0 (don't know what it is)
linmod_vcmax <- lm(log_vcmax25_obs ~ lnD  + lnppfd + alpha + gdday + soil_ph + CNrt + elv, data = df)
linmod_narea <- lm(log_Narea ~       lnD  + lnppfd + alpha + gdday + soil_ph + CNrt + elv, data = df)
linmod_nmass <- lm(log_Nmass ~       lnD  + lnppfd + alpha + gdday + soil_ph + CNrt + elv, data = df)
linmod_lma   <- lm(log_LMA ~         lnD  + lnppfd + alpha + gdday + soil_ph + CNrt + elv, data = df)

library(visreg)
gg_vcmax <- visreg(linmod_vcmax, "CNrt", gg = TRUE)
gg_narea <- visreg(linmod_narea, "CNrt", gg = TRUE)
gg_nmass <- visreg(linmod_nmass, "CNrt", gg = TRUE)
gg_lma   <- visreg(linmod_lma,   "CNrt", gg = TRUE)

gg_vcmax <- gg_vcmax +
  theme_classic()
gg_narea <- gg_narea +
  theme_classic()
gg_nmass <- gg_nmass +
  theme_classic()
gg_lma <- gg_lma +
  theme_classic()

(gg_vcmax + gg_narea) /
  (gg_nmass + gg_lma)

Findings:

No decline in Vcmax with increasing soil C:N.
Muted response in Narea.
Clear decline in Nmass with soil C:N
Clear increase in LMA with soil C:N

Issues:

Why no MAT-like variable available? Or am I missing something?

Coefficients

Consider leaf Narea as consisting of a metabolic \(N_v\) and a structural \(N_s\) leaf N component: \[ N_\text{area} = N_v + N_s \] and that \(N_v\) is proportional to \(V_\text{cmax25}\), while \(N_s\) is proportional to LMA (with a zero-y-axis intercept).

linmod_leafn = lm(Narea ~ vcmax25_obs + LMA + 0, data = df)
summary(linmod_leafn)

## 
## Call:
## lm(formula = Narea ~ vcmax25_obs + LMA + 0, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4153 -0.3177  0.0516  0.4428 10.3225 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## vcmax25_obs 0.0136818  0.0004412   31.01   <2e-16 ***
## LMA         0.0109072  0.0001553   70.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.818 on 3132 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.8793, Adjusted R-squared:  0.8792 
## F-statistic: 1.141e+04 on 2 and 3132 DF,  p-value: < 2.2e-16

These coefficients can now be used in the CN-model for modelling leaf C:N with prescribed species-specific LMA.

For reference, the following values are returned:

# Coefficients:
#              Estimate Std. Error t value Pr(>|t|)    
# vcmax25_obs 0.0136818  0.0004412   31.01   <2e-16 ***
# LMA         0.0109072  0.0001553   70.22   <2e-16 ***

The coefficient for vcmax25_obs is in units of gN m-2 / (micro-mol m-2 s-1).

The coefficient for LMA is in units of gN m-2 / (gDM m-2).

Visualise model fit.

df <- df |> 
  mutate(narea_pred = predict(linmod_leafn, newdata = df))

out <- df |> 
  analyse_modobs2(mod = "narea_pred", obs = "Narea")

## Loading required package: LSD

## 
## Attaching package: 'LSD'

## The following objects are masked from 'package:rbeni':
## 
##     colorpalette, complementarycolor, convertcolor, convertgrey,
##     daltonize, disco, distinctcolors, heatpairs, heatscatter,
##     heatscatterpoints

## Loading required package: ggthemes

## Loading required package: RColorBrewer

out$gg +
  xlim(0, 18) + ylim(0, 18)

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

Ellsworth data

TO EXPLORE. BUT LOW PRIORITY.

The data used here is this leaf photosynthetic traits and N dataset:

Data reference

Ellsworth, David; Wright, Ian; Crous, Kristine Y.; Goll, Daniel S; Zaehle, Sönke; Cernusak, Lucas A.; et al. (2022): Convergence in phosphorus constraints to photosynthesis dataset. figshare. Dataset. https://doi.org/10.6084/m9.figshare.20010485.v1

Paper reference

Ellsworth, D.S., Crous, K.Y., De Kauwe, M.G. et al. Convergence in phosphorus constraints to photosynthesis in forests around the world. Nat Commun 13, 5005 (2022). https://doi.org/10.1038/s41467-022-32545-0

df_ellsworth <- read_csv("~/data/leafnp_vcmax_ellsworth/Ellsworth_NCOMMS_Figure1and2_fulldata.csv")

## Rows: 447 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): Species, Family, Sitename, Continent, Plim_status
## dbl (24): Site_No, Cont_No, Species_No, Vcmax, Jmax, Rd, Vcmax_SE, Jmax_SE, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

skim(df_ellsworth)

Data summary
Name	df_ellsworth
Number of rows	447
Number of columns	29
_______________________
Column type frequency:
character	5
numeric	24
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
Species	0	1.00	4	44	402
Family	7	0.98	7	17	80
Sitename	0	1.00	4	22	52
Continent	0	1.00	4	9	4
Plim_status	0	1.00	5	6	2

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
Site_No	0	1.00	34.37	19.12	1.00	15.00	39.00	51.00	64.00	▇▂▅▇▆
Cont_No	0	1.00	235.55	138.52	1.00	119.50	230.00	354.50	474.00	▇▇▇▆▇
Species_No	7	0.98	44.85	22.46	1.00	27.75	45.00	59.00	87.00	▅▆▇▇▆
Vcmax	0	1.00	47.55	18.31	10.50	34.90	44.80	58.40	112.30	▃▇▅▂▁
Jmax	0	1.00	95.33	34.82	29.30	71.70	90.10	112.70	243.40	▃▇▃▁▁
Rd	0	1.00	0.56	0.70	-1.55	0.18	0.53	0.92	3.29	▁▆▇▂▁
Vcmax_SE	0	1.00	3.08	2.54	0.30	1.40	2.30	3.90	17.90	▇▂▁▁▁
Jmax_SE	8	0.98	3.17	2.33	0.30	1.80	2.60	3.90	27.80	▇▁▁▁▁
Rd_SE	0	1.00	0.47	0.26	0.04	0.29	0.40	0.59	1.64	▇▇▂▁▁
Photo	0	1.00	11.02	4.19	2.40	7.90	10.60	13.45	25.00	▃▇▅▂▁
Tleaf	0	1.00	29.67	3.00	18.70	28.10	29.90	31.15	36.80	▁▁▇▇▂
CO2S	0	1.00	384.52	8.88	350.00	381.00	386.00	390.00	404.00	▁▁▃▇▂
PARi	0	1.00	1747.93	297.97	998.00	1702.00	1800.00	2000.00	2001.00	▂▁▂▆▇
LMA	0	1.00	126.05	44.58	27.45	95.81	120.70	150.93	316.77	▂▇▃▁▁
Perc_N	0	1.00	1.83	0.65	0.54	1.38	1.76	2.17	4.28	▃▇▃▁▁
Perc_P	0	1.00	0.10	0.06	0.01	0.06	0.09	0.13	0.44	▇▆▁▁▁
Narea	0	1.00	2.12	0.61	0.68	1.71	2.05	2.48	5.30	▂▇▂▁▁
Parea	0	1.00	0.12	0.06	0.01	0.08	0.11	0.14	0.43	▇▇▂▁▁
Photomass	0	1.00	98.54	55.57	16.90	62.45	86.50	122.15	435.20	▇▃▁▁▁
Jmaxmass	0	1.00	843.02	435.97	189.70	565.00	754.40	992.75	3389.10	▇▅▁▁▁
Vcmaxmass	0	1.00	419.06	217.20	72.70	273.70	376.30	519.10	1643.10	▇▆▁▁▁
Nmass	0	1.00	18.34	6.47	5.39	13.84	17.59	21.69	42.83	▃▇▃▁▁
Pmass	0	1.00	1.02	0.57	0.08	0.60	0.94	1.28	4.40	▇▆▁▁▁
NPratio	0	1.00	21.49	11.13	2.60	14.85	19.00	24.65	119.60	▇▂▁▁▁

Issues

No longitude and latitude information in this data. Beni will contact David Ellsworth to ask for the long and lat data.

TROBIT data

df_trobit <- read_csv("~/data/leaf_traits/leaf_soil.csv")

## New names:
## Rows: 2757 Columns: 35
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): source, site, species, Family, Genus, Date, author dbl (26): ...1, lon_2,
## lat_2, lon, lat, z, start_yr, end_yr, Tleaf, narea, p... lgl (2): Author, rep
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

## 81 sites like this:
df_sites_trobit <- df_trobit %>% 
  select(site, lon, lat, lon_2, lat_2, z, species, Vcmax25, narea, lma, Jmax25, nmass) %>% 
  distinct(site, .keep_all = TRUE)
nrow(df_sites_trobit)

## [1] 81

## 68 sites when using lon_2 and lat_2:
df_sites_trobit_test <- df_trobit %>% 
  select(site, lon, lat, lon_2, lat_2, z, species, Vcmax25, narea, lma, Jmax25, nmass) %>% 
  distinct(lon_2, lat_2, .keep_all = TRUE)
nrow(df_sites_trobit_test)

## [1] 68

## 89 sites when using lon_2, lat_2, and z (elevation):
df_sites_trobit_test2 <- df_trobit %>% 
  select(site, lon, lat, lon_2, lat_2, z, species, Vcmax25, narea, lma, Jmax25, nmass) %>% 
  distinct(lon_2, lat_2, z, .keep_all = TRUE)
nrow(df_sites_trobit_test2)

## [1] 89

Issues

When using rounded lon and lat to identify sites, their distinct elevation is not considered, When using site to identify site ID, there may still be entries taken from distinct elevations. But in general, elevational differences are small. Therefore use site for aggregation.

duplicated_sites <- df_sites_trobit_test2 %>% 
  group_by(site) %>% 
  summarise(n=n()) %>% 
  filter(n>1) %>% 
  pull(site)

## note differences in elevation (column z)
df_sites_trobit_test2 %>% 
  filter(site %in% duplicated_sites)

## # A tibble: 22 × 12
##    site     lon   lat lon_2 lat_2     z species       Vcmax25 narea   lma Jmax25
##    <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <chr>           <dbl> <dbl> <dbl>  <dbl>
##  1 JEN-12 -73.6 -4.90 -73.6 -4.9    135 Micrandra sp…    31.0 1.93   123.   66.2
##  2 JEN-12 -73.6 -4.90 -73.6 -4.9    124 Alchornea tr…    49.1 3.4    107.   NA  
##  3 JEN-11 -73.6 -4.88 -73.6 -4.88   131 Sloanea brev…    30.3 1.19   101.   56.8
##  4 JEN-11 -73.6 -4.88 -73.6 -4.88   124 Pouteria cus…    NA   3.54   232.   NA  
##  5 ALP-40 -73.4 -3.94 -73.4 -3.94   142 Emmotum flor…    21.4 1.97   261.   NA  
##  6 ALP-40 -73.4 -3.94 -73.4 -3.94   120 Cupania diph…     7.1 1.5    202.   NA  
##  7 SUC-01 -72.9 -3.25 -72.9 -3.25   117 Sloanea glad…    17.1 0.903  127.   40.8
##  8 SUC-01 -72.9 -3.25 -72.9 -3.25   111 Buchenavia t…    24.7 2.44   141    NA  
##  9 SUC-05 -72.9 -3.26 -72.9 -3.26   132 Osteophloeum…    NA   1.87   122.   NA  
## 10 SUC-05 -72.9 -3.26 -72.9 -3.26   111 Naucleopsis …    20.3 2.41   125.   NA  
## # … with 12 more rows, and 1 more variable: nmass <dbl>

Aggregate by site (mean) and take log of variables.

df_trobit_sitemean <- df_trobit %>% 
  group_by(site) %>% 
  summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)))

# log-transform
df_trobit_sitemean <- df_trobit_sitemean %>% 
  mutate(across(
    c("Vcmax25", "Jmax25", "narea", "nmass", "leafCN", "lma"), 
    .fns = log, 
    .names = "log_{.col}")) %>% 
  rename(sitename = site) # for later use in ingestr

Analysis vs soil C:N

This uses observed soil C:N data. Do a simple regression (plotting against soil C:N), not controlling for other factors.

gg1 <- df_trobit_sitemean %>% 
  ggplot(aes(CN, log_Vcmax25)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg2 <- df_trobit_sitemean %>% 
  ggplot(aes(CN, log_narea)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg3 <- df_trobit_sitemean %>% 
  ggplot(aes(CN, log_nmass)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg4 <- df_trobit_sitemean %>% 
  ggplot(aes(CN, log_leafCN)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg5 <- df_trobit_sitemean %>% 
  ggplot(aes(CN, log_lma)) +
  geom_point() +
  geom_smooth(color = "red", method = "lm") +
  theme_classic()

gg1 + gg2 + gg3 + gg4 + gg5

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Complement climate variables

growth temperature
growing season mean VPD
growing season mean PPFD
N deposition

filn <- "../data/data_leafn_vcmax_trobit_ALTCLIM.csv"

get_growingseasonmean <- function(df){
  df |> 
    filter(tgrowth > 0) |> 
    ungroup() |> 
    summarise(across(c(tgrowth, vpd, ppfd), mean))
}

kfFEC <- 2.04
 
if (!file.exists(filn)){
  
  ## Collect climatic variables from WorldClim----------------------------------  
  df_sites <- df_trobit_sitemean %>% 
    select(sitename, lon, lat, elv = z) %>% 
    distinct() |> 
    
    ## at least one sitename is missing!
    drop_na(sitename)

  settings_wc <- list(varnam = c("tmin", "tmax", "vapr", "srad"))

  ## collect WC data for idxs sites
  df_wc <- ingest(
    df_sites,
    source    = "worldclim",
    settings  = settings_wc,
    dir       = "~/data/worldclim/"
    )
  
  ## get growting-season means
  df_wc <- df_wc |> 
  
    unnest(data) |> 
  
    ## add latitude
    left_join(df_sites, by = "sitename") |> 
    
    ## vapour pressure kPa -> Pa
    mutate(vapr = vapr * 1e3) |>
    
    ## PPFD from solar radiation: kJ m-2 day-1 -> mol m−2 s−1 PAR
    mutate(ppfd = 1e3 * srad * kfFEC * 1.0e-6 / (60 * 60 * 24)) |>
  
    ## calculate VPD (Pa) based on tmin and tmax
    rowwise() |> 
    mutate(vpd = ingestr::calc_vpd(eact = vapr, tmin = tmin, tmax = tmax)) |> 
    
    ## calculate growth temperature (average daytime temperature)
    mutate(doy = lubridate::yday(lubridate::ymd("2001-01-15") + months(month - 1))) |> 
    mutate(tgrowth = ingestr::calc_tgrowth(tmin, tmax, lat, doy)) |> 
    
    ## average over growing season (where Tgrowth > 0 deg C)
    group_by(sitename) |> 
    nest() |> 
    mutate(data_growingseason = purrr::map(data, ~get_growingseasonmean(.))) |> 
    unnest(data_growingseason) |> 
    select(-data)  
  
  ## Collect N deposition-------------------------------------------------------  
  df_ndep <- ingest(
                    df_sites |> 
                      mutate(year_start = 1990, year_end = 2009),
                    source    = "ndep",
                    timescale = "y",
                    dir       = "~/data/ndep_lamarque/",
                    verbose   = FALSE
                    ) |> 
    unnest(cols = data) |> 
    group_by(sitename) |> 
    summarise(noy = mean(noy), nhx = mean(nhx)) |> 
    mutate(ndep = noy + nhx) |> 
    select(-noy, -nhx)
  
  ## Combine data---------------------------------------------------------------
  df_trobit_sitemean <- df_trobit_sitemean %>% 
    left_join(
      df_wc,
      by = "sitename"
    ) |> 
    left_join(
      df_ndep,
      by = "sitename"
    )
  
  write_csv(df_trobit_sitemean, file = filn)  
  
} else {
  df_trobit_sitemean <- read_csv(filn)
}

## New names:
## Rows: 81 Columns: 37
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): sitename dbl (36): ...2, lon_2, lat_2, lon, lat, z, start_yr, end_yr,
## Tleaf, narea, p...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `...1` -> `...2`

Multivariate analysis

Missing: alpha

df_trobit_sitemean <- df_trobit_sitemean |> 
  mutate(log_vpd = log(vpd),
         log_ndep = log(ndep))

# removed: mgdd0 (don't know what it is)
linmod_vcmax <- lm(log_Vcmax25 ~     log_vpd  + ppfd + tgrowth + pH + CN + z + log_ndep, 
                   data = df_trobit_sitemean)
linmod_narea <- lm(log_narea ~       log_vpd  + ppfd + tgrowth + pH + CN + z + log_ndep, 
                   data = df_trobit_sitemean)
linmod_nmass <- lm(log_nmass ~       log_vpd  + ppfd + tgrowth + pH + CN + z + log_ndep, 
                   data = df_trobit_sitemean)
linmod_lma   <- lm(log_lma ~         log_vpd  + ppfd + tgrowth + pH + CN + z + log_ndep, 
                   data = df_trobit_sitemean)
linmod_cn   <- lm(log_leafCN ~       log_vpd  + ppfd + tgrowth + pH + CN + z + log_ndep, 
                   data = df_trobit_sitemean)

Partial relationship analysis with soil C:N.

gg_vcmax <- visreg(linmod_vcmax, "CN", gg = TRUE)
gg_narea <- visreg(linmod_narea, "CN", gg = TRUE)
gg_nmass <- visreg(linmod_nmass, "CN", gg = TRUE)
gg_lma   <- visreg(linmod_lma,   "CN", gg = TRUE)
gg_cn   <- visreg(linmod_cn,     "CN", gg = TRUE)

gg_vcmax <- gg_vcmax +
  theme_classic()
gg_narea <- gg_narea +
  theme_classic()
gg_nmass <- gg_nmass +
  theme_classic()
gg_lma <- gg_lma +
  theme_classic()
gg_cn <- gg_cn +
  theme_classic()

(gg_vcmax + gg_narea) /
  (gg_nmass + gg_lma) / 
  (gg_cn + gg_cn)

Partial relationship analysis with N deposition.

gg_vcmax <- visreg(linmod_vcmax, "log_ndep", gg = TRUE)
gg_narea <- visreg(linmod_narea, "log_ndep", gg = TRUE)
gg_nmass <- visreg(linmod_nmass, "log_ndep", gg = TRUE)
gg_lma   <- visreg(linmod_lma,   "log_ndep", gg = TRUE)
gg_cn   <- visreg(linmod_cn,     "log_ndep", gg = TRUE)

gg_vcmax <- gg_vcmax +
  theme_classic()
gg_narea <- gg_narea +
  theme_classic()
gg_nmass <- gg_nmass +
  theme_classic()
gg_lma <- gg_lma +
  theme_classic()
gg_cn <- gg_cn +
  theme_classic()

(gg_vcmax + gg_narea) /
  (gg_nmass + gg_lma) / 
  (gg_cn + gg_cn)

C:N by PFT

Paper reference

Kattge, J., Bönisch, G., Díaz, S., Lavorel, S., Prentice, I. C., Leadley, P., . . . Wirth, C. (2020). TRY plant trait database – enhanced coverage and open access. Global Change Biology, 26(1), 119-188. doi:10.1111/gcb.14904

df <- read_csv("~/scratch/try_kattge_2020/data_cn_pdf.csv")

## Rows: 6129 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): AccSpeciesName, GrowthForm, LeafPhenology, Woodiness, LeafType, Pho...
## dbl (8): ID, lon, lat, CN, SLA, nmass, site_id, BIOME
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df |> 
  ggplot(aes(PFTs, CN)) +
  geom_boxplot(fill = "azure3") +
  coord_flip() +
  theme_classic() +
  labs(x = "PFT", y = expression(paste("C:N"[leaf])))