knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(rbeni)
## 
## Attaching package: 'rbeni'
## The following object is masked _by_ '.GlobalEnv':
## 
##     add_alpha
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:dplyr':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(survival)
library(SPREDA)
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
library(extRemes)
## Loading required package: Lmoments
## Loading required package: distillery
## 
## Attaching package: 'extRemes'
## The following objects are masked from 'package:stats':
## 
##     qqnorm, qqplot
source("R/eva_max.R")
source("R/mct2.R")
source("R/get_plantwhc_mct_bysite.R")
source("R/convert_et.R")

Code for this is available on my github.

Approach

Gao et al. (2014) suggest that the plant rooting is adapted to the cumulative water deficit during the dry spells They adopt the Mass Curve Technique to derive it. A Gumbel distribution is fit to the accumulated water deficits during dry spells and allows for an estimate of the deficit with a given return period. The method requires precipitation (\(P\)), runoff (\(Q\)), potential evapotranspiration (\(E_p\)) and green vegetation cover (\(f_v\)) to be specified for each time step (here daily) over multiple years.

The approach implemented here (function mct()) considers temporal variations in the demand, while Gao et al. calculated a mean annual and mean dry season demand. Here, PET is calculated based on the Priestly-Taylor equation as opposed to the Hargreaves equation used by Gao et al. Limitations of the method described here include that lateral runon and runoff and delayed water inflow by snow melt are ignored, and PET is assumed to drive the demand but is not affected by other plant adaptations to dry conditions (reduction of stomatal conductance, internal water storage). Effects of phenological changes on water demand during droughts are accounted for by \(f_v\).

The steps for the method are:

  1. Identify events where the water deficit (\(ET - P\)) is accumulating.
  2. Fit a gumbel distribution to the largest \(N\) events.
  3. Extract the estimated water deficit for a given return period \(T\).

Identify CWD events

Get FLUXNET data

Let’s try this out for the FLUXNET site ‘FR-Pue’, for which whe have these variables. Read FLUXNET meteo data from FLUXNET stations (and years), written by rscript_eval_fluxnet2015.R.

## meteo data 
load("./data/ddf_meteo.Rdata")

## re-read ET data from FLUXNET and don't remove (filter) any data
filn <- "data/ddf_eval_all.Rdata"
if (!file.exists(filn)){
  ddf_eval <- ingest(
    siteinfo = siteinfo,
    source    = "fluxnet", 
    getvars   = list(latenth = "LE_F_MDS", latenth_qc = "LE_F_MDS_QC", gpp = "GPP_NT_VUT_REF"),
    dir       = "~/data/FLUXNET-2015_Tier1/20191024/DD/",
    settings  = list(threshold_GPP = 0.8, threshold_LE = 0.0, getswc = FALSE, filter_ntdt = TRUE, remove_neg = FALSE),
    timescale = "d"
  )
  save(ddf_eval, file = filn)
} else {
  load(filn)
}

## combine FLUXNET meteo and ET data
ddf_fluxnet <- ddf_meteo %>% 
  unnest(data) %>% 
  left_join(ddf_eval %>% 
              unnest(data), 
            by = c("sitename", "date")
            ) %>% 
  group_by(sitename) %>% 
  nest() %>% 
  
  ## nest elv into data frame
  left_join(
    readr::read_csv("~/ingestr/siteinfo_fluxnet2015.csv") %>% 
      dplyr::select(sitename, elv),
    by = "sitename"
  ) %>% 
  unnest(data) %>% 
  group_by(sitename) %>% 
  nest() %>% 
  
  ## convert units: get ET in mm d-1
  dplyr::mutate(latenth_mm = purrr::map(data, ~convert_et(.$latenth, .$temp, .$elv))) %>% 
  dplyr::mutate(latenth_mm = purrr::map(latenth_mm, ~tibble(latenth_mm = .))) %>% 
  dplyr::mutate(data       = purrr::map2(data, latenth_mm, ~bind_cols(.x, .y))) %>% 
  dplyr::select(-latenth_mm)
## Parsed with column specification:
## cols(
##   sitename = col_character(),
##   lon = col_double(),
##   lat = col_double(),
##   elv = col_double(),
##   year_start = col_double(),
##   year_end = col_double(),
##   classid = col_character(),
##   c4 = col_logical(),
##   whc = col_double(),
##   koeppen_code = col_character(),
##   igbp_land_use = col_character(),
##   plant_functional_type = col_character()
## )

Get ALEXI/WATCH-WFDEI data

Read ALEXI ET and WATCH-WFDEI data at FLUXNET sites, written by rscript_get_data_fluxnetsites.R.

load("data/df_alexi.Rdata")

Plot water balance components

ddf_fluxnet %>%
  filter(sitename == "US-Ton") %>% 
  unnest(data) %>% 
  ggplot(aes(date, prec)) +
  geom_line(color = "royalblue") +
  labs(x = "Date", y = expression(paste("Precipitation (mm d"^{-1}, ")")) )

#ggsave("fig/prec_example.pdf", width = 6, height = 3)

ddf_fluxnet %>% 
  filter(sitename == "US-Ton") %>% 
  unnest(data) %>% 
  ggplot(aes(date, latenth_mm)) +
  geom_line(color = "tomato") +
  labs(x = "Date", y = expression(paste("ET (J m"^{-2}, "d"^{-1}, ")")) )
## Warning: Removed 66 row(s) containing missing values (geom_path).

#ggsave("fig/pet_example.pdf", width = 6, height = 3)

ddf_fluxnet %>% 
  filter(sitename == "US-Ton") %>% 
  unnest(data) %>% 
  mutate(latenth_mm = ifelse(is.na(latenth_mm), 0, latenth_mm)) %>% 
  ggplot(aes(date, -cumsum(prec - latenth_mm))) +
  geom_line(color = "black") +
  labs(x = "Date", y = expression(paste("Water balance (mm d"^{-1}, ")")) )

Calculate daily water balance and add to test data frame for site US-Ton.

df_example_fluxnet <- ddf_fluxnet %>% 
  filter(sitename == "US-Ton") %>% 
  unnest(data) %>% 
  mutate(latenth_mm = ifelse(is.na(latenth_mm), 0, latenth_mm)) %>% 
  mutate(bal = prec - latenth_mm) %>% 
  mutate(bal = myapprox(bal)) %>% 
  mutate(bal_cum    = cumsum(bal)) %>% 
  mutate(demand_cum = cumsum(latenth_mm)) %>% 
  mutate(supply_cum = cumsum(prec))

df_example_alexi <- df_alexi %>% 
  filter(idx == "US-Ton") %>% 
  unnest(df) %>% 
  mutate(et_mm = ifelse(is.na(et_mm), 0, et_mm)) %>% 
  mutate(bal = prec - et_mm) %>% 
  mutate(bal = myapprox(bal)) %>% 
  mutate(bal_cum    = cumsum(bal)) %>% 
  mutate(demand_cum = cumsum(et_mm)) %>% 
  mutate(supply_cum = cumsum(prec)) %>% 
  drop_na(bal) %>% 
  
  ## merge FLUXNET GPP and PPFD data into the alexi data frame
  left_join(
    df_example_fluxnet %>% 
      ungroup() %>% 
      dplyr::select(date, gpp, ppfd),
    by = "date"
  )

CWD events

Get events of consecutive deficit using the mct() function. This function identifies events as periods of consecutive days where the water balance \(P-E\) is negative. The maximum CWD for each event is recorded. Actually, the “consecutiveness” of days with negative water balance is slightly relaxed and an event is terminated only when the cumulated water balance has fallen to less than thresh_terminate times the maximum CWD of the respective event (see argument thresh_terminate). Additionally, all days between that end-of-event day and the day when the CWD has fallen below thresh_drop times the maximum CWD are dropped from the respective event, too, in order to avoid days with precipitation. This all sounds a bit complicated but the plots below illustrate this.

out_mct_fluxnet <- mct(df_example_fluxnet, varname_wbal = "bal", thresh_terminate = 0.1, thresh_drop = 0.9 )
out_mct_alexi   <- mct(df_example_alexi,   varname_wbal = "bal", thresh_terminate = 0.1, thresh_drop = 0.9 )

Plot the cumulative deficit and rain events, with blue for precipitation, reddish for the cumulative water deficit, and black for GPP/PPFD.

## cumulative deficits
## FLUXNET data
ggplot() +
  geom_rect(
    data=out_mct_fluxnet$inst, 
    aes(xmin=date_start, xmax=date_end, ymin=-99, ymax=99999), 
    fill=rgb(0,0,0,0.3), 
    color=NA) +
  geom_line(data = out_mct_fluxnet$df, aes(date, prec), size = 0.3, color="royalblue") +
  geom_line(data = out_mct_fluxnet$df, aes(date, deficit), color="tomato") +
  geom_line(data = out_mct_fluxnet$df, aes(date, 100 * (gpp/ppfd))) +
  coord_cartesian(ylim=c(0, 200), xlim = c(ymd("2002-01-01"), ymd("2010-12-01"))) +
  theme_classic() +
  labs(title = "US-Ton", subtitle = "ET and precipitation: FLUXNET", x = "Date", y = "Cumulative water deficit (mm)")
## Warning: Removed 138 row(s) containing missing values (geom_path).

ggsave("fig/cwd_example_fluxnet.pdf", width = 6, height = 3)
## Warning: Removed 138 row(s) containing missing values (geom_path).
## ALEXI data
ggplot() +
  geom_rect(
    data=out_mct_alexi$inst, 
    aes(xmin=date_start, xmax=date_end, ymin=-99, ymax=99999), 
    fill=rgb(0,0,0,0.3), 
    color=NA) +
  geom_line(data = out_mct_alexi$df, aes(date, prec), size = 0.3, color="royalblue") +
  geom_line(data = out_mct_alexi$df, aes(date, deficit), color="tomato") +
  geom_line(data = out_mct_fluxnet$df, aes(date, 100 * (gpp/ppfd))) +
  coord_cartesian(ylim=c(0, 300)) + # , xlim = c(ymd("2002-01-01"), ymd("2010-12-01"))
  theme_classic() +
  labs(title = "US-Ton", subtitle = "ET: ALEXI, precipitation: WATCH-WFDEI", x = "Date", y = "Cumulative water deficit (mm)")
## Warning: Removed 138 row(s) containing missing values (geom_path).

ggsave("fig/cwd_example_alexi.pdf", width = 6, height = 3)
## Warning: Removed 138 row(s) containing missing values (geom_path).
## ALEXI and FLUXNET data on top of each other
ggplot() +
  geom_line(data = out_mct_fluxnet$df, aes(date, deficit), color="black") +
  geom_line(data = out_mct_alexi$df, aes(date, deficit), color="tomato")

LUE vs. CWD

This shows that GPP/PAR declines as the CWD increases during large CWD events. Let’s retain only data from the largest \(N\) events and plot GPP/PAR vs. CWD.

For FLUXNET data:

## retain only data from largest instances
nyears <- out_mct_fluxnet$df %>% 
  mutate(year = year(date)) %>% 
  pull(year) %>% 
  unique() %>% 
  length()
biginstances <- out_mct_fluxnet$inst %>% 
  arrange(-deficit) %>% 
  slice(1:nyears) %>% 
  pull(iinst)

## get linear fit with outliers
linmod <- lm(lue ~ deficit, data = out_mct_fluxnet$df %>% mutate(lue = gpp/ppfd))
idx_drop <- which(is.na(remove_outliers(linmod$residuals, coef = 1.5)))
out_mct_fluxnet$df_clean <- out_mct_fluxnet$df %>% 
  mutate(lue = gpp/ppfd)
out_mct_fluxnet$df_clean$lue[idx_drop] <- NA

## git linear fit without outliers
linmod <- lm(lue ~ deficit, data = out_mct_fluxnet$df_clean)

## test: is slope negative?
is_neg <- coef(linmod)["deficit"] < 0.0

## test: is slope significantly (5% level) different from zero (t-test)?
coef(summary(linmod))["deficit", "Pr(>|t|)"] < 0.05
## [1] TRUE
## get x-axis cutoff
lue0 <- - coef(linmod)["(Intercept)"] / coef(linmod)["deficit"]
df_fit = data.frame(y = predict(linmod, newdata = out_mct_fluxnet$df_clean), x = out_mct_fluxnet$df_clean$deficit)

## plot
out_mct_fluxnet$df_clean %>% 
  filter(iinst %in% biginstances) %>% 
  ggplot(aes(x = deficit, y = gpp/ppfd)) +
  geom_point(alpha = 0.5) +
  labs(title = "US-Ton", x = "Cumulative water deficit (mm)", y = "GPP/PPFD", subtitle = "ET: FLUXNET, precipitation: FLUXNET, GPP: FLUXNET, PPFD: FLUXNET") +
  geom_vline(xintercept = lue0, linetype = "dotted") +
  geom_line(data = df_fit, aes(x, y), col = "red")
## Warning: Removed 469 rows containing missing values (geom_point).

For ALEXI data:

nyears <- out_mct_alexi$df %>% 
  mutate(year = year(date)) %>% 
  pull(year) %>% 
  unique() %>% 
  length()
biginstances <- out_mct_alexi$inst %>% 
  arrange(-deficit) %>% 
  slice(1:nyears) %>% 
  pull(iinst)

## get linear fit with outliers
linmod <- lm(lue ~ deficit, data = out_mct_alexi$df %>% mutate(lue = gpp/ppfd))
idx_drop <- which(is.na(remove_outliers(linmod$residuals, coef = 1.5)))
out_mct_alexi$df_clean <- out_mct_alexi$df %>% 
  mutate(lue = gpp/ppfd)
out_mct_alexi$df_clean$lue[idx_drop] <- NA

## git linear fit without outliers
linmod <- lm(lue ~ deficit, data = out_mct_alexi$df_clean)

## test: is slope negative?
is_neg <- coef(linmod)["deficit"] < 0.0

## test: is slope significantly (5% level) different from zero (t-test)?
coef(summary(linmod))["deficit", "Pr(>|t|)"] < 0.05
## [1] TRUE
## get x-axis cutoff
lue0 <- - coef(linmod)["(Intercept)"] / coef(linmod)["deficit"]
df_fit = data.frame(y = predict(linmod, newdata = out_mct_alexi$df_clean), x = out_mct_alexi$df_clean$deficit)

## plot
out_mct_alexi$df_clean %>% 
  filter(iinst %in% biginstances) %>% 
  ggplot(aes(x = deficit, y = gpp/ppfd)) +
  geom_point(alpha = 0.5) +
  labs(title = "US-Ton", x = "Cumulative water deficit (mm)", y = "GPP/PPFD", subtitle = "ET: ALEXI, precipitation: WATCH-WFDEI, GPP: FLUXNET, PPFD: FLUXNET") +
  geom_vline(xintercept = lue0, linetype = "dotted") +
  geom_line(data = df_fit, aes(x, y), col = "red")
## Warning: Removed 947 rows containing missing values (geom_point).

I’ve fitted a linear regression above. Admittedly, it looks more like an exponential decay with CWD (which surprises me). Fitting an exponential instead is left to do.

Distribution of CWD events

Plot the distribution of cumulative deficits (this is actually the maximum CWD attained during each event). Grey for CWD based on ET and P from FLUXNET data, reddish from ALEXI (ET) and WATCH-WFDEI (P).

ggplot() +
  geom_histogram(
    data = out_mct_fluxnet$inst,
    aes(x = deficit, y = ..density..),
    color = "black", alpha = 0.5, fill = "black",
    position="identity") +
  geom_histogram(
    data = out_mct_alexi$inst,
    aes(x = deficit, y = ..density..),
    color = "black", alpha = 0.5, fill = "tomato", 
    position="identity") +
  labs(title = "US-Ton", x = "Cumulative water deficit (mm)")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

  scale_fill_manual(name = "", values = c("black", "tomato"), labels = c("FLUXNET", "ALEXI/WATCH-WFDEI"))
## <ggproto object: Class ScaleDiscrete, Scale, gg>
##     aesthetics: fill
##     axis_order: function
##     break_info: function
##     break_positions: function
##     breaks: waiver
##     call: call
##     clone: function
##     dimension: function
##     drop: TRUE
##     expand: waiver
##     get_breaks: function
##     get_breaks_minor: function
##     get_labels: function
##     get_limits: function
##     guide: legend
##     is_discrete: function
##     is_empty: function
##     labels: FLUXNET ALEXI/WATCH-WFDEI
##     limits: NULL
##     make_sec_title: function
##     make_title: function
##     map: function
##     map_df: function
##     n.breaks.cache: NULL
##     na.translate: TRUE
##     na.value: NA
##     name: 
##     palette: function
##     palette.cache: NULL
##     position: left
##     range: <ggproto object: Class RangeDiscrete, Range, gg>
##         range: NULL
##         reset: function
##         train: function
##         super:  <ggproto object: Class RangeDiscrete, Range, gg>
##     rescale: function
##     reset: function
##     scale_name: manual
##     train: function
##     train_df: function
##     transform: function
##     transform_df: function
##     super:  <ggproto object: Class ScaleDiscrete, Scale, gg>

Fit an extreme value distribution

To estimate the probability of extreme values, we fit a Gumbel distribution following this link. A paper on the R package extRemes. The distribution above looks a bit strange. There is a large number of events with low CWD, then nothing and then a few very large events (more or less one per year). We try to retain only these few large events and fit a Gumbel distribution. Filtering is now done by taking the largest \(N\) events, where \(N\) corresponds to the length of the time series in years. Alternatively (better?) would be to take events above the X% threshold (for example X = 95%).

## Take only the N largest instances (deficits), where N is given by the number of years available in the data
nyears <- year(range(out_mct_alexi$df$date)[2]) - year(range(out_mct_alexi$df$date)[1]) + 1
vals <- out_mct_alexi$inst %>% 
  arrange(desc(deficit)) %>% 
  dplyr::slice(1:nyears) %>%  # use all data!
  dplyr::select(deficit) %>% 
  pull()

gumbi <- extRemes::fevd(x=vals, type="Gumbel", method="MLE")
summary(gumbi)
## 
## extRemes::fevd(x = vals, type = "Gumbel", method = "MLE")
## 
## [1] "Estimation Method used: MLE"
## 
## 
##  Negative Log-Likelihood Value:  92.70421 
## 
## 
##  Estimated parameters:
## location    scale 
## 190.9592  79.2806 
## 
##  Standard Error Estimates:
## location    scale 
## 21.16152 12.82352 
## 
##  Estimated parameter covariance matrix.
##           location     scale
## location 447.81003  95.38675
## scale     95.38675 164.44273
## 
##  AIC = 189.4084 
## 
##  BIC = 190.9536
pdf("fig/gumbi_example.pdf", width = 8, height = 7)
pl <- plot(gumbi)
dev.off()
## quartz_off_screen 
##                 2
# extract MLEs (these are needed for the remaining part of the analysis)
muG <- gumbi$results$par[1]
sigmaG <- gumbi$results$par[2]

## don't know which package these are coming from (e1071?)
probplot(values=vals, model=gumbi, varname="Deficit (mm)", alpha=1-0.95, dist="gumbel")

#cprobplot(values=vals, model=gumbi, varname="Deficit (mm)", alpha=1-0.95, dist="gumbel")

QQplot(values=vals, mu=muG, sigma=sigmaG, dist="gumbel")

PPplot(values=vals, mu=muG, sigma=sigmaG, dist="gumbel")

# all plots ("primary")
# extRemes::plot.fevd(gumbi)

# only return period plot
plot.fevd.mle(gumbi, 
      # type = c("primary", "probprob", "qq", "qq2", "Zplot", "hist", "density", "rl", "trace"),
      type = c("rl"),
      rperiods = c(2, 5, 10, 20, 50, 80, 100, 120, 200, 250, 300, 500, 800),
      a = 0, hist.args = NULL, density.args = NULL, d = NULL )

# source("R/get_return_period.R")
# df_test <- get_return_period(gumbi)
# with(df_test, plot(trans_period, return_values, pch=16, col="red"))

Get return levels for given return periods

The plot created by plot.fevd.mle() shows the return levels for a given return period. This is returned also by the function extRemes::return.level().

The tranformed variate of return period \(T\) as described in Gao et al. as \[ y = - \ln ( -\ln (1-1/T) ) \] Return level \(X\) has a linear relationship with the transformed return period \(y\).

## get return levels for a given vector of return periods
return_period <- c(2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 120, 200, 250, 300, 500, 800)

return_level <- extRemes::return.level(
  gumbi, 
  return.period = return_period
  )
df_return <- tibble( 
  return_period = return_period, 
  return_level = unname(c(return_level)), 
  trans_period = -log( -log(1 - 1/return_period)) )

df_return %>% 
  ggplot(aes(trans_period, return_level)) +
  geom_point()

Nice functions

Fitting the extreme value distribution and estimating the event size with return period \(T\) is done in one single function.

# FLUXNET data
result <- get_plantwhc_mct_bysite(df_example_fluxnet, varname_wbal = "bal")
result
## # A tibble: 18 x 2
##    return_period return_level
##            <dbl>        <dbl>
##  1             2         127.
##  2             5         146.
##  3            10         158.
##  4            20         170.
##  5            30         177.
##  6            40         182.
##  7            50         185.
##  8            60         188.
##  9            70         191.
## 10            80         193.
## 11            90         195.
## 12           100         197.
## 13           120         200.
## 14           200         208.
## 15           250         212.
## 16           300         215.
## 17           500         223.
## 18           800         231.
# ALEXI/WATCH-WFDEI data
result <- get_plantwhc_mct_bysite(df_example_alexi, varname_wbal = "bal")
result
## # A tibble: 18 x 2
##    return_period return_level
##            <dbl>        <dbl>
##  1             2         220.
##  2             5         309.
##  3            10         369.
##  4            20         426.
##  5            30         459.
##  6            40         482.
##  7            50         500.
##  8            60         514.
##  9            70         527.
## 10            80         537.
## 11            90         547.
## 12           100         555.
## 13           120         570.
## 14           200         610.
## 15           250         628.
## 16           300         642.
## 17           500         683.
## 18           800         720.

Let’s visualise the estimated event size with a return period of \(T = 20\) y on top of the distribution of cumulative water deficit events.

mct_alexi   <- get_plantwhc_mct_bysite(df_example_alexi,   varname_wbal = "bal", thresh_terminate = 0.2, thresh_drop = 0.8)
mct_fluxnet <- get_plantwhc_mct_bysite(df_example_fluxnet, varname_wbal = "bal", thresh_terminate = 0.2, thresh_drop = 0.8)

ggplot() +
  geom_histogram(
    data = out_mct_fluxnet$inst,
    aes(x = deficit, y = ..density..),
    color = "black", alpha = 0.5, fill = "black",
    position="identity") +
  geom_histogram(
    data = out_mct_alexi$inst,
    aes(x = deficit, y = ..density..),
    color = "black", alpha = 0.5, fill = "tomato", 
    position="identity") +
  labs(title = "US-Ton", x = "Cumulative water deficit (mm)") +
  geom_vline(xintercept = mct_fluxnet %>% filter(return_period == 20) %>% pull(return_level), col = "black") +
  geom_vline(xintercept = mct_alexi %>% filter(return_period == 20) %>% pull(return_level), col = "tomato")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

  scale_fill_manual(name = "", values = c("black", "tomato"), labels = c("FLUXNET", "ALEXI/WATCH-WFDEI"))
## <ggproto object: Class ScaleDiscrete, Scale, gg>
##     aesthetics: fill
##     axis_order: function
##     break_info: function
##     break_positions: function
##     breaks: waiver
##     call: call
##     clone: function
##     dimension: function
##     drop: TRUE
##     expand: waiver
##     get_breaks: function
##     get_breaks_minor: function
##     get_labels: function
##     get_limits: function
##     guide: legend
##     is_discrete: function
##     is_empty: function
##     labels: FLUXNET ALEXI/WATCH-WFDEI
##     limits: NULL
##     make_sec_title: function
##     make_title: function
##     map: function
##     map_df: function
##     n.breaks.cache: NULL
##     na.translate: TRUE
##     na.value: NA
##     name: 
##     palette: function
##     palette.cache: NULL
##     position: left
##     range: <ggproto object: Class RangeDiscrete, Range, gg>
##         range: NULL
##         reset: function
##         train: function
##         super:  <ggproto object: Class RangeDiscrete, Range, gg>
##     rescale: function
##     reset: function
##     scale_name: manual
##     train: function
##     train_df: function
##     transform: function
##     transform_df: function
##     super:  <ggproto object: Class ScaleDiscrete, Scale, gg>

LUE vs. CWD with gumbi line

We expect that LUE \(=\) GPP\(/\)PAR declines with increasing CWD. Let’s align LUE at the onset of the largest few CWD events for one site.

ALEXI/WATCH-WFDEI data

nyears <- out_mct_alexi$df %>% 
  mutate(year = year(date)) %>% 
  pull(year) %>% 
  unique() %>% 
  length()
biginstances <- out_mct_alexi$inst %>% 
  arrange(-deficit) %>% 
  slice(1:nyears) %>% 
  pull(iinst)

out <- out_mct_alexi$df %>% 
  filter(iinst %in% biginstances) %>% 
  ggplot(aes(x = deficit, y = gpp/ppfd)) +
  geom_point(alpha = 0.5) +
  labs(title = "US-Ton", x = "Cumulative water deficit (mm)", y = "GPP/PPFD", subtitle = "ET: ALEXI, precipitation: WATCH-WFDEI, GPP: FLUXNET, PPFD: FLUXNET") +
  geom_smooth(aes(y =  gpp/ppfd), color = 'red', method = 'lm', se = TRUE)
if (!identical(mct_alexi, NA)){
  mct20 <- mct_alexi %>% filter(return_period == 20) %>% pull(return_level)
  if (!is.na(mct20)){
    out +
      geom_vline(xintercept = mct20, col = "tomato")
  } else {
    rlang::warn(paste("No ALEXI MCT outputs for site", sitename))
  }
} else {
  rlang::warn(paste("No ALEXI MCT outputs for site", sitename))
}
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 947 rows containing non-finite values (stat_smooth).
## Warning: Removed 947 rows containing missing values (geom_point).

out
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 947 rows containing non-finite values (stat_smooth).

## Warning: Removed 947 rows containing missing values (geom_point).

Methods evaluation at all sites

All of the above evaluated steps are implemented in one function, producing plot files for each site separately.

filn <- "data/out.Rdata"
if (!file.exists(filn)){
  allsites <- ddf_fluxnet %>% 
    pull(sitename)
  out <- list()
  for (site in allsites){
   out[[site]] <- test_mct_bysite(
     site,
     ddf_fluxnet %>% 
      filter(sitename == site), 
     df_alexi %>% 
       filter(idx == site),
     thresh_terminate = 0.1, 
     thresh_drop = 0.9,
     use_return_period = 10
   )
  }
  save(out, file = "data/out.Rdata")
} else {
  load(filn)
}

## for some reason, purrr::map() did not work here.

Some evaluations across all sites. This shows a good correlation between the CWD event size with a 20-year return period and the CWD magnitude at which LUE drops to zero. This may be indicative of plants sizing their rooting zone such as to withstand water deficits that occur, on average, more often than every ~20 years.

allsites <- ddf_fluxnet %>% 
  pull(sitename)
df_corr_fluxnet <- tibble(lue0 = purrr::map_dbl(out, "lue0_fluxnet"), mct20 = purrr::map_dbl(out, "mct20_fluxnet")) %>% 
  mutate(sitename = allsites)
df_corr_alexi <- tibble(lue0 = purrr::map_dbl(out, "lue0_alexi"), mct20 = purrr::map_dbl(out, "mct20_alexi")) %>% 
  mutate(sitename = allsites)

df_corr_fluxnet %>% 
  filter(lue0 < 1000 & mct20 < 1000) %>% 
  rbeni::analyse_modobs2("lue0", "mct20")
## Loading required package: LSD
## 
## Attaching package: 'LSD'
## The following objects are masked from 'package:rbeni':
## 
##     colorpalette, complementarycolor, convertcolor, convertgrey,
##     daltonize, disco, distinctcolors, heatpairs, heatscatter,
##     heatscatterpoints
## Loading required package: ggthemes
## Loading required package: RColorBrewer
## $df_metrics
## # A tibble: 10 x 3
##    .metric  .estimator .estimate
##    <chr>    <chr>          <dbl>
##  1 rmse     standard     119.   
##  2 rsq      standard       0.678
##  3 mae      standard      85.3  
##  4 n        standard      24    
##  5 slope    standard       0.604
##  6 mean_obs standard     144.   
##  7 prmse    standard       0.824
##  8 pmae     standard       0.591
##  9 bias     standard      77.1  
## 10 pbias    standard       0.757
## 
## $gg
## `geom_smooth()` using formula 'y ~ x'

## 
## $linmod
## 
## Call:
## lm(formula = obs ~ mod, data = df)
## 
## Coefficients:
## (Intercept)          mod  
##     10.6740       0.6036  
## 
## 
## $results
## # A tibble: 1 x 6
##     rsq  rmse   mae  bias slope     n
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.678  119.  85.3  77.1 0.604    24
df_corr_alexi %>% 
  filter(lue0 < 1000 & mct20 < 1000) %>% 
  rbeni::analyse_modobs2("lue0", "mct20")
## $df_metrics
## # A tibble: 10 x 3
##    .metric  .estimator .estimate
##    <chr>    <chr>          <dbl>
##  1 rmse     standard      55.5  
##  2 rsq      standard       0.903
##  3 mae      standard      33.9  
##  4 n        standard      94    
##  5 slope    standard       0.976
##  6 mean_obs standard     152.   
##  7 prmse    standard       0.365
##  8 pmae     standard       0.223
##  9 bias     standard      12.0  
## 10 pbias    standard       0.239
## 
## $gg
## `geom_smooth()` using formula 'y ~ x'

## 
## $linmod
## 
## Call:
## lm(formula = obs ~ mod, data = df)
## 
## Coefficients:
## (Intercept)          mod  
##     -8.0485       0.9761  
## 
## 
## $results
## # A tibble: 1 x 6
##     rsq  rmse   mae  bias slope     n
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.903  55.5  33.9  12.0 0.976    94