Calibrate rsofun with photocold branch

This was done with rsofun, branch photocold at commit ad1b734fec1e5371212ce767b611cbaaede2b02a.

Load rsofun

Get rsofun, photocold branch, and install it.

# devtools::install_github("computationales/rsofun@v4.2")
library(rsofun)

Description

This is to run the same evaluation of GPP simulated by the P-model as done for Stocker et al. (2020), using data from the FLUXNET2015 Tier 1 ensemble. Model forcing and observational GPP data are prepared as detailed in the vignette prepare_inputs_rsofun.Rmd. Respective files are available on Euler (~/data/rsofun_benchmarking/df_drivers_fluxnet2015.Rdata)

This assumes that the model is already calibrated (calibratable parameters are prescribed).

Note: For simulations used in Stocker et al. (2020), forcing data was written to files and read by Fortran. With the updated rsofun model, this is passed through R, using an object formatted like rsofun::df_drivers.

Load data

Load drivers data frame (created by prepare_inputs_FLUXNET2015_ensemble.Rmd).

load("~/data/rsofun_benchmarking/df_drivers_fluxnet2015.Rdata")

There seem to be some leap year dates which create problems for rsofun. Drop Feb. 29 dates.

df_drivers_fluxnet2015 <- df_drivers_fluxnet2015 %>% 
  dplyr::select(sitename, forcing) %>% 
  unnest(forcing) %>% 
  dplyr::filter(!(month(date)==2 & mday(date)==29)) %>% 
  
  ## model requires flux per seconds now
  mutate(prec = prec / (60*60*24), ppfd = ppfd / (60*60*24)) %>% 
  
  ## assuming all precipitation in liquid form
  mutate(rainf = prec, snowf = 0) %>% 

  ## required for new version, but not used because   
  mutate(tmin = temp, tmax = temp) %>% 

  group_by(sitename) %>% 
  nest() %>%
  rename(forcing = data) %>% 
  right_join(
    df_drivers_fluxnet2015 %>% 
      dplyr::select(-forcing),
    by = "sitename"
  ) %>% 
  ungroup() %>% 
  rename(site_info = siteinfo, params_soil = df_soiltexture)

## change name to make compatible
df_drivers_fluxnet2015 <- df_drivers_fluxnet2015 %>% 
  mutate(forcing = purrr::map(forcing, ~rename(., rain = rainf, snow = snowf)))


# save(df_drivers_fluxnet2015, file = "~/data/rsofun_benchmarking/df_drivers_fluxnet2015.Rdata")

Calibrate model

Define calibration sites.

flue_sites <- readr::read_csv( "~/data/flue/flue_stocker18nphyt.csv" ) %>%
              dplyr::filter( !is.na(cluster) ) %>% 
              distinct(site) %>% 
              pull(site)

# calibsites <- siteinfo_fluxnet2015 %>% 
#   dplyr::filter(!(sitename %in% c("DE-Akm", "IT-Ro1"))) %>%  # excluded because fapar data could not be downloaded (WEIRD)
#   # dplyr::filter(!(sitename %in% c("AU-Wom"))) %>%  # excluded because no GPP data was found in FLUXNET file
#   dplyr::filter(sitename != "FI-Sod") %>%  # excluded because some temperature data is missing
#   dplyr::filter( c4 %in% c(FALSE, NA) & classid != "CRO" & classid != "WET" ) %>%
#   dplyr::filter( sitename %in% flue_sites ) %>%
#   pull(sitename)

calibsites <- "DK-Sor"

Use the ingestr package once again, now for collecting calibration target data. I.e., GPP based on the nighttime flux decomposition method.

filn <- "~/data/rsofun_benchmarking/ddf_fluxnet_gpp.Rdata"
if (!file.exists(filn)){
  settings_ingestr_fluxnet <- list(
    dir_hh = "~/data/FLUXNET-2015_Tier1/20191024/HH/", 
    getswc = FALSE,
    filter_ntdt = TRUE,
    threshold_GPP = 0.8,
    remove_neg = FALSE
    )

  ddf_fluxnet_gpp <- ingestr::ingest(
    siteinfo = siteinfo_fluxnet2015 %>% 
      dplyr::filter(sitename %in% calibsites),
    source    = "fluxnet",
    getvars = list(gpp = "GPP_NT_VUT_REF",
                   gpp_unc = "GPP_NT_VUT_SE"),
    dir = "~/data/FLUXNET-2015_Tier1/20191024/DD/",
    settings = settings_ingestr_fluxnet,
    timescale = "d"
    )
  save(ddf_fluxnet_gpp, file = filn)
} else {
  load(filn)
}

Define calibration settings.

settings <- list(
  method      = "bayesiantools",
  targetvars  = c("gpp"),
  timescale   = list(targets_obs = "y"),
  metric      = cost_mse_photocold,
  dir_results = "./",
  name        = "photocold",
  control     = list(
    sampler = "DEzs",
    settings = list(
    burnin = 1500,
    iterations = 10000
    )),
  par = list(
    kphio       = list(lower=0.03, upper=0.2,  init = 0.09),
    
    soilm_par_a = list(lower=0.00, upper=1.0,  init = 0.33),
    soilm_par_b = list(lower=0.00, upper=10.0, init = 1.5),
    
    kphio_par_a = list(lower=-10,  upper=10,   init = 5.00),
    kphio_par_b = list(lower=0.1,  upper=10.0, init = 0.50),
    
    kphio_par_c = list(lower=1,    upper=200,  init = 50.0),
    kphio_par_d = list(lower=0.01, upper=10,   init = 0.1),
    kphio_par_e = list(lower=0   , upper=10,   init = 5)
    )
)

Calibrate the model.

overwrite <- TRUE
filn <- "../data/pars_calib_photocold.rds"
if (!file.exists(filn) || overwrite){
  set.seed(1982)
  pars <- calib_sofun(
    drivers  = df_drivers_fluxnet2015 %>% 
      dplyr::filter(sitename %in% calibsites),
    obs      = ddf_fluxnet_gpp %>% 
      dplyr::filter(sitename %in% calibsites),
    settings = settings
    )
  saveRDS(pars, file = filn)
} else {
  pars <- read_rds(filn)
}

## 
 Running DEzs-MCMC, chain  1 iteration 300 of 10002 . Current logp  -15.5958 -15.59799 -15.65309 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 600 of 10002 . Current logp  -15.55213 -15.69172 -15.62202 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 900 of 10002 . Current logp  -15.68108 -15.70155 -15.70449 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 1200 of 10002 . Current logp  -15.6113 -15.70264 -15.6602 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 1500 of 10002 . Current logp  -15.68773 -15.69169 -15.69553 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 1800 of 10002 . Current logp  -15.57389 -15.57387 -15.70715 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 2100 of 10002 . Current logp  -15.68721 -15.70316 -15.7108 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 2400 of 10002 . Current logp  -15.70078 -15.70264 -15.67519 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 2700 of 10002 . Current logp  -15.69168 -15.70673 -15.62534 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 3000 of 10002 . Current logp  -15.68621 -15.59564 -15.70767 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 3300 of 10002 . Current logp  -15.68409 -15.68829 -15.71042 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 3600 of 10002 . Current logp  -15.7095 -15.60977 -15.60139 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 3900 of 10002 . Current logp  -15.68121 -15.71072 -15.70169 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 4200 of 10002 . Current logp  -15.70844 -15.69423 -15.67598 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 4500 of 10002 . Current logp  -15.69992 -15.70654 -15.57865 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 4800 of 10002 . Current logp  -15.584 -15.70571 -15.70466 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 5100 of 10002 . Current logp  -15.63408 -15.63002 -15.58358 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 5400 of 10002 . Current logp  -15.69888 -15.69959 -15.69174 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 5700 of 10002 . Current logp  -15.61297 -15.69203 -15.67236 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 6000 of 10002 . Current logp  -15.70844 -15.60642 -15.69075 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 6300 of 10002 . Current logp  -15.70681 -15.70661 -15.69677 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 6600 of 10002 . Current logp  -15.7089 -15.65582 -15.70603 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 6900 of 10002 . Current logp  -15.62579 -15.70713 -15.69086 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 7200 of 10002 . Current logp  -15.68807 -15.70475 -15.67837 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 7500 of 10002 . Current logp  -15.68036 -15.64433 -15.65382 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 7800 of 10002 . Current logp  -15.64559 -15.58647 -15.69991 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 8100 of 10002 . Current logp  -15.66933 -15.60849 -15.66458 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 8400 of 10002 . Current logp  -15.70201 -15.67235 -15.68018 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 8700 of 10002 . Current logp  -15.58988 -15.58542 -15.65022 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 9000 of 10002 . Current logp  -15.62457 -15.6621 -15.70385 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 9300 of 10002 . Current logp  -15.69501 -15.6795 -15.69301 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 9600 of 10002 . Current logp  -15.66162 -15.70901 -15.67302 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 9900 of 10002 . Current logp  -15.65353 -15.60493 -15.70861 . Please wait! 

 Running DEzs-MCMC, chain  1 iteration 10002 of 10002 . Current logp  -15.6993 -15.60502 -15.69924 . Please wait!

The calibrated parameters are returned by calib_sofun() as part of the list:

pars

## $par
##        kphio  soilm_par_a  soilm_par_b  kphio_par_a  kphio_par_b  kphio_par_c 
##   0.07032196   0.89026009   4.24783206   1.46286160   0.41185385 143.79587834 
##  kphio_par_d  kphio_par_e 
##   1.13910311   2.58289924

Update model parameters.

params_modl <- list(
    kphio       = pars$par[1],
    
    soilm_par_a = pars$par[2],
    soilm_par_b = pars$par[3],
    
    kphio_par_a = pars$par[4],
    kphio_par_b = pars$par[5],
    kphio_par_c = pars$par[6],
    kphio_par_d = pars$par[7],
    kphio_par_e = pars$par[8]
  )

Run model

"~/data/rsofun_benchmarking/df_drivers_fluxnet2015_allsites.Rdata" is prepared by benchmarking/collect_data/prepare_inputs_FLUXNET2015_allsites.Rmd.

output <- rsofun::runread_pmodel_f(
  df_drivers_fluxnet2015 %>% 
      dplyr::filter(sitename %in% calibsites) %>% 
      mutate(forcing = purrr::map(forcing, 
                                  ~mutate(., 
                                          snow = ifelse(temp < 1, prec, 0),
                                          prec = ifelse(temp < 1, 0, prec)))),
  par = params_modl
  )

saveRDS(output, file = "../data/output_photocold.rds")

Run evaluation

Single site

df_modobs <- output %>% 
  filter(sitename %in% calibsites) %>% 
  select(sitename, data) %>% 
  unnest(data) %>% 
  select(sitename, date, mod = gpp) %>% 
  left_join(
    ddf_fluxnet_gpp %>% 
      filter(sitename %in% calibsites) %>% 
      unnest(data) %>% 
      rename(obs = gpp),
    by = c("sitename", "date")
  )

library(rbeni)

## 
## Attaching package: 'rbeni'

## The following object is masked from 'package:ingestr':
## 
##     remove_outliers

df_modobs %>%
  # ggplot() +
  # geom_hex(aes(mod, obs))
  analyse_modobs2("mod", "obs")

## Loading required package: LSD

## 
## Attaching package: 'LSD'

## The following objects are masked from 'package:rbeni':
## 
##     colorpalette, complementarycolor, convertcolor, convertgrey,
##     daltonize, disco, distinctcolors, heatpairs, heatscatter,
##     heatscatterpoints

## Loading required package: ggthemes

## Loading required package: RColorBrewer

## $df_metrics
## # A tibble: 12 × 3
##    .metric  .estimator .estimate
##    <chr>    <chr>          <dbl>
##  1 rmse     standard       2.28 
##  2 rsq      standard       0.855
##  3 mae      standard       1.55 
##  4 n        standard    4890    
##  5 slope    standard       0.958
##  6 mean_obs standard       5.22 
##  7 prmse    standard       0.436
##  8 pmae     standard       0.296
##  9 bias     standard      -0.670
## 10 pbias    standard      -0.369
## 11 cor      standard       0.925
## 12 cor_test standard       0    
## 
## $gg

## `geom_smooth()` using formula 'y ~ x'

## 
## $linmod
## 
## Call:
## lm(formula = obs ~ mod, data = df)
## 
## Coefficients:
## (Intercept)          mod  
##      0.8599       0.9582  
## 
## 
## $results
## # A tibble: 1 × 6
##     rsq  rmse   mae   bias slope     n
##   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1 0.855  2.28  1.55 -0.670 0.958  4890

df_modobs %>% 
  ggplot() +
  geom_line(aes(date, obs)) +
  geom_line(aes(date, mod), color = "red")

df_modobs %>% 
  mutate(doy = lubridate::yday(date)) %>% 
  group_by(sitename, doy) %>% 
  summarise(obs = mean(obs, na.rm = TRUE),
            mod = mean(mod, na.rm = TRUE)) %>% 
  ggplot() +
  geom_line(aes(doy, obs)) +
  geom_line(aes(doy, mod), color = "red")

## `summarise()` has grouped output by 'sitename'. You can override using the `.groups` argument.

Multi-site

Do evaluation only for sites where simulation was run.

evalsites <- output %>% 
  mutate(ntsteps = purrr::map_dbl(data, ~nrow(.))) %>% 
  dplyr::filter(ntsteps > 0) %>% 
  pull(sitename)

Load standard benchmarking file with observational data for evaluation.

load("~/data/rsofun_benchmarking/obs_eval_fluxnet2015.Rdata")

Define evaluation settings.

settings_eval <- list(
  benchmark = list( gpp = c("fluxnet") ),
  sitenames = evalsites,
  agg       = 8
  )

And finally run the evaluation.

source("~/sofunCalVal/R/eval_sofun.R")
source("~/sofunCalVal/R/get_stats.R")
filn <- "../out_eval_photocold.rds"
overwrite <- TRUE
if (!file.exists(filn) || overwrite){
  out_eval <- eval_sofun( 
    output, 
    settings_eval, 
    settings_sims, 
    obs_eval = obs_eval, 
    overwrite = TRUE, 
    light = FALSE 
    )
  saveRDS(out_eval, file = filn)
} else {
  out_eval <- read_rds(filn)
}

## Error in approx(seq(length(vec)), vec, xout = seq(length(vec))) : 
##   need at least two non-NA values to interpolate
## Error in approx(seq(length(vec)), vec, xout = seq(length(vec))) : 
##   need at least two non-NA values to interpolate
## Error in approx(seq(length(vec)), vec, xout = seq(length(vec))) : 
##   need at least two non-NA values to interpolate
## Error in approx(seq(length(vec)), vec, xout = seq(length(vec))) : 
##   need at least two non-NA values to interpolate
## Error in approx(seq(length(vec)), vec, xout = seq(length(vec))) : 
##   need at least two non-NA values to interpolate
## Error in approx(seq(length(vec)), vec, xout = seq(length(vec))) : 
##   need at least two non-NA values to interpolate
## Error in approx(seq(length(vec)), vec, xout = seq(length(vec))) : 
##   need at least two non-NA values to interpolate
## Error in approx(seq(length(vec)), vec, xout = seq(length(vec))) : 
##   need at least two non-NA values to interpolate
## Error in approx(seq(length(vec)), vec, xout = seq(length(vec))) : 
##   need at least two non-NA values to interpolate
## Error in approx(seq(length(vec)), vec, xout = seq(length(vec))) : 
##   need at least two non-NA values to interpolate
## Error in approx(seq(length(vec)), vec, xout = seq(length(vec))) : 
##   need at least two non-NA values to interpolate
## Error in approx(seq(length(vec)), vec, xout = seq(length(vec))) : 
##   need at least two non-NA values to interpolate

Evaluation results

Metrics table

out_eval$gpp$fluxnet$metrics %>% 
  bind_rows(.id = "Level") %>% 
  kable

Level	rsq	rmse	slope	bias	nvals
daily_pooled	0.8562269	2.279257	0.9577751	-0.6712623	4932
xdaily_pooled	0.9167928	1.795237	1.0422885	-0.6885430	667
annual_pooled	0.1771821	246.137325	0.4430771	-225.3698006	13
monthly_pooled	0.9570822	1.379389	1.0801793	-0.6675326	162
spatial	0.0000000	232.835203	NA	-232.8352028	1
anomalies_annual	0.1771821	99.235920	0.4430771	7.4654022	13
meandoy	0.9661538	1.320187	1.0985886	-0.6648781	365
anomalies_daily	0.4761007	1.898195	0.4305972	-0.0331886	4928
meanxoy	0.9761579	1.213910	1.1093916	-0.6720466	46
anomalies_xdaily	0.3465137	1.313884	0.3987394	-0.0017270	667

Visualisations

Correlations

out_eval$gpp$fluxnet$plot$gg_modobs_xdaily

out_eval$gpp$fluxnet$plot$gg_modobs_spatial_annual

Mean seasonal cycle

## plot
out_eval$gpp$fluxnet$data$meandoydf_byclim %>% 
  dplyr::filter(climatezone %in% c("Aw south", "BSk north", "Cfa north", "Cfb north", "Cfb south", "Csa north", "Csb north", "Dfb north", "Dfc north")) %>%
  dplyr::filter(koeppen_code != "-") %>% 
  pivot_longer(c(obs_mean, mod_mean), names_to = "source", values_to = "gpp") %>% 
  ggplot() +
  geom_ribbon(
    aes(x = doy, ymin = obs_min, ymax = obs_max), 
    fill = "black", 
    alpha = 0.2
    ) +
  geom_line(aes(x = doy, y = gpp, color = source), size = 0.4) +
  labs(y = expression( paste("Simulated GPP (g C m"^-2, " d"^-1, ")" ) ), 
       x = "DOY") +
  facet_wrap( ~climatezone ) +    # , labeller = labeller(climatezone = list_rosetta)
  theme_gray() +
  theme(legend.position = "bottom") +
  scale_color_manual(
    name="Setup: ",
    values=c("red", "black")
    # values=c("FULL" = "#DE1A1A", "Observed" = "black")
    )