Overview for package rsofun

Benjamin D. Stocker

2019-01-16

Environment

Load the package. This contains all the necessary wrapper functions to set up and run SOFUN and read its output.

library(rsofun)
## Warning: replacing previous import 'dplyr::intersect' by
## 'lubridate::intersect' when loading 'rsofun'
## Warning: replacing previous import 'dplyr::union' by 'lubridate::union'
## when loading 'rsofun'
## Warning: replacing previous import 'dplyr::setdiff' by 'lubridate::setdiff'
## when loading 'rsofun'
## Warning: replacing previous import 'purrr::invoke' by 'rlang::invoke' when
## loading 'rsofun'
## Warning: replacing previous import 'purrr::flatten_int' by
## 'rlang::flatten_int' when loading 'rsofun'
## Warning: replacing previous import 'purrr::flatten_dbl' by
## 'rlang::flatten_dbl' when loading 'rsofun'
## Warning: replacing previous import 'purrr::list_along' by
## 'rlang::list_along' when loading 'rsofun'
## Warning: replacing previous import 'purrr::%||%' by 'rlang::%||%' when
## loading 'rsofun'
## Warning: replacing previous import 'purrr::flatten_chr' by
## 'rlang::flatten_chr' when loading 'rsofun'
## Warning: replacing previous import 'purrr::prepend' by 'rlang::prepend'
## when loading 'rsofun'
## Warning: replacing previous import 'purrr::flatten' by 'rlang::flatten'
## when loading 'rsofun'
## Warning: replacing previous import 'Metrics::ll' by 'rlang::ll' when
## loading 'rsofun'
## Warning: replacing previous import 'purrr::rep_along' by 'rlang::rep_along'
## when loading 'rsofun'
## Warning: replacing previous import 'purrr::splice' by 'rlang::splice' when
## loading 'rsofun'
## Warning: replacing previous import 'purrr::modify' by 'rlang::modify' when
## loading 'rsofun'
## Warning: replacing previous import 'purrr::flatten_lgl' by
## 'rlang::flatten_lgl' when loading 'rsofun'
## Warning: replacing previous import 'purrr::as_function' by
## 'rlang::as_function' when loading 'rsofun'
## Warning: replacing previous import 'purrr::%@%' by 'rlang::%@%' when
## loading 'rsofun'
## load all rsofun dependencies
load_dependencies_rsofun()

## other crap
systr <- "''"    # for Mac
knitr::opts_knit$set( root.dir = rprojroot::find_rstudio_root_file() ) # does not work properly
## for developer setup
if (!file.exists("bash"))    system("ln -s inst/bash bash")
if (!file.exists("extdata")) system("ln -s inst/extdata extdata")

# if (is.null(options()$rsofun.dir.sofun)) rlang::abort( "Option rsofun.dir.sofun not set. Do so by `options( list( rsofun.dir.sofun=string_path_where_sofun_is ))`" )  
options( list( rsofun.dir.sofun="~/sofun/trunk/" ) )

Simplest setup

This is the simplest setup, where the SOFUN is executed as a function for a single time step and location.

Simulation settings

## Set the option specifying the location of SOFUN manually it it's not set
settings_sims_simple <- list( 
  setup = "simple", 
  implementation  = "fortran", 
  dir_sofun = options()$rsofun.dir.sofun 
  )

Model setup

Use the demo_pmodel compilation for the simple setup

setup_sofun_simple <- list(
  model      = "demo_pmodel",
  dir        = options()$rsofun.dir.sofun,
  do_compile = FALSE,
  simsuite   = FALSE
  )

Prepare setup

This the simple setup, this just checks whether the model code is in place.

settings_sims_simple <- prepare_setup_sofun( settings = settings_sims_simple, setup = setup_sofun_simple )
## [1] "Copying executable provided by rsofun and compiled on a 64-bit UNIX machine with gfortran into the SOFUN run directory..."

Run the model

Run SOFUN in the simple setup.

## First update calibratable parameters.
knitr::opts_knit$set( root.dir = rprojroot::find_rstudio_root_file() )
params_opt <- readr::read_csv( paste0( path.package("rsofun"), "/extdata/params_opt_kphio_soilm_global.csv" ) ) # as an example
nothing <- update_params( params_opt, settings_sims_simple$dir_sofun )

## Check if executable is available, otherwise copy it from the R package's external data
## This is not done inside the function 'pmodel()'. It would slow it down unnecessarily
if (!file.exists( paste0( settings_sims_simple$dir_sofun, "/rundemo_pmodel"))){
  system( paste0( "cp extdata/rundemo_pmodel ", settings_sims_simple$dir_sofun ) )
}

## Run for different temperatures and plot
ppfd <- 800
vpd  <- 1000
co2  <- 400
elv  <- 0
fapar <- 1

## Fortran
## update parameter
params_opt <- dplyr::tibble( kphio=0.05 ) # , temp_ramp_edge=NA, soilm_par_a=NA, soilm_par_b=NA
nothing <- update_params( params_opt, options()$rsofun.dir.sofun )

ptm <- proc.time()
pmodel_fortran <- purrr::map( as.list( seq( 0, 35, length.out = 100 ) ), 
  ~pmodel( temp = ., vpd = vpd, co2 = co2, ppfd = ppfd, fapar = fapar, elv = elv, implementation = "fortran", sofundir = settings_sims_simple$dir_sofun ) )
time_for <- proc.time() - ptm

## R
ptm <- proc.time()
pmodel_R <- purrr::map( as.list( seq( 0, 35, length.out = 100 ) ), 
  ~rpmodel( tc = ., vpd = vpd, co2 = co2, elv = elv, kphio = 0.05, fapar = fapar, ppfd = ppfd, method_optci="prentice14", method_jmaxlim = "wang17", do_ftemp_kphio = FALSE ) 
  )
time_R <- proc.time() - ptm

plot(  seq( 0, 35, length.out = 100 ), pmodel_fortran %>% purrr::map_dbl("gpp"), ylim = c(0,300), type = "l", xlab = "Temperature (deg C)", ylab = "GPP (gC/d/m2)" )
lines( seq( 0, 35, length.out = 100 ), pmodel_R %>% purrr::map_dbl("gpp"), col = "red" )
legend( "topright", c("Fortran", "R"), lty = 1, col = c("black", "red"), bty = "n")

Site-scale simulations

The examples shown below are for site-scale simulations, i.e., where one model run covers a single point only. Note that calibration and evaluation steps (described further below) are currently available only for site-scale simulations.

Simulation settings

The simulation settings define the model simulations for each site and the the user-defined local environment. Simulation settings are defined as a named list. The user specifies only variables that are identical across simulations within an ensemble. Other variables that differ between simulation within an ensemble (e.g. start and end year of the simulation, vegetation type C3 or C4, longitude and latitude of the site, soil type, etc.) are to be specified in the meta info file path_siteinfo (a CSV file created separately). The function prepare_setup_sofun() complements the simulation settings by these additional components as lists with elements for each site, nested within the simulation settings list.

In the example given here, we use a site meta information file for FLUXNET 2015 Tier 1 sites, provided along with the rsofun package. It’s located at the path /rsofun/extdata/siteinfo_example.csv. For demonstration, we define a set of site-scale simulations of length one. That is, for only one site, here “FR-Pue”. Note that FLUXNET 2015 data and MODIS FPAR data is provided along with the package rsofun for this one site for demonstration purposes only and provides data only for a reduced number of years (2007-2014). The site meta information file looks like this:

siteinfo <- readr::read_csv( paste0( path.package("rsofun"), "/extdata/siteinfo_example.csv" ) )
print( siteinfo )
## # A tibble: 1 x 13
##   sitename    lon     lat   elv year_start year_end years_data classid
##      <chr>  <dbl>   <dbl> <int>      <int>    <int>      <int>   <chr>
## 1   FR-Pue 3.5958 43.7414   270       2007     2014          8     EBF
## # ... with 5 more variables: whc <dbl>, c4 <chr>, koeppen_code <chr>,
## #   kgnumber <int>, koeppen_code_extr <chr>

To conduct P-model simulations with the Fortran implementation, respective model code needs to be downloaded separately from the Github sofun repository and placed in a separate directory (dir_sofun below). Builds of this Fortran code are available for public download at XXX in order to avoid having to compile from source.

settings_sims_sitescale <- list(
  path_siteinfo   = paste0( path.package("rsofun"), "/extdata/siteinfo_example.csv"),
  ensemble        = TRUE,
  setup           = "site",
  name            = "example",
  dir_sofun       = options()$rsofun.dir.sofun,
  path_output     = "~/sofun/output_example_sofun/",
  path_output_nc  = "~/sofun/output_nc_example_sofun/",
  path_input      = "~/sofun/input_example_sofun/",
  grid            = NA,
  implementation  = "fortran",
  in_ppfd         = TRUE,
  in_netrad       = FALSE,
  recycle         = 1,
  spinupyears     = 10,
  calibvars       = c("gpp"),  # needed later for calibration setup, any of "gpp", "fapar", and "transp"
  soilmstress     = TRUE,
  tempstress      = TRUE,
  loutdgpp        = TRUE,
  loutdwcont      = TRUE,
  loutdaet        = TRUE,
  loutdpet        = TRUE,
  loutdalpha      = TRUE
  )
  • path_siteinfo: Path (character string) of a CSV file that contains all the information needed for defining simulations. One row for each simulation in an ensemble, typically sites. Specifying this element is required if ensemble = TRUE. Columns are as follows:
    • site name, must be: column number 1
    • longitude of site, column must be named ‘lon’
    • latitude of site, column must be named ‘lat’
    • elevation of site, column must be named ‘elv’
    • years for which simulation is to be done (corresponding to data availability from site), requires two columns named ‘year_start’ and ‘year_end’.
  • ensemble: TRUE if an ensemble of site-level simulations are to be run (This may become obsolte, this information is given already if the number of rows in the CSV file path_siteinfo is larger than one.)
  • setup: String. One of "simple" (SOFUN used as a function, single time step, single location), "site" (site-scale simulation), or "lonlat" (spatial simulation on a longitude-latitude grid).
  • name: a character string specifying the name of the ensemble (e.g. ‘fluxnet2015’) or of a single simulation.
  • dir_sofun: Path (character string) where the model sits (corresponding to the parent directory of the respective git repository).
  • path_output: Path (character string) where model output (ascii text files) is written to.
  • path_output_nc: Path (character string) where NetCDF model output is written to.
  • path_input: Path (character string) where model input is located.
  • grid: Character string defining the type of grid used, e.g. halfdeg for half-degree resolution in lon. and lat. (only used in lonlat setup).
  • implementation: Character string specifying whether Fortran (implementation= "fortran") or Python (implementation= "python") version is to be used.
  • in_ppfd: Switch (TRUE of FALSE) whether PPFD should be read from data (prescribed) or simulated online using SPLASH and prescribed fractional cloud cover data.
  • recycle: Periodicity (integer, number of years) of repeating forcing years during spinup (e.g. if the simulation start year is 1982 and recycle=3, then forcing years 1979-1981 are repeated during the duration of the model spinup, so that the last year of the spinup is forcing year 1981. For recycle=1 and simulation start year 1982, forcing year 1981 is used for all years of the model spinup. Here, ‘forcing year’ refers to the year AD in the climate, CO2, fAPAR, etc. data used as model forcing.
  • spinupyears: Integer, number of model spinup years before the transient simulation. Use spinupyears > 0 if the model contains pool variables that that are simulated by dynamics that depend on their current state (typically soil water storage, or plant and soil carbon pools). Typically spinupyears = 10 is sufficient to bring soil water pools to equilibrium across the globe (unless you chose a large soil water holding capacity).
  • calibvars: A vector of strings specifying which variables are to be calibrated. Use c() (used as default) for a standard model setup. See vignette calib_sofun.pdf for an example of a model calibration setup.
  • soilmstress: Switch (TRUE of FALSE) defining whether soil moisture stress function is to be applied to GPP.
  • tempstress: Switch (TRUE of FALSE) defining whether temperature stress function is to be applied to GPP.
  • lout<var>: Switch (TRUE of FALSE) whether variable <var> is to be written to output (ascii text file for time series). To be anbandoned, only NetCDF output should be maintained.
  • lout<var>: Switch (TRUE of FALSE) whether variable <var> is to be written to output (NetCDF file).

Input settings

Input settings define data sources used for SOFUN input. Two modes of specifying inputs are available. Either providing the input data directly (element data, not available for "lonlat" simulations), or by specifying a keyword for the source dataset as strings. E.g. "fluxnet2015" triggers specific functions that read input data from specifically formatted files, here all the site-level meteo data from FLUXNET 2015. Define the input settings as a list.

settings_input_sitescale <-  list(
  data                     = NA,
  temperature              = "fluxnet2015",
  precipitation            = "fluxnet2015",
  vpd                      = "fluxnet2015",
  ppfd                     = "fluxnet2015",
  netrad                   = "fluxnet2015",
  cloudcover               = NA,
  fapar                    = "MODIS_FPAR_MCD15A3H",
  splined_fapar            = TRUE,
  path_co2                 = paste0( path.package("rsofun"), "/extdata/cCO2_rcp85_const850-1765.dat" ),
  path_fluxnet2015         = paste0( path.package("rsofun"), "/extdata/" ),
  path_MODIS_FPAR_MCD15A3H = paste0( path.package("rsofun"), "/extdata/" ),

  ## below is information for downloading input data from remote server
  get_from_remote          = TRUE,
  path_remote_watch_wfdei  = "/work/bstocker/labprentice/data/watch_wfdei/",
  path_remote_co2          = "/work/bstocker/labprentice/data/co2/cCO2_rcp85_const850-1765.dat",
  path_remote_cru          = "/work/bstocker/labprentice/data/cru/ts_4.01/",
  path_remote_MODIS_FPAR_MCD15A3H  = "/work/bstocker/labprentice/data/fapar_MODIS_FPAR_MCD15A3H_fluxnet2015_gee_subset/",
  path_remote_MODIS_EVI_MOD13Q1 = "/work/bstocker/labprentice/data/evi_MODIS_EVI_MOD13Q1_fluxnet2015_gee_subset/",
  path_remote_fluxnet2015  = "/work/bstocker/labprentice/data/FLUXNET-2015_Tier1/20160128/point-scale_none_1d/original/unpacked/",
  path_remote_watch_wfdei  = "/work/bstocker/labprentice/data/watch_wfdei/",
  uname_remote             = "bstocker",
  address_remote           = "login.cx1.hpc.ic.ac.uk"
  )
  • data: A named list of data frames, each containing the input data for one site with a column "date" specifying the date, and columns fapar, tempvarnam, precvarnam, vpdvarnam, ppfdvarnam, and netradvarnam specifying the fraction of absorbed photosynthetically active radiation, temperature, precipitation, vapour pressure deficit, photosynthetic photon flux density, and net radiation, respictively. Elements of the list data must be named according to site names. If data = NA, data will be read from files. If data is provided, all other elements of the input settings will be ignored.
  • temperature: A character string specifying the source for temperature data. Any of "fluxnet2015", "watch_wfdei", and/or "cru". This can also be a vector of strings (e.g. c("fluxnet2015", "watch_wfdei"), to specify priorities: first take FLUXNET 2015 data for periods where data is available. For remaining years (given by date_start and date_end), use WATCH-WFDEI data. If "fluxnet2015" is specified for any of temperature, precipitation, vpd, ppfd, or netrad, then path_fluxnet2015 must be specified as well in the settings.
  • precipitation: See temperature.
  • vpd: See temperature.
  • ppfd: See temperature.
  • netrad: See temperature.
  • fapar: A character string specifying the type of fAPAR data used as input. Implemented for use of data from CX1 are "MODIS_FPAR_MCD15A3H" and "MODIS_EVI_MOD13Q1". Use NA in case no fAPAR data is used as forcing (internally simulated fAPAR).
  • splined_fapar: Logical defining whether splined fAPAR data is to be used. If FALSE, linearly interpolated fAPAR data is used.
  • co2: A character string specifying which CO\(_2\) file should be used (globally uniform and identical for each site in an ensemble). All available CO\(_2\) forcing files are located on CX1 at /work/bstocker/labprentice/data/co2/. co2="cmip" specifies that the CMIP-standard CO2 file cCO2_rcp85_const850-1765.dat should be used.
  • path_fluxnet2015: A character string specifying the path where standard FLUXNET 2015 CSV files are located.
  • path_watch_wfdei: A character string specifying the path where standard WATCH-WFDEI NetCDF files are located.
  • path_cru_ts4_01: A character string specifying the path where standard CRU NetCDF files are located.
  • path<fapar>: A character string specifying the path where site-specific fapar files are located. This element is named according to the fapar setting (element fapar). E.g., an element named path_MODIS_FPAR_MCD15A3H is required if fapar = MODIS_FPAR_MCD15A3H.

Model setup

Define model setup as a list.

setup_sofun_sitescale <- list(
  model      = "pmodel",
  dir        = options()$rsofun.dir.sofun,
  do_compile = FALSE,
  simsuite   = FALSE
  )
  • model: For Fortran version: A character string specifying the compilation option. The name of the executable is derived from this as "run<setup_sofun$model>".
  • dir: A path (character) specifying the directory of where the executables are located (corresponds to the parent directory of the model git repository).
  • do_compile: If TRUE, the model code is compiled as make <setup_sofun$model>. If FALSE, compiled executables are used (compiled with gfortran on a Mac 64-bit).
  • simsuite: If TRUE, the SOFUN option for running an entire simulation suite (ensemble of simulations) with a single executable. (In the Fortran implementation, this this requires the main program file sofun_simsuite.f90 to be compiled instead of sofun.f90). This is to be preferred over a set of individual runs submitted individually when doing calibration runs because the cost across the entire ensemble (mod-obs error) can thus be calculated online.

Workflow

The example shown below is for a set of site-scale simulations.

Prepare simulation setup

Create a run directory with all the simulation parameter files (defining simulation years, length, etc.). This returs the settings_sims list, complemented by additional information. Calibration settings are an optional argument. When passed on, simulation parameter files will contain information which target variables are to be written to special calibration output files (a single file written for an entire ensemble).

settings_sims_sitescale <- prepare_setup_sofun(
  settings = settings_sims_sitescale,
    setup = setup_sofun_sitescale,
  write_paramfils = TRUE
  )
## [1] "Copying executable provided by rsofun and compiled on a 64-bit UNIX machine with gfortran into the SOFUN run directory..."
## [1] "writing site parameter files..."
## [1] "writing simulation parameter files..."

Prepare inputs

Prepare SOFUN input (climate input, CO2, etc.). Complements settings_input. This will require inputs from the user through the prompt, entered in the console to specify whether data files should be downloaded from Imperial CX1. In case you chose to download, you must have access to CX1 and be connected to the Imperial VPN. Once asked (see console!), enter your user name on CX1. This also requires that no additional entering of the password is required. In order to set this up, you need to generate an SSH key pair beforehand (see here.

inputdata <- prepare_input_sofun(
  settings_input = settings_input_sitescale,
  settings_sims = settings_sims_sitescale,
  return_data = TRUE,
  overwrite_climate = TRUE,
  overwrite_fapar = TRUE,
  verbose = TRUE
  )

Calibrate the model

Define the calibration settings as.

## Define calibration settings common for all setups
settings_calib <- list(
  name             = "example",
  method           = "optimr",
  targetvars       = c("gpp"),
  timescale        = list( gpp = "d" ),
  par              = list( kphio = list( lower=0.01, upper=0.12, init=0.05 ) ),
  datasource       = list( gpp = "fluxnet2015_NT" ),
  path_fluxnet2015 = paste0( path.package("rsofun"), "/extdata/" ),
  path_gepisat     = paste0( path.package("rsofun"), "/extdata/" ),
  maxit            = 50,
  sitenames        = "FR-Pue",  # dplyr::filter(siteinfo$light, c4 %in% c(FALSE, NA) )$mysitename, # calibrate for non-C4 sites
  filter_temp_min  = 10.0,
  filter_temp_max  = 35.0,
  filter_soilm_min = NA,     # this is dangerous: removes entire sites where no soil moisture is measured
  filter_drought   = FALSE,
  metric           = "rmse",
  dir_results      = "/Users/benjaminstocker/rsofun/calib_results" # use absolute path!
 )

Do the calibration

Run the model

Run SOFUN with calibrated parameters.

params_opt <- readr::read_csv( paste0( settings_calib$dir_results, "/params_opt_", settings_calib$name, ".csv" ) ) # as an example
nothing <- update_params( params_opt, settings_sims_sitescale$dir_sofun )
mod <- runread_sofun(
  settings = settings_sims_sitescale,
  setup = setup_sofun_sitescale
  )
## [1] "processing NetCDF outputs..."
## [1] "processing FR-Pue..."
## [1] "reading from daily NetCDF files..."
## [1] "Reading NetCDF for FR-Pue"

Evaluate the model

Define model evaluation settings as a list.

settings_eval <- list(
  sitenames = "FR-Pue",
  benchmark = list( gpp = c("fluxnet2015_NT") ),
  sitenames_siteplots = "FR-Pue",
  agg = 5,
  path_fluxnet2015_d = paste0( path.package("rsofun"), "/extdata/" ),
  path_fluxnet2015_w = "",
  path_fluxnet2015_m = paste0( path.package("rsofun"), "/extdata/" ),
  path_fluxnet2015_y = paste0( path.package("rsofun"), "/extdata/" ),
  path_gepisat_d     = "",
  dir_figs           = "~/rsofun/fig/"
  )
## Get observational data for evaluation
obs_eval <- get_obs_eval( settings_eval = settings_eval, settings_sims = settings_sims_sitescale, overwrite = TRUE )
## [1] "getting annual FLUXNET-2015_Tier1 data..."
## [1] "/Library/Frameworks/R.framework/Versions/3.3/Resources/library/rsofun/extdata/"
## [1] "getting monthly FLUXNET-2015_Tier1 data..."
## [1] "getting daily FLUXNET-2015_Tier1 data..."
out_eval <- eval_sofun( mod, settings_eval, settings_sims_sitescale, obs_eval = obs_eval, overwrite = TRUE )
## [1] "Aggregating model outputs..."
## [1] "Evaluate annual values..."
## [1] "Evaluate monthly values..."
## [1] "Evaluate spatial values..."
## [1] "Evaluate interannual variability..."
## [1] "Evaluate mean seasonal cycle..."
## [1] "Evaluate mean seasonal cycle by climate zones..."
## [1] "Evaluate inter-day variability..."
## [1] "Evaluate mean seasonal cycle by X-day periods..."
## [1] "Evaluate inter-X-day variability..."
## [1] "Done with eval_sofun()."

The object returned by eval_sofun() includes a set of standard functions that can be applied to the evaluation data itself. Here is one example for observed versus modelled values.

modobs_daily   <- out_eval$plot$modobs_daily()

modobs_xdf     <- out_eval$plot$modobs_xdaily()

modobs_mdf     <- out_eval$plot$modobs_monthly()

modobs_meandoy <- out_eval$plot$modobs_meandoy()

modobs_meandoy <- out_eval$plot$modobs_annual()

out_eval$plot$by_doy_allsites()

out_eval$plot$by_xoy_allsites()

modobs_anomalies_daily  <- out_eval$plot$modobs_anomalies_daily()

modobs_anomalies_xdaily <- out_eval$plot$modobs_anomalies_xdaily()

modobs_anomalies_annual <- out_eval$plot$modobs_anomalies_annual()

modobs_meanxoy <- out_eval$plot$modobs_meanxoy()

out_eval$plot$by_doy_allzones()

#out_eval$plot$modobs_spatial_annual()
modobs_spatial <- out_eval$plot$modobs_spatial()