library(tidyverse)   # for working with ease

Read data

The MESI data.

df <- read_csv("../data/mesi_main.csv")
## Warning: One or more parsing issues, see `problems()` for details

The Liang et al. data can be obtained as an Excel file from here: https://doi.org/10.1111/gcb.15071. I first exported each excel tab to a separate CSV file and then combined them into a single big table. Respective code is commented out below and the combined file is read directly.

# files <- list.files(path = "~/data/nfert_liang/data_csv/", full.names = TRUE)
# df <- purrr::map_dfr(as.list(files), ~read_csv(.))
# use_names <- c(
#   "variable",           "reference",          "lat",                "lon",                "mat",               
#   "map",                "ph_soil",            "species",            "family",             "genus",             
#   "photopathway",       "legume",             "managed",            "growthform",         "functionaltype",    
#   "broadleaf_conifer",  "evergreen_deciduous","ninorg_form",        "ndep_kg_ha_yr",      "duration_yr",       
#   "load",               "value_t",            "value_c",            "n_t",                "n_c",               
#   "sd_t",               "sd_c",               "ln_rr",              "v",                  "w",                 
#   "cluster",            "w_dash"
#   )
# names(df) <- use_names
# write_csv(df, file = "~/data/nfert_liang/nfert_liang_combined.csv")
  
# modify nfert_liang_combined.csv by hand to have nice column names.
df_liang <- read_csv("../data-raw/nfert_liang_combined.csv")
## New names:
## Rows: 2683 Columns: 48
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (15): variable, reference, lat, lon, species, family, genus, photopathwa... dbl
## (17): mat, map, ph_soil, ndep_kg_ha_yr, duration_yr, load, value_t, valu... lgl
## (16): ...33, ...34, ...35, ...36, ...37, ...38, ...39, ...40, ...41, ......
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
## • `` -> `...36`
## • `` -> `...37`
## • `` -> `...38`
## • `` -> `...39`
## • `` -> `...40`
## • `` -> `...41`
## • `` -> `...42`
## • `` -> `...43`
## • `` -> `...44`
## • `` -> `...45`
## • `` -> `...46`
## • `` -> `...47`
## • `` -> `...48`

Example for how the two look like (Nmass).

MESI:

df %>% 
  filter(response == "leaf_n") %>% 
  head() %>% 
  knitr::kable()
db id duplicate_id citation response site study exp lat lon elevation mat map ecosystem_type vegetation_type experiment_type community_type dominant_species growth_form age disturbance_type treatment npk w_t1 c_c c_t d_t d_t2 n_c n_t p_c p_t k_c k_t i_c i_t i_t2 s_c s_t w_t2 w_t3 start_year duration treatment_duration fumigation_type start_treatment end_treatment sampling_year sampling_depth aggregation_level x_c x_t x_units sd_c sd_t se_c se_t rep_c rep_t sampling_date comment
antwerp p27451 p27451 agrell_et_al_2005 leaf_n aspenface aspenface_bp aspenface_bp_c 45.70 -89.50 490 4.9 810 forest NA field planted betula_papyrifera NA NA NA c _000 NA 367 560 0 0 0 0 0 0 0 0 0 0 0 NA NA 0 0 1998 NA NA face NA NA NA NA NA 2.600 2.8 g_100g 0.3464102 0.3464102 0.200 0.200 3 3 NA we_determined_nitrogen_concentrations_by_hightemperature_combustion_followed_by_thermoconductometric_detection_(leco_fp528_nitrogen_analyzer_st_joseph_mi_usa)with_glycine_p-toluenesulfonate_used_as_the_reference_standard(5.667%_n_hach_co._lovelan
antwerp p27452 p27452 agrell_et_al_2005 leaf_n aspenface aspenface_bp aspenface_bp_c 45.70 -89.50 490 4.9 810 forest NA field planted betula_papyrifera NA NA NA c _000 NA 367 560 0 0 0 0 0 0 0 0 0 0 0 NA NA 0 0 1998 NA NA face NA NA NA NA NA 2.600 2.5 g_100g 0.5196152 0.3464102 0.300 0.200 3 3 NA we_determined_nitrogen_concentrations_by_hightemperature_combustion_followed_by_thermoconductometric_detection_(leco_fp528_nitrogen_analyzer_st_joseph_mi_usa)with_glycine_p-toluenesulfonate_used_as_the_reference_standard(5.667%_n_hach_co._lovelan
antwerp p27453 p27453 agrell_et_al_2005 leaf_n aspenface aspenface_bp aspenface_bp_c 45.70 -89.50 490 4.9 810 forest NA field planted betula_papyrifera NA NA NA c _000 NA 367 560 0 0 0 0 0 0 0 0 0 0 0 NA NA 0 0 1998 NA NA face NA NA NA NA NA 3.500 3.3 g_100g 0.1732051 0.1732051 0.100 0.100 3 3 NA we_determined_nitrogen_concentrations_by_hightemperature_combustion_followed_by_thermoconductometric_detection_(leco_fp528_nitrogen_analyzer_st_joseph_mi_usa)with_glycine_p-toluenesulfonate_used_as_the_reference_standard(5.667%_n_hach_co._lovelan
antwerp p27468 p27468 andresen_et_al_2009 leaf_n climaite climaite climaite_c 55.88 11.97 79 8.2 603 shrubland temperate_heathland field natural NA NA NA NA c _000 _0000 380 510 0 0 0 0 0 0 0 0 0 0 0 NA NA 0 0 2006 NA NA face NA NA 1 NA NA 1.595 1.34 g_100g 0.4629536 0.3355801 0.189 0.137 6 6 28_06_2006 NA
antwerp p27469 p27469 andresen_et_al_2009 leaf_n climaite climaite climaite_cd 55.88 11.97 79 8.2 603 shrubland temperate_heathland field natural NA NA NA NA cd _000 _0000 380 510 1 NA 0 0 0 0 0 0 0 0 0 NA NA 0 0 2006 NA 35 face 30_06_2006 04_08_2006 1 NA NA 1.595 1.545 g_100g 0.4629536 0.7715893 0.189 0.315 6 6 28_06_2006 water_info:_5_weeks_until_early_august
antwerp p27470 p27470 andresen_et_al_2009 leaf_n climaite climaite climaite_cw 55.88 11.97 79 8.2 603 shrubland temperate_heathland field natural NA NA NA NA cw _000 _0001 380 510 0 0 0 0 0 0 0 0 0 0 0 NA NA 2 1 2006 NA NA face NA NA 1 NA NA 1.595 1.44 g_100g 0.4629536 0.3282316 0.189 0.134 6 6 28_06_2006 NA

Liang et al.:

df_liang %>% 
  filter(variable == "Nmass") %>% 
  head()
## # A tibble: 6 × 48
##   variable refere…¹ lat   lon     mat   map ph_soil species family genus photo…²
##   <chr>    <chr>    <chr> <chr> <dbl> <dbl>   <dbl> <chr>   <chr>  <chr> <chr>  
## 1 Nmass    Bagherz… 51.7… 9.62E   6.5 1050     3.69 Fagus … Fagac… Fagus C3     
## 2 Nmass    Bai-201… 43.6… 116.…   0.3  340     7.5  Leymus… Poace… Leym… C3     
## 3 Nmass    Bai-201… 41.7… 111.…  NA    311.    8.1  Stipa … Poace… Stipa C3     
## 4 Nmass    Barton-… 31.9… 115.…  18.6  746     4.7  Pennis… Poace… Penn… C4     
## 5 Nmass    Barton-… 31.9… 115.…  18.6  746     4.7  Pennis… Poace… Penn… C4     
## 6 Nmass    Barton-… 31.9… 115.…  18.6  746     4.7  Pennis… Poace… Penn… C4     
## # … with 37 more variables: legume <chr>, managed <chr>, growthform <chr>,
## #   functionaltype <chr>, broadleaf_conifer <chr>, evergreen_deciduous <chr>,
## #   ninorg_form <chr>, ndep_kg_ha_yr <dbl>, duration_yr <dbl>, load <dbl>,
## #   value_t <dbl>, value_c <dbl>, n_t <dbl>, n_c <dbl>, sd_t <dbl>, sd_c <dbl>,
## #   ln_rr <dbl>, v <dbl>, w <dbl>, cluster <dbl>, w_dash <dbl>, ...33 <lgl>,
## #   ...34 <lgl>, ...35 <lgl>, ...36 <lgl>, ...37 <lgl>, ...38 <lgl>,
## #   ...39 <lgl>, ...40 <lgl>, ...41 <lgl>, ...42 <lgl>, ...43 <lgl>, …

The goal is to make the Liang et al. data (more) conform with MESI. Different steps need to be taken:

Simple reformatting of column names and entries.

# ## look at growth growth forms in Liang and in MESI
# df$growthform %>% unique()
# df$growth_form %>% unique()

mesi_make_conform_liang <- function(df_liang){
  
  df_liang <- df_liang %>% 
    
    ## add columns
    mutate(db = "liang",
           treatment = "f") %>% 
    
    ## Correct weird longitude and latitude specification
    mutate(lon = ifelse(str_ends(lon, "W"), -as.numeric(str_remove(lon, "W")), as.numeric(str_remove(lon, "W"))),
           lat = ifelse(str_ends(lat, "N"), as.numeric(str_remove(lat, "N")), -as.numeric(str_remove(lat, "N")))) %>% 
    
    ## Some columns can just be renamed
    rename(citation = reference,
           response = variable,
           x_c = value_c,
           x_t = value_t,
           rep_c = n_c,
           rep_t = n_t,
           growth_form = growthform
           ) %>% 
    
    ## Entries have to conform the entries in MESI.
    ## ... for growth form
    mutate(growth_form = ifelse("Woody", # Liang name
                                "woody", # MESI name
                                ifelse("Herb",      # Liang name
                                       "herbaceous" # MESI name
                                       )))
  
    ## .. for variable name
    ## XXX add your code here or find equivalent solution XXX
    
}

df_liang <- mesi_make_conform_liang(df_liang)
## Warning in ifelse(str_ends(lon, "W"), -as.numeric(str_remove(lon, "W")), : NAs
## introduced by coercion

## Warning in ifelse(str_ends(lon, "W"), -as.numeric(str_remove(lon, "W")), : NAs
## introduced by coercion
## Warning in ifelse(str_ends(lat, "N"), as.numeric(str_remove(lat, "N")), : NAs
## introduced by coercion

## Warning in ifelse(str_ends(lat, "N"), as.numeric(str_remove(lat, "N")), : NAs
## introduced by coercion

Add info

Missing info in Liang et al.:

Identify duplicates

This is challenging. Here are some starting points.

Based on strings (e.g., experiment name):

Or it could be done based on longitude and latitude of the site (but missing for many!). This gives an idea of how a solution could be found, but this doesn’t show any duplicates.

df_liang %>% 
  select(citation, lon, lat) %>%
  distinct() %>% 
  
  ## round to two digits
  mutate(lon = format(lon, digits = 2), lat = format(lat, digits = 2)) %>% 
  
  ## join with MESI based on lon and lat rounded to two digits
  left_join(
    df %>% 
      select(site, exp, citation, lon, lat) %>% 
      mutate(lon = format(lon, digits = 2), lat = format(lat, digits = 2)) %>% 
      distinct(),
    by = c("lon", "lat")
  ) %>% 
  drop_na(site, exp) %>% 
  knitr::kable()
citation.x lon lat site exp citation.y