library(tidyverse)   # for working with ease

Read data

The MESI data.

df <- read_csv("../data/mesi_main.csv")

## Warning: One or more parsing issues, see `problems()` for details

The Liang et al. data can be obtained as an Excel file from here: https://doi.org/10.1111/gcb.15071. I first exported each excel tab to a separate CSV file and then combined them into a single big table. Respective code is commented out below and the combined file is read directly.

# files <- list.files(path = "~/data/nfert_liang/data_csv/", full.names = TRUE)
# df <- purrr::map_dfr(as.list(files), ~read_csv(.))
# use_names <- c(
#   "variable",           "reference",          "lat",                "lon",                "mat",               
#   "map",                "ph_soil",            "species",            "family",             "genus",             
#   "photopathway",       "legume",             "managed",            "growthform",         "functionaltype",    
#   "broadleaf_conifer",  "evergreen_deciduous","ninorg_form",        "ndep_kg_ha_yr",      "duration_yr",       
#   "load",               "value_t",            "value_c",            "n_t",                "n_c",               
#   "sd_t",               "sd_c",               "ln_rr",              "v",                  "w",                 
#   "cluster",            "w_dash"
#   )
# names(df) <- use_names
# write_csv(df, file = "~/data/nfert_liang/nfert_liang_combined.csv")
  
# modify nfert_liang_combined.csv by hand to have nice column names.
df_liang <- read_csv("../data-raw/nfert_liang_combined.csv")

## New names:
## Rows: 2683 Columns: 48
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (15): variable, reference, lat, lon, species, family, genus, photopathwa... dbl
## (17): mat, map, ph_soil, ndep_kg_ha_yr, duration_yr, load, value_t, valu... lgl
## (16): ...33, ...34, ...35, ...36, ...37, ...38, ...39, ...40, ...41, ......
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
## • `` -> `...36`
## • `` -> `...37`
## • `` -> `...38`
## • `` -> `...39`
## • `` -> `...40`
## • `` -> `...41`
## • `` -> `...42`
## • `` -> `...43`
## • `` -> `...44`
## • `` -> `...45`
## • `` -> `...46`
## • `` -> `...47`
## • `` -> `...48`

Example for how the two look like (Nmass).

MESI:

df %>% 
  filter(response == "leaf_n") %>% 
  head() %>% 
  knitr::kable()

db	id	duplicate_id	citation	response	site	study	exp	lat	lon	elevation	mat	map	ecosystem_type	vegetation_type	experiment_type	community_type	dominant_species	growth_form	age	disturbance_type	treatment	npk	w_t1	c_c	c_t	d_t	d_t2	s_c	s_t	w_t2	w_t3	start_year	duration	treatment_duration	fumigation_type	start_treatment	end_treatment	sampling_year	sampling_depth	aggregation_level	x_c	x_t	x_units	sd_c	sd_t	se_c	se_t	rep_c	rep_t	sampling_date	comment
antwerp	p27451	p27451	agrell_et_al_2005	leaf_n	aspenface	aspenface_bp	aspenface_bp_c	45.70	-89.50	490	4.9	810	forest	NA	field	planted	betula_papyrifera	NA	NA	NA	c	_000	NA	367	560	0	0	NA	NA	0	0	1998	NA	NA	face	NA	NA	NA	NA	NA	2.600	2.8	g_100g	0.3464102	0.3464102	0.200	0.200	3	3	NA	we_determined_nitrogen_concentrations_by_hightemperature_combustion_followed_by_thermoconductometric_detection_(leco_fp528_nitrogen_analyzer_st_joseph_mi_usa)with_glycine_p-toluenesulfonate_used_as_the_reference_standard(5.667%_n_hach_co._lovelan
antwerp	p27452	p27452	agrell_et_al_2005	leaf_n	aspenface	aspenface_bp	aspenface_bp_c	45.70	-89.50	490	4.9	810	forest	NA	field	planted	betula_papyrifera	NA	NA	NA	c	_000	NA	367	560	0	0	NA	NA	0	0	1998	NA	NA	face	NA	NA	NA	NA	NA	2.600	2.5	g_100g	0.5196152	0.3464102	0.300	0.200	3	3	NA	we_determined_nitrogen_concentrations_by_hightemperature_combustion_followed_by_thermoconductometric_detection_(leco_fp528_nitrogen_analyzer_st_joseph_mi_usa)with_glycine_p-toluenesulfonate_used_as_the_reference_standard(5.667%_n_hach_co._lovelan
antwerp	p27453	p27453	agrell_et_al_2005	leaf_n	aspenface	aspenface_bp	aspenface_bp_c	45.70	-89.50	490	4.9	810	forest	NA	field	planted	betula_papyrifera	NA	NA	NA	c	_000	NA	367	560	0	0	NA	NA	0	0	1998	NA	NA	face	NA	NA	NA	NA	NA	3.500	3.3	g_100g	0.1732051	0.1732051	0.100	0.100	3	3	NA	we_determined_nitrogen_concentrations_by_hightemperature_combustion_followed_by_thermoconductometric_detection_(leco_fp528_nitrogen_analyzer_st_joseph_mi_usa)with_glycine_p-toluenesulfonate_used_as_the_reference_standard(5.667%_n_hach_co._lovelan
antwerp	p27468	p27468	andresen_et_al_2009	leaf_n	climaite	climaite	climaite_c	55.88	11.97	79	8.2	603	shrubland	temperate_heathland	field	natural	NA	NA	NA	NA	c	_000	_0000	380	510	0	0	NA	NA	0	0	2006	NA	NA	face	NA	NA	1	NA	NA	1.595	1.34	g_100g	0.4629536	0.3355801	0.189	0.137	6	6	28_06_2006	NA
antwerp	p27469	p27469	andresen_et_al_2009	leaf_n	climaite	climaite	climaite_cd	55.88	11.97	79	8.2	603	shrubland	temperate_heathland	field	natural	NA	NA	NA	NA	cd	_000	_0000	380	510	1	NA	NA	NA	0	0	2006	NA	35	face	30_06_2006	04_08_2006	1	NA	NA	1.595	1.545	g_100g	0.4629536	0.7715893	0.189	0.315	6	6	28_06_2006	water_info:_5_weeks_until_early_august
antwerp	p27470	p27470	andresen_et_al_2009	leaf_n	climaite	climaite	climaite_cw	55.88	11.97	79	8.2	603	shrubland	temperate_heathland	field	natural	NA	NA	NA	NA	cw	_000	_0001	380	510	0	0	NA	NA	2	1	2006	NA	NA	face	NA	NA	1	NA	NA	1.595	1.44	g_100g	0.4629536	0.3282316	0.189	0.134	6	6	28_06_2006	NA

Liang et al.:

df_liang %>% 
  filter(variable == "Nmass") %>% 
  head()

## # A tibble: 6 × 48
##   variable refere…¹ lat   lon     mat   map ph_soil species family genus photo…²
##   <chr>    <chr>    <chr> <chr> <dbl> <dbl>   <dbl> <chr>   <chr>  <chr> <chr>  
## 1 Nmass    Bagherz… 51.7… 9.62E   6.5 1050     3.69 Fagus … Fagac… Fagus C3     
## 2 Nmass    Bai-201… 43.6… 116.…   0.3  340     7.5  Leymus… Poace… Leym… C3     
## 3 Nmass    Bai-201… 41.7… 111.…  NA    311.    8.1  Stipa … Poace… Stipa C3     
## 4 Nmass    Barton-… 31.9… 115.…  18.6  746     4.7  Pennis… Poace… Penn… C4     
## 5 Nmass    Barton-… 31.9… 115.…  18.6  746     4.7  Pennis… Poace… Penn… C4     
## 6 Nmass    Barton-… 31.9… 115.…  18.6  746     4.7  Pennis… Poace… Penn… C4     
## # … with 37 more variables: legume <chr>, managed <chr>, growthform <chr>,
## #   functionaltype <chr>, broadleaf_conifer <chr>, evergreen_deciduous <chr>,
## #   ninorg_form <chr>, ndep_kg_ha_yr <dbl>, duration_yr <dbl>, load <dbl>,
## #   value_t <dbl>, value_c <dbl>, n_t <dbl>, n_c <dbl>, sd_t <dbl>, sd_c <dbl>,
## #   ln_rr <dbl>, v <dbl>, w <dbl>, cluster <dbl>, w_dash <dbl>, ...33 <lgl>,
## #   ...34 <lgl>, ...35 <lgl>, ...36 <lgl>, ...37 <lgl>, ...38 <lgl>,
## #   ...39 <lgl>, ...40 <lgl>, ...41 <lgl>, ...42 <lgl>, ...43 <lgl>, …

The goal is to make the Liang et al. data (more) conform with MESI. Different steps need to be taken:

Simple reformatting of column names and entries.

# ## look at growth growth forms in Liang and in MESI
# df$growthform %>% unique()
# df$growth_form %>% unique()

mesi_make_conform_liang <- function(df_liang){
  
  df_liang <- df_liang %>% 
    
    ## add columns
    mutate(db = "liang",
           treatment = "f") %>% 
    
    ## Correct weird longitude and latitude specification
    mutate(lon = ifelse(str_ends(lon, "W"), -as.numeric(str_remove(lon, "W")), as.numeric(str_remove(lon, "W"))),
           lat = ifelse(str_ends(lat, "N"), as.numeric(str_remove(lat, "N")), -as.numeric(str_remove(lat, "N")))) %>% 
    
    ## Some columns can just be renamed
    rename(citation = reference,
           response = variable,
           x_c = value_c,
           x_t = value_t,
           rep_c = n_c,
           rep_t = n_t,
           growth_form = growthform
           ) %>% 
    
    ## Entries have to conform the entries in MESI.
    ## ... for growth form
    mutate(growth_form = ifelse("Woody", # Liang name
                                "woody", # MESI name
                                ifelse("Herb",      # Liang name
                                       "herbaceous" # MESI name
                                       )))
  
    ## .. for variable name
    ## XXX add your code here or find equivalent solution XXX
    
}

df_liang <- mesi_make_conform_liang(df_liang)

## Warning in ifelse(str_ends(lon, "W"), -as.numeric(str_remove(lon, "W")), : NAs
## introduced by coercion

## Warning in ifelse(str_ends(lon, "W"), -as.numeric(str_remove(lon, "W")), : NAs
## introduced by coercion

## Warning in ifelse(str_ends(lat, "N"), as.numeric(str_remove(lat, "N")), : NAs
## introduced by coercion

## Warning in ifelse(str_ends(lat, "N"), as.numeric(str_remove(lat, "N")), : NAs
## introduced by coercion

Add info

Missing info in Liang et al.:

elevation (elv). May use ingestr R package (see here: https://computationales.github.io/ingestr/articles/example.html#etopo1).

Identify duplicates

This is challenging. Here are some starting points.

Based on strings (e.g., experiment name):

Or it could be done based on longitude and latitude of the site (but missing for many!). This gives an idea of how a solution could be found, but this doesn’t show any duplicates.

df_liang %>% 
  select(citation, lon, lat) %>%
  distinct() %>% 
  
  ## round to two digits
  mutate(lon = format(lon, digits = 2), lat = format(lat, digits = 2)) %>% 
  
  ## join with MESI based on lon and lat rounded to two digits
  left_join(
    df %>% 
      select(site, exp, citation, lon, lat) %>% 
      mutate(lon = format(lon, digits = 2), lat = format(lat, digits = 2)) %>% 
      distinct(),
    by = c("lon", "lat")
  ) %>% 
  drop_na(site, exp) %>% 
  knitr::kable()

citation.x	lon	lat	site	exp	citation.y

Add data from Liang et al. to MESI

Beni Stocker

2022-11-02

Read data

Simple reformatting of column names and entries.

Add info

Identify duplicates