library(tidyverse) # for working with ease
The MESI data.
df <- read_csv("../data/mesi_main.csv")
## Warning: One or more parsing issues, see `problems()` for details
The Liang et al. data can be obtained as an Excel file from here: https://doi.org/10.1111/gcb.15071. I first exported each excel tab to a separate CSV file and then combined them into a single big table. Respective code is commented out below and the combined file is read directly.
# files <- list.files(path = "~/data/nfert_liang/data_csv/", full.names = TRUE)
# df <- purrr::map_dfr(as.list(files), ~read_csv(.))
# use_names <- c(
# "variable", "reference", "lat", "lon", "mat",
# "map", "ph_soil", "species", "family", "genus",
# "photopathway", "legume", "managed", "growthform", "functionaltype",
# "broadleaf_conifer", "evergreen_deciduous","ninorg_form", "ndep_kg_ha_yr", "duration_yr",
# "load", "value_t", "value_c", "n_t", "n_c",
# "sd_t", "sd_c", "ln_rr", "v", "w",
# "cluster", "w_dash"
# )
# names(df) <- use_names
# write_csv(df, file = "~/data/nfert_liang/nfert_liang_combined.csv")
# modify nfert_liang_combined.csv by hand to have nice column names.
df_liang <- read_csv("../data-raw/nfert_liang_combined.csv")
## New names:
## Rows: 2683 Columns: 48
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (15): variable, reference, lat, lon, species, family, genus, photopathwa... dbl
## (17): mat, map, ph_soil, ndep_kg_ha_yr, duration_yr, load, value_t, valu... lgl
## (16): ...33, ...34, ...35, ...36, ...37, ...38, ...39, ...40, ...41, ......
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
## • `` -> `...36`
## • `` -> `...37`
## • `` -> `...38`
## • `` -> `...39`
## • `` -> `...40`
## • `` -> `...41`
## • `` -> `...42`
## • `` -> `...43`
## • `` -> `...44`
## • `` -> `...45`
## • `` -> `...46`
## • `` -> `...47`
## • `` -> `...48`
Example for how the two look like (Nmass).
MESI:
df %>%
filter(response == "leaf_n") %>%
head() %>%
knitr::kable()
db | id | duplicate_id | citation | response | site | study | exp | lat | lon | elevation | mat | map | ecosystem_type | vegetation_type | experiment_type | community_type | dominant_species | growth_form | age | disturbance_type | treatment | npk | w_t1 | c_c | c_t | d_t | d_t2 | n_c | n_t | p_c | p_t | k_c | k_t | i_c | i_t | i_t2 | s_c | s_t | w_t2 | w_t3 | start_year | duration | treatment_duration | fumigation_type | start_treatment | end_treatment | sampling_year | sampling_depth | aggregation_level | x_c | x_t | x_units | sd_c | sd_t | se_c | se_t | rep_c | rep_t | sampling_date | comment |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
antwerp | p27451 | p27451 | agrell_et_al_2005 | leaf_n | aspenface | aspenface_bp | aspenface_bp_c | 45.70 | -89.50 | 490 | 4.9 | 810 | forest | NA | field | planted | betula_papyrifera | NA | NA | NA | c | _000 | NA | 367 | 560 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NA | NA | 0 | 0 | 1998 | NA | NA | face | NA | NA | NA | NA | NA | 2.600 | 2.8 | g_100g | 0.3464102 | 0.3464102 | 0.200 | 0.200 | 3 | 3 | NA | we_determined_nitrogen_concentrations_by_hightemperature_combustion_followed_by_thermoconductometric_detection_(leco_fp528_nitrogen_analyzer_st_joseph_mi_usa)with_glycine_p-toluenesulfonate_used_as_the_reference_standard(5.667%_n_hach_co._lovelan |
antwerp | p27452 | p27452 | agrell_et_al_2005 | leaf_n | aspenface | aspenface_bp | aspenface_bp_c | 45.70 | -89.50 | 490 | 4.9 | 810 | forest | NA | field | planted | betula_papyrifera | NA | NA | NA | c | _000 | NA | 367 | 560 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NA | NA | 0 | 0 | 1998 | NA | NA | face | NA | NA | NA | NA | NA | 2.600 | 2.5 | g_100g | 0.5196152 | 0.3464102 | 0.300 | 0.200 | 3 | 3 | NA | we_determined_nitrogen_concentrations_by_hightemperature_combustion_followed_by_thermoconductometric_detection_(leco_fp528_nitrogen_analyzer_st_joseph_mi_usa)with_glycine_p-toluenesulfonate_used_as_the_reference_standard(5.667%_n_hach_co._lovelan |
antwerp | p27453 | p27453 | agrell_et_al_2005 | leaf_n | aspenface | aspenface_bp | aspenface_bp_c | 45.70 | -89.50 | 490 | 4.9 | 810 | forest | NA | field | planted | betula_papyrifera | NA | NA | NA | c | _000 | NA | 367 | 560 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NA | NA | 0 | 0 | 1998 | NA | NA | face | NA | NA | NA | NA | NA | 3.500 | 3.3 | g_100g | 0.1732051 | 0.1732051 | 0.100 | 0.100 | 3 | 3 | NA | we_determined_nitrogen_concentrations_by_hightemperature_combustion_followed_by_thermoconductometric_detection_(leco_fp528_nitrogen_analyzer_st_joseph_mi_usa)with_glycine_p-toluenesulfonate_used_as_the_reference_standard(5.667%_n_hach_co._lovelan |
antwerp | p27468 | p27468 | andresen_et_al_2009 | leaf_n | climaite | climaite | climaite_c | 55.88 | 11.97 | 79 | 8.2 | 603 | shrubland | temperate_heathland | field | natural | NA | NA | NA | NA | c | _000 | _0000 | 380 | 510 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NA | NA | 0 | 0 | 2006 | NA | NA | face | NA | NA | 1 | NA | NA | 1.595 | 1.34 | g_100g | 0.4629536 | 0.3355801 | 0.189 | 0.137 | 6 | 6 | 28_06_2006 | NA |
antwerp | p27469 | p27469 | andresen_et_al_2009 | leaf_n | climaite | climaite | climaite_cd | 55.88 | 11.97 | 79 | 8.2 | 603 | shrubland | temperate_heathland | field | natural | NA | NA | NA | NA | cd | _000 | _0000 | 380 | 510 | 1 | NA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NA | NA | 0 | 0 | 2006 | NA | 35 | face | 30_06_2006 | 04_08_2006 | 1 | NA | NA | 1.595 | 1.545 | g_100g | 0.4629536 | 0.7715893 | 0.189 | 0.315 | 6 | 6 | 28_06_2006 | water_info:_5_weeks_until_early_august |
antwerp | p27470 | p27470 | andresen_et_al_2009 | leaf_n | climaite | climaite | climaite_cw | 55.88 | 11.97 | 79 | 8.2 | 603 | shrubland | temperate_heathland | field | natural | NA | NA | NA | NA | cw | _000 | _0001 | 380 | 510 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NA | NA | 2 | 1 | 2006 | NA | NA | face | NA | NA | 1 | NA | NA | 1.595 | 1.44 | g_100g | 0.4629536 | 0.3282316 | 0.189 | 0.134 | 6 | 6 | 28_06_2006 | NA |
Liang et al.:
df_liang %>%
filter(variable == "Nmass") %>%
head()
## # A tibble: 6 × 48
## variable refere…¹ lat lon mat map ph_soil species family genus photo…²
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 Nmass Bagherz… 51.7… 9.62E 6.5 1050 3.69 Fagus … Fagac… Fagus C3
## 2 Nmass Bai-201… 43.6… 116.… 0.3 340 7.5 Leymus… Poace… Leym… C3
## 3 Nmass Bai-201… 41.7… 111.… NA 311. 8.1 Stipa … Poace… Stipa C3
## 4 Nmass Barton-… 31.9… 115.… 18.6 746 4.7 Pennis… Poace… Penn… C4
## 5 Nmass Barton-… 31.9… 115.… 18.6 746 4.7 Pennis… Poace… Penn… C4
## 6 Nmass Barton-… 31.9… 115.… 18.6 746 4.7 Pennis… Poace… Penn… C4
## # … with 37 more variables: legume <chr>, managed <chr>, growthform <chr>,
## # functionaltype <chr>, broadleaf_conifer <chr>, evergreen_deciduous <chr>,
## # ninorg_form <chr>, ndep_kg_ha_yr <dbl>, duration_yr <dbl>, load <dbl>,
## # value_t <dbl>, value_c <dbl>, n_t <dbl>, n_c <dbl>, sd_t <dbl>, sd_c <dbl>,
## # ln_rr <dbl>, v <dbl>, w <dbl>, cluster <dbl>, w_dash <dbl>, ...33 <lgl>,
## # ...34 <lgl>, ...35 <lgl>, ...36 <lgl>, ...37 <lgl>, ...38 <lgl>,
## # ...39 <lgl>, ...40 <lgl>, ...41 <lgl>, ...42 <lgl>, ...43 <lgl>, …
The goal is to make the Liang et al. data (more) conform with MESI. Different steps need to be taken:
# ## look at growth growth forms in Liang and in MESI
# df$growthform %>% unique()
# df$growth_form %>% unique()
mesi_make_conform_liang <- function(df_liang){
df_liang <- df_liang %>%
## add columns
mutate(db = "liang",
treatment = "f") %>%
## Correct weird longitude and latitude specification
mutate(lon = ifelse(str_ends(lon, "W"), -as.numeric(str_remove(lon, "W")), as.numeric(str_remove(lon, "W"))),
lat = ifelse(str_ends(lat, "N"), as.numeric(str_remove(lat, "N")), -as.numeric(str_remove(lat, "N")))) %>%
## Some columns can just be renamed
rename(citation = reference,
response = variable,
x_c = value_c,
x_t = value_t,
rep_c = n_c,
rep_t = n_t,
growth_form = growthform
) %>%
## Entries have to conform the entries in MESI.
## ... for growth form
mutate(growth_form = ifelse("Woody", # Liang name
"woody", # MESI name
ifelse("Herb", # Liang name
"herbaceous" # MESI name
)))
## .. for variable name
## XXX add your code here or find equivalent solution XXX
}
df_liang <- mesi_make_conform_liang(df_liang)
## Warning in ifelse(str_ends(lon, "W"), -as.numeric(str_remove(lon, "W")), : NAs
## introduced by coercion
## Warning in ifelse(str_ends(lon, "W"), -as.numeric(str_remove(lon, "W")), : NAs
## introduced by coercion
## Warning in ifelse(str_ends(lat, "N"), as.numeric(str_remove(lat, "N")), : NAs
## introduced by coercion
## Warning in ifelse(str_ends(lat, "N"), as.numeric(str_remove(lat, "N")), : NAs
## introduced by coercion
Missing info in Liang et al.:
elv
). May use ingestr R package (see here:
https://computationales.github.io/ingestr/articles/example.html#etopo1).This is challenging. Here are some starting points.
Based on strings (e.g., experiment name):
Or it could be done based on longitude and latitude of the site (but missing for many!). This gives an idea of how a solution could be found, but this doesn’t show any duplicates.
df_liang %>%
select(citation, lon, lat) %>%
distinct() %>%
## round to two digits
mutate(lon = format(lon, digits = 2), lat = format(lat, digits = 2)) %>%
## join with MESI based on lon and lat rounded to two digits
left_join(
df %>%
select(site, exp, citation, lon, lat) %>%
mutate(lon = format(lon, digits = 2), lat = format(lat, digits = 2)) %>%
distinct(),
by = c("lon", "lat")
) %>%
drop_na(site, exp) %>%
knitr::kable()
citation.x | lon | lat | site | exp | citation.y |
---|