Trait gradient analysis of rooting depth

The data is downloaded from the google spreadsheet [RSIP Working Copy], tab ‘Analysis sheet’, into a CSV (19.9.2019) and saved as data/RSIP_Analysis_sheet.csv.

Use gsheet::gsheet2tbl

Read the data.

df <- read_csv("~/data/rootingdepth/rsip/RSIP_Analysis_sheet_210409.csv") %>%      # previously done with "data/RSIP_Analysis_sheet.csv"
# df <- read_csv("~/data/rootingdepth/rsip/RSIP_Analysis_sheet_210721.csv") %>% 
  rename(lon = Long, lat = Lat) %>% 
  rowid_to_column(var = "id") %>% 
  
  ## problem: some have a reference error
  dplyr::filter(lon != "#REF!") %>% 
  mutate(lon = as.numeric(lon), lat = as.numeric(lat), 
         Dr = as.numeric(Dr),
         wtd = as.numeric(Water_Table_Depth_Fan))

## Rows: 5997 Columns: 135

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (82): ID_Section, ID_Number, Reference, Species, Synonym, Family, Life_s...
## dbl (51): Water_Table_Depth_Fan, LOG10_Dr, LOG10_Lr, LOG10_Sr, LOG10_Dl, LOG...
## lgl  (2): LOG10_Niklas_Vca, LOG10_Niklas_RS_ratio

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

print(paste("Total number of entries:", nrow(df)))

## [1] "Total number of entries: 5524"

Trait gradient analysis

First, remove data from herbaceous plants that occurr at sites together with woody plants. Group grasses and forbs into “herbaceous” and trees, shrubs, and semi-shrubs into “woody”.

df2 <- df %>% 
  mutate(sitename = paste0("i_", as.character(lon), "_", as.character(lat))) %>% 
  mutate(type = ifelse(Growth_form %in% c("Forb", "Grass"), "herb",
                       ifelse(Growth_form %in% c("Tree", "Shrub", "Semi-shrub"), "woody", NA))) %>% 
  filter(!is.na(type))

find_coexisting <- function(vec){
  vec <- unique(vec)
  if (length(vec) > 1){
    return(TRUE)
  } else {
    return(FALSE)
  }
}

tmp <- df2 %>% 
  group_by(sitename) %>% 
  summarise(coexisting = find_coexisting(type))

df3 <- df2 %>% 
  left_join(tmp, by = "sitename") %>% 
  filter(!(type == "herb" & coexisting))

Plot depth heigh relationship

Remove the significant effect of shoot height. Note that 3230 data points have missing Hs. The filtering here reduces the dataset to 2287 entries.

df4 <- df3 %>% 
  mutate(Hs = as.numeric(Hs)) %>% 
  filter(Hs > 0 & Dr > 0)

df4 %>% 
  ggplot(aes(Hs, Dr)) +
  geom_point(alpha = 0.5) +
  scale_x_log10() +
  scale_y_log10() +
  geom_smooth(method = "lm") +
  theme_classic() +
  labs(x = expression(italic("H")[s] ~ "(m)"),
       y = expression(italic("z")[r] ~ "(m)"))

## `geom_smooth()` using formula 'y ~ x'

ggsave("fig/height_depth.pdf", width = 6, height = 5)

## `geom_smooth()` using formula 'y ~ x'

ggsave("fig/height_depth.png", width = 6, height = 5)

## `geom_smooth()` using formula 'y ~ x'

linmod <- lm(log(Dr) ~ log(Hs), 
             data = df4, 
             na.action = "na.omit")

plot(linmod)

hist(linmod$residuals)

Consider residuals for the TGA.

df4 <- df4 %>% 
  mutate(Dr_res = linmod$residuals)

Aggregate to sites

df_sites2 <- df4 %>% 
  mutate(sitename = paste0("i_", as.character(lon), "_", as.character(lat))) %>% 
  group_by(sitename) %>% 
  summarise(Dr = mean(Dr, na.rm = TRUE),
            Dr_res = mean(Dr_res, na.rm = TRUE))

## add WWF biome info
df_wwf_sites2 <- ingest(
  df4 %>% 
    distinct(lon, lat) %>% 
    mutate(sitename = paste0("i_", as.character(lon), "_", as.character(lat))),
  source = "wwf",
  dir = "~/data/biomes/wwf_ecoregions/official/",
  settings = list(layer = "wwf_terr_ecos")
  )%>% 
  mutate(data = purrr::map(data, ~slice(., 1))) %>% 
  unnest(data)

## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/benjaminstocker/data/biomes/wwf_ecoregions/official", layer: "wwf_terr_ecos"
## with 14458 features
## It has 21 fields

## Warning in sp::proj4string(shp): CRS object has comment, which is lost in output

df_sites2 <- df_sites2 %>% 
  left_join(
    df_wwf_sites2 %>% 
      dplyr::select(sitename, biome = BIOME, biome_name = BIOME_NAME),
    by = "sitename"
  )

df_sites2 %>%
  separate(sitename, into = c(NA, "lon", "lat"), sep = "_") %>% 
  mutate(lon = as.numeric(lon), lat = as.numeric(lat)) %>% 
  write_csv(file = "data/df_sites_rsip_tga.csv")

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

Add site mean to full data

df4 <- df_sites2 %>% 
  dplyr::select(sitename, Dr_sitemean = Dr, Dr_res_sitemean = Dr_res) %>% 
  right_join(
    df4 %>% 
      mutate(sitename = paste0("i_", as.character(lon), "_", as.character(lat))), 
    by = "sitename"
    )

Filter data

Use data only for sites where at least 3 data points are available. Reduces data from 1,497 to 1,197 points.

use_sites <- df4 %>% 
  dplyr::select(sitename, Species) %>% 
  group_by(sitename) %>% 
  summarise(n = n()) %>% 
  dplyr::filter(n >= 3) %>% 
  pull(sitename)

df5 <- df4 %>% 
  dplyr::filter(sitename %in% use_sites)

Use data only for species that appear in at least 3 sites => 35 species and 166 data points.

use_species <- df5 %>%
  filter(Species != "NA") %>% 
  dplyr::select(sitename, Species) %>% 
  distinct() %>% 
  group_by(Species) %>% 
  summarise(n = n()) %>% 
  dplyr::filter(n >= 3) %>% 
  pull(Species)

df6 <- df5 %>% 
  dplyr::filter(Species %in% use_species)

# test : number of species per site
df6 %>% 
  dplyr::select(sitename, Species) %>% 
  distinct() %>% 
  group_by(sitename) %>% 
  summarise(n = n())

## # A tibble: 55 × 2
##    sitename            n
##    <chr>           <int>
##  1 i_-105.06_39.12     4
##  2 i_-105.08_39.16     4
##  3 i_-105.09_39.1      4
##  4 i_-106.31_35.86     3
##  5 i_-106.44_32.37     1
##  6 i_-108.37_50.68     1
##  7 i_-117.76_34.2      1
##  8 i_-118.93_34.12     1
##  9 i_-119.42_36.78     1
## 10 i_-96.8_46.9        1
## # … with 45 more rows

# test : number of sites per species
df6 %>% 
  dplyr::select(sitename, Species) %>% 
  distinct() %>% 
  group_by(Species) %>% 
  summarise(n = n()) %>% 
  arrange(n)

## # A tibble: 30 × 2
##    Species                     n
##    <chr>                   <int>
##  1 Adenostoma fasciculatum     3
##  2 Alnus incana                3
##  3 Beta vulgaris               3
##  4 Calluna vulgaris            3
##  5 Carpinus betulus            3
##  6 Cucurbita pepo              3
##  7 Fagus sylvatica             3
##  8 Fraxinus excelsior          3
##  9 Halothamnus subaphyllus     3
## 10 Hordeum vulgare             3
## # … with 20 more rows

saveRDS(df6, file = "data/df_tga.rds")

TGA

Plot.

df6 %>% 
  ggplot(aes(x = Dr_res_sitemean, y = Dr_res)) +  # , color = Species
  geom_point() +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted")

Plot just the lines

gg1 <- df6 %>% 
  group_by(Family) %>% 
  ggplot(aes(x = Dr_res_sitemean, y = Dr_res, group = Species, color = Family)) +
  geom_smooth(method = "lm", se = FALSE, size = 0.5, alpha = 0.2) +
  geom_abline(intercept=0, slope=1, linetype="dotted") +
  theme_classic() +
  geom_point(alpha = 0.3) +
  labs(x = expression("Site mean" ~ italic("z")[r] ~ "(m)"),
       y = expression(italic("z")[r] ~ "(m)")) +
  theme(legend.title = element_blank())

gg1

## `geom_smooth()` using formula 'y ~ x'

ggsave("fig/tga_rsip.pdf", width = 6, height = 4)

## `geom_smooth()` using formula 'y ~ x'

ggsave("fig/tga_rsip.png", width = 6, height = 4)

## `geom_smooth()` using formula 'y ~ x'

Fit linear regressions by species for Dr_res (residual of Dr from model log(Dr) ~ log(Hs)).

get_width_res <- function(df){
  df %>% pull(Dr_res) %>% range() %>% diff()
}

df_tga_res <- df5 %>% 
  dplyr::filter(Species %in% use_species & sitename %in% use_sites) %>% 
  group_by(Species) %>% 
  nest() %>% 
  mutate(linmod = purrr::map(data, ~lm(Dr_res ~ Dr_res_sitemean, data = .)),
         width = purrr::map_dbl(data, ~get_width_res(.))) %>% 
  mutate(slope = purrr::map_dbl(linmod, ~coef(.)[2])) %>% 
  left_join(df5 %>% 
              dplyr::select(Species, Family) %>% 
              distinct(), 
            by = "Species") %>% 
  
  ## remove outlier slope
  filter(slope < 30) %>% 
  mutate(n = purrr::map_int(data, ~nrow(.)))

gg2 <- df_tga_res %>% 
  ggplot() +
  geom_histogram(aes(slope, ..count..), binwidth = 0.4, alpha = 0.7) +
  # geom_density(aes(slope, ..density..), color = "red") +
  geom_vline(xintercept = 1.0, linetype = "dotted") +
  theme_classic() +
  labs(x = "Slope", y = "Count")


df_tga_res %>% 
  ggplot(aes(width, slope)) +
  geom_point() +
  geom_smooth(method = "lm") +
  geom_hline(yintercept = 1, linetype = "dotted")

## `geom_smooth()` using formula 'y ~ x'

df_tga_res %>% 
  ggplot(aes(width, abs(slope))) +
  geom_point() +
  geom_smooth(method = "lm") +
  geom_hline(yintercept = 1, linetype = "dotted")

## `geom_smooth()` using formula 'y ~ x'

linmod2 <- lm(slope ~ width, data = df_tga_res)
summary(linmod2)

## 
## Call:
## lm(formula = slope ~ width, data = df_tga_res)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1382 -0.2927  0.0310  0.2253  3.4486 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.4467     0.4345   1.028    0.313
## width         0.3052     0.3059   0.998    0.327
## 
## Residual standard error: 1.109 on 28 degrees of freedom
## Multiple R-squared:  0.03433,    Adjusted R-squared:  -0.0001562 
## F-statistic: 0.9955 on 1 and 28 DF,  p-value: 0.327

## species-level
gg3 <- df_tga_res %>% 
  drop_na() %>% 
  ggplot(aes(forcats::fct_reorder(Species, slope), slope)) +
  geom_bar(stat = "identity") +
  geom_hline(yintercept = 1.0, linetype = "dotted") +
  coord_flip() +
  labs(y = "Slope", x = "")

## family level
df_tga_res %>% 
  group_by(Family) %>% 
  summarise(slope = mean(slope), n = n()) %>% 
  mutate(family_n = paste0(Family, " (", as.character(n), ")")) %>% 
  mutate(family_n = forcats::fct_reorder(family_n, slope)) %>% 
  drop_na() %>% 
  ggplot(aes(family_n, slope)) +
  geom_bar(stat = "identity") +
  geom_hline(yintercept = 1.0, linetype = "dotted") +
  coord_flip()

Publication figures

bottom_row <- plot_grid(gg2, gg3, ncol = 4, labels = c('b', 'c'), rel_widths = c(0.3, 0.7))
plot_grid(gg1, bottom_row, ncol = 1, labels = c('a', ''))

## `geom_smooth()` using formula 'y ~ x'

top_row <- plot_grid(gg1, gg2, ncol = 2, labels = c('a', 'b'), rel_widths = c(0.7, 0.3))

## `geom_smooth()` using formula 'y ~ x'

plot_grid(top_row, gg3, nrow = 2, labels = c('', 'c'), rel_heights = c(1, 1.5) )

ggsave("fig/tga.pdf", width = 12, height = 9)
ggsave("fig/tga.png", width = 12, height = 9)