The data is downloaded from the google spreadsheet [RSIP Working Copy], tab ‘Analysis sheet’, into a CSV (19.9.2019) and saved as data/RSIP_Analysis_sheet.csv
.
Use gsheet::gsheet2tbl
Read the data.
df <- read_csv("~/data/rootingdepth/rsip/RSIP_Analysis_sheet_210409.csv") %>% # previously done with "data/RSIP_Analysis_sheet.csv"
# df <- read_csv("~/data/rootingdepth/rsip/RSIP_Analysis_sheet_210721.csv") %>%
rename(lon = Long, lat = Lat) %>%
rowid_to_column(var = "id") %>%
## problem: some have a reference error
dplyr::filter(lon != "#REF!") %>%
mutate(lon = as.numeric(lon), lat = as.numeric(lat),
Dr = as.numeric(Dr),
wtd = as.numeric(Water_Table_Depth_Fan))
## Rows: 5997 Columns: 135
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (82): ID_Section, ID_Number, Reference, Species, Synonym, Family, Life_s...
## dbl (51): Water_Table_Depth_Fan, LOG10_Dr, LOG10_Lr, LOG10_Sr, LOG10_Dl, LOG...
## lgl (2): LOG10_Niklas_Vca, LOG10_Niklas_RS_ratio
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(paste("Total number of entries:", nrow(df)))
## [1] "Total number of entries: 5524"
First, remove data from herbaceous plants that occurr at sites together with woody plants. Group grasses and forbs into “herbaceous” and trees, shrubs, and semi-shrubs into “woody”.
df2 <- df %>%
mutate(sitename = paste0("i_", as.character(lon), "_", as.character(lat))) %>%
mutate(type = ifelse(Growth_form %in% c("Forb", "Grass"), "herb",
ifelse(Growth_form %in% c("Tree", "Shrub", "Semi-shrub"), "woody", NA))) %>%
filter(!is.na(type))
find_coexisting <- function(vec){
vec <- unique(vec)
if (length(vec) > 1){
return(TRUE)
} else {
return(FALSE)
}
}
tmp <- df2 %>%
group_by(sitename) %>%
summarise(coexisting = find_coexisting(type))
df3 <- df2 %>%
left_join(tmp, by = "sitename") %>%
filter(!(type == "herb" & coexisting))
Remove the significant effect of shoot height. Note that 3230 data points have missing Hs. The filtering here reduces the dataset to 2287 entries.
df4 <- df3 %>%
mutate(Hs = as.numeric(Hs)) %>%
filter(Hs > 0 & Dr > 0)
df4 %>%
ggplot(aes(Hs, Dr)) +
geom_point(alpha = 0.5) +
scale_x_log10() +
scale_y_log10() +
geom_smooth(method = "lm") +
theme_classic() +
labs(x = expression(italic("H")[s] ~ "(m)"),
y = expression(italic("z")[r] ~ "(m)"))
## `geom_smooth()` using formula 'y ~ x'
ggsave("fig/height_depth.pdf", width = 6, height = 5)
## `geom_smooth()` using formula 'y ~ x'
ggsave("fig/height_depth.png", width = 6, height = 5)
## `geom_smooth()` using formula 'y ~ x'
linmod <- lm(log(Dr) ~ log(Hs),
data = df4,
na.action = "na.omit")
plot(linmod)
hist(linmod$residuals)
Consider residuals for the TGA.
df4 <- df4 %>%
mutate(Dr_res = linmod$residuals)
df_sites2 <- df4 %>%
mutate(sitename = paste0("i_", as.character(lon), "_", as.character(lat))) %>%
group_by(sitename) %>%
summarise(Dr = mean(Dr, na.rm = TRUE),
Dr_res = mean(Dr_res, na.rm = TRUE))
## add WWF biome info
df_wwf_sites2 <- ingest(
df4 %>%
distinct(lon, lat) %>%
mutate(sitename = paste0("i_", as.character(lon), "_", as.character(lat))),
source = "wwf",
dir = "~/data/biomes/wwf_ecoregions/official/",
settings = list(layer = "wwf_terr_ecos")
)%>%
mutate(data = purrr::map(data, ~slice(., 1))) %>%
unnest(data)
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/benjaminstocker/data/biomes/wwf_ecoregions/official", layer: "wwf_terr_ecos"
## with 14458 features
## It has 21 fields
## Warning in sp::proj4string(shp): CRS object has comment, which is lost in output
df_sites2 <- df_sites2 %>%
left_join(
df_wwf_sites2 %>%
dplyr::select(sitename, biome = BIOME, biome_name = BIOME_NAME),
by = "sitename"
)
df_sites2 %>%
separate(sitename, into = c(NA, "lon", "lat"), sep = "_") %>%
mutate(lon = as.numeric(lon), lat = as.numeric(lat)) %>%
write_csv(file = "data/df_sites_rsip_tga.csv")
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
Add site mean to full data
df4 <- df_sites2 %>%
dplyr::select(sitename, Dr_sitemean = Dr, Dr_res_sitemean = Dr_res) %>%
right_join(
df4 %>%
mutate(sitename = paste0("i_", as.character(lon), "_", as.character(lat))),
by = "sitename"
)
Use data only for sites where at least 3 data points are available. Reduces data from 1,497 to 1,197 points.
use_sites <- df4 %>%
dplyr::select(sitename, Species) %>%
group_by(sitename) %>%
summarise(n = n()) %>%
dplyr::filter(n >= 3) %>%
pull(sitename)
df5 <- df4 %>%
dplyr::filter(sitename %in% use_sites)
Use data only for species that appear in at least 3 sites => 35 species and 166 data points.
use_species <- df5 %>%
filter(Species != "NA") %>%
dplyr::select(sitename, Species) %>%
distinct() %>%
group_by(Species) %>%
summarise(n = n()) %>%
dplyr::filter(n >= 3) %>%
pull(Species)
df6 <- df5 %>%
dplyr::filter(Species %in% use_species)
# test : number of species per site
df6 %>%
dplyr::select(sitename, Species) %>%
distinct() %>%
group_by(sitename) %>%
summarise(n = n())
## # A tibble: 55 × 2
## sitename n
## <chr> <int>
## 1 i_-105.06_39.12 4
## 2 i_-105.08_39.16 4
## 3 i_-105.09_39.1 4
## 4 i_-106.31_35.86 3
## 5 i_-106.44_32.37 1
## 6 i_-108.37_50.68 1
## 7 i_-117.76_34.2 1
## 8 i_-118.93_34.12 1
## 9 i_-119.42_36.78 1
## 10 i_-96.8_46.9 1
## # … with 45 more rows
# test : number of sites per species
df6 %>%
dplyr::select(sitename, Species) %>%
distinct() %>%
group_by(Species) %>%
summarise(n = n()) %>%
arrange(n)
## # A tibble: 30 × 2
## Species n
## <chr> <int>
## 1 Adenostoma fasciculatum 3
## 2 Alnus incana 3
## 3 Beta vulgaris 3
## 4 Calluna vulgaris 3
## 5 Carpinus betulus 3
## 6 Cucurbita pepo 3
## 7 Fagus sylvatica 3
## 8 Fraxinus excelsior 3
## 9 Halothamnus subaphyllus 3
## 10 Hordeum vulgare 3
## # … with 20 more rows
saveRDS(df6, file = "data/df_tga.rds")
Plot.
df6 %>%
ggplot(aes(x = Dr_res_sitemean, y = Dr_res)) + # , color = Species
geom_point() +
geom_abline(slope = 1, intercept = 0, linetype = "dotted")
Plot just the lines
gg1 <- df6 %>%
group_by(Family) %>%
ggplot(aes(x = Dr_res_sitemean, y = Dr_res, group = Species, color = Family)) +
geom_smooth(method = "lm", se = FALSE, size = 0.5, alpha = 0.2) +
geom_abline(intercept=0, slope=1, linetype="dotted") +
theme_classic() +
geom_point(alpha = 0.3) +
labs(x = expression("Site mean" ~ italic("z")[r] ~ "(m)"),
y = expression(italic("z")[r] ~ "(m)")) +
theme(legend.title = element_blank())
gg1
## `geom_smooth()` using formula 'y ~ x'
ggsave("fig/tga_rsip.pdf", width = 6, height = 4)
## `geom_smooth()` using formula 'y ~ x'
ggsave("fig/tga_rsip.png", width = 6, height = 4)
## `geom_smooth()` using formula 'y ~ x'
Fit linear regressions by species for Dr_res
(residual of Dr from model log(Dr) ~ log(Hs)).
get_width_res <- function(df){
df %>% pull(Dr_res) %>% range() %>% diff()
}
df_tga_res <- df5 %>%
dplyr::filter(Species %in% use_species & sitename %in% use_sites) %>%
group_by(Species) %>%
nest() %>%
mutate(linmod = purrr::map(data, ~lm(Dr_res ~ Dr_res_sitemean, data = .)),
width = purrr::map_dbl(data, ~get_width_res(.))) %>%
mutate(slope = purrr::map_dbl(linmod, ~coef(.)[2])) %>%
left_join(df5 %>%
dplyr::select(Species, Family) %>%
distinct(),
by = "Species") %>%
## remove outlier slope
filter(slope < 30) %>%
mutate(n = purrr::map_int(data, ~nrow(.)))
gg2 <- df_tga_res %>%
ggplot() +
geom_histogram(aes(slope, ..count..), binwidth = 0.4, alpha = 0.7) +
# geom_density(aes(slope, ..density..), color = "red") +
geom_vline(xintercept = 1.0, linetype = "dotted") +
theme_classic() +
labs(x = "Slope", y = "Count")
df_tga_res %>%
ggplot(aes(width, slope)) +
geom_point() +
geom_smooth(method = "lm") +
geom_hline(yintercept = 1, linetype = "dotted")
## `geom_smooth()` using formula 'y ~ x'
df_tga_res %>%
ggplot(aes(width, abs(slope))) +
geom_point() +
geom_smooth(method = "lm") +
geom_hline(yintercept = 1, linetype = "dotted")
## `geom_smooth()` using formula 'y ~ x'
linmod2 <- lm(slope ~ width, data = df_tga_res)
summary(linmod2)
##
## Call:
## lm(formula = slope ~ width, data = df_tga_res)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1382 -0.2927 0.0310 0.2253 3.4486
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4467 0.4345 1.028 0.313
## width 0.3052 0.3059 0.998 0.327
##
## Residual standard error: 1.109 on 28 degrees of freedom
## Multiple R-squared: 0.03433, Adjusted R-squared: -0.0001562
## F-statistic: 0.9955 on 1 and 28 DF, p-value: 0.327
## species-level
gg3 <- df_tga_res %>%
drop_na() %>%
ggplot(aes(forcats::fct_reorder(Species, slope), slope)) +
geom_bar(stat = "identity") +
geom_hline(yintercept = 1.0, linetype = "dotted") +
coord_flip() +
labs(y = "Slope", x = "")
## family level
df_tga_res %>%
group_by(Family) %>%
summarise(slope = mean(slope), n = n()) %>%
mutate(family_n = paste0(Family, " (", as.character(n), ")")) %>%
mutate(family_n = forcats::fct_reorder(family_n, slope)) %>%
drop_na() %>%
ggplot(aes(family_n, slope)) +
geom_bar(stat = "identity") +
geom_hline(yintercept = 1.0, linetype = "dotted") +
coord_flip()
bottom_row <- plot_grid(gg2, gg3, ncol = 4, labels = c('b', 'c'), rel_widths = c(0.3, 0.7))
plot_grid(gg1, bottom_row, ncol = 1, labels = c('a', ''))
## `geom_smooth()` using formula 'y ~ x'
top_row <- plot_grid(gg1, gg2, ncol = 2, labels = c('a', 'b'), rel_widths = c(0.7, 0.3))
## `geom_smooth()` using formula 'y ~ x'
plot_grid(top_row, gg3, nrow = 2, labels = c('', 'c'), rel_heights = c(1, 1.5) )
ggsave("fig/tga.pdf", width = 12, height = 9)
ggsave("fig/tga.png", width = 12, height = 9)