1. Vegetation:
presence/absence, ordination, and diversity
fn_veg <- file.path(DATA_DIR, params$fn_veg)
need_file(fn_veg)
## [1] "/Users/rasmanns/Library/CloudStorage/Dropbox/Lavoro/Manuscripts/2025/Aurora_Urban_Ecology/data_folder/stats_figures/data//Matrice_1_Présence_non-présence plantes.csv"
veg <- read.csv(fn_veg, header = TRUE, sep = ";", na.strings = "NA", check.names = FALSE)
# Minimal checks
stopifnot(all(c("Parc","Layer") %in% names(veg)))
veg$Parc <- as.factor(veg$Parc)
veg$Layer <- as.factor(veg$Layer)
# Identify community matrix columns (assumes metadata in first 5 cols as in your script)
comm_cols <- 6:ncol(veg)
stopifnot(max(comm_cols) > 6)
comm <- veg[, comm_cols]
# Ensure binary presence/absence
comm_bin <- as.data.frame(comm > 0) * 1
# Jaccard NMDS
d <- vegdist(comm_bin, method = "jaccard", binary = TRUE)
nmds <- metaMDS(d, k = 2, trymax = 100, autotransform = FALSE, trace = 0)
# Site scores
nmds_points <- as.data.frame(scores(nmds, display = "sites"))
colnames(nmds_points) <- c("NMDS1","NMDS2")
nmds_points$Parc <- veg$Parc
nmds_points$Habitat <- plyr::revalue(
veg$Layer,
c("herb" = "Herb", "shrub" = "Shrub", "tree" = "Tree"),
warn_missing = FALSE
)
nmds_points$Richness <- vegan::specnumber(comm_bin)
# PERMANOVA (strata if available; fallback to Parc)
strata_col <- if ("Parc2" %in% names(veg)) "Parc2" else "Parc"
perm_formula <- as.formula(paste0("comm_bin ~ Layer"))
perm_out <- adonis2(comm_bin ~ Layer, data = veg, permutations = 199, strata = veg[[strata_col]])
# NMDS plot
nmds_plot <- ggplot(nmds_points, aes(NMDS1, NMDS2)) +
geom_point(aes(size = Richness, color = Parc), alpha = 0.8) +
geom_text_repel(aes(label = Parc), color = "black", vjust = 3.5, size = 3.5) +
stat_ellipse(aes(fill = Habitat), geom = "polygon", alpha = 0.2, color = NA,
type = "norm", level = 0.75) +
annotate("text", x = -3, y = max(nmds_points$NMDS2, na.rm = TRUE),
label = paste0("Stress = ", round(nmds$stress, 3)), hjust = 1.05, vjust = 1.5, size = 4) +
labs(x = "NMDS1", y = "NMDS2", color = "Parc", fill = "Habitat") +
theme(legend.position = "none")
ggsave(file.path(OUTPUT_DIR, "veg_nmds.png"), nmds_plot, width = 7, height = 6, dpi = 300)
veg$H <- vegan::diversity(comm_bin)
veg$S <- vegan::specnumber(comm_bin)
# Site-level diversity table
site_div <- tibble(
Site = if ("Elev" %in% names(veg)) veg$Elev else seq_len(nrow(veg)),
Parc = veg$Parc,
Shannon = veg$H,
Richness = veg$S
)
write.csv(site_div, file.path(OUTPUT_DIR, "site_diversity.csv"), row.names = FALSE)
# Parc-pooled presence/absence and diversity
pooled_by_parc <- rowsum(comm_bin, group = veg$Parc)
pooled_by_parc_bin <- (pooled_by_parc > 0) * 1
write.csv(pooled_by_parc_bin, file.path(OUTPUT_DIR, "pooled_by_parc.csv"))
parc_div <- tibble(
Parc = rownames(pooled_by_parc_bin),
Shannon = as.numeric(vegan::diversity(pooled_by_parc_bin)),
Richness = as.numeric(vegan::specnumber(pooled_by_parc_bin))
)
write.csv(parc_div, file.path(OUTPUT_DIR, "parc_diversity.csv"), row.names = FALSE)
# Barplot: richness by Parc
diversity_plot2 <- ggplot(parc_div, aes(y = reorder(Parc, Richness), x = Richness, fill = Parc)) +
geom_col() +
labs(x = "Species richness", y = "Parcs") +
theme(legend.position = "none",
axis.text = element_text(size = 10),
axis.title = element_text(size = 14))
ggsave(file.path(OUTPUT_DIR, "veg_richness_by_parc.png"), diversity_plot2, width = 7, height = 8, dpi = 300)
# Parc-level covariates and relationships
veg3 <- veg %>%
dplyr::select(any_of(c("Parc","Area","H","S"))) %>%
dplyr::group_by(Parc) %>%
dplyr::summarise(across(c(Area,H,S), mean, na.rm = TRUE), .groups = "drop") %>%
remove_rownames() %>%
column_to_rownames("Parc")
## Warning: There was 1 warning in `dplyr::summarise()`.
## ℹ In argument: `across(c(Area, H, S), mean, na.rm = TRUE)`.
## ℹ In group 1: `Parc = Brouette1`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
veg3$Size <- factor(ifelse(veg3$Area >= 20000, "Big", "Small"))
fit_area_S <- lm(Area ~ S, data = veg3)
area_S_plot <- ggplot(veg3, aes(x = Area, y = S, color = Size)) +
geom_point() +
geom_smooth(method = lm, se = TRUE) +
scale_color_brewer(palette = "Set2") +
labs(y = "Species richness", x = expression(paste("Area (", m^2, ")")))
ggsave(file.path(OUTPUT_DIR, "area_vs_species.png"), area_S_plot, width = 6.5, height = 5.5, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
fig_veg <- ggarrange(
nmds_plot,
ggarrange(diversity_plot2, area_S_plot, ncol = 2, labels = c("B)", "C)")),
ncol = 1, labels = c("A)")
)
## `geom_smooth()` using formula = 'y ~ x'
ggsave(file.path(OUTPUT_DIR, "vegetation_panel.png"), fig_veg, width = 9, height = 12, dpi = 300)
fig_veg

2. VOCs (GC–MS/MZmine)
processing and ordination
fn_vocs <- file.path(DATA_DIR, params$fn_vocs)
fn_vocs_meta <- file.path(DATA_DIR, params$fn_vocs_meta)
need_file(fn_vocs); need_file(fn_vocs_meta)
## [1] "/Users/rasmanns/Library/CloudStorage/Dropbox/Lavoro/Manuscripts/2025/Aurora_Urban_Ecology/data_folder/stats_figures/data//fehti_mzmine2_worked.csv"
## [1] "/Users/rasmanns/Library/CloudStorage/Dropbox/Lavoro/Manuscripts/2025/Aurora_Urban_Ecology/data_folder/stats_figures/data//metadata_VOCs.csv"
vocs <- read.csv(fn_vocs, h=T, sep=";", na.strings = "NA", stringsAsFactors=FALSE)
metadata_vocs <- read.csv(fn_vocs_meta, h=T, sep=";", na.strings = "NA", stringsAsFactors=FALSE, row.names = 1)
# Construct sample x feature matrix
head(vocs)
names(vocs)
## [1] "row.ID" "row.m.z" "row.retention.time"
## [4] "X02.CDF.Peak.area" "X01.CDF.Peak.area" "X11.CDF.Peak.area"
## [7] "X22.CDF.Peak.area" "X23.CDF.Peak.area" "X63.CDF.Peak.area"
## [10] "X33.CDF.Peak.area" "X21.CDF.Peak.area" "X13.CDF.Peak.area"
## [13] "X15.CDF.Peak.area" "X62.CDF.Peak.area" "X61.CDF.Peak.area"
## [16] "X12.CDF.Peak.area" "X73.CDF.Peak.area" "X31.CDF.Peak.area"
## [19] "X82.CDF.Peak.area" "X113.CDF.Peak.area" "X41.CDF.Peak.area"
## [22] "X103.CDF.Peak.area" "X83.CDF.Peak.area" "X42.CDF.Peak.area"
## [25] "X102.CDF.Peak.area" "X43.CDF.Peak.area" "X81.CDF.Peak.area"
## [28] "X71.CDF.Peak.area" "X52.CDF.Peak.area" "X51.CDF.Peak.area"
## [31] "X112.CDF.Peak.area" "X93.CDF.Peak.area" "X123.CDF.Peak.area"
## [34] "X111.CDF.Peak.area" "X92.CDF.Peak.area" "X101.CDF.Peak.area"
## [37] "X121.CDF.Peak.area" "X122.CDF.Peak.area" "X91.CDF.Peak.area"
vocs2 <- vocs%>%
select(-c(2,3)) %>%
column_to_rownames(var='row.ID') %>% t
# Clean sample names to match metadata
rownames(vocs2)<- rownames(vocs2) %>% str_replace(".CDF.Peak.area*", "")
rownames(vocs2)<- rownames(vocs2) %>% str_replace(".X*", "")
rownames(metadata_vocs)
## [1] "X02.CDF.Peak.area" "X15.CDF.Peak.area" "X11.CDF.Peak.area"
## [4] "X12.CDF.Peak.area" "X13.CDF.Peak.area" "X21.CDF.Peak.area"
## [7] "X22.CDF.Peak.area" "X23.CDF.Peak.area" "X31.CDF.Peak.area"
## [10] "X32.CDF.Peak.area" "X33.CDF.Peak.area" "X41.CDF.Peak.area"
## [13] "X42.CDF.Peak.area" "X43.CDF.Peak.area" "X51.CDF.Peak.area"
## [16] "X52.CDF.Peak.area" "X53.CDF.Peak.area" "X61.CDF.Peak.area"
## [19] "X62.CDF.Peak.area" "X63.CDF.Peak.area" "X71.CDF.Peak.area"
## [22] "X72.CDF.Peak.area" "X73.CDF.Peak.area" "X81.CDF.Peak.area"
## [25] "X82.CDF.Peak.area" "X83.CDF.Peak.area" "X91.CDF.Peak.area"
## [28] "X92.CDF.Peak.area" "X93.CDF.Peak.area" "X101.CDF.Peak.area"
## [31] "X102.CDF.Peak.area" "X103.CDF.Peak.area" "X111.CDF.Peak.area"
## [34] "X112.CDF.Peak.area" "X113.CDF.Peak.area" "X121.CDF.Peak.area"
## [37] "X122.CDF.Peak.area" "X123.CDF.Peak.area"
rownames(metadata_vocs)<- rownames(metadata_vocs) %>% str_replace(".CDF.Peak.area*", "")
rownames(metadata_vocs)<- rownames(metadata_vocs) %>% str_replace(".X*", "")
# Merge
vocs3 <- merge(metadata_vocs, vocs2, by = 0)
# Optional row removals kept from original (ensure indices exist)
drop_idx <- intersect(c(1,14), seq_len(nrow(vocs3)))
vocs4 <- if (length(drop_idx)) vocs3[-drop_idx, ] else vocs3
names(vocs4)[2:4] <- c("ID_nb","Parc","Type")
vocs4$Parc <- as.factor(vocs4$Parc)
vocs4$Type <- as.factor(vocs4$Type)
# Internal-standard based scaling (keep structure from original script)
# NOTE: adapt indices if your IS column is elsewhere.
intensity <- vocs4[, 5:44]
is_col <- 25
scale_fac <- 20
vocscal <- intensity / vocs4[, is_col] * scale_fac
vocscal <- vocscal[, -21, drop = FALSE] # remove IS column from the features
# NMDS on Bray–Curtis
nmds_voc <- metaMDS(vocscal, distance = "bray", trymax = 100, autotransform = FALSE, trace = 0)
scores_voc <- as.data.frame(scores(nmds_voc, display = "sites"))
colnames(scores_voc) <- c("NMDS1","NMDS2")
scores_voc$Parc <- vocs4$Parc
scores_voc$Vegetation_type <- plyr::revalue(
vocs4$Type, c("Haut" = "Tree", "Milieu" = "Shrub", "Sol" = "Herb"), warn_missing = FALSE
)
nmdsplot_vocs <- ggplot(scores_voc, aes(NMDS1, NMDS2, colour = Parc)) +
geom_point(size = 3.5, alpha = 0.8) +
stat_ellipse(aes(fill = Vegetation_type), geom = "polygon", alpha = 0.2, color = NA,
type = "norm", level = 0.75) +
annotate("text", x = max(scores_voc$NMDS1, na.rm = TRUE), y = max(scores_voc$NMDS2, na.rm = TRUE),
label = paste0("Stress = ", round(nmds_voc$stress, 3)), hjust = 1.05, vjust = 1.5, size = 4) +
labs(color = "Parc", fill = "Habitat") +
theme(legend.position = "top")
ggsave(file.path(OUTPUT_DIR, "vocs_nmds.png"), nmdsplot_vocs, width = 7, height = 6, dpi = 300)
# PERMANOVA on Type with Parc as strata
perm_voc <- adonis2(vocscal ~ Type, data = vocs4, permutations = 199, strata = vocs4$Parc)
# Derive VOC richness per Parc (count of molecules present in any sample within Parc)
voc_by_parc <- vocscal %>%
dplyr:: mutate(Parc = vocs4$Parc) %>%
dplyr:: group_by(Parc) %>%
dplyr:: summarise(across(where(is.numeric), ~ as.integer(any(. > 0))), .groups = "drop") %>%
dplyr:: mutate(S_chem = rowSums(across(where(is.numeric))))
write.csv(voc_by_parc, file.path(OUTPUT_DIR, "voc_richness_by_parc.csv"), row.names = FALSE)
# Join VOC chemical richness to vegetation richness per Parc
vocscal2 <- cbind(vocs4[,c(3:4)], vocscal)
vocscal2$Type
## [1] Sol Milieu Haut Sol Sol Milieu Haut Milieu Sol Milieu
## [11] Haut Haut Sol Milieu Haut Sol Haut Sol Milieu Haut
## [21] Sol Milieu Sol Milieu Haut Sol Haut Sol Milieu Haut
## [31] Sol Milieu Haut
## Levels: Haut Milieu Sol
vocscal2$Parc
## [1] Valency Valency Valency Ouchy Languedoc
## [6] Languedoc Languedoc Ouchy Jeunesse Jeunesse
## [11] Jeunesse Ouchy Ficelle Ficelle Ficelle
## [16] Milan Milan Montbenon Montbenon Montbenon
## [21] Eracom Eracom Brouette Brouette Brouette
## [26] Hermitage Hermitage Monrepos Monrepos Monrepos
## [31] Derrière-Bourg Derrière-Bourg Derrière-Bourg
## 12 Levels: Brouette Derrière-Bourg Eracom Ficelle Hermitage ... Valency
vocscal2$Parc <- gsub("[0-9]", "", vocscal2$Parc)
vocscal2$Parc <- as.factor(vocscal2$Parc)
veg3_tbl <- vocscal2 %>% dplyr::select(Parc, 3:33) %>%
dplyr:: group_by(Parc) %>%
dplyr:: summarise(across(everything(), ~ sum(., na.rm = TRUE))) %>%
column_to_rownames(var='Parc')
S_chem <- vegan::specnumber(veg3_tbl)
S_veg <- veg$S
div_table <- cbind(veg3, S_chem, S_veg)
div_table$Parc <- factor(rownames(div_table))
names(div_table)[1] <- c("Area")
vocs_veg_plot <- ggplot(div_table, aes(x = S_veg, y = S_chem)) +
geom_point(aes(size = Area, color = Parc )) +
geom_smooth(method = lm, color = "darkgrey", se = TRUE) +
ylab("VOCs diversity") +
xlab("Plant diversity") +
theme_pubclean(base_size = 14) +
theme(legend.position = "right")
vocs_veg_plot
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file.path(OUTPUT_DIR, "vocs_vs_veg.png"), vocs_veg_plot, width = 7, height = 6, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
# Simple model including city section (Parc prefix) as fixed effect
div_table$Parc2 <- sub("[0-9]$", "", div_table$Parc)
fit_vocveg <- lm(S_veg ~ S_chem + Parc2, data = div_table)
summary(fit_vocveg)
##
## Call:
## lm(formula = S_veg ~ S_chem + Parc2, data = div_table)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.610 -2.396 0.000 1.949 12.976
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.1429 6.2299 -0.826 0.4176
## S_chem 0.7071 0.2805 2.521 0.0191 *
## Parc2Eracom -3.1786 3.8606 -0.823 0.4188
## Parc2Ficelle -1.2595 3.9678 -0.317 0.7538
## Parc2Hermitage -6.1619 3.9930 -1.543 0.1364
## Parc2Jeunesse 2.0000 3.8322 0.522 0.6067
## Parc2Languedoc -5.5119 3.8606 -1.428 0.1668
## Parc2Milan 1.0738 3.9678 0.271 0.7891
## Parc2Monrepos -3.1619 3.9930 -0.792 0.4365
## Parc2Montbenon -1.0000 3.8322 -0.261 0.7965
## Parc2Ouchy 2.1548 3.8606 0.558 0.5821
## Parc2Promenade -0.5929 3.9678 -0.149 0.8825
## Parc2Valency 0.8381 3.9930 0.210 0.8356
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.693 on 23 degrees of freedom
## Multiple R-squared: 0.4299, Adjusted R-squared: 0.1325
## F-statistic: 1.445 on 12 and 23 DF, p-value: 0.2159
anova(fit_vocveg)
write.csv(div_table, file.path(OUTPUT_DIR, "div_table.csv"), row.names = FALSE)
#
fig_vocs <- ggarrange(nmdsplot_vocs + theme(legend.position = "none"),
vocs_veg_plot, labels = c("A)", "B)"), ncol = 2, common.legend = TRUE)
## `geom_smooth()` using formula = 'y ~ x'
ggsave(file.path(OUTPUT_DIR, "vocs_panel.png"), fig_vocs, width = 12, height = 6, dpi = 300)
fig_vocs

4. Phylogeny and
species chemical diversity
# Species vector (de-duplicated)
r <- c(
"Acer campestre","Acer palmatum","Acer platanoides","Acer pseudoplatanus","Acer saccharum",
"Achillea millefolium","Aesculus hippocastanum","Anagallis arvensis","Anemone nemorosa",
"Anthemis tinctoria","Aquilegia vulgaris","Arrhenatherum elatius","Artemisia verlotiorum",
"Aubrieta deltoidea","Bellis perennis","Betonica officinalis","Betula pendula","Buxus sempervirens",
"Calystegia sepium","Campanula persicifolia","Campsis grandiflora","Capsella bursa-pastoris",
"Carex paniculata","Carpinus betulus","Carpinus orientalis","Cedrus atlantica","Cedrus deodara",
"Centaurea cyanus","Centaurea scabiosa","Centaurea stoebe","Cerastium fontanum",
"Cercidiphyllum japonicum","Chamaecyparis obtusa","Chimonanthus praecox","Chionanthus retusus",
"Cichorium intybus","Circaea lutetiana","Cirsium arvense","Convolvulus arvensis","Cornus controversa",
"Cornus mas","Cornus sanguinea","Cortaderia selloana","Corylus avellana","Corylus cornuta",
"Cotinus coggygria","Crataegus laevigata","Crepis aurea","Crepis capillaris","Cymbalaria muralis",
"Dactylis glomerata","Daucus carota","Delphinium elatum","Dryopteris filix-mas","Echium vulgare",
"Euonymus alatus","Euonymus europaeus","Euonymus latifolius","Euphorbia cyparissias","Fagus sylvatica",
"Fraxinus excelsior","Galium mollugo","Geranium robertianum","Geranium sanguineum","Geum urbanum",
"Ginkgo biloba","Glechoma hederacea","Gleditsia triacanthos","Hamamelis virginiana","Hedera helix",
"Holcus lanatus","Hosta sieboldiana","Hydrangea macrophylla","Hypericum androsaemum","Hypericum palustrum",
"Hypericum perforatum","Hypochaeris radicata","Iberis sempervirens","Ilex aquifolium","Ilex canariensis",
"Impatiens parviflora","Juniperus communis","Lactuca virosa","Lathyrus latifolius","Lavandula angustifolia",
"Leucanthemum vulgare","Leymus arenarius","Ligustrum vulgare","Linaria vulgaris","Liquidambar orientalis",
"Liriodendron tulipifera","Lolium multiflorum","Lolium perenne","Lonicera japonica","Lonicera xylosteum",
"Lotus corniculatus","Magnolia × soulangeana","Magnolia kobus","Malva alcea","Malva parviflora",
"Medicago sativa","Miscanthus sinensis","Nepeta racemosa","Origanum vulgare","Osmanthus heterophyllus",
"Panicum virgatum","Papaver rhoeas","Parrotia persica","Parthenocissus tricuspidata","Pastinaca sativa",
"Phleum pratense","Pinus mugo","Pinus nigra","Pinus sylvestris","Plantago lanceolata","Plantago major",
"Platanus × hispanica","Poa annua","Polygonum aviculare","Polystichum setiferum","Populus nigra",
"Potentilla reptans","Prunella vulgaris","Prunus × fruticans","Prunus avium","Prunus cerasus",
"Prunus domestica","Prunus laurocerasus","Prunus lusitanica","Prunus mahaleb","Prunus padus",
"Prunus serrulata","Prunus yedoensis","Punica granatum","Quercus ilex","Quercus petraea","Quercus robur",
"Ranunculus acris","Rosa multiflora","Rumex acetosa","Rumex obtusifolius","Salix purpurea","Salvia pratensis",
"Sambucus canadensis","Scabiosa columbaria","Sequoiadendron giganteum","Silene noctiflora","Sonchus asper",
"Sonchus oleraceus","Styphnolobium japonicum","Tamarix ramosissima","Taraxacum officinale","Taxus baccata",
"Taxus brevifolia","Taxus cuspidata","Tilia platyphyllos","Torilis arvensis","Torminalis glaberrima",
"Tradescantia virginiana","Trifolium arvense","Trifolium pratense","Trifolium repens","Trisetum flavescens",
"Valeriana officinalis","Verbascum nigrum","Verbascum pulverulentum","Verbena bonariensis","Veronica persica",
"Viburnum acerifolium","Viburnum henryi","Viburnum lantana","Viburnum opulus","Viburnum trilobum",
"Vinca major","Vinca minor","Visnaga daucoides"
)
Species <- unique(r)
# Manual corrections retained from source
Species[97] <- "Magnolia denudata"
Species[124] <- "Prunus spinosa"
Species[117] <- "Platanus orientalis"
Species[158] <- "Sorbus glaberrima"
# TNRS and OpenTree subtree
taxon_search <- tnrs_match_names(names = Species, context_name = "All life")
mu <- tibble(Species = Species,
ott_name = unique_name(taxon_search),
ott_id = taxon_search$ott_id)
ott_in_tree <- ott_id(taxon_search)[is_in_tree(ott_id(taxon_search))]
tr <- tol_induced_subtree(ott_ids = ott_in_tree)
## Progress [--------------------------------] 0/1663 ( 0%) ?sProgress [============================] 1663/1663 (100%) 0s
tr$tip.label <- strip_ott_ids(tr$tip.label, remove_underscores = TRUE)
# Build species x molecule logical matrix from HPLC-merged table (presence if >0)
# Assumes 'Species' column exists in hplc_merged metadata columns.
stopifnot("Species" %in% colnames(hplc_merged))
# Feature columns start after metadata; here we detect numeric-only columns
feature_mat <- hplc_merged %>% select(where(is.numeric))
# Collapse to species presence (any occurrence)
diversity_tbl <- hplc_merged %>%
dplyr::select(Species, 8:8738)%>%
dplyr::group_by(Species) %>%
dplyr:: summarise(across(everything(), ~ as.integer(any(. > 0, na.rm = TRUE))))%>%
dplyr:: mutate(chem_diversity = rowSums(across(2:ncol(.)))) # counts 1s
write.csv(diversity_tbl, file.path(OUTPUT_DIR, "diversity_tbl.csv"), row.names = FALSE)
# Name normalization for matching to tree tips
normalize_name <- function(x) {
x %>%
str_trim() %>%
str_replace_all("\\s+", " ") %>%
str_replace_all("×", "x") %>%
str_replace_all("[._]", " ") %>%
stringi::stri_trans_general("Latin-ASCII") %>%
tolower() %>%
str_replace_all("[^a-z0-9 ]", "") %>%
str_squish()
}
diversity_tbl <- diversity_tbl %>% mutate(name_norm = normalize_name(Species))
tips <- tr$tip.label
tips_norm <- normalize_name(tips)
idx <- match(diversity_tbl$name_norm, tips_norm)
diversity_tbl$tip_label <- ifelse(!is.na(idx), tips[idx], NA)
if (anyNA(diversity_tbl$tip_label)) {
need <- which(is.na(diversity_tbl$tip_label))
fm <- stringdist::amatch(diversity_tbl$name_norm[need], tips_norm, method = "osa", maxDist = 2)
diversity_tbl$tip_label[need] <- ifelse(!is.na(fm), tips[fm], NA)
}
unmatched <- diversity_tbl %>% filter(is.na(tip_label)) %>% select(Species)
if (nrow(unmatched) > 0) {
write.csv(unmatched, file.path(OUTPUT_DIR, "unmatched_species_phylogeny.csv"), row.names = FALSE)
message("Some species could not be matched to the phylogeny; see unmatched_species_phylogeny.csv")
}
## Some species could not be matched to the phylogeny; see unmatched_species_phylogeny.csv
traits <- diversity_tbl %>% filter(!is.na(tip_label)) %>% select(tip_label, chem_diversity)
# Example correction retained from original
if (length(tr$tip.label) >= 152) tr$tip.label[152] <- "Panicum virgatum"
# Map chemical diversity onto tree
phylo_chem_plot <- ggtree(tr, layout = "circular") %<+% traits +
geom_tippoint(aes(size = chem_diversity, color = chem_diversity), alpha = 0.6) +
scale_size_continuous(name = "Chemical diversity", range = c(1, 5)) +
scale_color_viridis_c(name = "Chemical diversity") +
theme(legend.position = "right")
## Warning: `aes_()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`
## ℹ The deprecated feature was likely used in the ggtree package.
## Please report the issue at <https://github.com/YuLab-SMU/ggtree/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • as.Date = as.Date
## • yscale_mapping = yscale_mapping
## • branch.length = branch.length
## • hang = hang
## ℹ Did you misspell an argument name?
ggsave(file.path(OUTPUT_DIR, "phylogeny_chem_diversity.png"), phylo_chem_plot, width = 8, height = 8, dpi = 300)
## Warning: Removed 78 rows containing missing values or values outside the scale range
## (`geom_point_g_gtree()`).
phylo_chem_plot
## Warning: Removed 78 rows containing missing values or values outside the scale range
## (`geom_point_g_gtree()`).

5. Virtual communities:
species vs chemical richness
# Species → primary layer assignment from vegetation matrix
meta_cols <- intersect(c("Parc2","Parc1","Parc","Area","Layer"), names(veg))
species_cols <- setdiff(names(veg), meta_cols)
layer_by_species <- purrr::map_chr(
species_cols,
\(sp) {
rows <- veg[[sp]] > 0
if (!any(rows)) return(NA_character_)
lyr <- veg$Layer[rows]
names(sort(table(lyr), decreasing = TRUE))[1]
}
) |> setNames(species_cols)
target_layers <- c("herb","shrub","tree")
species_pool <- names(layer_by_species)[layer_by_species %in% target_layers]
# Prepare chemical matrix from diversity_tbl (molecule columns start with any feature column)
mol_cols <- names(diversity_tbl)[!(names(diversity_tbl) %in% c("Species","name_norm","tip_label","chem_diversity"))]
stopifnot(length(mol_cols) > 0)
norm_name <- function(x) {
x |>
stringr::str_to_lower() |>
stringi::stri_trans_general("Latin-ASCII") |>
gsub("[^a-z]+", " ", x = _) |>
stringr::str_squish()
}
chem <- diversity_tbl
chem$species_norm <- norm_name(chem$Species)
pool_norm <- norm_name(species_pool)
chem_index <- match(pool_norm, chem$species_norm)
matched_mask <- !is.na(chem_index)
matched_species <- species_pool[matched_mask]
chem_bool <- as.matrix(chem[mol_cols] > 0)
chemical_richness <- function(species_vec) {
idx <- match(norm_name(species_vec), chem$species_norm)
idx <- idx[!is.na(idx)]
if (length(idx) == 0) return(c(chem_rich = NA_real_, n_with_chem = 0))
present_any <- matrixStats::colAnys(chem_bool[idx, , drop = FALSE])
c(chem_rich = sum(present_any), n_with_chem = length(idx))
}
# Observed layer proportions (from veg)
prop <- prop.table(table(veg$Layer[target_layers %in% veg$Layer]))
# If some layers missing, fallback to given vector from your script
if (length(prop) != 3) prop <- c(herb = 0.358, shrub = 0.328, tree = 0.313)
sp_by_layer <- split(names(layer_by_species), layer_by_species)[target_layers]
sizes <- 3:80
reps_per_size <- 100
results <- vector("list", length(sizes) * reps_per_size); k <- 1L
for (s in sizes) {
for (r in seq_len(reps_per_size)) {
counts <- round(s * prop / sum(prop))
while (sum(counts) < s) counts[which.max(prop)] <- counts[which.max(prop)] + 1
while (sum(counts) > s) counts[which.max(counts)] <- counts[which.max(counts)] - 1
comm <- character(0)
for (ly in names(counts)) {
layer_pool <- intersect(sp_by_layer[[ly]], matched_species)
n_ly <- counts[[ly]]
if (n_ly > 0 && length(layer_pool) > 0) {
pick <- if (n_ly <= length(layer_pool)) sample(layer_pool, n_ly, FALSE) else sample(layer_pool, n_ly, TRUE)
comm <- c(comm, pick)
}
}
vals <- chemical_richness(comm)
results[[k]] <- tibble(
species_richness = s,
chemical_richness = vals[["chem_rich"]],
n_with_chem_profiles = vals[["n_with_chem"]]
)
k <- k + 1L
}
}
sim_df <- bind_rows(results) |> drop_na(chemical_richness)
# Stats
pear <- cor.test(sim_df$species_richness, sim_df$chemical_richness, method = "pearson")
spear <- cor.test(sim_df$species_richness, sim_df$chemical_richness, method = "spearman")
## Warning in cor.test.default(sim_df$species_richness, sim_df$chemical_richness,
## : Cannot compute exact p-value with ties
lm_fit <- lm(chemical_richness ~ species_richness, data = sim_df)
lm_sum <- summary(lm_fit)
write.csv(sim_df, file.path(OUTPUT_DIR, "virtual_communities_results.csv"), row.names = FALSE)
# Plot
comm_chem_div_plot <- ggplot(sim_df, aes(species_richness, chemical_richness)) +
geom_point(alpha = 0.25, size = 1) +
geom_smooth(se = TRUE, color = "darkred", linewidth = 1.2, span = 0.6) +
labs(x = "Plant species richness (community size)", y = "Chemical richness (unique molecules)")
ggsave(file.path(OUTPUT_DIR, "virtual_communities_plot.png"), comm_chem_div_plot, width = 7, height = 6, dpi = 300)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# Inset with phylogeny (optional combined figure)
combo <- ggdraw() +
draw_plot(comm_chem_div_plot, 0, 0, 1, 1) +
draw_plot(phylo_chem_plot + theme(legend.position = "right"), 0.55, 0.1, 0.4, 0.4)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 78 rows containing missing values or values outside the scale range
## (`geom_point_g_gtree()`).
ggsave(file.path(OUTPUT_DIR, "communities_plus_phylogeny.png"), combo, width = 10, height = 7, dpi = 300)
combo

6. Summary panels
panel_veg_vocs <- ggarrange(
nmds_plot, nmdsplot_vocs, labels = c("A) Vegetation NMDS","B) VOCs NMDS"), ncol = 2
)
ggsave(file.path(OUTPUT_DIR, "panel_veg_vocs.png"), panel_veg_vocs, width = 12, height = 6, dpi = 300)
panel_veg_vocs

7. PERMANOVA
summaries
cat("Vegetation PERMANOVA (Jaccard PA, factor Layer; strata =", strata_col, ")\\n")
## Vegetation PERMANOVA (Jaccard PA, factor Layer; strata = Parc2 )\n
print(perm_out)
## Permutation test for adonis under reduced model
## Blocks: strata
## Permutation: free
## Number of permutations: 199
##
## adonis2(formula = comm_bin ~ Layer, data = veg, permutations = 199, strata = veg[[strata_col]])
## Df SumOfSqs R2 F Pr(>F)
## Model 2 3.0999 0.20437 4.2383 0.005 **
## Residual 33 12.0681 0.79563
## Total 35 15.1680 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\\nVOCs PERMANOVA (Bray–Curtis; factor Type; strata = Parc)\\n")
## \nVOCs PERMANOVA (Bray–Curtis; factor Type; strata = Parc)\n
print(perm_voc)
## Permutation test for adonis under reduced model
## Blocks: strata
## Permutation: free
## Number of permutations: 199
##
## adonis2(formula = vocscal ~ Type, data = vocs4, permutations = 199, strata = vocs4$Parc)
## Df SumOfSqs R2 F Pr(>F)
## Model 2 0.31375 0.11772 2.0015 0.005 **
## Residual 30 2.35138 0.88228
## Total 32 2.66513 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
8. Session info
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Zurich
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] stringdist_0.9.15 rotl_3.1.0 ggtree_3.16.3 ape_5.8-1
## [5] matrixStats_1.5.0 stringi_1.8.7 cowplot_1.2.0 pheatmap_1.0.13
## [9] janitor_2.2.1 ggfortify_0.4.19 lubridate_1.9.4 forcats_1.0.0
## [13] stringr_1.5.2 dplyr_1.1.4 purrr_1.1.0 readr_2.1.5
## [17] tidyr_1.3.1 tibble_3.3.0 tidyverse_2.0.0 ggrepel_0.9.6
## [21] ggpubr_0.6.1 plyr_1.8.9 ggplot2_4.0.0 vegan_2.7-1
## [25] permute_0.9-8 igraph_2.1.4 RColorBrewer_1.1-3
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 viridisLite_0.4.2 farver_2.1.2 S7_0.2.0
## [5] fastmap_1.2.0 lazyeval_0.2.2 XML_3.99-0.19 digest_0.6.37
## [9] timechange_0.3.0 lifecycle_1.0.4 cluster_2.1.8.1 tidytree_0.4.6
## [13] magrittr_2.0.4 compiler_4.5.1 progress_1.2.3 rlang_1.1.6
## [17] sass_0.4.10 tools_4.5.1 yaml_2.3.10 knitr_1.50
## [21] ggsignif_0.6.4 labeling_0.4.3 prettyunits_1.2.0 curl_7.0.0
## [25] aplot_0.2.9 abind_1.4-8 withr_3.0.2 grid_4.5.1
## [29] scales_1.4.0 MASS_7.3-65 cli_3.6.5 crayon_1.5.3
## [33] rmarkdown_2.29 ragg_1.5.0 treeio_1.32.0 generics_0.1.4
## [37] rstudioapi_0.17.1 httr_1.4.7 tzdb_0.5.0 cachem_1.1.0
## [41] splines_4.5.1 parallel_4.5.1 ggplotify_0.1.3 vctrs_0.6.5
## [45] yulab.utils_0.2.1 Matrix_1.7-3 jsonlite_2.0.0 carData_3.0-5
## [49] car_3.1-3 patchwork_1.3.2 gridGraphics_0.5-1 hms_1.1.3
## [53] rstatix_0.7.2 Formula_1.2-5 systemfonts_1.2.3 jquerylib_0.1.4
## [57] glue_1.8.0 gtable_0.3.6 pillar_1.11.1 rappdirs_0.3.3
## [61] htmltools_0.5.8.1 R6_2.6.1 textshaping_1.0.3 evaluate_1.0.5
## [65] lattice_0.22-7 rentrez_1.2.4 backports_1.5.0 broom_1.0.10
## [69] snakecase_0.11.1 ggfun_0.2.0 bslib_0.9.0 rncl_0.8.7
## [73] Rcpp_1.1.0 gridExtra_2.3 nlme_3.1-168 mgcv_1.9-3
## [77] xfun_0.53 fs_1.6.6 pkgconfig_2.0.3
Notes for repository
upload
- Keep the
data/ directory structure consistent with
params.
- The
output/ directory contains:
- Tables:
site_diversity.csv,
pooled_by_parc.csv, parc_diversity.csv,
voc_richness_by_parc.csv, div_table.csv,
hplc_merged.csv, diversity_tbl.csv,
virtual_communities_results.csv, and optionally
unmatched_species_phylogeny.csv.
- Figures:
veg_nmds.png,
veg_richness_by_parc.png, area_vs_species.png,
vegetation_panel.png, vocs_nmds.png,
vocs_vs_veg.png, vocs_panel.png,
phylogeny_chem_diversity.png,
virtual_communities_plot.png,
communities_plus_phylogeny.png,
panel_veg_vocs.png.
- For exact reproducibility, consider archiving an
renv.lock file created with {renv} after
running renv::init() in the project root.
- This document avoids hard-coded absolute paths and provides checks
where column names were ambiguous (e.g.,
Parc2 vs
Parc).