R code fo the paper: Moreira et al…

Libraries
## Warning: package 'vegan' was built under R version 4.1.2
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.1.2
## Loading required package: lattice
## This is vegan 2.6-4
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.5.1     ✔ purrr   1.0.2
## ✔ tibble  3.2.1     ✔ dplyr   1.1.4
## ✔ tidyr   1.1.4     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## Warning: package 'ggrepel' was built under R version 4.1.2
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
## Please note that rgdal will be retired by the end of 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
## 
## rgdal: version: 1.5-27, (SVN revision 1148)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
## Overwritten PROJ_LIB was /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/proj
## Warning: multiple methods tables found for 'direction'
## Warning: multiple methods tables found for 'gridDistance'
## 
## Attaching package: 'raster'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## Warning: package 'sf' was built under R version 4.1.2
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
## Warning: package 'rmapshaper' was built under R version 4.1.2
## Warning: package 'rfPermute' was built under R version 4.1.2
## Welcome to rfPermute v2.5.1
## See rfPermuteTutorial() for a guide to the package.
## Warning: package 'randomForest' was built under R version 4.1.2
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
## Warning: package 'rpart.plot' was built under R version 4.1.2
## Loading required package: rpart

Working directory prep

wd <- list()
#  
wd$data   <- "/Users/rasmanns/Dropbox/Lavoro/Manuscripts/2024/Xoaquin_tenerife/Analyses/Data/Formatted/"
wd$output <- "/Users/rasmanns/Dropbox/Lavoro/Manuscripts/2024/Xoaquin_tenerife/Analyses/Output/"

Load the different datasets

metadata<- read.csv(paste0(wd$data, "metada_canary_corrected.csv") , h=T, sep=",", na.strings = "NA")
climate<- read.csv(paste0(wd$data, "species_climate.csv") , h=T, sep=",", na.strings = "NA")
feature_tab<- read.csv(paste0(wd$data, "FeatureList.csv") , h=T, sep=",", na.strings = "NA")
canopus <- readr::read_tsv(paste0(wd$data, "inf1000/canopus_compound_summary.tsv"))
## Rows: 23836 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (13): id, molecularFormula, adduct, precursorFormula, NPC#pathway, NPC#s...
## dbl  (9): NPC#pathway Probability, NPC#superclass Probability, NPC#class Pro...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sirius <- readr::read_tsv(paste0(wd$data, "inf1000/compound_identifications.tsv"))
## Rows: 23677 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (11): ConfidenceScore, ZodiacScore, molecularFormula, adduct, InChIkey2D...
## dbl (12): confidenceRank, structurePerIdRank, formulaRank, #adducts, #predic...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Metadata

names(metadata)
##  [1] "nb"               "row_chem_samples" "chem_samples"     "ID"              
##  [5] "species"          "native.endemic"   "plant"            "latitude"        
##  [9] "longitude"        "Height"           "Diameter"         "stem_age"        
## [13] "crown_age"        "herbivory"
metadata$species <- as.factor(metadata$species)
metadata$native.endemic <- as.factor(metadata$native.endemic)

names(metadata)
##  [1] "nb"               "row_chem_samples" "chem_samples"     "ID"              
##  [5] "species"          "native.endemic"   "plant"            "latitude"        
##  [9] "longitude"        "Height"           "Diameter"         "stem_age"        
## [13] "crown_age"        "herbivory"
str(metadata)
## 'data.frame':    265 obs. of  14 variables:
##  $ nb              : int  56 57 58 59 60 192 193 194 195 196 ...
##  $ row_chem_samples: int  40 31 23 58 29 160 252 209 186 188 ...
##  $ chem_samples    : chr  "80" "81" "82" "83" ...
##  $ ID              : int  80 81 82 83 84 303 304 305 306 307 ...
##  $ species         : Factor w/ 54 levels "Aeonium_cuneatum",..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ native.endemic  : Factor w/ 2 levels "endemic","native": 1 1 1 1 1 1 1 1 1 1 ...
##  $ plant           : int  1 2 3 4 5 1 2 3 4 5 ...
##  $ latitude        : num  28.5 28.5 28.5 28.5 28.5 ...
##  $ longitude       : num  -16.2 -16.2 -16.2 -16.2 -16.2 ...
##  $ Height          : num  23 29 35 23 37 300 280 320 250 230 ...
##  $ Diameter        : num  14.7 14.1 13.7 10.8 16 ...
##  $ stem_age        : num  0.68 0.68 0.68 0.68 0.68 1.9 1.9 1.9 1.9 1.9 ...
##  $ crown_age       : num  6.8 6.8 6.8 6.8 6.8 1.9 1.9 1.9 1.9 1.9 ...
##  $ herbivory       : num  0.025 0 0.043 0.987 0.133 ...
metadata2 <- metadata %>% dplyr::mutate(across(where(is.numeric), ~replace_na(., mean(., na.rm=TRUE))))
str(metadata2)
## 'data.frame':    265 obs. of  14 variables:
##  $ nb              : num  56 57 58 59 60 192 193 194 195 196 ...
##  $ row_chem_samples: num  40 31 23 58 29 160 252 209 186 188 ...
##  $ chem_samples    : chr  "80" "81" "82" "83" ...
##  $ ID              : num  80 81 82 83 84 303 304 305 306 307 ...
##  $ species         : Factor w/ 54 levels "Aeonium_cuneatum",..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ native.endemic  : Factor w/ 2 levels "endemic","native": 1 1 1 1 1 1 1 1 1 1 ...
##  $ plant           : num  1 2 3 4 5 1 2 3 4 5 ...
##  $ latitude        : num  28.5 28.5 28.5 28.5 28.5 ...
##  $ longitude       : num  -16.2 -16.2 -16.2 -16.2 -16.2 ...
##  $ Height          : num  23 29 35 23 37 300 280 320 250 230 ...
##  $ Diameter        : num  14.7 14.1 13.7 10.8 16 ...
##  $ stem_age        : num  0.68 0.68 0.68 0.68 0.68 1.9 1.9 1.9 1.9 1.9 ...
##  $ crown_age       : num  6.8 6.8 6.8 6.8 6.8 1.9 1.9 1.9 1.9 1.9 ...
##  $ herbivory       : num  0.025 0 0.043 0.987 0.133 ...
names(metadata2)
##  [1] "nb"               "row_chem_samples" "chem_samples"     "ID"              
##  [5] "species"          "native.endemic"   "plant"            "latitude"        
##  [9] "longitude"        "Height"           "Diameter"         "stem_age"        
## [13] "crown_age"        "herbivory"
dudi1 <- dudi.pca(metadata2[,c(10:14)], scale = T, scan = FALSE, nf = 2)
pcap<-fviz_pca_biplot(dudi1, addEllipses=TRUE, habillage =metadata2$native.endemic, ellipse.level=0.95)
pcap

Climate and map

climate$species <- as.factor(climate$species)

names(climate)
## [1] "species"       "X"             "Y"             "temperature"  
## [5] "max_temp"      "min_temp"      "precipitation"
str(climate)
## 'data.frame':    13004 obs. of  7 variables:
##  $ species      : Factor w/ 54 levels "Aeonium cuneatum Webb & Berthel.",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ X            : int  320250 320250 320750 320750 320750 321250 321250 321750 321750 374750 ...
##  $ Y            : int  3136250 3136750 3135750 3136250 3136750 3135750 3136250 3136250 3136750 3159750 ...
##  $ temperature  : num  17 17.5 16.5 17 17.8 ...
##  $ max_temp     : num  25.5 26.2 25.3 25.2 25.9 ...
##  $ min_temp     : num  10.14 10.44 9.43 10.41 11.25 ...
##  $ precipitation: int  484 501 466 454 470 478 478 440 439 524 ...
dudi1 <- dudi.pca(climate[,c(4:7)], scale = T, scan = FALSE, nf = 2)
pcap<-fviz_pca_biplot(dudi1, geom="point")
pcap

climate2 <- climate %>% dplyr::mutate(across(where(is.numeric), ~replace_na(., mean(., na.rm=TRUE))))

climate3 <- climate2 %>% 
  dplyr::  group_by(species)%>% 
  dplyr::    summarise(across(everything(), list(mean)))

spp <- word(as.character(climate3$species), 1,2, sep=" ")
climate3$spp <- sub(" ", "_", spp)

names(climate3)
## [1] "species"         "X_1"             "Y_1"             "temperature_1"  
## [5] "max_temp_1"      "min_temp_1"      "precipitation_1" "spp"
climate3$spp
##  [1] "Aeonium_cuneatum"           "Anagyris_latifolia"        
##  [3] "Apollonias_barbujana"       "Argyranthemum_adauctum"    
##  [5] "Argyranthemum_broussonetii" "Asparagus_pastorianus"     
##  [7] "Astydamia_latifolia"        "Bituminaria_bituminosa"    
##  [9] "Bosea_yerbamora"            "Bupleurum_salicifolium"    
## [11] "Campylanthus_salsoloides"   "Canarina_canariensis"      
## [13] "Carlina_salicifolia"        "Cistus_monspeliensis"      
## [15] "Convolvulus_floridus"       "Crambe_arborea"            
## [17] "Crambe_strigosa"            "Daphne_gnidium"            
## [19] "Dracunculus_canariensis"    "Echium_decaisnei"          
## [21] "Euphorbia_balsamifera"      "Gesnouinia_arborea"        
## [23] "Globularia_salicina"        "Hedera_canariensis"        
## [25] "Helianthemum_broussonetii"  "Hypericum_canariense"      
## [27] "Juniperus_cedrus"           "Kleinia_neriifolia"        
## [29] "Limonium_dendroides"        "Lobularia_canariensis"     
## [31] "Lotus_campylocladus"        "Ocotea_foetens"            
## [33] "Olea_cerasiformis"          "Pericallis_appendiculata"  
## [35] "Pericallis_cruenta"         "Pericallis_lanata"         
## [37] "Periploca_laevigata"        "Persea_indica"             
## [39] "Pistacia_lentiscus"         "Plocama_pendula"           
## [41] "Ranunculus_cortusifolius"   "Rhamnus_crenulata"         
## [43] "Rubia_fruticosa"            "Ruta_oreojasme"            
## [45] "Salvia_canariensis"         "Scrophularia_smithii"      
## [47] "Sideritis_oroteneriffae"    "Sideroxylon_canariense"    
## [49] "Silene_berthelotiana"       "Silene_nocteolens"         
## [51] "Sonchus_canariensis"        "Sonchus_congestus"         
## [53] "Teline_canariensis"         "Volutaria_canariensis"
climate3 <- as.data.frame(climate3)

###merge average climatic niche per species with metadata

#metadata$species
setdiff(levels(metadata$species), climate3$spp)
## character(0)
m1 <- merge(metadata, climate3, by.x = "species", by.y = "spp")
m2 <-   m1 #cbind(m1, df)

names(m2)
##  [1] "species"          "nb"               "row_chem_samples" "chem_samples"    
##  [5] "ID"               "native.endemic"   "plant"            "latitude"        
##  [9] "longitude"        "Height"           "Diameter"         "stem_age"        
## [13] "crown_age"        "herbivory"        "species.y"        "X_1"             
## [17] "Y_1"              "temperature_1"    "max_temp_1"       "min_temp_1"      
## [21] "precipitation_1"
m3 <- m2 %>% dplyr::select(nb, species,species.y, row_chem_samples: precipitation_1)

names(m3)
##  [1] "nb"               "species"          "species.y"        "row_chem_samples"
##  [5] "chem_samples"     "ID"               "native.endemic"   "plant"           
##  [9] "latitude"         "longitude"        "Height"           "Diameter"        
## [13] "stem_age"         "crown_age"        "herbivory"        "X_1"             
## [17] "Y_1"              "temperature_1"    "max_temp_1"       "min_temp_1"      
## [21] "precipitation_1"
####map
library(rnaturalearth)
## Warning: package 'rnaturalearth' was built under R version 4.1.2
world <- ne_countries(scale = "medium", returnclass = "sf")
class(world)
## [1] "sf"         "data.frame"
#names(m3)

m4_map <- m3 %>% dplyr::select(species, longitude, latitude,crown_age, herbivory, temperature_1, max_temp_1,min_temp_1,precipitation_1 ) %>%
                  dplyr::  group_by(species)%>% 
                   dplyr::    summarise(across(everything(), list(mean)))

colnames(m4_map)<- colnames(m4_map) %>% str_replace("_1*", "")
colnames(m4_map)[4] <- c("crown_age")

### for figure map 
map1 <- ggplot(data = world) +
  geom_sf() +
  geom_point(data = m4_map, aes(x = longitude, y = latitude), 
             shape = 20, size = m4_map$crown_age, alpha=0.2)+
  #geom_text_repel(data = m4_map, aes(x = longitude, y = latitude, label =crown_age), 
             #   fontface = "bold", nudge_x = c(1, -1.5, 2, 2, -1), size = 3, segment.colour = "grey",
               # nudge_y = c(0.25, -0.25, 0.5, 0.5, -0.5)) +
  coord_sf(xlim = c(-19,-13), ylim = c(27,29.5), expand = FALSE)+ theme_bw()

map1

Full chemical dataset

########### chemical data #####

#names(feature_tab)
lett <- paste("1",LETTERS[2:26],sep="")
roman <- paste("Sample_",as.roman(1:5), sep ="")
##remove columns that are not present in the metadata comm
feature_tab2 <-  feature_tab %>% dplyr::select(-contains(c( "blank", lett , roman )))
#names(feature_tab2)

feature_tab3 <-  feature_tab2 %>% dplyr::select(!c(row.ID, row.m.z, row.retention.time, X))
rownames(feature_tab3) <- feature_tab2$row.ID
#names(feature_tab3)

feature_tab4 <- as.data.frame(t(feature_tab3))

###remove exclusion list ####
#rownames(feature_tab4)
remove_vect <- numeric()
for (i in 1:ncol(feature_tab4)) {
  if (feature_tab4[1,i]>0) {
    remove_vect[i] <-colnames(feature_tab4[i])
  }
}

feature_tab5 <- feature_tab4 %>% 
                  dplyr::select(-remove_vect)%>% 
                    slice(-1)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(remove_vect)
## 
##   # Now:
##   data %>% select(all_of(remove_vect))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#####

rownames(feature_tab5)<- rownames(feature_tab5) %>% str_replace(".Peak.area*", "")
rownames(feature_tab5)<- rownames(feature_tab5) %>% str_replace("X*", "")
rownames(feature_tab5)<- rownames(feature_tab5) %>% str_replace("231101_AK_DDA_Phenolic_Sample_*", "")
rownames(feature_tab5)<- rownames(feature_tab5) %>% str_replace("231101_AK_DDA_Phenolic_Sample_*", "")
rownames(feature_tab5)<- rownames(feature_tab5) %>% str_replace("231101_AK_DDA_Phenolic_Sample_*", "")
rownames(feature_tab5)<- rownames(feature_tab5) %>% str_replace("231103_AK_DDA_Phenolic_Sample_*", "")
rownames(feature_tab5)<- rownames(feature_tab5) %>% str_replace("231030_AK_DDA_Phenolic_Sample_*", "")
rownames(feature_tab5)<- rownames(feature_tab5) %>% str_replace("231031_AK_DDA_Phenolic_Sample_*", "")
rownames(feature_tab5)<- rownames(feature_tab5) %>% str_replace("_nr8cent.mzML*", "")
#rownames(feature_tab5)

####keep columns with at least three values 
remove_vect3 <- numeric()

tabx <- feature_tab5[, c(1,2,3,66,534,3,32,28383,2222,31332, 31586)]
class(tabx[,c(10)])
## [1] "numeric"
as.numeric(colSums(tabx[11] != 0))
## [1] 1
for (i in 1:ncol(tabx)) {
  if ( as.numeric(colSums(tabx[i] != 0)) < 4) {
    remove_vect3[i] <-as.numeric(colnames(tabx[i]))
    print(as.numeric(colnames(tabx[i])))
  }
}
## [1] 76443
## [1] 89122
## [1] 92543
remove_vect4 <- as.vector(na.omit(as.character(remove_vect3)))

tabx2 <- tabx %>% dplyr::select(-remove_vect4)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(remove_vect4)
## 
##   # Now:
##   data %>% select(all_of(remove_vect4))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
for (i in 1:ncol(feature_tab5)) {
   if ( as.numeric(colSums(feature_tab5[i] != 0)) < 4) {
    remove_vect3[i] <-as.numeric(colnames(feature_tab5[i]))
  }
}

remove_vect4 <- as.vector(na.omit(as.character(remove_vect3)))
feature_tab6 <- feature_tab5 %>% dplyr::select(-remove_vect4)
#names(feature_tab6)
####

Annotated compounds dataset

##chemistry identification
#names(canopus)
#names(sirius)
#sirius$id
#canopus$id

compounds <-merge(sirius, canopus,  by= "featureId", all =T)
#names(compounds)

rownames(compounds) <- compounds$featureId
feature_tab7 <- as.data.frame(t(feature_tab6))
compounds2 <- merge(compounds, feature_tab7, by = 'row.names')
#names(compounds2)

compounds2$`NPC#pathway` <- as.factor(compounds2$`NPC#pathway`)
compounds2$`NPC#superclass` <- as.factor(compounds2$`NPC#superclass`)
compounds2$`NPC#class` <- as.factor(compounds2$`NPC#class`)

####
metadata4 <- m3
rownames(metadata4) <- m3$chem_samples
compounds3<- merge(metadata4, feature_tab6, by = 'row.names')

#names(compounds3)

# permanova ####
library(labdsv)
## Loading required package: mgcv
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:raster':
## 
##     getData
## The following object is masked from 'package:dplyr':
## 
##     collapse
## This is mgcv 1.8-36. For overview type 'help("mgcv-package")'.
## Registered S3 method overwritten by 'labdsv':
##   method       from
##   summary.dist ade4
## This is labdsv 2.0-1
## convert existing ordinations with as.dsvord()
## 
## Attaching package: 'labdsv'
## The following object is masked from 'package:randomForest':
## 
##     importance
## The following object is masked from 'package:rfPermute':
## 
##     importance
## The following object is masked from 'package:raster':
## 
##     density
## The following object is masked from 'package:stats':
## 
##     density
chemdata <- hellinger(compounds3[,c(23:ncol(compounds3))])
fit <-adonis2(chemdata ~  species, data= compounds3, permutations = 999,method = "bray") 
fit ### species are different in their chemistry
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = chemdata ~ species, data = compounds3, permutations = 999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## species   53   91.221 0.88975 32.129  0.001 ***
## Residual 211   11.303 0.11025                  
## Total    264  102.525 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# nmds and envift model ####
nmds <- metaMDS(chemdata, trace = FALSE, distance = "bray")
ef <- envfit(nmds, compounds3[,c(14,15)], permu = 999,  na.rm = TRUE)
ef
## 
## ***VECTORS
## 
##              NMDS1    NMDS2     r2 Pr(>r)    
## stem_age  -0.41404  0.91026 0.0226  0.066 .  
## crown_age -0.79982 -0.60023 0.0622  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## 5 observations deleted due to missingness
####

## diversity indices ####
S = specnumber(chemdata)
max(S)
## [1] 2163
min(S)
## [1] 314
H = diversity(chemdata)
log(max(H))
## [1] 1.990373
log(min(H))
## [1] 1.701143
chem_div <- cbind(compounds3[,c(1:22)], S, H )
names(chem_div)
##  [1] "Row.names"        "nb"               "species"          "species.y"       
##  [5] "row_chem_samples" "chem_samples"     "ID"               "native.endemic"  
##  [9] "plant"            "latitude"         "longitude"        "Height"          
## [13] "Diameter"         "stem_age"         "crown_age"        "herbivory"       
## [17] "X_1"              "Y_1"              "temperature_1"    "max_temp_1"      
## [21] "min_temp_1"       "precipitation_1"  "S"                "H"
#write.csv(chem_div, file = paste0(wd$output, "compounds_matrix.csv"))

summary(fit<- lm(S~ crown_age + temperature_1 + max_temp_1 + min_temp_1 +precipitation_1, data = chem_div))
## 
## Call:
## lm(formula = S ~ crown_age + temperature_1 + max_temp_1 + min_temp_1 + 
##     precipitation_1, data = chem_div)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -793.91 -205.79  -35.32  170.87  864.84 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2662.2906   907.0988   2.935 0.003635 ** 
## crown_age         -5.5300     7.6263  -0.725 0.469029    
## temperature_1   -569.2657   133.9815  -4.249 3.00e-05 ***
## max_temp_1       204.8599    43.3933   4.721 3.85e-06 ***
## min_temp_1       318.4010    85.8343   3.709 0.000254 ***
## precipitation_1   -0.2410     0.2708  -0.890 0.374362    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 316.9 on 259 degrees of freedom
## Multiple R-squared:  0.1518, Adjusted R-squared:  0.1355 
## F-statistic: 9.274 on 5 and 259 DF,  p-value: 3.873e-08
summary(fit<- lm(S~ crown_age , data = chem_div))
## 
## Call:
## lm(formula = S ~ crown_age, data = chem_div)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -936.94 -218.09  -30.54  186.56  914.81 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1247.9613    32.5102  38.387   <2e-16 ***
## crown_age      0.8039     7.8938   0.102    0.919    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 341.5 on 263 degrees of freedom
## Multiple R-squared:  3.944e-05,  Adjusted R-squared:  -0.003763 
## F-statistic: 0.01037 on 1 and 263 DF,  p-value: 0.919
summary(fit<- lm(H~ crown_age + temperature_1 + max_temp_1 + min_temp_1 +precipitation_1, data = chem_div))
## 
## Call:
## lm(formula = H ~ crown_age + temperature_1 + max_temp_1 + min_temp_1 + 
##     precipitation_1, data = chem_div)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.97462 -0.15240  0.00555  0.16583  0.71877 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      8.6117508  0.8108577  10.621  < 2e-16 ***
## crown_age       -0.0010005  0.0068172  -0.147    0.883    
## temperature_1   -0.6352630  0.1197664  -5.304 2.43e-07 ***
## max_temp_1       0.2079470  0.0387894   5.361 1.83e-07 ***
## min_temp_1       0.3669891  0.0767274   4.783 2.90e-06 ***
## precipitation_1 -0.0001521  0.0002421  -0.628    0.530    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2833 on 259 degrees of freedom
## Multiple R-squared:  0.2096, Adjusted R-squared:  0.1943 
## F-statistic: 13.74 on 5 and 259 DF,  p-value: 6.651e-12
Splot <- ggplot(chem_div, aes(x=S, y=crown_age)) + 
  geom_point()+
  geom_smooth(method=lm)+ 
  labs(x = "S", y = "Crown age") +theme_bw()

Hplot <-  ggplot(chem_div, aes(x=H, y=crown_age)) + 
  geom_point()+
  geom_smooth(method=lm)+ 
  labs(x = "H", y = "Crown age") + theme_bw()

library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:raster':
## 
##     rotate
 ggarrange(Splot, Hplot, ncol = 2, labels = c("A", "B"))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

####

full_tab<- merge(m3, compounds3, by.x = "ID", by.y = "ID")
#names(full_tab)

full_tab2 <- na.omit(full_tab)

traits_clim <- full_tab2[,c(11:15, 18:21)] 

compounds_only2 <- full_tab2[,c(43:ncol(compounds3))]+ 0.001

names(traits_clim) <- c("Height", "Diameter","Stem_age", "Crown_age", "Herbivory", "Temp_avg", "Max_temp", "Min_temp", "Precip_avg")
dbRDA = vegan::dbrda(compounds_only2 ~ Crown_age+ Temp_avg+ Max_temp +Min_temp+ Precip_avg, traits_clim, dist="bray")

plot(dbRDA) # use base plot, might be done with ggplot2

anova(dbRDA) # overall test of the significant of the analysis
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
## 
## Model: vegan::dbrda(formula = compounds_only2 ~ Crown_age + Temp_avg + Max_temp + Min_temp + Precip_avg, data = traits_clim, distance = "bray")
##           Df SumOfSqs     F Pr(>F)    
## Model      5   10.417 5.315  0.001 ***
## Residual 221   86.630                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(dbRDA, by="axis", perm.max=500) # test axes for significance
## Permutation test for dbrda under reduced model
## Forward tests for axes
## Permutation: free
## Number of permutations: 999
## 
## Model: vegan::dbrda(formula = compounds_only2 ~ Crown_age + Temp_avg + Max_temp + Min_temp + Precip_avg, data = traits_clim, distance = "bray")
##           Df SumOfSqs      F Pr(>F)    
## dbRDA1     1    2.544 6.4899  0.001 ***
## dbRDA2     1    2.276 5.8069  0.001 ***
## dbRDA3     1    2.236 5.7043  0.001 ***
## dbRDA4     1    1.790 4.5652  0.001 ***
## dbRDA5     1    1.571 4.0086  0.001 ***
## Residual 221   86.630                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(dbRDA, by="terms", permu=200) # test for sign. environ. variables
## Permutation test for dbrda under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## Model: vegan::dbrda(formula = compounds_only2 ~ Crown_age + Temp_avg + Max_temp + Min_temp + Precip_avg, data = traits_clim, distance = "bray")
##             Df SumOfSqs      F Pr(>F)    
## Crown_age    1    1.948 4.9705  0.001 ***
## Temp_avg     1    2.388 6.0912  0.001 ***
## Max_temp     1    1.920 4.8982  0.001 ***
## Min_temp     1    2.349 5.9927  0.001 ***
## Precip_avg   1    1.812 4.6223  0.001 ***
## Residual   221   86.630                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following object is masked from 'package:raster':
## 
##     crosstab
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
full_tab <- clean_names(full_tab)
#names(full_tab)

tempplot <-  ggplot(full_tab, aes(x=temperature_1_x, y=crown_age_x)) + 
  geom_point()+
  geom_smooth(method=lm)+ 
  labs(x = "Temp_avg", y = "Crown age") + theme_bw()


precipplot <-  ggplot(full_tab, aes(x=precipitation_1_x, y=crown_age_x)) + 
  geom_point()+
  geom_smooth(method=lm)+ 
  labs(x = "Precip_avg", y = "Crown age") + theme_bw()



ggarrange(Splot, 
          Hplot, 
          tempplot, 
          precipplot, 
          ncol = 2, nrow =2, labels = c("A", "B", "C", "D"))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Phylogeny

library(U.PhyloMaker)
## Loading required package: ape
## Warning: package 'ape' was built under R version 4.1.2
## 
## Attaching package: 'ape'
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## The following objects are masked from 'package:raster':
## 
##     rotate, zoom
## The following object is masked from 'package:dplyr':
## 
##     where
library(tidyverse)
library(ggplot2)
library(ape)
library(phytools)
## Loading required package: maps
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
## 
## Attaching package: 'phytools'
## The following object is masked from 'package:ape':
## 
##     drop.tip.multiPhylo
## The following object is masked from 'package:vegan':
## 
##     scores
library(ggtree)
## Warning: package 'ggtree' was built under R version 4.1.2
## ggtree v3.2.1  For help: https://yulab-smu.top/treedata-book/
## 
## If you use ggtree in published research, please cite the most appropriate paper(s):
## 
## 1. Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics. 2020, 69:e96. doi:10.1002/cpbi.96
## 2. Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution. 2018, 35(12):3041-3043. doi:10.1093/molbev/msy194
## 3. Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
## 
## Attaching package: 'ggtree'
## The following object is masked from 'package:ape':
## 
##     rotate
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## The following object is masked from 'package:nlme':
## 
##     collapse
## The following object is masked from 'package:randomForest':
## 
##     margin
## The following objects are masked from 'package:raster':
## 
##     flip, rotate
## The following object is masked from 'package:tidyr':
## 
##     expand
library(vegan)


### build data matrix ####

names(chem_div)
##  [1] "Row.names"        "nb"               "species"          "species.y"       
##  [5] "row_chem_samples" "chem_samples"     "ID"               "native.endemic"  
##  [9] "plant"            "latitude"         "longitude"        "Height"          
## [13] "Diameter"         "stem_age"         "crown_age"        "herbivory"       
## [17] "X_1"              "Y_1"              "temperature_1"    "max_temp_1"      
## [21] "min_temp_1"       "precipitation_1"  "S"                "H"
data <- compounds3 %>% dplyr::select(species, Height, Diameter, crown_age, stem_age,herbivory)

data2 <- cbind(data, S, H)

data2_av <- data2 %>% 
  dplyr::  group_by(species)%>% 
  dplyr::    summarise(across(everything(), list(mean)))

data3_av <- merge(data2_av, climate3,  by.x = "species", by.y = "spp")
names(data3_av)
##  [1] "species"         "Height_1"        "Diameter_1"      "crown_age_1"    
##  [5] "stem_age_1"      "herbivory_1"     "S_1"             "H_1"            
##  [9] "species.y"       "X_1"             "Y_1"             "temperature_1"  
## [13] "max_temp_1"      "min_temp_1"      "precipitation_1"
data3_av <- data3_av[,c(1,4,5,7,8,12:15)]

colnames(data3_av)<- colnames(data3_av) %>% str_replace("_1*", "")
rownames(data3_av) <- data3_av$species
data3_av <- data3_av[,-1]
names(data3_av) <- c(  "Crown_age", "Stem_age", "S", "H", "Temperature", "Max_temp", "Min_temp", "Precipitation")

data3_av <- as.data.frame(data3_av)


###pruning using the open tree of life ####
library(rotl)
remove <-m4_map$species
remove <- as.character(remove)
remove[33] <- c("Olea_europaea")
remove[9] <- c("Bosea_yervamora")
remove2 <- gsub("_", " ",remove)

library(taxize)
## 
## Attaching package: 'taxize'
## The following objects are masked from 'package:rotl':
## 
##     synonyms, tax_name, tax_rank
spp_ott <- get_tolid(as.character(remove2), ask = TRUE,messages=T)
## ══  54 queries  ══════════════
## 
## Retrieving data for taxon 'Aeonium cuneatum'
## ✔  Found:  Aeonium cuneatum
## 
## Retrieving data for taxon 'Anagyris latifolia'
## ✔  Found:  Anagyris latifolia
## 
## Retrieving data for taxon 'Apollonias barbujana'
## ✔  Found:  Apollonias barbujana
## 
## Retrieving data for taxon 'Argyranthemum adauctum'
## ✔  Found:  Argyranthemum adauctum
## 
## Retrieving data for taxon 'Argyranthemum broussonetii'
## ✔  Found:  Argyranthemum broussonetii
## 
## Retrieving data for taxon 'Asparagus pastorianus'
## ✔  Found:  Asparagus pastorianus
## 
## Retrieving data for taxon 'Astydamia latifolia'
## ✔  Found:  Astydamia latifolia
## 
## Retrieving data for taxon 'Bituminaria bituminosa'
## ✔  Found:  Bituminaria bituminosa
## 
## Retrieving data for taxon 'Bosea yervamora'
## ✔  Found:  Bosea yervamora
## 
## Retrieving data for taxon 'Bupleurum salicifolium'
## ✔  Found:  Bupleurum salicifolium
## 
## Retrieving data for taxon 'Campylanthus salsoloides'
## ✔  Found:  Campylanthus salsoloides
## 
## Retrieving data for taxon 'Canarina canariensis'
## ✔  Found:  Canarina canariensis
## 
## Retrieving data for taxon 'Carlina salicifolia'
## ✔  Found:  Carlina salicifolia
## 
## Retrieving data for taxon 'Cistus monspeliensis'
## ✔  Found:  Cistus monspeliensis
## 
## Retrieving data for taxon 'Convolvulus floridus'
## ✔  Found:  Convolvulus floridus
## 
## Retrieving data for taxon 'Crambe arborea'
## ✔  Found:  Crambe arborea
## 
## Retrieving data for taxon 'Crambe strigosa'
## ✔  Found:  Crambe strigosa
## 
## Retrieving data for taxon 'Daphne gnidium'
## ✔  Found:  Daphne gnidium
## 
## Retrieving data for taxon 'Dracunculus canariensis'
## ✔  Found:  Dracunculus canariensis
## 
## Retrieving data for taxon 'Echium decaisnei'
## ✔  Found:  Echium decaisnei
## 
## Retrieving data for taxon 'Euphorbia balsamifera'
## ✔  Found:  Euphorbia balsamifera
## 
## Retrieving data for taxon 'Gesnouinia arborea'
## ✔  Found:  Gesnouinia arborea
## 
## Retrieving data for taxon 'Globularia salicina'
## ✔  Found:  Globularia salicina
## 
## Retrieving data for taxon 'Hedera canariensis'
## ✔  Found:  Hedera canariensis
## 
## Retrieving data for taxon 'Helianthemum broussonetii'
## ✔  Found:  Helianthemum broussonetii
## 
## Retrieving data for taxon 'Hypericum canariense'
## ✔  Found:  Hypericum canariense
## 
## Retrieving data for taxon 'Juniperus cedrus'
## ✔  Found:  Juniperus cedrus
## 
## Retrieving data for taxon 'Kleinia neriifolia'
## ✔  Found:  Kleinia neriifolia
## 
## Retrieving data for taxon 'Limonium dendroides'
## ✔  Found:  Limonium dendroides
## 
## Retrieving data for taxon 'Lobularia canariensis'
## ✔  Found:  Lobularia canariensis
## 
## Retrieving data for taxon 'Lotus campylocladus'
## ✔  Found:  Lotus campylocladus
## 
## Retrieving data for taxon 'Ocotea foetens'
## ✔  Found:  Ocotea foetens
## 
## Retrieving data for taxon 'Olea europaea'
## ✔  Found:  Olea europaea
## 
## Retrieving data for taxon 'Pericallis appendiculata'
## ✔  Found:  Pericallis appendiculata
## 
## Retrieving data for taxon 'Pericallis cruenta'
## ✔  Found:  Pericallis cruenta
## 
## Retrieving data for taxon 'Pericallis lanata'
## ✔  Found:  Pericallis lanata
## 
## Retrieving data for taxon 'Periploca laevigata'
## ✔  Found:  Periploca laevigata
## 
## Retrieving data for taxon 'Persea indica'
## ✔  Found:  Persea indica
## 
## Retrieving data for taxon 'Pistacia lentiscus'
## ✔  Found:  Pistacia lentiscus
## 
## Retrieving data for taxon 'Plocama pendula'
## ✔  Found:  Plocama pendula
## 
## Retrieving data for taxon 'Ranunculus cortusifolius'
## ✔  Found:  Ranunculus cortusifolius
## 
## Retrieving data for taxon 'Rhamnus crenulata'
## ✔  Found:  Rhamnus crenulata
## 
## Retrieving data for taxon 'Rubia fruticosa'
## ✔  Found:  Rubia fruticosa
## 
## Retrieving data for taxon 'Ruta oreojasme'
## ✔  Found:  Ruta oreojasme
## 
## Retrieving data for taxon 'Salvia canariensis'
## ✔  Found:  Salvia canariensis
## 
## Retrieving data for taxon 'Scrophularia smithii'
## ✔  Found:  Scrophularia smithii
## 
## Retrieving data for taxon 'Sideritis oroteneriffae'
## ✔  Found:  Sideritis oroteneriffae
## 
## Retrieving data for taxon 'Sideroxylon canariense'
## ✔  Found:  Sideroxylon canariense
## 
## Retrieving data for taxon 'Silene berthelotiana'
## ✔  Found:  Silene berthelotiana
## 
## Retrieving data for taxon 'Silene nocteolens'
## ✔  Found:  Silene nocteolens
## 
## Retrieving data for taxon 'Sonchus canariensis'
## ✔  Found:  Sonchus canariensis
## 
## Retrieving data for taxon 'Sonchus congestus'
## ✔  Found:  Sonchus congestus
## 
## Retrieving data for taxon 'Teline canariensis'
## ✔  Found:  Teline canariensis
## 
## Retrieving data for taxon 'Volutaria canariensis'
## ✔  Found:  Volutaria canariensis
## ══  Results  ═════════════════
## 
## • Total: 54 
## • Found: 54 
## • Not Found: 0
taxa <- tnrs_match_names(names = remove)
tree <- tol_induced_subtree(ott_ids = spp_ott[1:54])
## 
Progress [---------------------------------] 0/687 (  0) ?s
Progress [==============================] 687/687 (100)  0s
                                                            
## Warning in collapse_singles(tr, show_progress): Dropping singleton nodes
## with labels: Magnoliopsida ott99252, mrcaott2ott2645, mrcaott2ott35778,
## mrcaott2ott10930, mrcaott2ott969, mrcaott2ott62529, mrcaott2ott8379,
## mrcaott2ott141150, mrcaott2ott8384, mrcaott2ott2737, mrcaott2ott1479,
## mrcaott2ott345, mrcaott345ott55887, mrcaott345ott57970, mrcaott345ott3949,
## mrcaott345ott22364, mrcaott345ott18689, mrcaott345ott177705, mrcaott345ott38722,
## mrcaott345ott24237, mrcaott345ott30307, mrcaott345ott504, Euphorbieae
## ott520955, mrcaott345ott1030065, Euphorbia ott109494, mrcaott8051ott17906,
## mrcaott17906ott554221, mrcaott17906ott638533, mrcaott17906ott90331,
## mrcaott90331ott136209, mrcaott136209ott332887, mrcaott136209ott452092,
## mrcaott3853ott8858, mrcaott3853ott210461, mrcaott3853ott3856,
## mrcaott3853ott24235, mrcaott3853ott55182, mrcaott3853ott3858, Hypericaceae
## ott873015, mrcaott3858ott9427, Hypericeae ott1001315, mrcaott9427ott58438,
## mrcaott124561ott887076, mrcaott124561ott283621, mrcaott124561ott354150,
## mrcaott371ott2511, Rosales ott208031, mrcaott371ott63303, mrcaott371ott42100,
## mrcaott371ott22991, mrcaott22991ott39118, mrcaott22991ott23000,
## mrcaott22991ott266711, mrcaott22991ott262532, mrcaott262532ott723994,
## mrcaott262532ott431744, mrcaott262532ott472752, mrcaott262532ott6111754,
## mrcaott262532ott630642, mrcaott630642ott724001, mrcaott773ott137975,
## mrcaott773ott122906, mrcaott773ott33367, mrcaott33367ott73222,
## mrcaott73222ott88661, mrcaott73222ott228771, mrcaott228771ott349967,
## mrcaott228771ott796228, mrcaott228771ott399959, mrcaott228771ott549757,
## mrcaott228771ott394064, mrcaott228771ott549747, mrcaott228771ott549740,
## mrcaott228771ott1025788, mrcaott1025788ott5786548, Fabales ott956360,
## mrcaott579ott179386, Fabaceae ott560323, mrcaott579ott146599, mrcaott579ott730,
## mrcaott579ott37050, mrcaott579ott2644, mrcaott579ott8603, mrcaott579ott8026,
## mrcaott579ott5468, mrcaott579ott5457, mrcaott579ott5462, mrcaott579ott3942,
## mrcaott579ott5470, mrcaott579ott5091, mrcaott579ott34386, robinioid clade
## ott7055625, mrcaott34386ott72192, Loteae ott1039849, mrcaott42465ott253732,
## mrcaott42465ott868657, mrcaott42465ott45130, mrcaott45130ott70013,
## mrcaott70013ott444907, mrcaott70013ott253736, mrcaott70013ott70014,
## mrcaott70015ott119025, mrcaott70015ott72883, mrcaott70015ott72875,
## mrcaott72875ott472044, mrcaott72875ott72876, mrcaott72875ott472039,
## mrcaott472039ott492420, mrcaott5515ott44910, mrcaott5515ott170171,
## mrcaott5515ott71551, mrcaott5515ott16972, mrcaott16972ott117879,
## mrcaott16972ott931944, mrcaott16972ott997717, mrcaott16972ott222824,
## mrcaott16972ott33188, mrcaott16972ott131682, mrcaott16972ott569265,
## mrcaott16972ott73347, mrcaott16972ott48574, mrcaott16972ott21757,
## mrcaott21757ott763833, mrcaott21757ott116123, mrcaott21757ott219566,
## mrcaott21757ott48568, mrcaott21757ott119902, mrcaott21757ott161299,
## mrcaott161299ott326398, mrcaott161299ott276495, mrcaott161299ott276474,
## mrcaott276474ott807795, mrcaott5473ott26267, mrcaott5473ott590008,
## mrcaott5473ott46797, mrcaott5473ott165471, mrcaott5473ott16745,
## mrcaott16745ott33319, mrcaott16745ott50459, mrcaott16745ott181566,
## mrcaott16745ott259869, mrcaott259869ott356276, Anagyris ott259866,
## mrcaott8031ott33311, mrcaott8031ott31847, mrcaott8031ott583408,
## mrcaott8031ott113168, mrcaott8031ott62410, mrcaott8031ott53575,
## mrcaott8031ott398900, mrcaott8031ott8519, mrcaott8031ott242561,
## mrcaott8031ott33315, mrcaott71860ott204086, mrcaott71860ott255204,
## mrcaott71860ott71864, mrcaott71864ott255207, mrcaott255207ott517595,
## mrcaott255207ott461287, mrcaott255207ott517587, mrcaott711180ott1044110,
## mrcaott96ott607, mrcaott96ott14140, mrcaott96ott50744, mrcaott96ott24291,
## mrcaott96ott84975, mrcaott96ott9337, mrcaott96ott655994, mrcaott96ott982264,
## mrcaott96ott21231, mrcaott21231ott21233, mrcaott21237ott54041,
## mrcaott21237ott34792, mrcaott21237ott243064, mrcaott21237ott33358,
## mrcaott21237ott21239, mrcaott21239ott133920, mrcaott21239ott66920,
## mrcaott21239ott77288, mrcaott21239ott131835, mrcaott131835ott190736,
## mrcaott131835ott5748087, Pistacia ott200790, mrcaott131835ott565880,
## mrcaott1860ott63812, mrcaott1860ott582623, mrcaott1860ott2373,
## mrcaott1860ott11645, mrcaott19688ott81680, mrcaott19688ott260396,
## mrcaott19688ott30220, mrcaott30220ott70410, mrcaott70410ott123371, Ruta
## ott596622, mrcaott397678ott504806, mrcaott504806ott504808, mrcaott378ott29446,
## Brassicales ott8844, mrcaott378ott307071, mrcaott378ott32461,
## mrcaott378ott509555, mrcaott378ott114659, mrcaott378ott318175,
## mrcaott378ott9635, mrcaott378ott125843, mrcaott378ott509568,
## mrcaott378ott28763, mrcaott378ott261456, mrcaott378ott83547, Brassicaceae
## ott309271, mrcaott378ott4077, mrcaott4077ott58909, mrcaott4077ott4671,
## mrcaott4671ott4673, mrcaott4673ott19457, mrcaott19457ott577356, Lobularia
## (genus in kingdom Archaeplastida) ott213309, mrcaott19457ott196713,
## mrcaott4679ott4691, mrcaott4679ott76833, mrcaott4679ott11023,
## mrcaott11023ott25468, mrcaott25468ott38207, mrcaott38207ott76827,
## mrcaott76827ott97503, mrcaott97503ott233198, mrcaott233198ott355455,
## Malvales ott229284, mrcaott1697ott247869, mrcaott1697ott3190, Thymelaeaceae
## ott1015625, mrcaott1697ott191888, mrcaott1697ott141225, mrcaott1697ott179896,
## mrcaott1697ott141227, mrcaott1697ott44291, mrcaott44291ott3942990,
## mrcaott44291ott268293, mrcaott44291ott90513, mrcaott90513ott99350,
## mrcaott90513ott368588, mrcaott90513ott140095, mrcaott140095ott140098,
## mrcaott140095ott716475, mrcaott716475ott996171, mrcaott716475ott716477,
## mrcaott8010ott711767, mrcaott8010ott615554, mrcaott8010ott287352,
## mrcaott8010ott19033, Cistaceae ott343135, mrcaott8010ott714342,
## mrcaott8010ott177672, mrcaott177672ott228844, mrcaott177672ott343141,
## mrcaott177672ott228842, mrcaott177672ott177677, mrcaott177672ott177675,
## mrcaott177675ott752025, mrcaott177675ott228832, Saxifragales ott648890,
## mrcaott2464ott148590, mrcaott2464ott4115, mrcaott4115ott26027,
## mrcaott4115ott45014, Crassulaceae ott509332, mrcaott4115ott61169,
## mrcaott4115ott121860, mrcaott4115ott7978, mrcaott4115ott581648,
## mrcaott4115ott56181, mrcaott4115ott120798, mrcaott4115ott286743,
## mrcaott4115ott31402, mrcaott31402ott526119, mrcaott31402ott833582,
## mrcaott31402ott58800, mrcaott31402ott185016, mrcaott31402ott31404,
## mrcaott31404ott82926, mrcaott248ott10053, mrcaott248ott20991,
## mrcaott248ott16734, mrcaott248ott100191, mrcaott248ott979274,
## mrcaott248ott101831, mrcaott248ott26035, Lamiales ott23736, mrcaott248ott55259,
## Core_Lamiales ott5263556, mrcaott248ott55260, mrcaott248ott36730,
## mrcaott248ott54652, mrcaott248ott12399, mrcaott12535ott40245,
## mrcaott12535ott68966, mrcaott68966ott716280, mrcaott68966ott79139,
## Campylanthus ott673583, Globularieae ott182047, Globularia (genus in kingdom
## Archaeplastida) ott628260, mrcaott22352ott34574, mrcaott22352ott29664,
## mrcaott22352ott150365, mrcaott22352ott34571, mrcaott34571ott34580,
## mrcaott34571ott35629, mrcaott1016ott25224, mrcaott1016ott1452,
## mrcaott1452ott5046, mrcaott1452ott2133, mrcaott1452ott1605, Lamiaceae
## ott544714, mrcaott1605ott89672, mrcaott1605ott5950, mrcaott1605ott35635,
## mrcaott1605ott99347, mrcaott1605ott42829, mrcaott1605ott11338,
## mrcaott1605ott23545, mrcaott1605ott4689, mrcaott1605ott61906, Salvia
## ott374096, mrcaott61887ott61904, mrcaott61887ott287400, mrcaott61887ott499330,
## mrcaott61887ott61891, mrcaott61887ott523318, mrcaott61887ott61990,
## mrcaott61990ott1025149, mrcaott61990ott287416, mrcaott287416ott448587,
## mrcaott448587ott1025163, mrcaott9739ott161831, mrcaott9739ott41508,
## mrcaott9739ott960105, mrcaott9739ott107007, mrcaott9739ott92771,
## mrcaott9739ott679483, mrcaott9739ott105513, mrcaott9739ott18578, Stachydeae
## ott534796, mrcaott11341ott371758, Oleaceae ott23723, mrcaott11341ott143704,
## mrcaott11341ott15055, mrcaott15055ott220881, mrcaott15055ott89208,
## mrcaott15055ott89212, mrcaott15055ott22164, mrcaott22164ott721525,
## mrcaott22164ott115501, mrcaott22164ott721523, mrcaott22164ott115506,
## mrcaott2108ott12524, mrcaott25419ott375209, mrcaott25419ott482404,
## mrcaott25419ott336060, mrcaott25419ott150290, mrcaott25419ott27104,
## mrcaott27104ott27126, mrcaott42819ott805487, mrcaott42819ott432867,
## mrcaott42819ott156208, mrcaott42819ott57976, mrcaott57976ott854256,
## mrcaott57976ott266688, mrcaott266688ott279683, mrcaott279683ott791515, Echium
## ott411085, mrcaott1191ott381067, mrcaott1191ott2380, mrcaot
plot(tree, cex = .8, label.offset = .1, no.margin = TRUE)

is.rooted(tree)
## [1] TRUE
length(tree$tip.label)
## [1] 54
tree$tip.label
##  [1] "Euphorbia_balsamifera_ott567441"     
##  [2] "Hypericum_canariense_ott371982"      
##  [3] "Gesnouinia_arborea_ott724001"        
##  [4] "Rhamnus_crenulata_ott1025788"        
##  [5] "Lotus_campylocladus_ott472039"       
##  [6] "Bituminaria_bituminosa_ott807795"    
##  [7] "Anagyris_latifolia_ott1078490"       
##  [8] "Genista_canariensis_ott711180"       
##  [9] "Pistacia_lentiscus_ott565880"        
## [10] "Ruta_oreojasme_ott504806"            
## [11] "Lobularia_canariensis_ott196715"     
## [12] "Crambe_arborea_ott999952"            
## [13] "Crambe_strigosa_ott423022"           
## [14] "Daphne_gnidium_ott716475"            
## [15] "Cistus_monspeliensis_ott177675"      
## [16] "Helianthemum_broussonetii_ott6114584"
## [17] "Aeonium_cuneatum_ott82926"           
## [18] "Campylanthus_salsoloides_ott673575"  
## [19] "Globularia_salicina_ott682443"       
## [20] "Scrophularia_smithii_ott5232555"     
## [21] "Salvia_canariensis_ott1025163"       
## [22] "Sideritis_oroteneriffae_ott427482"   
## [23] "Olea_europaea_ott23729"              
## [24] "Echium_decaisnei_ott492069"          
## [25] "Convolvulus_floridus_ott759189"      
## [26] "Plocama_pendula_ott589085"           
## [27] "Rubia_fruticosa_ott294135"           
## [28] "Periploca_laevigata_ott270064"       
## [29] "Sonchus_congestus_ott761946"         
## [30] "Sonchus_canariensis_ott451166"       
## [31] "Kleinia_neriifolia_ott65656"         
## [32] "Pericallis_cruenta_ott81124"         
## [33] "Pericallis_lanata_ott524438"         
## [34] "Pericallis_appendiculata_ott812712"  
## [35] "Argyranthemum_adauctum_ott614013"    
## [36] "Argyranthemum_broussonetii_ott531430"
## [37] "Carlina_salicifolia_ott173354"       
## [38] "Volutaria_canariensis_ott3894373"    
## [39] "Canarina_canariensis_ott512094"      
## [40] "Bupleurum_salicifolium_ott36103"     
## [41] "Astydamia_latifolia_ott620219"       
## [42] "Hedera_canariensis_ott28373"         
## [43] "Sideroxylon_canariense_ott3901042"   
## [44] "Silene_nocteolens_ott415821"         
## [45] "Silene_berthelotiana_ott4724314"     
## [46] "Bosea_yervamora_ott674499"           
## [47] "Limonium_dendroides_ott416250"       
## [48] "Ranunculus_cortusifolius_ott856086"  
## [49] "Asparagus_pastorianus_ott3996371"    
## [50] "Dracunculus_canariensis_ott1019046"  
## [51] "Persea_indica_ott9684"               
## [52] "Apollonias_barbujana_ott297736"      
## [53] "Ocotea_foetens_ott495895"            
## [54] "Juniperus_cedrus_ott824129"
library(stringr)
new_tip_labels <- word(tree$tip.label, 1,2, sep="_")

setdiff(new_tip_labels, rownames(data3_av))
## [1] "Genista_canariensis" "Olea_europaea"       "Bosea_yervamora"
data4_av <- data3_av
rownames(data3_av)[53]
## [1] "Teline_canariensis"
rownames(data4_av)[53] <- c("Genista_canariensis")
rownames(data4_av)[33] <- c("Olea_europaea")
rownames(data4_av)[9] <- c("Bosea_yervamora")

setdiff(new_tip_labels, rownames(data4_av))
## character(0)
tree$tip.label <- new_tip_labels

library(picante)
phydatrtol<-match.phylo.data(tree, as.data.frame(data4_av))
phydatrtol$data
##                            Crown_age Stem_age       S        H Temperature
## Euphorbia_balsamifera           3.40     4.30  983.00 6.536296   20.239377
## Hypericum_canariense            1.90    10.80 1438.20 6.803109   17.822882
## Gesnouinia_arborea              0.25     1.51  704.00 6.226687   16.420971
## Rhamnus_crenulata               1.71     3.88 1372.40 6.845837   18.443485
## Lotus_campylocladus             1.28     7.86 1463.20 6.993179   14.190988
## Bituminaria_bituminosa          1.02     1.59 2018.40 7.223677   17.073511
## Anagyris_latifolia              1.90     1.90 1559.00 7.007849   17.803131
## Genista_canariensis             2.70     4.00 1327.20 6.755713   17.617273
## Pistacia_lentiscus              1.15     1.76  978.00 6.447311   18.336957
## Ruta_oreojasme                  8.10    18.00 1322.80 6.827975   19.295055
## Lobularia_canariensis           6.31       NA 1092.60 6.552086   16.570343
## Crambe_arborea                  8.20     0.98 1164.00 6.702697   17.588824
## Crambe_strigosa                 8.20     0.98 1217.40 6.740096   17.598704
## Daphne_gnidium                  1.96     4.36 1178.20 6.608149   15.949155
## Cistus_monspeliensis            0.23     0.50 1765.00 7.037673   17.470807
## Helianthemum_broussonetii       1.09     1.30 1009.20 6.496212   16.624000
## Aeonium_cuneatum                6.80     0.68  900.80 6.479980   17.227857
## Campylanthus_salsoloides        0.70     0.90  925.80 6.318969   19.413551
## Globularia_salicina             0.20     0.30 1409.80 6.776145   18.263882
## Scrophularia_smithii            3.40     5.90 1247.60 6.719808   16.345714
## Salvia_canariensis              5.54    17.14 1168.20 6.390525   19.314101
## Sideritis_oroteneriffae         3.30     1.21 1767.40 7.131125   13.252222
## Olea_europaea                   0.28     2.60 2066.80 7.271051   18.178524
## Echium_decaisnei                3.70     7.70  420.75 5.711227   19.837377
## Convolvulus_floridus            0.59     1.60 1335.20 6.773061   18.719355
## Plocama_pendula                 7.31    30.26 1203.20 6.556236   20.135959
## Rubia_fruticosa                 2.10     1.79  891.40 6.357727   18.705345
## Periploca_laevigata             0.10     0.40 1115.50 6.614944   19.167134
## Sonchus_congestus               8.50    13.20  900.60 6.368211   17.905827
## Sonchus_canariensis             8.50    13.20 1204.75 6.711835   16.443443
## Kleinia_neriifolia              1.40     3.30 1010.60 6.510741   19.137996
## Pericallis_cruenta              4.80     1.37 1716.00 7.096605   15.941905
## Pericallis_lanata               4.80     1.37 1833.00 7.161657   16.788495
## Pericallis_appendiculata        4.80     1.37 1393.80 6.926299   16.314713
## Argyranthemum_adauctum          2.48     0.20 1625.60 7.053378   14.865643
## Argyranthemum_broussonetii      2.48     0.20 1395.00 6.815979   17.266750
## Carlina_salicifolia             3.78     4.27 1216.40 6.714993   17.291975
## Volutaria_canariensis           1.10     3.00 1421.40 6.916759   20.129143
## Canarina_canariensis            0.90     6.40 1210.40 6.684933   17.459583
## Bupleurum_salicifolium          9.49    15.74 1785.60 7.038676   16.233333
## Astydamia_latifolia             1.13    33.73  543.20 5.761647   20.217895
## Hedera_canariensis              0.90     6.60 1292.40 6.773738   16.024239
## Sideroxylon_canariense          7.16    38.86 1367.20 6.828866   17.951990
## Silene_nocteolens               0.91     1.35 1300.75 6.702233    8.205102
## Silene_berthelotiana            0.91     1.35 1238.00 6.675502   11.223906
## Bosea_yervamora                 1.64     5.71 1118.80 6.608457   18.381758
## Limonium_dendroides             6.00    18.84 1116.40 6.604272   18.109048
## Ranunculus_cortusifolius        4.60     5.30  997.25 6.535248   15.684852
## Asparagus_pastorianus           1.75     2.80 1078.60 6.466651   19.157059
## Dracunculus_canariensis         2.76    12.10 1173.80 6.680532   17.598889
## Persea_indica                   0.90     2.60 1002.80 6.456685   16.174829
## Apollonias_barbujana            1.10     3.80  915.20 6.425263   17.367348
## Ocotea_foetens                  1.60     1.80  900.80 6.433766   16.655231
## Juniperus_cedrus                2.50     7.50 1480.40 6.932629   12.451898
##                            Max_temp  Min_temp Precipitation
## Euphorbia_balsamifera      27.42097 13.660511      193.2029
## Hypericum_canariense       25.81850 10.808676      452.6176
## Gesnouinia_arborea         25.00330  9.421068      634.7184
## Rhamnus_crenulata          25.88216 12.287593      409.8963
## Lotus_campylocladus        25.07163  6.032209      531.3372
## Bituminaria_bituminosa     25.96018  9.822876      468.6723
## Anagyris_latifolia         26.32864 10.954242      370.1970
## Genista_canariensis        25.13107 11.045620      486.6116
## Pistacia_lentiscus         26.09522 11.438696      291.3913
## Ruta_oreojasme             30.80451 11.410769      195.7802
## Lobularia_canariensis      25.71296  9.050976      520.4617
## Crambe_arborea             25.24647 10.994118      427.0588
## Crambe_strigosa            25.35500 11.041667      477.9259
## Daphne_gnidium             24.40465  8.862394      592.2958
## Cistus_monspeliensis       26.72961  9.977009      389.6917
## Helianthemum_broussonetii  24.27200 10.106000      557.4667
## Aeonium_cuneatum           24.70071 10.657500      500.9286
## Campylanthus_salsoloides   27.78087 12.765000      246.6594
## Globularia_salicina        25.98737 11.896743      453.6480
## Scrophularia_smithii       24.98939  9.264082      544.5714
## Salvia_canariensis         28.58815 11.531164      220.8439
## Sideritis_oroteneriffae    23.77778  4.789556      469.6444
## Olea_europaea              26.64586 11.373714      374.3905
## Echium_decaisnei           28.92930 12.151974      175.4364
## Convolvulus_floridus       26.24286 12.502028      395.7880
## Plocama_pendula            28.40855 13.249737      187.6724
## Rubia_fruticosa            26.35274 11.900195      366.3628
## Periploca_laevigata        26.71496 12.374887      335.8326
## Sonchus_congestus          25.28633 11.563237      440.5971
## Sonchus_canariensis        25.88869  8.949016      414.6721
## Kleinia_neriifolia         27.14963 12.135127      304.2373
## Pericallis_cruenta         24.76048  8.798095      522.0952
## Pericallis_lanata          25.83194  9.490215      368.0430
## Pericallis_appendiculata   24.67506  9.549885      627.3908
## Argyranthemum_adauctum     26.13414  6.090571      508.6071
## Argyranthemum_broussonetii 24.62250 10.708500      507.2250
## Carlina_salicifolia        25.94391  9.987069      410.7422
## Volutaria_canariensis      27.74600 13.061286      216.6286
## Canarina_canariensis       25.58783 10.611917      508.8083
## Bupleurum_salicifolium     25.88362  8.764396      529.3430
## Astydamia_latifolia        26.89569 13.494258      230.9378
## Hedera_canariensis         24.89315  8.998804      695.6902
## Sideroxylon_canariense     25.83188 11.475340      445.3403
## Silene_nocteolens          21.56531 -2.122041      360.2857
## Silene_berthelotiana       23.94617  1.019141      366.9688
## Bosea_yervamora            26.14956 12.004945      402.5495
## Limonium_dendroides        26.95857 11.593333      375.6190
## Ranunculus_cortusifolius   25.11834  7.868876      533.8698
## Asparagus_pastorianus      26.62176 12.696471      280.3529
## Dracunculus_canariensis    26.04759 10.630926      476.3148
## Persea_indica              25.00047  9.288077      666.2436
## Apollonias_barbujana       25.43033 10.542652      549.5746
## Ocotea_foetens             25.25108  9.779538      597.2615
## Juniperus_cedrus           24.08479  3.728054      552.0511
phydatrtol$phy
## 
## Phylogenetic tree with 54 tips and 51 internal nodes.
## 
## Tip labels:
##   Euphorbia_balsamifera, Hypericum_canariense, Gesnouinia_arborea, Rhamnus_crenulata, Lotus_campylocladus, Bituminaria_bituminosa, ...
## Node labels:
##   Spermatophyta ott10218, Mesangiospermae ott5298374, mrcaott2ott121, mrcaott2ott2441, mrcaott2ott248, mrcaott2ott2464, ...
## 
## Rooted; no branch lengths.
###phylo4d
library(phylobase)
## 
## Attaching package: 'phylobase'
## The following object is masked from 'package:taxize':
## 
##     children
## The following object is masked from 'package:ggtree':
## 
##     MRCA
## The following object is masked from 'package:phytools':
## 
##     readNexus
## The following object is masked from 'package:ape':
## 
##     edges
## The following object is masked from 'package:rpart':
## 
##     prune
p4_tree <- as(phydatrtol$phy, "phylo4")

traits <- phydatrtol$data[,c(3,4)]

traits
##                                  S        H
## Euphorbia_balsamifera       983.00 6.536296
## Hypericum_canariense       1438.20 6.803109
## Gesnouinia_arborea          704.00 6.226687
## Rhamnus_crenulata          1372.40 6.845837
## Lotus_campylocladus        1463.20 6.993179
## Bituminaria_bituminosa     2018.40 7.223677
## Anagyris_latifolia         1559.00 7.007849
## Genista_canariensis        1327.20 6.755713
## Pistacia_lentiscus          978.00 6.447311
## Ruta_oreojasme             1322.80 6.827975
## Lobularia_canariensis      1092.60 6.552086
## Crambe_arborea             1164.00 6.702697
## Crambe_strigosa            1217.40 6.740096
## Daphne_gnidium             1178.20 6.608149
## Cistus_monspeliensis       1765.00 7.037673
## Helianthemum_broussonetii  1009.20 6.496212
## Aeonium_cuneatum            900.80 6.479980
## Campylanthus_salsoloides    925.80 6.318969
## Globularia_salicina        1409.80 6.776145
## Scrophularia_smithii       1247.60 6.719808
## Salvia_canariensis         1168.20 6.390525
## Sideritis_oroteneriffae    1767.40 7.131125
## Olea_europaea              2066.80 7.271051
## Echium_decaisnei            420.75 5.711227
## Convolvulus_floridus       1335.20 6.773061
## Plocama_pendula            1203.20 6.556236
## Rubia_fruticosa             891.40 6.357727
## Periploca_laevigata        1115.50 6.614944
## Sonchus_congestus           900.60 6.368211
## Sonchus_canariensis        1204.75 6.711835
## Kleinia_neriifolia         1010.60 6.510741
## Pericallis_cruenta         1716.00 7.096605
## Pericallis_lanata          1833.00 7.161657
## Pericallis_appendiculata   1393.80 6.926299
## Argyranthemum_adauctum     1625.60 7.053378
## Argyranthemum_broussonetii 1395.00 6.815979
## Carlina_salicifolia        1216.40 6.714993
## Volutaria_canariensis      1421.40 6.916759
## Canarina_canariensis       1210.40 6.684933
## Bupleurum_salicifolium     1785.60 7.038676
## Astydamia_latifolia         543.20 5.761647
## Hedera_canariensis         1292.40 6.773738
## Sideroxylon_canariense     1367.20 6.828866
## Silene_nocteolens          1300.75 6.702233
## Silene_berthelotiana       1238.00 6.675502
## Bosea_yervamora            1118.80 6.608457
## Limonium_dendroides        1116.40 6.604272
## Ranunculus_cortusifolius    997.25 6.535248
## Asparagus_pastorianus      1078.60 6.466651
## Dracunculus_canariensis    1173.80 6.680532
## Persea_indica              1002.80 6.456685
## Apollonias_barbujana        915.20 6.425263
## Ocotea_foetens              900.80 6.433766
## Juniperus_cedrus           1480.40 6.932629
class(traits)
## [1] "data.frame"
setdiff(phydatrtol$phy$tip.label, rownames(traits))
## character(0)
tree4d <- phylo4d( phydatrtol$phy, tip.data= phydatrtol$data[,c(3,4)])

class(tree4d)
## [1] "phylo4d"
## attr(,"package")
## [1] "phylobase"
library(phylosignal)
## 
## Attaching package: 'phylosignal'
## The following object is masked from 'package:lattice':
## 
##     dotplot
barplot(tree4d, center = T, scale = T)
## Warning in asMethod(object): trees with unknown order may be unsafe in ape

###
library(adephylo)
nodesrtol <- as.matrix(distTips(phydatrtol$phy,1:3,"nNodes"))

nodesrtol2<- as.data.frame(nodesrtol[,54])

setdiff(rownames(phydatrtol$data), rownames(nodesrtol2))
## character(0)
data_nodes <- merge(phydatrtol$data, nodesrtol2, by=0, all=TRUE)
names(data_nodes)[10] <- c("nNodes")

#write.csv(data_nodes, file = paste0(wd$output, "data_nodes.csv"))


#Crown_age", "Herbivory", "S", "H", "Temperature", "Max_temp", "Min_temp", "Precipitation"

summary(fit<- lm(H~ Crown_age + Stem_age +nNodes + Temperature + Max_temp + Min_temp +Precipitation, data = data_nodes))
## 
## Call:
## lm(formula = H ~ Crown_age + Stem_age + nNodes + Temperature + 
##     Max_temp + Min_temp + Precipitation, data = data_nodes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72575 -0.10924  0.00675  0.14407  0.55500 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    8.018e+00  1.729e+00   4.638 3.04e-05 ***
## Crown_age      4.058e-03  1.651e-02   0.246  0.80691    
## Stem_age      -6.051e-03  5.393e-03  -1.122  0.26781    
## nNodes         2.675e-02  1.176e-02   2.276  0.02767 *  
## Temperature   -6.723e-01  2.586e-01  -2.599  0.01259 *  
## Max_temp       2.347e-01  8.365e-02   2.806  0.00738 ** 
## Min_temp       3.890e-01  1.658e-01   2.346  0.02344 *  
## Precipitation  8.123e-06  5.163e-04   0.016  0.98752    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2706 on 45 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3463, Adjusted R-squared:  0.2446 
## F-statistic: 3.405 on 7 and 45 DF,  p-value: 0.005283
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
Havplot <-avPlots(fit) #produce added variable plots

Havplot_tab <- do.call(data.frame, Havplot)
names(Havplot_tab)
##  [1] "Crown_age.Crown_age"         "Crown_age.H"                
##  [3] "Stem_age.Stem_age"           "Stem_age.H"                 
##  [5] "nNodes.nNodes"               "nNodes.H"                   
##  [7] "Temperature.Temperature"     "Temperature.H"              
##  [9] "Max_temp.Max_temp"           "Max_temp.H"                 
## [11] "Min_temp.Min_temp"           "Min_temp.H"                 
## [13] "Precipitation.Precipitation" "Precipitation.H"
summary(fit<- lm(S~ Crown_age + nNodes + Temperature + Max_temp + Min_temp +Precipitation, data = data_nodes))
## 
## Call:
## lm(formula = S ~ Crown_age + nNodes + Temperature + Max_temp + 
##     Min_temp + Precipitation, data = data_nodes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -626.14 -153.75  -14.15  146.12  714.00 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   2408.59543 1849.30032   1.302   0.1991   
## Crown_age      -11.41907   15.90105  -0.718   0.4762   
## nNodes          37.14520   12.54568   2.961   0.0048 **
## Temperature   -671.10070  278.41911  -2.410   0.0199 * 
## Max_temp       241.75687   91.28867   2.648   0.0110 * 
## Min_temp       380.87139  178.30612   2.136   0.0379 * 
## Precipitation   -0.09432    0.55349  -0.170   0.8654   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 296.8 on 47 degrees of freedom
## Multiple R-squared:  0.2951, Adjusted R-squared:  0.2052 
## F-statistic:  3.28 on 6 and 47 DF,  p-value: 0.008893
library(car)
Savplot <- avPlots(fit) #produce added variable plot

Savplot_tab <- do.call(data.frame, Savplot)

############### for figure 

rownames(data_nodes) <- data_nodes$Row.names
data_nodes <- data_nodes[,-1]
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
mdata_nodes<- melt(data_nodes, id.vars = c("nNodes"))

p <- ggplot(mdata_nodes, aes(y=value, x=nNodes)) + 
  geom_point()+
  labs( x = "Nb of nodes", y = "Value")

p + facet_wrap(~ variable , ncol = 2, scales = "free") + 
  theme_bw()+ 
  theme(legend.position = "none")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

###
names(data_nodes)
## [1] "Crown_age"     "Stem_age"      "S"             "H"            
## [5] "Temperature"   "Max_temp"      "Min_temp"      "Precipitation"
## [9] "nNodes"
hplot <- ggplot(data_nodes, aes(y=H, x=nNodes)) + 
  geom_point()+
  labs( x = "Nb of nodes", y = "Chemical diversity (Shannon)")+
  theme_bw()

  
splot <- ggplot(data_nodes, aes(y=S, x=nNodes)) + 
  geom_point()+
  geom_smooth(method=lm)+
  labs( x = "Nb of nodes", y = "Chemical diversity (# compounds)")+
  theme_bw()

  
  ##
library(ggpubr)

ggarrange(
           splot, 
            hplot, 
            ncol = 2, labels = c("B", "C"))
## `geom_smooth()` using formula = 'y ~ x'

summary(fit <- lm(nNodes ~ S, data = data_nodes))
## 
## Call:
## lm(formula = nNodes ~ S, data = data_nodes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.7089  -1.2544   0.1027   2.7259   4.8370 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.582374   1.728066   3.809  0.00037 ***
## S           0.002787   0.001341   2.079  0.04256 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.25 on 52 degrees of freedom
## Multiple R-squared:  0.07675,    Adjusted R-squared:  0.05899 
## F-statistic: 4.323 on 1 and 52 DF,  p-value: 0.04256
summary(fit <- lm(nNodes ~ H, data = data_nodes))
## 
## Call:
## lm(formula = nNodes ~ H, data = data_nodes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.626  -1.088   0.169   2.567   4.767 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -5.118      9.838  -0.520    0.605
## H              2.271      1.471   1.544    0.129
## 
## Residual standard error: 3.307 on 52 degrees of freedom
## Multiple R-squared:  0.04384,    Adjusted R-squared:  0.02545 
## F-statistic: 2.384 on 1 and 52 DF,  p-value: 0.1286

Superclass analysis

superclass <- compounds2[, c(31, 46:ncol(compounds2))]
superclass_av <- superclass %>% 
  dplyr::  group_by(`NPC#superclass`)%>% 
  dplyr::    summarise(across(everything(), list(mean)))


#names(superclass_av)
superclass_av3 <- superclass_av[,-1]
superclass_av3 <- as.data.frame(superclass_av3)
rownames(superclass_av3) <- superclass_av$`NPC#superclass`

superclass_av4 <- as.data.frame(t(superclass_av3))

library(stringr)
rownames(superclass_av4)<- rownames(superclass_av4) %>% str_replace("_1*", "")

superclass_tab<- merge(metadata4, superclass_av4, by = 'row.names')

names(superclass_tab)
##  [1] "Row.names"                           "nb"                                 
##  [3] "species"                             "species.y"                          
##  [5] "row_chem_samples"                    "chem_samples"                       
##  [7] "ID"                                  "native.endemic"                     
##  [9] "plant"                               "latitude"                           
## [11] "longitude"                           "Height"                             
## [13] "Diameter"                            "stem_age"                           
## [15] "crown_age"                           "herbivory"                          
## [17] "X_1"                                 "Y_1"                                
## [19] "temperature_1"                       "max_temp_1"                         
## [21] "min_temp_1"                          "precipitation_1"                    
## [23] "Alkylresorcinols"                    "Amino acid glycosides"              
## [25] "Aminosugars and aminoglycosides"     "Anthranilic acid alkaloids"         
## [27] "Apocarotenoids"                      "Aromatic polyketides"               
## [29] "Carotenoids (C40)"                   "Carotenoids (C50)"                  
## [31] "Chromanes"                           "Coumarins"                          
## [33] "Cyclic polyketides"                  "Diarylheptanoids"                   
## [35] "Diazotetronic acids and derivatives" "Diphenyl ethers (DPEs)"             
## [37] "Diterpenoids"                        "Docosanoids"                        
## [39] "Eicosanoids"                         "Fatty Acids and Conjugates"         
## [41] "Fatty acyl glycosides"               "Fatty acyls"                        
## [43] "Fatty amides"                        "Fatty esters"                       
## [45] "Flavonoids"                          "Fluorenes"                          
## [47] "Glycerolipids"                       "Glycerophospholipids"               
## [49] "Guanidine alkaloids"                 "Histidine alkaloids"                
## [51] "Isoflavonoids"                       "Lignans"                            
## [53] "Linear polyketides"                  "Lysine alkaloids"                   
## [55] "Macrolides"                          "Meroterpenoids"                     
## [57] "Mitomycin derivatives"               "Monoterpenoids"                     
## [59] "Mycosporine derivatives"             "Naphthalenes"                       
## [61] "Nicotinic acid alkaloids"            "Nucleosides"                        
## [63] "Octadecanoids"                       "Oligopeptides"                      
## [65] "Ornithine alkaloids"                 "Peptide alkaloids"                  
## [67] "Phenanthrenoids"                     "Phenolic acids (C6-C1)"             
## [69] "Phenylethanoids (C6-C2)"             "Phenylpropanoids (C6-C3)"           
## [71] "Phloroglucinols"                     "Polycyclic aromatic polyketides"    
## [73] "Polyethers"                          "Polyols"                            
## [75] "Polyprenols"                         "Proline alkaloids"                  
## [77] "Pseudoalkaloids"                     "Pseudoalkaloids (transamidation)"   
## [79] "Saccharides"                         "Serine alkaloids"                   
## [81] "Sesquiterpenoids"                    "Small peptides"                     
## [83] "Sphingolipids"                       "Steroids"                           
## [85] "Stilbenoids"                         "Styrylpyrones"                      
## [87] "Terphenyls"                          "Triterpenoids"                      
## [89] "Tropolones"                          "Tryptophan alkaloids"               
## [91] "Tyrosine alkaloids"                  "Xanthones"                          
## [93] "β-lactams"
###
superclass_means <-  aggregate(superclass_tab[,c(23:ncol(superclass_tab))], by=list( superclass_tab$species), FUN=mean, na.rm=TRUE)
rownames(superclass_means) <- superclass_means$Group.1
superclass_means <- superclass_means[,-1]


###
chunk <- function(x,n) split(x, factor(sort(rank(x)%%n)))
x <- superclass_tab$crown_age
n <- 3

superclass_tab$chunks <- cut_number(x, n)


superclass_tab2 <- clean_names(superclass_tab)
superclass_tab2$chunks <- cut_number(x, n)

superclass_means <-  aggregate(superclass_tab2[,c(23:93)], by=list(superclass_tab2$chunks), FUN=mean, na.rm=TRUE)
rownames(superclass_means) <- superclass_means$Group.1
superclass_means <- superclass_means[,-1]

library(pheatmap)


heatmapplot <-pheatmap(t(superclass_means),
         scale = "row", 
         fontsize= 8,
         cutree_rows = 7,
         cutree_cols = 2, 
         main = "Crown age")

S_crownage_p <- ggplot(data_nodes, aes(y=S, x=Crown_age)) + 
  geom_point()+
  #geom_smooth(method=lm)+
  labs( x = "Crown age", y = "Chemical diversity (# compounds)")+
  theme_bw(base_size = 16); S_crownage_p

H_crownage_p <- ggplot(data_nodes, aes(y=H, x=Crown_age)) + 
  geom_point()+
  #geom_smooth(method=lm)+
  labs( x = "Crown age", y = "Chemical diversity (Shannon))")+
  theme_bw(base_size = 16); H_crownage_p

#### for Figure  ####
ggarrange(heatmapplot[[4]],
          ggarrange(S_crownage_p, H_crownage_p,   nrow  = 2, labels = c("B", "C")),
          ncol = 2, 
          labels = "A") 

Random forests for crown_age

tabrf2 <-  superclass_tab2 %>% dplyr::select(crown_age, alkylresorcinols:b_lactams)
rf2 <- rfPermute(crown_age ~ ., data = tabrf2, ntree = 500, num.rep = 100)
pval2<-as.data.frame(rf2$pval)

import_tab <- as.data.frame(rf2$rf$importance)
rf_tab <- merge(import_tab, pval2, by = 0)

rf_tab <- clean_names(rf_tab)
names(rf_tab)
## [1] "row_names"                "percent_inc_mse"         
## [3] "inc_node_purity"          "percent_inc_mse_unscaled"
## [5] "inc_node_purity_unscaled" "percent_inc_mse_scaled"  
## [7] "inc_node_purity_scaled"
retained <- rf_tab %>% filter(percent_inc_mse_unscaled < 0.05) # filter the signifant ones
rownames(retained)
##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14" "15"
## [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30"
## [31] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42"
retained$row_names
##  [1] "alkylresorcinols"                "amino_acid_glycosides"          
##  [3] "aminosugars_and_aminoglycosides" "anthranilic_acid_alkaloids"     
##  [5] "apocarotenoids"                  "aromatic_polyketides"           
##  [7] "b_lactams"                       "chromanes"                      
##  [9] "eicosanoids"                     "fatty_acyls"                    
## [11] "fatty_esters"                    "flavonoids"                     
## [13] "fluorenes"                       "glycerolipids"                  
## [15] "guanidine_alkaloids"             "histidine_alkaloids"            
## [17] "isoflavonoids"                   "lignans"                        
## [19] "linear_polyketides"              "mitomycin_derivatives"          
## [21] "monoterpenoids"                  "naphthalenes"                   
## [23] "nucleosides"                     "octadecanoids"                  
## [25] "oligopeptides"                   "ornithine_alkaloids"            
## [27] "peptide_alkaloids"               "phenanthrenoids"                
## [29] "phenylethanoids_c6_c2"           "phenylpropanoids_c6_c3"         
## [31] "phloroglucinols"                 "polycyclic_aromatic_polyketides"
## [33] "polyols"                         "proline_alkaloids"              
## [35] "pseudoalkaloids"                 "pseudoalkaloids_transamidation" 
## [37] "serine_alkaloids"                "sesquiterpenoids"               
## [39] "styrylpyrones"                   "triterpenoids"                  
## [41] "tropolones"                      "tryptophan_alkaloids"
scatterplot_tab <- superclass_tab2 %>% dplyr:: select(crown_age, retained$row_names)
mscatterplot_tab<- melt(scatterplot_tab, id.vars = c("crown_age"))

### scatterplots all significant supercalsses with crownage ####
p2 <- ggplot(mscatterplot_tab, aes(x=crown_age, y=value)) + 
  geom_point(shape=20, color="black")+
  geom_smooth(method = "lm")+ 
  labs(x = "Crown age", y = "Value") + theme_bw()
p2 + facet_wrap(~ variable , ncol = 4, scales = "free") + 
  theme_bw()+ 
  theme(legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'

str(mscatterplot_tab)
## 'data.frame':    11130 obs. of  3 variables:
##  $ crown_age: num  4.8 0.7 0.7 0.7 0.7 0.7 0.59 0.59 0.59 0.59 ...
##  $ variable : Factor w/ 42 levels "alkylresorcinols",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ value    : num  0 0 0 0 0 0 0 0 0 0 ...
#mscatterplot_tab$variable
out <- split( mscatterplot_tab , f = mscatterplot_tab$variable)

tab_dotplot <- matrix(nrow = length(out), ncol = 3); colnames(tab_dotplot) <- c("mean", "inf", "sup") ; rownames(tab_dotplot) <- levels( mscatterplot_tab$variable)

for (i in 1:length(out)) {
 fit <- lm(crown_age ~ value , data = out[[i]])
 tab_dotplot[i,1] <- coef(fit)[[2]]
 tab_dotplot[i,2] <-confint(fit, 'value', level=0.95)[[1]]
 tab_dotplot[i,3] <-confint(fit, 'value', level=0.95)[[2]]
}

tab_dotplot <- as.data.frame(tab_dotplot)
tab_dotplot$Superclassess <- factor(rownames(tab_dotplot))

es_plot_crown <- ggplot(tab_dotplot, aes(x =Superclassess , y = mean)) +
  geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.7)+ 
  geom_errorbar(aes(ymin = mean - abs(inf), 
                    ymax = mean + abs(sup)), 
                width = 0.2, linewidth = 0.7)+
  labs(x = "Superclasses of compounds", y = "Slope +/- 95% c.i.", title = "Crown age") +
   geom_hline(yintercept=0)+
   coord_flip()+ 
  theme(axis.text=element_text(size=8),
        axis.title=element_text(size=12,face="bold"))+
  theme_classic2()




# rf for temperature  #### 

retained$row_names
##  [1] "alkylresorcinols"                "amino_acid_glycosides"          
##  [3] "aminosugars_and_aminoglycosides" "anthranilic_acid_alkaloids"     
##  [5] "apocarotenoids"                  "aromatic_polyketides"           
##  [7] "b_lactams"                       "chromanes"                      
##  [9] "eicosanoids"                     "fatty_acyls"                    
## [11] "fatty_esters"                    "flavonoids"                     
## [13] "fluorenes"                       "glycerolipids"                  
## [15] "guanidine_alkaloids"             "histidine_alkaloids"            
## [17] "isoflavonoids"                   "lignans"                        
## [19] "linear_polyketides"              "mitomycin_derivatives"          
## [21] "monoterpenoids"                  "naphthalenes"                   
## [23] "nucleosides"                     "octadecanoids"                  
## [25] "oligopeptides"                   "ornithine_alkaloids"            
## [27] "peptide_alkaloids"               "phenanthrenoids"                
## [29] "phenylethanoids_c6_c2"           "phenylpropanoids_c6_c3"         
## [31] "phloroglucinols"                 "polycyclic_aromatic_polyketides"
## [33] "polyols"                         "proline_alkaloids"              
## [35] "pseudoalkaloids"                 "pseudoalkaloids_transamidation" 
## [37] "serine_alkaloids"                "sesquiterpenoids"               
## [39] "styrylpyrones"                   "triterpenoids"                  
## [41] "tropolones"                      "tryptophan_alkaloids"
names(superclass_tab2)
##  [1] "row_names"                           "nb"                                 
##  [3] "species"                             "species_y"                          
##  [5] "row_chem_samples"                    "chem_samples"                       
##  [7] "id"                                  "native_endemic"                     
##  [9] "plant"                               "latitude"                           
## [11] "longitude"                           "height"                             
## [13] "diameter"                            "stem_age"                           
## [15] "crown_age"                           "herbivory"                          
## [17] "x_1"                                 "y_1"                                
## [19] "temperature_1"                       "max_temp_1"                         
## [21] "min_temp_1"                          "precipitation_1"                    
## [23] "alkylresorcinols"                    "amino_acid_glycosides"              
## [25] "aminosugars_and_aminoglycosides"     "anthranilic_acid_alkaloids"         
## [27] "apocarotenoids"                      "aromatic_polyketides"               
## [29] "carotenoids_c40"                     "carotenoids_c50"                    
## [31] "chromanes"                           "coumarins"                          
## [33] "cyclic_polyketides"                  "diarylheptanoids"                   
## [35] "diazotetronic_acids_and_derivatives" "diphenyl_ethers_dp_es"              
## [37] "diterpenoids"                        "docosanoids"                        
## [39] "eicosanoids"                         "fatty_acids_and_conjugates"         
## [41] "fatty_acyl_glycosides"               "fatty_acyls"                        
## [43] "fatty_amides"                        "fatty_esters"                       
## [45] "flavonoids"                          "fluorenes"                          
## [47] "glycerolipids"                       "glycerophospholipids"               
## [49] "guanidine_alkaloids"                 "histidine_alkaloids"                
## [51] "isoflavonoids"                       "lignans"                            
## [53] "linear_polyketides"                  "lysine_alkaloids"                   
## [55] "macrolides"                          "meroterpenoids"                     
## [57] "mitomycin_derivatives"               "monoterpenoids"                     
## [59] "mycosporine_derivatives"             "naphthalenes"                       
## [61] "nicotinic_acid_alkaloids"            "nucleosides"                        
## [63] "octadecanoids"                       "oligopeptides"                      
## [65] "ornithine_alkaloids"                 "peptide_alkaloids"                  
## [67] "phenanthrenoids"                     "phenolic_acids_c6_c1"               
## [69] "phenylethanoids_c6_c2"               "phenylpropanoids_c6_c3"             
## [71] "phloroglucinols"                     "polycyclic_aromatic_polyketides"    
## [73] "polyethers"                          "polyols"                            
## [75] "polyprenols"                         "proline_alkaloids"                  
## [77] "pseudoalkaloids"                     "pseudoalkaloids_transamidation"     
## [79] "saccharides"                         "serine_alkaloids"                   
## [81] "sesquiterpenoids"                    "small_peptides"                     
## [83] "sphingolipids"                       "steroids"                           
## [85] "stilbenoids"                         "styrylpyrones"                      
## [87] "terphenyls"                          "triterpenoids"                      
## [89] "tropolones"                          "tryptophan_alkaloids"               
## [91] "tyrosine_alkaloids"                  "xanthones"                          
## [93] "b_lactams"                           "chunks"
scatterplot_tab_temp <- superclass_tab2 %>% dplyr:: select(temperature_1, retained$row_names)

mscatterplot_tab_temp<- melt(scatterplot_tab_temp, id.vars = c("temperature_1"))

out_temp <- split( mscatterplot_tab_temp , f = mscatterplot_tab_temp$variable)

tab_dotplot_temp <- matrix(nrow = length(out), ncol = 3); colnames(tab_dotplot_temp) <- c("mean", "inf", "sup") ; rownames(tab_dotplot_temp) <- levels( mscatterplot_tab_temp$variable)

for (i in 1:length(out)) {
  fit <- lm(temperature_1 ~ value , data = out_temp[[i]])
  tab_dotplot_temp[i,1] <- coef(fit)[[2]]
  tab_dotplot_temp[i,2] <-confint(fit, 'value', level=0.95)[[1]]
  tab_dotplot_temp[i,3] <-confint(fit, 'value', level=0.95)[[2]]
}

tab_dotplot_temp <- as.data.frame(tab_dotplot_temp)
tab_dotplot_temp$Superclassess <- factor(rownames(tab_dotplot_temp))

es_plot_temp <- ggplot(tab_dotplot_temp, aes(x =Superclassess , y = mean)) +
  geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.7)+ 
  geom_errorbar(aes(ymin = mean - abs(inf), 
                    ymax = mean + abs(sup)), 
                width = 0.2, linewidth = 0.7)+
  labs(x = "Superclasses of compounds", y = "Slope +/- 95% c.i.", title = "Temperature") +
  geom_hline(yintercept=0)+
  coord_flip()+ 
  theme(axis.text=element_text(size=8),
        axis.title=element_text(size=12,face="bold"))+
  theme_classic2()


# rf for pprecipitation #### 

retained$row_names
##  [1] "alkylresorcinols"                "amino_acid_glycosides"          
##  [3] "aminosugars_and_aminoglycosides" "anthranilic_acid_alkaloids"     
##  [5] "apocarotenoids"                  "aromatic_polyketides"           
##  [7] "b_lactams"                       "chromanes"                      
##  [9] "eicosanoids"                     "fatty_acyls"                    
## [11] "fatty_esters"                    "flavonoids"                     
## [13] "fluorenes"                       "glycerolipids"                  
## [15] "guanidine_alkaloids"             "histidine_alkaloids"            
## [17] "isoflavonoids"                   "lignans"                        
## [19] "linear_polyketides"              "mitomycin_derivatives"          
## [21] "monoterpenoids"                  "naphthalenes"                   
## [23] "nucleosides"                     "octadecanoids"                  
## [25] "oligopeptides"                   "ornithine_alkaloids"            
## [27] "peptide_alkaloids"               "phenanthrenoids"                
## [29] "phenylethanoids_c6_c2"           "phenylpropanoids_c6_c3"         
## [31] "phloroglucinols"                 "polycyclic_aromatic_polyketides"
## [33] "polyols"                         "proline_alkaloids"              
## [35] "pseudoalkaloids"                 "pseudoalkaloids_transamidation" 
## [37] "serine_alkaloids"                "sesquiterpenoids"               
## [39] "styrylpyrones"                   "triterpenoids"                  
## [41] "tropolones"                      "tryptophan_alkaloids"
names(superclass_tab2)
##  [1] "row_names"                           "nb"                                 
##  [3] "species"                             "species_y"                          
##  [5] "row_chem_samples"                    "chem_samples"                       
##  [7] "id"                                  "native_endemic"                     
##  [9] "plant"                               "latitude"                           
## [11] "longitude"                           "height"                             
## [13] "diameter"                            "stem_age"                           
## [15] "crown_age"                           "herbivory"                          
## [17] "x_1"                                 "y_1"                                
## [19] "temperature_1"                       "max_temp_1"                         
## [21] "min_temp_1"                          "precipitation_1"                    
## [23] "alkylresorcinols"                    "amino_acid_glycosides"              
## [25] "aminosugars_and_aminoglycosides"     "anthranilic_acid_alkaloids"         
## [27] "apocarotenoids"                      "aromatic_polyketides"               
## [29] "carotenoids_c40"                     "carotenoids_c50"                    
## [31] "chromanes"                           "coumarins"                          
## [33] "cyclic_polyketides"                  "diarylheptanoids"                   
## [35] "diazotetronic_acids_and_derivatives" "diphenyl_ethers_dp_es"              
## [37] "diterpenoids"                        "docosanoids"                        
## [39] "eicosanoids"                         "fatty_acids_and_conjugates"         
## [41] "fatty_acyl_glycosides"               "fatty_acyls"                        
## [43] "fatty_amides"                        "fatty_esters"                       
## [45] "flavonoids"                          "fluorenes"                          
## [47] "glycerolipids"                       "glycerophospholipids"               
## [49] "guanidine_alkaloids"                 "histidine_alkaloids"                
## [51] "isoflavonoids"                       "lignans"                            
## [53] "linear_polyketides"                  "lysine_alkaloids"                   
## [55] "macrolides"                          "meroterpenoids"                     
## [57] "mitomycin_derivatives"               "monoterpenoids"                     
## [59] "mycosporine_derivatives"             "naphthalenes"                       
## [61] "nicotinic_acid_alkaloids"            "nucleosides"                        
## [63] "octadecanoids"                       "oligopeptides"                      
## [65] "ornithine_alkaloids"                 "peptide_alkaloids"                  
## [67] "phenanthrenoids"                     "phenolic_acids_c6_c1"               
## [69] "phenylethanoids_c6_c2"               "phenylpropanoids_c6_c3"             
## [71] "phloroglucinols"                     "polycyclic_aromatic_polyketides"    
## [73] "polyethers"                          "polyols"                            
## [75] "polyprenols"                         "proline_alkaloids"                  
## [77] "pseudoalkaloids"                     "pseudoalkaloids_transamidation"     
## [79] "saccharides"                         "serine_alkaloids"                   
## [81] "sesquiterpenoids"                    "small_peptides"                     
## [83] "sphingolipids"                       "steroids"                           
## [85] "stilbenoids"                         "styrylpyrones"                      
## [87] "terphenyls"                          "triterpenoids"                      
## [89] "tropolones"                          "tryptophan_alkaloids"               
## [91] "tyrosine_alkaloids"                  "xanthones"                          
## [93] "b_lactams"                           "chunks"
scatterplot_tab_precip <- superclass_tab2 %>% dplyr:: select(precipitation_1, retained$row_names)

mscatterplot_tab_precip<- melt(scatterplot_tab_precip, id.vars = c("precipitation_1"))

out_precip <- split( mscatterplot_tab_precip , f = mscatterplot_tab_precip$variable)

tab_dotplot_precip <- matrix(nrow = length(out), ncol = 3); colnames(tab_dotplot_precip) <- c("mean", "inf", "sup") ; rownames(tab_dotplot_precip) <- levels( mscatterplot_tab_precip$variable)


for (i in 1:length(out)) {
  fit <- lm(precipitation_1 ~ value , data = out_precip[[i]])
  tab_dotplot_precip[i,1] <- coef(fit)[[2]]
  tab_dotplot_precip[i,2] <-confint(fit, 'value', level=0.95)[[1]]
  tab_dotplot_precip[i,3] <-confint(fit, 'value', level=0.95)[[2]]
}

tab_dotplot_precip <- as.data.frame(tab_dotplot_precip)
tab_dotplot_precip$Superclassess <- factor(rownames(tab_dotplot_precip))

es_plot_precip <- ggplot(tab_dotplot_precip, aes(x =Superclassess , y = mean)) +
  geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.7)+ 
  geom_errorbar(aes(ymin = mean - abs(inf), 
                    ymax = mean + abs(sup)), 
                width = 0.2, linewidth = 0.7)+
  labs(x = "Superclasses of compounds", y = "Slope +/- 95% c.i.", title = "Precipitation") +
  geom_hline(yintercept=0)+
  coord_flip()+ 
  theme(axis.text=element_text(size=8),
        axis.title=element_text(size=12,face="bold"))+
 theme_classic2()


### combine plots

library(ggpubr)

ggarrange(es_plot_crown, 
          es_plot_temp   + theme( axis.title.y=element_text(color="white")), 
          es_plot_precip + theme(axis.title.y=element_text(color="white")), 
          nrow = 1, ncol = 3, labels = c("A)", "B)", "C)"))
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

###
slopes_tab <- cbind.data.frame(rownames(tab_dotplot) ,tab_dotplot$mean, tab_dotplot_precip$mean, tab_dotplot_temp$mean)
names(slopes_tab) <- c("Superclasses", "Crown_age", "Precipitation", "Temperature")

Random forests for stem_age

tabrf3 <-  superclass_tab2 %>% dplyr::select(stem_age, alkylresorcinols:b_lactams)
tabrf3 <- na.omit(tabrf3)
rf3 <- rfPermute(stem_age ~ ., data = tabrf3, ntree = 500, num.rep = 100)
pval3<-as.data.frame(rf3$pval)

import_tab2 <- as.data.frame(rf3$rf$importance)
rf_tab_stem <- merge(import_tab2, pval3, by = 0)

rf_tab_stem <- clean_names(rf_tab_stem)
names(rf_tab_stem)
## [1] "row_names"                "percent_inc_mse"         
## [3] "inc_node_purity"          "percent_inc_mse_unscaled"
## [5] "inc_node_purity_unscaled" "percent_inc_mse_scaled"  
## [7] "inc_node_purity_scaled"
retained_stem <- rf_tab_stem %>% filter(percent_inc_mse_unscaled < 0.05) # filter the signifant ones
rownames(retained_stem)
##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14" "15"
## [16] "16" "17" "18" "19" "20" "21" "22"
retained_stem$row_names
##  [1] "alkylresorcinols"                "apocarotenoids"                 
##  [3] "b_lactams"                       "chromanes"                      
##  [5] "fatty_acids_and_conjugates"      "fatty_acyls"                    
##  [7] "flavonoids"                      "fluorenes"                      
##  [9] "glycerophospholipids"            "isoflavonoids"                  
## [11] "lignans"                         "peptide_alkaloids"              
## [13] "phenanthrenoids"                 "phenolic_acids_c6_c1"           
## [15] "polycyclic_aromatic_polyketides" "polyethers"                     
## [17] "proline_alkaloids"               "pseudoalkaloids_transamidation" 
## [19] "saccharides"                     "sphingolipids"                  
## [21] "triterpenoids"                   "tropolones"
scatterplot_tab_stem <- superclass_tab2 %>% dplyr:: select(stem_age, retained_stem$row_names)
mscatterplot_tab_stem<- melt(scatterplot_tab_stem, id.vars = c("stem_age"))

# scatterplots all significant supercalsses with stemage ####
p2 <- ggplot(mscatterplot_tab_stem, aes(x=stem_age, y=value)) + 
  geom_point(shape=20, color="black")+
  geom_smooth(method = "lm")+ 
  labs(x = "stem age", y = "Value") + theme_bw()
p2 + facet_wrap(~ variable , ncol = 4, scales = "free") + 
  theme_bw()+ 
  theme(legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 110 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 110 rows containing missing values or values outside the scale range
## (`geom_point()`).

out_stem <- split( mscatterplot_tab_stem , f = mscatterplot_tab_stem$variable)

tab_dotplot_stem <- matrix(nrow = length(out_stem), ncol = 3); colnames(tab_dotplot_stem) <- c("mean", "inf", "sup") ; rownames(tab_dotplot_stem) <- levels( mscatterplot_tab_stem$variable)

for (i in 1:length(out_stem)) {
 fit <- lm(stem_age ~ value , data = out_stem[[i]])
 tab_dotplot_stem[i,1] <- coef(fit)[[2]]
 tab_dotplot_stem[i,2] <-confint(fit, 'value', level=0.95)[[1]]
 tab_dotplot_stem[i,3] <-confint(fit, 'value', level=0.95)[[2]]
}

tab_dotplot_stem <- as.data.frame(tab_dotplot_stem)
tab_dotplot_stem$Superclassess <- factor(rownames(tab_dotplot_stem))

es_plot_stem <- ggplot(tab_dotplot_stem, aes(x =Superclassess , y = mean)) +
  geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.7)+ 
  geom_errorbar(aes(ymin = mean - abs(inf), 
                    ymax = mean + abs(sup)), 
                width = 0.2, linewidth = 0.7)+
  labs(x = "Superclasses of compounds", y = "Slope +/- 95% c.i.", title = "stem age") +
   geom_hline(yintercept=0)+
   coord_flip()+ 
  theme(axis.text=element_text(size=8),
        axis.title=element_text(size=12,face="bold"))+
  theme_classic2()


# rf for temperature ####
retained$row_names
##  [1] "alkylresorcinols"                "amino_acid_glycosides"          
##  [3] "aminosugars_and_aminoglycosides" "anthranilic_acid_alkaloids"     
##  [5] "apocarotenoids"                  "aromatic_polyketides"           
##  [7] "b_lactams"                       "chromanes"                      
##  [9] "eicosanoids"                     "fatty_acyls"                    
## [11] "fatty_esters"                    "flavonoids"                     
## [13] "fluorenes"                       "glycerolipids"                  
## [15] "guanidine_alkaloids"             "histidine_alkaloids"            
## [17] "isoflavonoids"                   "lignans"                        
## [19] "linear_polyketides"              "mitomycin_derivatives"          
## [21] "monoterpenoids"                  "naphthalenes"                   
## [23] "nucleosides"                     "octadecanoids"                  
## [25] "oligopeptides"                   "ornithine_alkaloids"            
## [27] "peptide_alkaloids"               "phenanthrenoids"                
## [29] "phenylethanoids_c6_c2"           "phenylpropanoids_c6_c3"         
## [31] "phloroglucinols"                 "polycyclic_aromatic_polyketides"
## [33] "polyols"                         "proline_alkaloids"              
## [35] "pseudoalkaloids"                 "pseudoalkaloids_transamidation" 
## [37] "serine_alkaloids"                "sesquiterpenoids"               
## [39] "styrylpyrones"                   "triterpenoids"                  
## [41] "tropolones"                      "tryptophan_alkaloids"
names(superclass_tab2)
##  [1] "row_names"                           "nb"                                 
##  [3] "species"                             "species_y"                          
##  [5] "row_chem_samples"                    "chem_samples"                       
##  [7] "id"                                  "native_endemic"                     
##  [9] "plant"                               "latitude"                           
## [11] "longitude"                           "height"                             
## [13] "diameter"                            "stem_age"                           
## [15] "crown_age"                           "herbivory"                          
## [17] "x_1"                                 "y_1"                                
## [19] "temperature_1"                       "max_temp_1"                         
## [21] "min_temp_1"                          "precipitation_1"                    
## [23] "alkylresorcinols"                    "amino_acid_glycosides"              
## [25] "aminosugars_and_aminoglycosides"     "anthranilic_acid_alkaloids"         
## [27] "apocarotenoids"                      "aromatic_polyketides"               
## [29] "carotenoids_c40"                     "carotenoids_c50"                    
## [31] "chromanes"                           "coumarins"                          
## [33] "cyclic_polyketides"                  "diarylheptanoids"                   
## [35] "diazotetronic_acids_and_derivatives" "diphenyl_ethers_dp_es"              
## [37] "diterpenoids"                        "docosanoids"                        
## [39] "eicosanoids"                         "fatty_acids_and_conjugates"         
## [41] "fatty_acyl_glycosides"               "fatty_acyls"                        
## [43] "fatty_amides"                        "fatty_esters"                       
## [45] "flavonoids"                          "fluorenes"                          
## [47] "glycerolipids"                       "glycerophospholipids"               
## [49] "guanidine_alkaloids"                 "histidine_alkaloids"                
## [51] "isoflavonoids"                       "lignans"                            
## [53] "linear_polyketides"                  "lysine_alkaloids"                   
## [55] "macrolides"                          "meroterpenoids"                     
## [57] "mitomycin_derivatives"               "monoterpenoids"                     
## [59] "mycosporine_derivatives"             "naphthalenes"                       
## [61] "nicotinic_acid_alkaloids"            "nucleosides"                        
## [63] "octadecanoids"                       "oligopeptides"                      
## [65] "ornithine_alkaloids"                 "peptide_alkaloids"                  
## [67] "phenanthrenoids"                     "phenolic_acids_c6_c1"               
## [69] "phenylethanoids_c6_c2"               "phenylpropanoids_c6_c3"             
## [71] "phloroglucinols"                     "polycyclic_aromatic_polyketides"    
## [73] "polyethers"                          "polyols"                            
## [75] "polyprenols"                         "proline_alkaloids"                  
## [77] "pseudoalkaloids"                     "pseudoalkaloids_transamidation"     
## [79] "saccharides"                         "serine_alkaloids"                   
## [81] "sesquiterpenoids"                    "small_peptides"                     
## [83] "sphingolipids"                       "steroids"                           
## [85] "stilbenoids"                         "styrylpyrones"                      
## [87] "terphenyls"                          "triterpenoids"                      
## [89] "tropolones"                          "tryptophan_alkaloids"               
## [91] "tyrosine_alkaloids"                  "xanthones"                          
## [93] "b_lactams"                           "chunks"
scatterplot_tab_stem_temp <- superclass_tab2 %>% dplyr:: select(temperature_1, retained_stem$row_names)

mscatterplot_tab_stem_temp<- melt(scatterplot_tab_stem_temp, id.vars = c("temperature_1"))

out_stem_temp <- split( mscatterplot_tab_stem_temp , f = mscatterplot_tab_stem_temp$variable)

tab_dotplot_stem_temp <- matrix(nrow = length(out_stem), ncol = 3)
colnames(tab_dotplot_stem_temp) <- c("mean", "inf", "sup")
rownames(tab_dotplot_stem_temp) <- levels( mscatterplot_tab_stem_temp$variable)

for (i in 1:length(out_stem_temp)) {
  fit <- lm(temperature_1 ~ value , data = out_stem_temp[[i]])
  tab_dotplot_stem_temp[i,1] <- coef(fit)[[2]]
  tab_dotplot_stem_temp[i,2] <-confint(fit, 'value', level=0.95)[[1]]
  tab_dotplot_stem_temp[i,3] <-confint(fit, 'value', level=0.95)[[2]]
}

tab_dotplot_stem_temp <- as.data.frame(tab_dotplot_stem_temp)
tab_dotplot_stem_temp$Superclassess <- factor(rownames(tab_dotplot_stem_temp))

es_plot_stem_temp <- ggplot(tab_dotplot_stem_temp, aes(x =Superclassess , y = mean)) +
  geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.7)+ 
  geom_errorbar(aes(ymin = mean - abs(inf), 
                    ymax = mean + abs(sup)), 
                width = 0.2, linewidth = 0.7)+
  labs(x = "Superclasses of compounds", y = "Slope +/- 95% c.i.", title = "Temperature") +
  geom_hline(yintercept=0)+
  coord_flip()+ 
  theme(axis.text=element_text(size=8),
        axis.title=element_text(size=12,face="bold"))+
  theme_classic2()




# rf for precipitation ####

scatterplot_tab_stem_precip <- superclass_tab2 %>% dplyr:: select(precipitation_1, retained_stem$row_names)

mscatterplot_tab_stem_precip<- melt(scatterplot_tab_stem_precip, id.vars = c("precipitation_1"))

out_stem_precip <- split( mscatterplot_tab_stem_precip , f = mscatterplot_tab_stem_precip$variable)

tab_dotplot_stem_precip <- matrix(nrow = length(out_stem_precip), ncol = 3)
colnames(tab_dotplot_stem_precip) <- c("mean", "inf", "sup")
rownames(tab_dotplot_stem_precip) <- levels( mscatterplot_tab_stem_precip$variable)


for (i in 1:length(out_stem_precip)) {
  fit <- lm(precipitation_1 ~ value , data = out_stem_precip[[i]])
  tab_dotplot_stem_precip[i,1] <- coef(fit)[[2]]
  tab_dotplot_stem_precip[i,2] <-confint(fit, 'value', level=0.95)[[1]]
  tab_dotplot_stem_precip[i,3] <-confint(fit, 'value', level=0.95)[[2]]
}

tab_dotplot_stem_precip <- as.data.frame(tab_dotplot_stem_precip)
tab_dotplot_stem_precip$Superclassess <- factor(rownames(tab_dotplot_stem_precip))

es_plot_stem_precip <- ggplot(tab_dotplot_stem_precip, aes(x =Superclassess , y = mean)) +
  geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.7)+ 
  geom_errorbar(aes(ymin = mean - abs(inf), 
                    ymax = mean + abs(sup)), 
                width = 0.2, linewidth = 0.7)+
  labs(x = "Superclasses of compounds", y = "Slope +/- 95% c.i.", title = "Precipitation") +
  geom_hline(yintercept=0)+
  coord_flip()+ 
  theme(axis.text=element_text(size=8),
        axis.title=element_text(size=12,face="bold"))+
 theme_classic2()







# combine plots################


library(ggpubr)

ggarrange(es_plot_stem, 
          es_plot_stem_temp   + theme( axis.title.y=element_text(color="white")), 
          es_plot_stem_precip + theme(axis.title.y=element_text(color="white")), 
          nrow = 1, ncol = 3, labels = c("A)", "B)", "C)"))
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

## data table
slopes_tab_stem <- cbind.data.frame(rownames(tab_dotplot_stem_precip) ,tab_dotplot_stem$mean, tab_dotplot_stem_precip$mean, tab_dotplot_stem_temp$mean)
names(slopes_tab_stem) <- c("Superclasses", "stem_age", "Precipitation", "Temperature")