Internship progress

Abdourahmane Diallo

2024-04-25

Setting

Packages

Code
    library(tidyverse)
  # library(glme)
  # library(lsmeans)
  # library(agricolae)
  # library(RVAideMemoire)
  library(corrplot)
  # library(emmeans)
  library(lme4)
  library(multcomp)
  library(MASS)
  # library(R2WinBUGS)
  library(arm)
  # library(performance)
  # library(AER)
  # library(AICcmodavg)
  # library(MuMIn)
  library(ade4)
  library(Hmisc)
  library(labdsv)
  library(vegan)
  library(cowplot)
  library(ggpubr)
  library(rstatix)
  library(patchwork)
  library(multcompView)
  library(ggsignif)
  library(grid)
  library(FactoMineR)
  library(factoextra)
  library(explore)
  library(ggrepel)
  library(naniar)
  library(outliers)
  library(leaps)
  library(fastDummies)
  library(caret) # pour l'entrainement des models
  library(mgcv)
  library(ggeffects)
  library(gratia)
  library(GGally) # pour ggpair
  # library(caTools)
  # library(rpart)
  # library(rpart.plot)
  library(openxlsx)
  library(readxl)
  library(leaflet) # pour la carto
  library(quarto)
  library(raster)
  library(knitr)
  library(kableExtra)
  library(stringr)
  library(plotly)
  # library(PerformanceAnalytics)
  # library(usdm)
  library(vcd) # pour la distribution des var reponse
  library(prospectr)# pour split data avec kenSton()
  # library(glmnet)
  library(randomForest)
  # library(doParallel)
  library(gbm)
  library(kernlab)
  # library(e1071)
  library(ggforce)
  library(keras)
  library(tensorflow)
  library(neuralnet)
  # library(parallel)
  library(iml) # pour l'interpretabilité des models https://cran.r-project.org/web/packages/iml/vignettes/intro.html
  library(stats)
  # library(Boruta) # importance des predicteurs
  library(bestNormalize)
  library(rmarkdown)
  library(DT)
  library(gtExtras) # pour la
  library(reshape2)
  library(mapview)

Functions

Code
## Identification des NA dans un df -----------------------------------------------
taux_completion<-
  function(df, afficher_zero_percent = FALSE, seuil, trie=FALSE) {
    # Calcule du pourcentage de NA dans le dataframe
    pourcentage_total <-
      round(sum(is.na(df)) / (nrow(df) * ncol(df)) * 100, 1)
    
    # Calcule du pourcentage de NA par colonne
    pourcentage_colonnes <- round(colMeans(is.na(df)) * 100, 1)
    
    # Creation d'un dataframe résultat avec deux colonnes
    result <-
      data.frame(
        Variables = names(df),
        CR = pourcentage_colonnes,
        row.names = NULL
      )
    
    if (afficher_zero_percent) {
      result <- result[result$CR == 0, ]
      result$CR = 100 -result$CR
    } else {
      result <- result[result$CR > 0, ]
      result$CR = 100 -result$CR
      
    }
    
    result <- rbind(result, c("Total", pourcentage_total))
    #result <- rbind(result, c("Total", paste0(pourcentage_total, "")))
    
    result <- result[, c("Variables", "CR")]
    result$CR = as.numeric(result$CR)
    result$CR = round(result$CR,1)
    if (trie){
      result = result %>% arrange(desc(CR))
    }
    result$CR = paste0(result$CR,"%")
    
    return(result)
  }
# Converssion des colonne en num ou factor-----------------------------------------------
conv_col <- function (data, columns_to_convert, to_types) {
  if (to_types == "numeric") {
    # Conversion des colonnes en numeric
    for (col in columns_to_convert) {
      data[, col] <- as.numeric(data[, col])
    }
  } else {
    # Conversion des colonnes en facteurs
    for (col in columns_to_convert) {
      data[, col] <- as.factor(data[, col])
    }
  }
  return(data)
}
#data_converted <- conv_col(data, names(data [, c(1, 3)]), "factor")

# exploration graphiques des variables numeriques -----------------------------------------------
explo_num <- function(nom_col, titre, df = bdd, ligne_col = c(2, 2),mini = min(df[[nom_col]]), maxi=max(df[[nom_col]]) ) {
  par(mfrow = ligne_col)
  
  df[complete.cases(df[[nom_col]]), ]
  df <- df %>%filter(!is.na(df[[nom_col]]))
  df[[nom_col]] = as.numeric(df[[nom_col]])
  # Boxplot
  boxplot(df[[nom_col]], col = 'blue', ylab = titre, ylim = c(mini, maxi))
  # Cleveland plot
  dotchart(df[[nom_col]], pch = 16, col = 'blue', xlab = titre)
  # Histogram
  hist(df[[nom_col]], col = 'blue', xlab = titre, main = "")
  # Quantile-Quantile plot
  qqnorm(df[[nom_col]], pch = 16, col = 'blue', xlab = '')
  qqline(df[[nom_col]], col = 'red') 
}

# Extraction des predictors + moyennes -----------------------------------------------

extraction <- function(nom_col, tif_file_path, df = bdd, conv = 1) {
  #df <- df %>%filter(!is.na(gps_x) & !is.na(gps_y))
  raster_data <- raster(tif_file_path)
  
  # Création d'un dataframe pour stocker les valeurs extraites
  df_interne <- data.frame(gps_x = df$gps_x, gps_y = df$gps_y)
  proj4Str <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
  # Transformer les coordonnées GPS en système de coordonnées du raster
  gps_coords_sp <- SpatialPoints(df_interne, proj4string = CRS(proj4Str))
  gps_coords_proj <- spTransform(gps_coords_sp, crs(raster_data))
  
  # Extraction des valeurs du raster 
  values <- raster::extract(raster_data, gps_coords_proj)
  
  # Ajout des valeurs extraites comme nouvelles colonnes a df
  #df_save = data.frame()
  #df_save[[nom_col]] <- values / conv
  
  df[[nom_col]] <- values / conv
  
  return(df)
}

# la moyenne des predictores -----------------------------------------------
moyenne_val_extrct <- function(nom_col, vec_col, df=bdd) {
  df[[nom_col]] <- rowMeans(as.matrix(df[, vec_col, drop = FALSE]), na.rm = TRUE)
  df[[nom_col]] = round(df[[nom_col]],1)
  return(as.data.frame(df))
}


# tests de corrélation avec un seuil -----------------------------------------------
cor_function_seuil <- function(data, seuil,affiche=FALSE) {
  # Création d'un vecteur pour stocker les paires de variables corrélées
  variables_corr <- c()
  
  # Boucle pour tester la corrélation entre chaque paire de variables
  for (i in 1:(ncol(data) - 1)) {
    for (j in (i + 1):ncol(data)) {
      # Calcul de la corrélation entre les variables i et j
      cor_value <- stats::cor(data[, i], data[, j], use = "na.or.complete")
      
      # Stockage du résultat dans le vecteur si supérieur au seuil
      if (cor_value >= seuil | cor_value <= -seuil) {
        if(affiche){
        cat(
          "***",
          colnames(data)[i],
          "  __est correlee a__  ",
          colnames(data)[j],
          "avec un R =",
          cor_value,
          "\n \n \n"
        )
      }
        
        variables_corr <-
          c(variables_corr, colnames(data)[i], colnames(data)[j])
      }
    }
  }
  
  return(variables_corr)
}


# tests de valeurs aberant -----------------------------------------------
test_grub <- function(data, variable, direction = "maxi") {
  
  if (direction == "maxi") { 
    repeat {
      # Effectuer le test de Grubbs
      test_aberrant <- grubbs.test(data[[variable]], opposite = FALSE)
      
      # Obtenir la p-valeur du test
      p.value <- test_aberrant$p.value
      # Si la p-valeur est inférieure au seuil de 0.05, on supprime la valeur aberrante
      if (p.value < 0.05) {
        max_value <- max(data[[variable]],na.rm=TRUE)
        data <- subset(data, data[[variable]] != max_value | is.na(data[[variable]]))
      } else {
        # S'il n'y a plus de valeurs aberrantes, sortir de la boucle
        break
      }
    }
  }
  
  
  if (direction == "mini") { 
    repeat {
      test_aberrant <- grubbs.test(data[[variable]], opposite = TRUE)
      # Obtenir la p-valeur du test
      p.value <- test_aberrant$p.value
      # Si la p-valeur est inférieure au seuil de 0.05, on supprime la valeur aberrante
      if (p.value < 0.05) {
        min_value <- min(data[[variable]],na.rm=TRUE)
        data <- subset(data, data[[variable]] != min_value | is.na(data[[variable]]))
      } else {
        # S'il n'y a plus de valeurs aberrantes, sortir de la boucle
        break
      }
    }
  }
  
  
  return(data)
}




# boxplote -----------------------------------------------
plot_boxplot <-function(donnee,
           x_col,y_col,x_label,y_label,title,legend_title,
           couleurs,
           affiche_point = TRUE,
           ymin = min(donnee[[y_col]]),
           ymax = 1.2 * max(donnee[[y_col]])) {
    
  graphe <-ggplot(donnee,
             aes_string(
               x = x_col,
               y = y_col,
               colour = x_col
             )) +
  geom_boxplot(
        outlier.shape = NA,
        outlier.colour = "black",
        alpha = 0.20,
        size = 1.5 
      ) +
  labs(title = title,x = x_label,y = y_label) +
  scale_color_manual(values = couleurs, name = legend_title) +
  theme_classic(base_size = 12, base_family = "Arial") +
  theme(axis.text = element_text(size = 10),
        axis.title.y = element_text(
          vjust = 5, size = 12, face = "bold"),
        axis.title.x = element_text(face = "bold"),
        axis.ticks.length = unit(0.2, "cm"),
        legend.position = "none",  # Cette ligne supprime la légende
        #legend.position = "right",
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 12, face = "bold"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        plot.title = element_text(size = 14, face = "bold"),
        plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm")
      )
    if (affiche_point) {
      graphe <-
        graphe + geom_jitter(position = position_jitter(seed = 0.5), size = 0.8)
    }
    
    if (y_col %in% names(donnee)) {
      graphe <- graphe +
        coord_cartesian(ylim = c(ymin, ymax))
    }
  
    graphe = graphe + stat_summary(
      fun.y = mean,
      geom = "point",
      shape = 15,
      size = 1.5,
      col = "black",
      fill = "black"
    )
    
    return(graphe)
}



#pour le  pairwise.t.test() -----------------------------------------------------
tri.to.squ <- function(x) {
  rn <- row.names(x)
  cn <- colnames(x)
  an <- unique(c(cn, rn))
  myval <- x[!is.na(x)]
  mymat <-
    matrix(
      1,
      nrow = length(an),
      ncol = length(an),
      dimnames = list(an, an)
    )
  for (ext in 1:length(cn))
  {
    for (int in 1:length(rn))
    {
      if (is.na(x[row.names(x) == rn[int], colnames(x) == cn[ext]]))
        next
      mymat[row.names(mymat) == rn[int], colnames(mymat) == cn[ext]] <-
        x[row.names(x) == rn[int], colnames(x) == cn[ext]]
      mymat[row.names(mymat) == cn[ext], colnames(mymat) == rn[int]] <-
        x[row.names(x) == rn[int], colnames(x) == cn[ext]]
    }
  }
  return(mymat)
}



# Selection interaction -------------------------------
select_inter <- function(response_var, df, explanatory_vars) {
  results <- data.frame()
  combinations <- combn(explanatory_vars, 2, simplify = FALSE)

  for(i in seq_along(combinations)) {

    formula <- as.formula(paste(response_var, "~", paste(combinations[[i]], collapse = "*")))
    model <- gam(formula, data = df)
    r_squared <- summary(model)$r.sq
    aic <- AIC(model)
    results <- rbind(results, data.frame("variables" = paste0(combinations[[i]], collapse = ".inter."), 
                                         "r_squared" = r_squared, 
                                 "aic" = aic))
  }
  return(results)
}

# Comparaion betwen predtited and observed -----------------------------------
plot_comp = function (df,ylabel, title_class, legende = TRUE,plotly = FALSE,xlabel = "observations",title=""){ 

  
  p = ggplot(df, aes(x = observation)) + 
  #graph representant observed
  geom_point(aes(y = Observed, color = "Observed valuess")) +
  geom_line(aes(y = Observed, color = "Observed valuess")) + 
  
  #graph representant  preticted
  geom_point(aes(y = Predicted, color="Predicted values")) +
  geom_line(aes(y = Predicted, color="Predicted values")) + 
  # ggtitle(title)
  theme(plot.title = element_text(hjust = 0.5)) + 
  labs(title = title,x=xlabel, y=ylabel, color = "Legend :") + 
  ylim(min(c(min(df$Predicted), min(df$Observed))),
            max(c(max(df$Predicted), max(df$Observed)))+1  ) +
    
  scale_color_manual(values = c("Observed valuess"='red', "Predicted values"='green')) +
  annotate("text", x = 8, y =  max(c(max(df$Predicted), max(df$Observed)))+1, 
           label = title_class, col = "black", size = 3)

  
  if (!legende) {
    p <- p + theme(legend.position = "none")
  }
  
  if(plotly){
    p = ggplotly(p)
  }

return (p)

}


# Calcul R²
calcule_R2 = function(x, y) {cor(x, y)^2}

Plan

  • Explorations de l’occurence des espèces ( species.html )

1 Database import

  • Import of database LandWorm_dataset_site_V1.9.xlsx (february 22, 2024)
  • The database contains 8019 rows and 481 columns

1.1 Data selection: EcoBioSoil

Numbers
cp 227
dc 5520
gp 299
mh 867
sg 545
NA's 561
  • The database therefore changes from 8019 to 5520 observations.

2 Database exploration

  • CR = Completion rate

2.1 Complete columns

Code
df_col=taux_completion(bdd,TRUE,trie=FALSE)
df_col = df_col[df_col$Variables != "Total",]
#print("table")
kable(df_col, caption = "", col.width = c("75%", "25%"))
Variables CR
79 ID 100%
80 Protocole 100%
82 owner 100%
83 AB_tot 100%
127 AB_Allolobophora_chlorotica_chlorotica 100%
167 AB_AD 100%
168 AB_JV 100%
169 AB_SA 100%
Code
# cat(                                                    )
# head(bdd[, "ID"])

2.2 Non-complete columns

Variables CR
93 AB_Lumbricus_castaneus 99.5%
90 AB_Aporrectodea_rosea 99%
87 AB_Aporrectodea_caliginosa 98.9%
123 AB_Lumbricus_terrestris 98.6%
121 AB_Aporrectodea_icterica 98%
145 BM_Aporrectodea_icterica 96.4%
109 BM_Lumbricus_castaneus 96.3%
1 Programme 96.1%
2 Annee 96.1%
4 ID_Site 96.1%
106 BM_Aporrectodea_rosea 96%
97 AB_Octolasion_cyaneum 95.8%
100 AB_Satchellius_mammalis 95.6%
11 clcm_lvl1 95.4%
12 clcm_lvl2 95.4%
13 clcm_lvl3 95.4%
16 code_clcm_lvl1 95.4%
17 code_clcm_lvl2 95.4%
18 code_clcm_lvl3 95.4%
113 BM_Octolasion_cyaneum 95.4%
173 AB_Aporrectodea_longa_longa 94.9%
120 AB_Aporrectodea_giardi 94.5%
116 BM_Satchellius_mammalis 94.3%
174 AB_Aporrectodea_nocturna 93.5%
147 BM_Lumbricus_terrestris 93.2%
8 gps_x 92.8%
9 gps_y 92.7%
144 BM_Aporrectodea_giardi 92.1%
197 BM_Aporrectodea_nocturna 91.5%
96 AB_Murchieona_muldali 90.7%
112 BM_Murchieona_muldali 90.5%
187 AB_Lumbricus_sp 90.5%
210 BM_Lumbricus_sp 89.7%
196 BM_Aporrectodea_longa_longa 89.4%
88 AB_Aporrectodea_caliginosa_meridionalis 89%
186 AB_Lumbricus_rubellus_castanoides 89%
209 BM_Lumbricus_rubellus_castanoides 88.8%
103 BM_Aporrectodea_caliginosa 87.9%
182 AB_indéterminable 87.3%
205 BM_indéterminable 87.3%
104 BM_Aporrectodea_caliginosa_meridionalis 86.9%
177 AB_Aporrectodea_sp 86.9%
200 BM_Aporrectodea_sp 86.3%
188 AB_Octolasion_sp 85.5%
211 BM_Octolasion_sp 85.5%
131 AB_Lumbricus_rubellus_rubellus 84.2%
151 BM_Allolobophora_chlorotica_chlorotica 83.9%
155 BM_Lumbricus_rubellus_rubellus 83.2%
81 Code_Parcelle 77.8%
180 AB_Dendrobaena_sp 77%
203 BM_Dendrobaena_sp 77%
125 AB_Lumbricus_festivus 75.5%
149 BM_Lumbricus_festivus 75.5%
170 AB_Allolobophora_chlorotica_postepheba 74.3%
193 BM_Allolobophora_chlorotica_postepheba 73.9%
89 AB_Aporrectodea_cupulifera 72.1%
105 BM_Aporrectodea_cupulifera 72.1%
94 AB_Lumbricus_friendi 72%
172 AB_Aporrectodea_indéterminable 69.2%
195 BM_Aporrectodea_indéterminable 68.9%
110 BM_Lumbricus_friendi 68.7%
141 AB_Octolasion_lacteum_lacteum 68.6%
165 BM_Octolasion_lacteum_lacteum 68.6%
135 AB_Lumbricus_centralis 67.2%
92 AB_Eiseniella_tetraedra 67%
108 BM_Eiseniella_tetraedra 66.9%
159 BM_Lumbricus_centralis 66.2%
37 clay 64.1%
181 AB_Eisenia_fetida 62.1%
204 BM_Eisenia_fetida 62.1%
175 AB_Aporrectodea_ripicola 61.9%
198 BM_Aporrectodea_ripicola 61.7%
31 fine_sand 61.6%
32 coarse_sand 61.6%
34 fine_silt 61.6%
35 coarse_silt 61.6%
225 AB_Eisenia_andrei 61.4%
230 BM_Eisenia_andrei 61.4%
184 AB_Lumbricus_castaneus_disjonctus 61.1%
207 BM_Lumbricus_castaneus_disjonctus 60.9%
237 AB_Microscolex_phosphoreus 60.3%
239 BM_Microscolex_phosphoreus 60.3%
21 ph_eau 54.1%
142 AB_Microscolex_dubius 53.6%
166 BM_Microscolex_dubius 53.6%
26 om 52.6%
171 AB_Allolobophora_sp 51.9%
194 BM_Allolobophora_sp 51.7%
224 AB_Dendrodrilus_rubidus 51.3%
229 BM_Dendrodrilus_rubidus 51.3%
245 AB_Lumbricus_friendi_lineatus 50%
253 BM_Lumbricus_friendi_lineatus 49.9%
24 n_tot 49.4%
192 AB_Scherotheca_sp 48.8%
215 BM_Scherotheca_sp 48.8%
236 AB_Aporrectodea_limicola 48.5%
238 BM_Aporrectodea_limicola 48.5%
190 AB_Prosellodrilus_sp 48.2%
213 BM_Prosellodrilus_sp 48.1%
122 AB_Aporrectodea_longa 46.8%
146 BM_Aporrectodea_longa 46.8%
226 AB_Proctodrilus_antipai_antipai 44.9%
231 BM_Proctodrilus_antipai_antipai 44.7%
232 AB_Eisenia_veneta 44.7%
233 BM_Eisenia_veneta 44.7%
183 AB_Indéterminable 44%
206 BM_Indéterminable 44%
23 c_org 43.7%
101 AB_Scherotheca_savignyi_indéterminable 42.6%
117 BM_Scherotheca_savignyi_indéterminable 42.6%
291 AB_Scherotheca_savignyi_savignyi 42.6%
293 BM_Scherotheca_savignyi_savignyi 42.6%
84 BM_tot 41.2%
10 Altitude 40.4%
246 AB_Octodrilus_complanatus 40.1%
254 BM_Octodrilus_complanatus 40%
179 AB_Aporrectodea_tuberculata 39.9%
202 BM_Aporrectodea_tuberculata 39.9%
178 AB_Aporrectodea_trapezoides 39%
201 BM_Aporrectodea_trapezoides 39%
290 AB_Allolobophora_burgondiae 37.6%
136 AB_Lumbricus_rubellus_friendoides 37.5%
295 AB_Scherotheca_aquitana 37.5%
292 BM_Allolobophora_burgondiae 37.4%
297 BM_Scherotheca_aquitana 37.4%
160 BM_Lumbricus_rubellus_friendoides 37.3%
15 land_cover_detail 36.6%
189 AB_Prosellodrilus_amplisetosus_amplisetosus 36.4%
212 BM_Prosellodrilus_amplisetosus_amplisetosus 36.4%
280 AB_Eisenia_fetida_indéterminable 35.8%
282 BM_Eisenia_fetida_indéterminable 35.8%
278 AB_Bimastos_eiseni 32%
279 BM_Bimastos_eiseni 32%
91 AB_Dendrobaena_octaedra 28.7%
107 BM_Dendrobaena_octaedra 28.7%
3 Date_Prelevement 24.4%
119 AB_Aporrectodea_caliginosa_indéterminable 22.9%
294 AB_Aporrectodea_rubra_acidicola 22.8%
296 BM_Aporrectodea_rubra_acidicola 22.8%
143 BM_Aporrectodea_caliginosa_indéterminable 22.3%
286 AB_Prosellodrilus_occidentalis_occidentalis 21.2%
289 BM_Prosellodrilus_occidentalis_occidentalis 21.2%
133 AB_Prosellodrilus_fragilis_fragilis 20.6%
157 BM_Prosellodrilus_fragilis_fragilis 20.6%
7 postal_code 20%
185 AB_Lumbricus_meliboeus 18.6%
208 BM_Lumbricus_meliboeus 18.6%
33 sand 18.2%
36 silt 18.2%
138 AB_Dendrodrilus_rubidus_subrubicundus 18.1%
162 BM_Dendrodrilus_rubidus_subrubicundus 18.1%
5 Modalite 17.4%
85 AB_STAD_X 17.1%
223 AB_Dendrobaena_attemsi 16.4%
228 BM_Dendrobaena_attemsi 16.4%
22 c_tot 15.8%
99 AB_Prosellodrilus_fragilis_indéterminable 15.2%
115 BM_Prosellodrilus_fragilis_indéterminable 15.2%
301 AB_Prosellodrilus_amplisetosus 15.1%
307 BM_Prosellodrilus_amplisetosus 15.1%
298 AB_Hemigastrodrilus_monicae 14.7%
299 AB_Octodrilus_indéterminable 14.7%
300 AB_Proctodrilus_antipai_indéterminable 14.7%
302 AB_Prosellodrilus_praticola 14.7%
303 AB_Scherotheca_porotheca 14.7%
305 BM_Octodrilus_indéterminable 14.7%
306 BM_Proctodrilus_antipai_indéterminable 14.7%
308 BM_Prosellodrilus_praticola 14.7%
309 BM_Scherotheca_porotheca 14.7%
304 BM_Hemigastrodrilus_monicae 14.6%
234 AB_Haplotaxis_sp 13.9%
235 BM_Haplotaxis_sp 13.9%
38 type_tillage 13.2%
28 cu_EDTA 12.4%
222 AB_Avelona_ligra 11.1%
227 BM_Avelona_ligra 11.1%
27 cu_tot 10.8%
263 AB_Pheretima_indéterminable 10.2%
271 BM_Pheretima_indéterminable 10.2%
51 herbicide_freq 10%
259 AB_Dendrobaena_cognettii 9.4%
267 BM_Dendrobaena_cognettii 9.4%
50 insecticide_freq 9.3%
6 Bloc 8.9%
43 fertilisation 8.4%
260 AB_Dendrobaena_hortensis 8.4%
268 BM_Dendrobaena_hortensis 8.4%
49 fungicide_freq 7.5%
284 AB_Microscolex_sp 7%
287 BM_Microscolex_sp 7%
285 AB_Pheritima_Diffringens 6.6%
288 BM_Pheritima_Diffringens 6.6%
44 ferti_min_product 6.4%
46 ferti_orga_product 6.2%
78 grassland_type 5.6%
63 rotation_plant_div 5.1%
281 AB_Lumbricus_rubellus_indéterminable 5.1%
283 BM_Lumbricus_rubellus_indéterminable 5.1%
56 tfi_herbicide 4.5%
73 herbage_use 4.3%
86 AB_Allolobophora_chlorotica_indéterminable 4%
102 BM_Allolobophora_chlorotica_indéterminable 4%
176 AB_Aporrectodea_rubra 4%
191 AB_Scherotheca_dinoscolex 4%
199 BM_Aporrectodea_rubra 4%
214 BM_Scherotheca_dinoscolex 4%
40 tillage_frequency_intra 3.5%
20 ph_kcl 3.3%
52 molluscicide_freq 3.2%
45 ferti_min_qtty 3.1%
47 ferti_orga_qtty 3.1%
66 crop_residues_management 2.7%
59 total_tfi 2.5%
75 herb_age 2%
76 animal_loading 2%
60 mecanical_weed_control 1.9%
65 rotation_grassland 1.8%
258 AB_Dendrobaena_alpina_zeugochaeta 1.8%
261 AB_Eisenia_sp 1.8%
262 AB_Flabellodrilus_bartolii 1.8%
264 AB_Prosellodrilus_pyrenaicus 1.8%
265 AB_Scherotheca_nivicola 1.8%
266 BM_Dendrobaena_alpina_zeugochaeta 1.8%
269 BM_Eisenia_sp 1.8%
270 BM_Flabellodrilus_bartolii 1.8%
272 BM_Prosellodrilus_pyrenaicus 1.8%
273 BM_Scherotheca_nivicola 1.8%
216 AB_Aporrectodea_nocturna_nocturna_cistercianus 1.6%
217 AB_Scherotheca_mifuga 1.6%
218 AB_Scherotheca_rhodana 1.6%
219 BM_Aporrectodea_nocturna_nocturna_cistercianus 1.6%
220 BM_Scherotheca_mifuga 1.6%
221 BM_Scherotheca_rhodana 1.6%
55 tfi_insecticide 1.5%
98 AB_Octolasion_lacteum 1.1%
114 BM_Octolasion_lacteum 1.1%
77 trampling_nature 1%
314 AB_Octolasion_lacteum_gracile 0.9%
315 BM_Octolasion_lacteum_gracile 0.9%
242 AB_Ethnodrilus_lydiae 0.8%
243 AB_Hemigastrodrilus_monicae_magnus 0.8%
244 AB_Hormogaster_praetiosa 0.8%
247 AB_Prosellodrilus_indéterminable 0.8%
248 AB_Scherotheca_corsicana_corsicana 0.8%
249 AB_Zophoscolex_graffi 0.8%
250 BM_Ethnodrilus_lydiae 0.8%
251 BM_Hemigastrodrilus_monicae_magnus 0.8%
252 BM_Hormogaster_praetiosa 0.8%
255 BM_Prosellodrilus_indéterminable 0.8%
256 BM_Scherotheca_corsicana_corsicana 0.8%
257 BM_Zophoscolex_graffi 0.8%
74 mowing_frequency_yr 0.7%
274 AB_Aporrectodea_georgii 0.5%
275 AB_Panoniona_leoni 0.5%
276 BM_Aporrectodea_georgii 0.5%
277 BM_Panoniona_leoni 0.5%
240 AB_Aporrectodea_balisa 0.4%
241 BM_Aporrectodea_balisa 0.4%
310 AB_Scherotheca_minor 0.4%
311 BM_Scherotheca_minor 0.4%
14 clcm_lvl4 0.3%
19 code_clcm_lvl4 0.3%
312 AB_Orodrilus_paradoxus_paradoxus 0.3%
313 BM_Orodrilus_paradoxus_paradoxus 0.3%
25 c/n 0%
29 soil_temperature 0%
30 soil_humidity 0%
39 tillage_depth 0%
41 tillage_frequency_inter 0%
42 tillage_date 0%
48 ferti_orga_freq 0%
53 nematicide_freq 0%
54 tfi_fungicide 0%
57 tfi_mollucicide 0%
58 tfi_nematicide 0%
61 thermal_weed_control 0%
62 crop_rotation_yr 0%
64 intercrop_div 0%
67 amdmt_orga_freq 0%
68 amdmt_orga_names 0%
69 amdmt_orga_qtty 0%
70 amdmt_calcic 0%
71 amdmt_calcic_names 0%
72 amdmt_calcic_qtty 0%
95 AB_Lumbricus_herculeus 0%
111 BM_Lumbricus_herculeus 0%
118 Parcelle 0%
124 AB_Allolobophora_chlorotica 0%
126 AB_A._muldali/rosea 0%
128 AB_Aporrectodea_longa/giardi 0%
129 AB_Indéterminable_epigeic 0%
130 AB_Lumbricus_friendi/centralis 0%
132 AB_Octolasion_indéterminable 0%
134 AB_Dendrobaena_pygmea 0%
137 AB_indéterminable_endogeic 0%
139 AB_Lumbricus_indéterminable_anecic 0%
140 AB_Eisenia_indéterminable 0%
148 BM_Allolobophora_chlorotica 0%
150 BM_A._muldali/rosea 0%
152 BM_Aporrectodea_longa/giardi 0%
153 BM_Indéterminable_epigeic 0%
154 BM_Lumbricus_friendi/centralis 0%
156 BM_Octolasion_indéterminable 0%
158 BM_Dendrobaena_pygmea 0%
161 BM_indéterminable_endogeic 0%
163 BM_Lumbricus_indéterminable_anecic 0%
164 BM_Eisenia_indéterminable 0%
316 AB_Ethnodrilus_zajonci 0%
317 BM_Ethnodrilus_zajonci 0%
318 AB_Hormogaster_sp 0%
319 AB_Octodrilus_lisseansis 0%
320 BM_Hormogaster_sp 0%
321 BM_Octodrilus_lisseansis 0%
322 AB_Scherotheca_michaelseni 0%
323 AB_Scherotheca_occidentalis 0%
324 AB_Scherotheca_occitanica 0%
325 AB_Aporrectodea_haymozi 0%
326 AB_Dendrobaena_alpina 0%
327 AB_Scherotheca_corsicana 0%
328 AB_Octolasion_tyrtaeum 0%
329 AB_Lumbricus_rubellus 0%
330 AB_Aporrectodea_terrestris 0%
331 AB_Aporrectodea_rubicunda 0%
332 AB_Diporodrilus_omodeoi 0%
333 AB_Eisenia_parva 0%
334 AB_Scherotheca_albomaculata 0%
335 AB_Bimastos_rubidus 0%
336 AB_Scherotheca_portonana 0%
337 AB_Scherotheca_brevisella 0%
338 AB_Proctodrilus_antipai 0%
339 AB_Octodrilus_juvyi 0%
340 AB_Dendrobaena_byblica 0%
341 AB_Dendrodrilus_subrubicundus 0%
342 AB_Prosellodrilus_albus 0%
343 AB_Kritodrilus_tetryae 0%
344 AB_Lumbricus_klarae 0%
345 AB_Aporrectodea_haymoziformis 0%
346 AB_Kritodrilus_micrurus 0%
347 AB_Allolobophora_satchelli 0%
348 AB_Ethnodrilus_aveli 0%
349 AB_Aporrectodea_zicsii 0%
350 AB_Diporodrilus_pilosus 0%
351 AB_Eumenescolex_emiliae 0%
352 AB_Dendrobaena_pantaleonis 0%
353 AB_Dendrobaena_veneta 0%
354 AB_Lumbricidae_f 0%
355 AB_Murchieona_minuscula 0%
356 BM_Lumbricidae_f 0%
357 BM_Murchieona_minuscula 0%
358 BM_Octolasion_tyrtaeum 0%
359 AB_Dendrobaena_alpina_indéterminable 0%
360 BM_Dendrobaena_alpina_indéterminable 0%
361 AB_Oligochaeta_so 0%
362 BM_Oligochaeta_so 0%
363 AB_Adult 0%
364 AB_cocon 0%
365 AB_indéterminé 0%
366 AB_Juvenile 0%
367 AB_Sub.adult 0%
368 AB_Allolobophora_delitescens 0%
369 AB_Amynthas_indicus 0%
370 AB_Aporrectodea_arverna 0%
371 AB_Aporrectodea_cuendeti 0%
372 AB_Aporrectodea_gogna 0%
373 AB_Aporrectodea_sineporis 0%
374 AB_Aporrectodea_velox 0%
375 AB_Aporrectodea_voconca 0%
376 AB_Bimastos_parvus 0%
377 AB_Boucheona_corbierensis 0%
378 AB_Boucheona_rosae 0%
379 AB_Ethnodrilus_gatesi 0%
380 AB_Ethnodrilus_setusmonsanus 0%
381 AB_Flabellodrilus_luberonensis 0%
382 AB_Gatesona_chaetophora 0%
383 AB_Gatesona_lablacherensis 0%
384 AB_Gatesona_rutena 0%
385 AB_Haplotaxis_gordioides 0%
386 AB_Helodrilus_oculatus 0%
387 AB_Hormogaster_insularis 0%
388 AB_Hormogaster_samnitica_lirapora 0%
389 AB_Kritodrilus_calarensis 0%
390 AB_Lucquesia_tiginosa 0%
391 AB_Lumbricus_bouchei 0%
392 AB_Lumbricus_improvisus 0%
393 AB_Octodrilus_hemiandrus 0%
394 AB_Panoniona_satchelli 0%
395 AB_Proctodrilus_tuberculatus 0%
396 AB_Prosellodrilus_alatus 0%
397 AB_Prosellodrilus_biserialis 0%
398 AB_Prosellodrilus_fragilis_polythecosus 0%
399 AB_Prosellodrilus_idealis 0%
400 AB_Scherotheca_altarocca 0%
401 AB_Scherotheca_betharramensis 0%
402 AB_Scherotheca_boccaverhju 0%
403 AB_Scherotheca_capcorsana 0%
404 AB_Scherotheca_chicharia 0%
405 AB_Scherotheca_darioi 0%
406 AB_Scherotheca_gigas_gigas 0%
407 AB_Scherotheca_haymozi 0%
408 AB_Scherotheca_minor_minorissima 0%
409 AB_Scherotheca_monspessulensis_idica 0%
410 AB_Scherotheca_monspessulensis_monspessulensis 0%
411 AB_Scherotheca_orbiensis 0%
412 AB_Scherotheca_pereli 0%
413 AB_Scherotheca_qiui 0%
414 AB_Scherotheca_sanaryensis 0%
415 AB_Scherotheca_trezencensis 0%
416 AB_Vignysa_callasensis 0%
417 AB_Vignysa_teres 0%
418 AB_Vosgesia_zicsii 0%
419 AB_Zophoscolex_atlanticus 0%
420 AB_Zophoscolex_micellus 0%
421 BM_Allolobophora_delitescens 0%
422 BM_Amynthas_indicus 0%
423 BM_Aporrectodea_arverna 0%
424 BM_Aporrectodea_cuendeti 0%
425 BM_Aporrectodea_gogna 0%
426 BM_Aporrectodea_sineporis 0%
427 BM_Aporrectodea_velox 0%
428 BM_Aporrectodea_voconca 0%
429 BM_Bimastos_parvus 0%
430 BM_Boucheona_corbierensis 0%
431 BM_Boucheona_rosae 0%
432 BM_Dendrobaena_byblica 0%
433 BM_Diporodrilus_omodeoi 0%
434 BM_Diporodrilus_pilosus 0%
435 BM_Ethnodrilus_aveli 0%
436 BM_Ethnodrilus_gatesi 0%
437 BM_Ethnodrilus_setusmonsanus 0%
438 BM_Flabellodrilus_luberonensis 0%
439 BM_Gatesona_chaetophora 0%
440 BM_Gatesona_lablacherensis 0%
441 BM_Gatesona_rutena 0%
442 BM_Haplotaxis_gordioides 0%
443 BM_Helodrilus_oculatus 0%
444 BM_Hormogaster_insularis 0%
445 BM_Hormogaster_samnitica_lirapora 0%
446 BM_Kritodrilus_calarensis 0%
447 BM_Lucquesia_tiginosa 0%
448 BM_Lumbricus_bouchei 0%
449 BM_Lumbricus_improvisus 0%
450 BM_Lumbricus_klarae 0%
451 BM_Octodrilus_hemiandrus 0%
452 BM_Panoniona_satchelli 0%
453 BM_Proctodrilus_tuberculatus 0%
454 BM_Prosellodrilus_alatus 0%
455 BM_Prosellodrilus_biserialis 0%
456 BM_Prosellodrilus_fragilis_polythecosus 0%
457 BM_Prosellodrilus_idealis 0%
458 BM_Scherotheca_albomaculata 0%
459 BM_Scherotheca_altarocca 0%
460 BM_Scherotheca_betharramensis 0%
461 BM_Scherotheca_boccaverhju 0%
462 BM_Scherotheca_brevisella 0%
463 BM_Scherotheca_capcorsana 0%
464 BM_Scherotheca_chicharia 0%
465 BM_Scherotheca_darioi 0%
466 BM_Scherotheca_gigas_gigas 0%
467 BM_Scherotheca_haymozi 0%
468 BM_Scherotheca_minor_minorissima 0%
469 BM_Scherotheca_monspessulensis_idica 0%
470 BM_Scherotheca_monspessulensis_monspessulensis 0%
471 BM_Scherotheca_orbiensis 0%
472 BM_Scherotheca_pereli 0%
473 BM_Scherotheca_portonana 0%
474 BM_Scherotheca_qiui 0%
475 BM_Scherotheca_sanaryensis 0%
476 BM_Scherotheca_trezencensis 0%
477 BM_Vignysa_callasensis 0%
478 BM_Vignysa_teres 0%
479 BM_Vosgesia_zicsii 0%
480 BM_Zophoscolex_atlanticus 0%
481 BM_Zophoscolex_micellus 0%

2.3 Focus on GPS coordinates

  • There is 398 NA (CR = 92.8%) in GPS_X
  • There is 401 NA (CR = 92.7%) in GPS_Y
Code
n_line= nrow(bdd)
bdd$gps_x <- as.numeric(gsub("[^0-9.-]", "", bdd$gps_x))
bdd$gps_y <- as.numeric(gsub("[^0-9.-]", "", bdd$gps_y))
bdd <- bdd[complete.cases(bdd$gps_x, bdd$gps_y), ]
bdd <- bdd %>%filter(!is.na(gps_x) & !is.na(gps_y))
#sum(is.na(bdd$gps_x))
#sum(is.na(bdd$gps_y))
  • We delete the NA lines in the GPS coordinates
  • The database therefore changes from 5520 to 5119 observations.
  • Merging database and climat database
Code
# Ajout variables climatiques (voir chunk extraction données climatiques)
chemin_fichier <- "C:/Users/diall/OneDrive/Bureau/M2_MODE/stage_abdou_m2/datas/bdd_climat_ok.rds"
# saveRDS(bdd_climat_ok, chemin_fichier)
bdd_climat_ok <- readRDS(chemin_fichier)
df_fusion <- subset(bdd_climat_ok, select = -c(gps_x, gps_y))

rows_not_in_df_fusion <- anti_join(bdd, df_fusion, by = "ID")
merged_df <- merge(bdd, df_fusion, by = "ID")

ids_not_matching <- anti_join( merged_df,bdd, by = "ID")

bdd = merged_df

#bdd <- cbind(bdd, df_fusion) # all = TRUE pour garder toutes les lignes

2.4 Cartography

Code
n_ligne= nrow(bdd)
df_coord <- bdd[, c("gps_x", "gps_y")] %>% mutate(gps_x = as.numeric(gps_x),gps_y = as.numeric(gps_y))

df_coord$num_ligne <- seq(nrow(df_coord))
carte <- leaflet(df_coord) %>%
  addTiles() %>%
  addCircleMarkers(lng = ~gps_x, lat = ~gps_y, radius = 0.8, fillOpacity = 0.8, fillColor = "blue")
carte
  • We delete points outside France (22)
  • The database therefore changes from 5120 to 5098 observations.

2.5 Focus on years

  • Cleaning the Annee column

  • CR of Annee = 96.1% (33 levels)

Code
bdd$Annee= as.factor(bdd$Annee)
summary_df <- as.data.frame(summary(bdd$Annee))
colnames(summary_df) <- c("Numbers")
kable(summary_df)
Numbers
1990 19
1991 23
1992 22
1993 15
1994 29
1995 6
1996 7
1997 8
1998 15
1999 30
2000 24
2001 10
2002 20
2004 9
2005 47
2006 57
2007 78
2008 24
2009 52
2010 67
2011 69
2012 127
2013 285
2014 542
2015 261
2016 508
2017 287
2018 372
2019 506
2020 353
2021 832
2022 344
2023 50

2.6 Focus on protocols

  • List of protocols available on the database ( 5 levels)
Code
bdd$Protocole = as.factor(bdd$Protocole)
summary_df <- as.data.frame(summary(bdd$Protocole))
colnames(summary_df) <- c("Numbers")
kable(summary_df,padding = 5)
Numbers
F 51
F_HS 872
HS 2940
M 1166
M_HS 69
  • Selection of protocols: F_HS, HS
Code
n_ligne = nrow(bdd)
#select_protocole =c("F_HS", "FHS", "hand sorting" ,"HS")
select_protocole =c("F_HS", "HS")
bdd <- bdd[bdd$Protocole %in% select_protocole, ]
bdd=droplevels(bdd)
bdd$Protocole = as.factor(bdd$Protocole)
summary_df <- as.data.frame(summary(bdd$Protocole))
colnames(summary_df) <- c("Numbers")
kable(summary_df,padding = 5)
Numbers
F_HS 872
HS 2940
  • The database therefore changes from 5098 to 3812 observations.

2.7 Focus on clcm_lvl1

  • CR of clcm_lvl1 = 95.4% (5 levels)
Code
bdd$clcm_lvl1= as.factor(bdd$clcm_lvl1)
summary_df <- as.data.frame(summary(bdd$clcm_lvl1))
colnames(summary_df) <- c("Numbers")
# kable(summary_df,padding = 5)
  • Merging levels
Code
levels(bdd$clcm_lvl1)[levels(bdd$clcm_lvl1) == "1_Naturel"] <- "Forest and semi natural areas"
levels(bdd$clcm_lvl1)[levels(bdd$clcm_lvl1) == "2_Agricole"] <- "Agricultural areas"

bdd$clcm_lvl1= as.factor(bdd$clcm_lvl1)
summary_df <- as.data.frame(summary(bdd$clcm_lvl1))
colnames(summary_df) <- c("Numbers")
kable(summary_df,padding = 5)
Numbers
Forest and semi natural areas 204
Agricultural areas 2732
Artificial surfaces 860
NA's 16
  • Update code_clcm_lvl1
Code
#bdd$code_clcm_lvl1 = as.factor(bdd$code_clcm_lvl1)

bdd$code_clcm_lvl1 <- ifelse(bdd$clcm_lvl1 == "Forest and semi natural areas", 3, bdd$code_clcm_lvl1)

bdd$code_clcm_lvl1 <- ifelse(bdd$clcm_lvl1 == "Agricultural areas", 2, bdd$code_clcm_lvl1)
  • For the moment, we will keep the NA of clcm_lvl1

2.8 Focus on clcm_lvl2

  • CR of clcm_lvl2 = 95.4% (11 levels)
Code
bdd$clcm_lvl2= as.factor(bdd$clcm_lvl2)
summary_df <- as.data.frame(summary(bdd$clcm_lvl2))
colnames(summary_df) <- c("Numbers")
# kable(summary_df,padding = 8)
  • Merging levels
Code
levels(bdd$clcm_lvl2)[levels(bdd$clcm_lvl2) == "21_Agricole ouvert"] <- "Arable land"

bdd$clcm_lvl2= as.factor(bdd$clcm_lvl2)
summary_df <- as.data.frame(summary(bdd$clcm_lvl2))
colnames(summary_df) <- c("Numbers")
kable(summary_df,padding = 5)
Numbers
Arable land 1496
Artificial, non-agricultural vegetated areas 667
Forests 117
Heterogeneous agricultural areas 107
Industrial, commercial and transport units 168
Mine, dump and construction sites 25
Open spaces with little or no vegetation 1
Pastures 372
Permanent crops 757
Scrub and/or herbaceous vegetation associations 85
NA's 17
  • Update code_clcm_lvl2
Code
bdd$code_clcm_lvl2 <- ifelse(bdd$clcm_lvl2 == "Arable land", 21, bdd$code_clcm_lvl2)

2.9 Focus on clcm_lvl3

  • CR of clcm_lvl3 = 95.4% (23 levels)
Code
bdd$clcm_lvl3= as.factor(bdd$clcm_lvl3)
summary_df <- as.data.frame(summary(bdd$clcm_lvl3))
colnames(summary_df) <- c("Numbers")
kable(summary_df,padding = 5)
Numbers
Agro-forestry areas 89
Airports 44
Beaches, dunes, sands 1
Broad-leaved forest 25
Complex cultivation patterns 13
Coniferous forest 4
Construction sites 21
Fruit trees and berry plantations 18
Green urban areas 648
Industrial or commercial units and public facilities 10
Mixed forest 88
Moors and heathland 7
Natural grasslands 65
Non-irrigated arable land 1493
Other artificial, non-agricultural vegetated areas 12
Other heterogeneous agricultural areas 5
Other mine, dump and construction sites 4
Other scrub and/or herbaceous vegetation associations 1
Pastures, meadows and other permanent grasslands under agricultural use 372
Road and rail networks and associated land 114
Sport and leisure facilities 7
Transitional woodland-shrub 12
Vineyards 739
NA's 20

2.10 Land use selection (clcm_lvl3)

Code
select_os= c("Broad-leaved forest", "Coniferous forest", "Mixed forest", 
"Pastures, meadows and other permanent grasslands under agricultural use", "Non-irrigated arable land", 
"Vineyards","Green urban areas","Natural grasslands")

bdd <- bdd[bdd$clcm_lvl3 %in% select_os, ]
bdd=droplevels(bdd)
bdd$clcm_lvl3 = as.factor(bdd$clcm_lvl3)
summary_df <- as.data.frame(summary(bdd$clcm_lvl3))
colnames(summary_df) <- c("Numbers")
kable(summary_df)
Numbers
Broad-leaved forest 25
Coniferous forest 4
Green urban areas 648
Mixed forest 88
Natural grasslands 65
Non-irrigated arable land 1493
Pastures, meadows and other permanent grasslands under agricultural use 372
Vineyards 739
  • Maybe, we can merge the three types of forest ?

2.11 Land use & protocol overview

Code
# kable (table(bdd$clcm_lvl1, bdd$Protocole,exclude = NULL), align = "c", format = "pipe", padding = 10)
# kable (table(bdd$clcm_lvl2, bdd$Protocole,exclude = NULL), align = "c", format = "pipe", padding = 10)
kable (table(bdd$clcm_lvl3, bdd$Protocole,exclude = NULL), align = "c", format = "pipe", padding = 10)
F_HS HS
Broad-leaved forest 9 16
Coniferous forest 3 1
Green urban areas 0 648
Mixed forest 11 77
Natural grasslands 3 62
Non-irrigated arable land 276 1217
Pastures, meadows and other permanent grasslands under agricultural use 116 256
Vineyards 373 366

3 Earthworms data

3.1 Total richness calculation method

  • Removal of columns with only NA (226) and/or only 0
  • Identify columns beginning with AB_
  • Deletion of AB_ columns that are not species
  • Calculate richness by assigning 1 to each column if the value is different from 0 and NA
  • Total richness = 1 if the plot has a value in AB and/or BM

3.2 Total abundance (CR = 100 % )

3.3 Total biomass (CR = 41.2%)

3.4 Total taxonomic richness (CR = 100 % )

3.5 Graphe valeurs aberant AB_tot

Code
# summary(AB_tot_aberant)
AB_tot_aberant_2 = AB_tot_aberant[AB_tot_aberant$AB_tot > max(bdd$AB_tot),]
AB_tot_aberant_2$clcm_lvl1 =as.factor(AB_tot_aberant_2$clcm_lvl1)
AB_tot_aberant_2$clcm_lvl2 =as.factor(AB_tot_aberant_2$clcm_lvl2)
AB_tot_aberant_2$clcm_lvl3 =as.factor(AB_tot_aberant_2$clcm_lvl3)
AB_tot_aberant_2 = droplevels(AB_tot_aberant_2)
kable(unique(AB_tot_aberant_2[,c("Programme","Annee","clcm_lvl3")]))
Programme Annee clcm_lvl3
10 AF 2014 Pastures, meadows and other permanent grasslands under agricultural use
467 CEREMA 2017 Green urban areas
545 Dephy Bio 2018 Non-irrigated arable land
575 Dephy 2016 Pastures, meadows and other permanent grasslands under agricultural use
644 Dephy 2020 Pastures, meadows and other permanent grasslands under agricultural use
785 ECLAS 2018 Green urban areas
999 JASSUR 2014 Green urban areas
1110 Life-PTD 2016 Pastures, meadows and other permanent grasslands under agricultural use
1111 Life-PTD 2019 Pastures, meadows and other permanent grasslands under agricultural use
1298 OPVT_BZH 2019 Green urban areas
1321 OPVT_BZH 2021 Natural grasslands
1346 OPVT_BZH 2020 Green urban areas
1361 OPVT_BZH 2020 Natural grasslands
1388 OPVT_BZH 2021 Non-irrigated arable land
1445 OPVT_IDF 2018 Green urban areas
1622 OPVT_IDF 2016 Green urban areas
1893 RMQS_BioDiv 2005 Pastures, meadows and other permanent grasslands under agricultural use
3674 SBT-ENI-TB 2021 Non-irrigated arable land
3956 Sols de Bretagne 2016 Green urban areas
3968 Sols de Bretagne 2015 Green urban areas
4511 TIGA 2021 Green urban areas
5065 ZAA_HR 2016 Pastures, meadows and other permanent grasslands under agricultural use
Code
df = AB_tot_aberant_2
df$observation = 1:nrow(df)
df$Richesse_tot_10 = df$Richesse_tot*100
g_AB_tot_aberant = ggplot(df, aes(x = observation)) + 
  geom_point(aes(y = AB_tot, color = "Abundance")) +
  geom_line(aes(y = AB_tot, color = "Abundance")) + 
  geom_point(aes(y = Richesse_tot_10, color="Richness*100")) +
  geom_line(aes(y = Richesse_tot_10, color="Richness*100")) + 
  # ggtitle(title)
  theme(plot.title = element_text(hjust = 0.5)) + 
  labs(title = "  ",x="Observation", y="Values", color = "Legend:") +
  scale_color_manual(values = c("Abundance"='red', "Richness*100"='green'))
ggsave("g_AB_tot_aberant.png", plot = g_AB_tot_aberant, dpi = 300)
g_AB_tot_aberant = ggplotly(g_AB_tot_aberant)
g_AB_tot_aberant 

4 Climate data extraction

4.1 The source database (CHELSA V2)

Code
# Lire le fichier Excel
chemin_fichier_excel <- "C:/Users/diall/Downloads/datas/ODMAP.xlsx"
climat <- read.xlsx(chemin_fichier_excel, sheet = "climat")

# Fusions des cellules des colonnes avec des éléments dupliqués
for (col in names(climat)) {
  climat[[col]] <- ifelse(duplicated(climat[[col]]), "", climat[[col]])
}

# Affichage du tableau avec kableExtra et centrage du contenu des cellules
kableExtra::kable(climat) %>%
  kableExtra::kable_styling() %>%
  kableExtra::column_spec(1:ncol(climat)) 
Source DB.name Categories Variables Units Formats Periods Resolution References
Fourcade et al., 2022 CHELSA V2.1 climate bio1 °C .tif 1981 - 2010 30 arc-second, ~ 1 km  https://chelsa-climate.org/bioclim/
https://zenodo.org/records/2525665
https://zenodo.org/records/2525662
bio19 kg/m² https://zenodo.org/records/2525553

4.2 Extraction method

  • Link recovery ( see file link .tif )

  • Extracting variable names

  • Uses of the extraction() function

  • Convert columns to correct format and unit

  • Adding variables to the LANDWORM database

Code
liens_tif = utils::read.table(file = "C:/Users/diall/Downloads/datas/envidatS3paths.txt")
liens_tif$shortname <- str_extract(liens_tif$V1, "(?<=CHELSA_).*?(?=_1981)")
liens_tif[liens_tif$shortname=="rsds","shortname"]=c("rsds_max","rsds_mean","rsds_min","rsds_range")

#all(is.na(bdd$gps_x))
#all(is.na(bdd$gps_y))

bdd_climat= bdd[, c("ID","gps_x","gps_y")]

temp_1=Sys.time()
#for( i in 1:nrow(liens_tif)){
  #nom=liens_tif[i,c("shortname")]
  #df_ext <- extraction(nom_col = nom,df = bdd_climat,conv = 1, 
                  #tif_file_path = liens_tif[i,c("V1")] ) 
  #bdd_climat[[nom]] <- df_ext [,nom]
  #rm("df_ext","nom")
  #cat("Extraction: ",i,"/",nrow(liens_tif), "\n")
#}
temp_2=Sys.time()
duree= difftime(temp_2,temp_1)

chemin_fichier <- "C:/Users/diall/OneDrive/Bureau/M2_MODE/stage_abdou_m2/datas/bdd_climat.rds"
# saveRDS(bdd_climat, chemin_fichier)
#bdd_climat <- readRDS(chemin_fichier)

# debut cnversion ------------------------------------------------------------
conv_df_climat= data.frame(shortname =liens_tif$shortname )

# unit = 1
conv_df_climat$unit = rep(1)
# unit = 100
unit_100=c("bio4")
conv_df_climat$unit <- ifelse(conv_df_climat$shortname %in% unit_100, 100, 1)


# scale = 0.1
conv_df_climat$scale = rep(0.1)
# scale = 1
scale_1=c("fcf","fgd","gddlgd0","gddlgd5","gddlgd10","gdgfgd0","gdgfgd5","gdgfgd10","gsl","kg0","kg1" ,"kg2" ,"kg3" ,"kg4" ,"kg5","lgd","ngd0","ngd5","ngd10","scd")

# scale = 0.01
scale_01=c("hurs_max","hurs_mean","hurs_min","hurs_range","pet_penman_max",
       "pet_penman_mean","pet_penman_min","pet_penman_range")

# scale = 0.001
scale_001=c("rsds","sfcWind_max","sfcWind_mean","sfcWind_min","sfcWind_range","pet_penman_max","pet_penman_mean","pet_penman_min","pet_penman_range","rsds_max","rsds_mean","rsds_min","rsds_range")

# Remplacement des valeurs de l'échelle en fonction des conditions
conv_df_climat$scale <- ifelse(conv_df_climat$shortname %in% scale_1, 1,
              ifelse(conv_df_climat$shortname %in% scale_01, 0.01,
                    ifelse(conv_df_climat$shortname %in% scale_001,0.001, 0.1)))

# offset = 0
conv_df_climat$offset = rep(0)
# offset = - 273.15
offset_273=c("bio1","bio5","bio6","bio8","bio9","bio10","bio11","gdgfgd10","gsl","gst")
conv_df_climat$offset = ifelse(conv_df_climat$shortname %in% offset_273, -273.15, 0)

# Pas present dans dans le pdf explicative donc pas de conversion
pas_pdf=c( "ai","swb", "clt_max","clt_mean","clt_min","clt_range")
verif=c(unit_100,scale_1,scale_01,scale_001,offset_273)
pas_pdf_2=setdiff(conv_df_climat$shortname, verif)
conv_df_climat[conv_df_climat$shortname %in% pas_pdf,"scale"] = 1

#bdd_climat_ok=bdd_climat[,c("ID","gps_x","gps_y")]

#for ( i in conv_df_climat$shortname){
  #if (i %in% names(bdd_climat)){
  #unitee= conv_df_climat[conv_df_climat$shortname ==i,"unit"]
  #echelle = conv_df_climat[conv_df_climat$shortname ==i,"scale"]
  #decalage = conv_df_climat[conv_df_climat$shortname ==i,"offset"]
  #bdd_climat_ok[[i]] = ((bdd_climat[[i]] / unitee)* echelle) + decalage
  #}else {
    #cat("Attention ",i, "n'exite pas dans la bdd_climat","\n")
  #}
#}


# chemin_fichier <- "C:/Users/diall/OneDrive/Bureau/M2_MODE/stage_abdou_m2/datas/bdd_climat_ok.rds"
# saveRDS(bdd_climat_ok, chemin_fichier)
# bdd_climat_ok <- readRDS(chemin_fichier)
# fin conversion

#df_fusion <- subset(bdd_climat_ok, select = -c(ID,gps_x, gps_y))
#bdd <- cbind(bdd, df_fusion) # all = TRUE pour garder toutes les lignes

4.3 List of variables

Variable description

      ID                gps_x             gps_y             ai        
 Length:7378        Min.   :-5.0496   Min.   :41.44   Min.   :0.5901  
 Class :character   1st Qu.:-0.3535   1st Qu.:46.17   1st Qu.:0.9428  
 Mode  :character   Median : 2.0985   Median :47.92   Median :1.0278  
                    Mean   : 1.9577   Mean   :47.26   Mean   :1.0878  
                    3rd Qu.: 4.1679   3rd Qu.:48.76   3rd Qu.:1.1373  
                    Max.   : 9.5213   Max.   :50.98   Max.   :3.4735  
                                                      NA's   :7       
     bio10           bio11             bio12            bio13       
 Min.   : 8.35   Min.   :-10.150   Min.   : 563.7   Min.   : 61.80  
 1st Qu.:17.55   1st Qu.:  3.550   1st Qu.: 738.5   1st Qu.: 78.20  
 Median :18.45   Median :  4.250   Median : 816.2   Median : 88.30  
 Mean   :18.61   Mean   :  4.613   Mean   : 847.3   Mean   : 93.17  
 3rd Qu.:19.55   3rd Qu.:  5.850   3rd Qu.: 898.8   3rd Qu.:102.70  
 Max.   :24.75   Max.   : 10.950   Max.   :2158.3   Max.   :239.90  
                                                                    
     bio14            bio15           bio16           bio17      
 Min.   :  7.00   Min.   : 8.10   Min.   :173.8   Min.   : 40.5  
 1st Qu.: 45.83   1st Qu.:12.50   1st Qu.:218.1   1st Qu.:149.3  
 Median : 49.60   Median :16.10   Median :246.7   Median :160.4  
 Mean   : 49.88   Mean   :18.64   Mean   :259.8   Mean   :164.9  
 3rd Qu.: 53.90   3rd Qu.:22.10   3rd Qu.:288.9   3rd Qu.:175.4  
 Max.   :154.50   Max.   :50.90   Max.   :645.4   Max.   :490.0  
                                                                 
     bio18           bio19            bio1            bio2       
 Min.   : 50.8   Min.   :137.4   Min.   :-1.05   Min.   : 1.600  
 1st Qu.:155.3   1st Qu.:184.2   1st Qu.:10.65   1st Qu.: 7.400  
 Median :177.6   Median :208.8   Median :11.25   Median : 7.600  
 Mean   :181.6   Mean   :220.5   Mean   :11.45   Mean   : 7.619  
 3rd Qu.:203.0   3rd Qu.:241.8   3rd Qu.:11.95   3rd Qu.: 8.000  
 Max.   :559.0   Max.   :574.1   Max.   :17.05   Max.   :11.300  
                                                                 
      bio3             bio4            bio5            bio6        
 Min.   :0.1100   Min.   :2.736   Min.   :13.85   Min.   :-15.350  
 1st Qu.:0.3220   1st Qu.:5.193   1st Qu.:22.55   1st Qu.:  0.250  
 Median :0.3410   Median :5.475   Median :23.55   Median :  1.150  
 Mean   :0.3383   Mean   :5.452   Mean   :23.68   Mean   :  1.186  
 3rd Qu.:0.3550   3rd Qu.:5.825   3rd Qu.:24.75   3rd Qu.:  2.450  
 Max.   :0.3990   Max.   :7.416   Max.   :31.25   Max.   :  9.350  
                                                                   
      bio7            bio8             bio9          clt_max        clt_mean   
 Min.   : 9.50   Min.   :-8.950   Min.   :-8.55   Min.   :3101   Min.   :2381  
 1st Qu.:21.50   1st Qu.: 7.350   1st Qu.: 6.65   1st Qu.:4824   1st Qu.:3962  
 Median :22.40   Median : 8.350   Median : 7.45   Median :5110   Median :4227  
 Mean   :22.49   Mean   : 9.151   Mean   :12.39   Mean   :5043   Mean   :4106  
 3rd Qu.:24.10   3rd Qu.: 9.850   3rd Qu.:18.75   3rd Qu.:5447   3rd Qu.:4374  
 Max.   :30.20   Max.   :18.150   Max.   :24.75   Max.   :6682   Max.   :4942  
                                                                               
    clt_min       clt_range       cmi_max          cmi_mean      
 Min.   : 688   Min.   : 890   Min.   : 18.60   Min.   :-58.800  
 1st Qu.:2936   1st Qu.:1746   1st Qu.: 45.80   1st Qu.:-15.000  
 Median :3289   Median :1996   Median : 53.20   Median : -9.300  
 Mean   :3079   Mean   :1964   Mean   : 58.57   Mean   : -7.522  
 3rd Qu.:3412   3rd Qu.:2148   3rd Qu.: 66.50   3rd Qu.: -2.700  
 Max.   :3999   Max.   :3432   Max.   :192.30   Max.   :103.600  
                               NA's   :3        NA's   :3        
    cmi_min          cmi_range          fcf              fgd       
 Min.   :-200.40   Min.   : 95.0   Min.   : 0.600   Min.   : 2.30  
 1st Qu.: -91.00   1st Qu.:118.8   1st Qu.: 4.300   1st Qu.:19.70  
 Median : -78.00   Median :136.2   Median : 5.600   Median :22.80  
 Mean   : -82.68   Mean   :141.2   Mean   : 6.191   Mean   :20.86  
 3rd Qu.: -68.00   3rd Qu.:157.6   3rd Qu.: 7.300   3rd Qu.:23.80  
 Max.   :  55.50   Max.   :310.5   Max.   :19.900   Max.   :26.90  
 NA's   :3         NA's   :3       NA's   :5541     NA's   :6577   
      gdd0          gdd10             gdd5           gddlgd0     
 Min.   :1006   Min.   :   5.2   Min.   : 314.6   Min.   : 0.30  
 1st Qu.:3903   1st Qu.: 989.8   1st Qu.:2182.8   1st Qu.:31.00  
 Median :4132   Median :1109.0   Median :2368.4   Median :32.55  
 Mean   :4195   Mean   :1189.2   Mean   :2452.8   Mean   :31.06  
 3rd Qu.:4388   3rd Qu.:1321.6   3rd Qu.:2591.7   3rd Qu.:34.10  
 Max.   :6253   Max.   :2607.0   Max.   :4427.6   Max.   :36.50  
                NA's   :2                         NA's   :7260   
    gddlgd10        gddlgd5         gdgfgd0          gdgfgd10     
 Min.   : 0.30   Min.   : 0.10   Min.   : 3.100   Min.   :-269.9  
 1st Qu.:29.70   1st Qu.:33.00   1st Qu.: 5.100   1st Qu.:-263.2  
 Median :30.10   Median :33.60   Median : 6.800   Median :-262.6  
 Mean   :30.27   Mean   :33.12   Mean   : 7.269   Mean   :-262.9  
 3rd Qu.:30.90   3rd Qu.:34.00   3rd Qu.: 9.725   3rd Qu.:-262.1  
 Max.   :36.40   Max.   :36.40   Max.   :12.500   Max.   :-253.6  
 NA's   :16      NA's   :2824    NA's   :7260     NA's   :16      
    gdgfgd5            gsl              gsp              gst       
 Min.   : 2.300   Min.   :-267.1   Min.   : 264.5   Min.   : 5.35  
 1st Qu.: 5.400   1st Qu.:-236.7   1st Qu.: 738.1   1st Qu.:10.75  
 Median : 5.800   Median :-236.7   Median : 813.7   Median :11.35  
 Mean   : 5.846   Mean   :-237.2   Mean   : 835.0   Mean   :11.52  
 3rd Qu.: 6.100   3rd Qu.:-236.7   3rd Qu.: 893.4   3rd Qu.:11.95  
 Max.   :15.600   Max.   :-236.7   Max.   :1924.0   Max.   :16.75  
 NA's   :2824                                                      
    hurs_max       hurs_mean        hurs_min       hurs_range    
 Min.   :582.0   Min.   :553.3   Min.   :488.0   Min.   : 17.00  
 1st Qu.:665.5   1st Qu.:615.2   1st Qu.:574.9   1st Qu.: 77.20  
 Median :680.1   Median :627.7   Median :585.3   Median : 93.80  
 Mean   :676.1   Mean   :623.9   Mean   :586.3   Mean   : 89.83  
 3rd Qu.:693.8   3rd Qu.:636.7   3rd Qu.:599.7   3rd Qu.:107.70  
 Max.   :733.3   Max.   :672.1   Max.   :652.4   Max.   :135.90  
                                                                 
      kg0             kg1             kg2             kg3        
 Min.   :0.900   Min.   :0.900   Min.   :0.900   Min.   :0.6000  
 1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:0.7000  
 Median :1.000   Median :1.000   Median :1.000   Median :0.7000  
 Mean   :1.017   Mean   :1.018   Mean   :1.026   Mean   :0.7396  
 3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:0.7000  
 Max.   :3.000   Max.   :3.000   Max.   :3.000   Max.   :2.1000  
                                                                 
      kg4             kg5             lgd             ngd0      
 Min.   :0.600   Min.   :0.300   Min.   : 0.10   Min.   :16.80  
 1st Qu.:1.200   1st Qu.:0.900   1st Qu.:21.50   1st Qu.:36.50  
 Median :1.200   Median :1.000   Median :22.80   Median :36.50  
 Mean   :1.127   Mean   :1.591   Mean   :23.72   Mean   :36.33  
 3rd Qu.:1.300   3rd Qu.:2.300   3rd Qu.:25.50   3rd Qu.:36.50  
 Max.   :2.100   Max.   :3.000   Max.   :36.50   Max.   :36.50  
                                 NA's   :6577                   
     ngd10            ngd5            npp         pet_penman_max
 Min.   : 2.50   Min.   :10.50   Min.   : 574.7   Min.   :1101  
 1st Qu.:18.80   1st Qu.:27.70   1st Qu.:1129.3   1st Qu.:1317  
 Median :19.80   Median :29.00   Median :1215.0   Median :1378  
 Mean   :20.19   Mean   :31.13   Mean   :1223.8   Mean   :1410  
 3rd Qu.:20.90   3rd Qu.:36.50   3rd Qu.:1302.0   3rd Qu.:1485  
 Max.   :36.50   Max.   :36.50   Max.   :1807.9   Max.   :2124  
 NA's   :2                                        NA's   :3     
 pet_penman_mean  pet_penman_min  pet_penman_range    rsds_max   
 Min.   : 478.9   Min.   : 28.3   Min.   : 776.6   Min.   :1850  
 1st Qu.: 707.7   1st Qu.:209.8   1st Qu.:1079.2   1st Qu.:1980  
 Median : 742.0   Median :250.7   Median :1156.3   Median :2078  
 Mean   : 755.0   Mean   :245.6   Mean   :1164.6   Mean   :2092  
 3rd Qu.: 805.2   3rd Qu.:276.3   3rd Qu.:1223.5   3rd Qu.:2140  
 Max.   :1174.5   Max.   :580.5   Max.   :1674.2   Max.   :2702  
 NA's   :3        NA's   :3       NA's   :3                      
   rsds_mean       rsds_min       rsds_range        scd         
 Min.   :1052   Min.   :154.8   Min.   :1492   Min.   : 0.0000  
 1st Qu.:1144   1st Qu.:276.6   1st Qu.:1692   1st Qu.: 0.0000  
 Median :1212   Median :326.2   Median :1735   Median : 0.0000  
 Mean   :1234   Mean   :349.6   Mean   :1743   Mean   : 0.2815  
 3rd Qu.:1287   3rd Qu.:384.3   3rd Qu.:1762   3rd Qu.: 0.0000  
 Max.   :1776   Max.   :891.7   Max.   :2188   Max.   :29.9000  
                                                                
  sfcWind_max     sfcWind_mean    sfcWind_min    sfcWind_range  
 Min.   : 67.1   Min.   : 59.8   Min.   : 55.8   Min.   : 11.3  
 1st Qu.:367.7   1st Qu.:325.8   1st Qu.:275.1   1st Qu.: 90.5  
 Median :440.4   Median :387.6   Median :322.6   Median :114.4  
 Mean   :425.5   Mean   :371.3   Mean   :312.7   Mean   :112.8  
 3rd Qu.:490.3   3rd Qu.:425.5   3rd Qu.:356.8   3rd Qu.:135.2  
 Max.   :788.9   Max.   :717.4   Max.   :665.4   Max.   :294.0  
 NA's   :3       NA's   :3       NA's   :3       NA's   :3      
      swb              swe           vpd_max          vpd_mean    
 Min.   :-722.0   Min.   : 28.3   Min.   : 476.1   Min.   :254.0  
 1st Qu.:-216.0   1st Qu.:150.2   1st Qu.: 846.6   1st Qu.:506.9  
 Median :-152.0   Median :224.2   Median : 935.7   Median :539.0  
 Mean   :-166.4   Mean   :245.6   Mean   : 944.4   Mean   :557.1  
 3rd Qu.:-107.0   3rd Qu.:371.6   3rd Qu.:1021.0   3rd Qu.:589.4  
 Max.   : 230.0   Max.   :551.7   Max.   :1619.6   Max.   :863.8  
 NA's   :4        NA's   :7260                                    
    vpd_min        vpd_range     
 Min.   :118.2   Min.   : 195.0  
 1st Qu.:248.7   1st Qu.: 565.9  
 Median :267.8   Median : 672.1  
 Mean   :280.9   Mean   : 663.5  
 3rd Qu.:303.0   3rd Qu.: 745.9  
 Max.   :508.4   Max.   :1224.4  
                                 

4.4 Temperature

  • Average annual air temperature (°C) = bio1
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   7.75   10.65   11.25   11.35   11.65   15.85 

4.5 Precipitation

  • Annual precipitation (kg/m²) = bio12
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  598.8   729.2   813.8   823.9   886.7  1642.3 

5 Soil data extraction

5.1 The source database (openlandmap)

Code
chemin_fichier_excel <- "C:/Users/diall/Downloads/datas/ODMAP.xlsx"
pedo <- read.xlsx(chemin_fichier_excel, sheet = "pedo")

# Fusion des cellules des colonnes avec des éléments dupliqués
for (col in names(pedo)) {
  pedo[[col]] <- ifelse(duplicated(pedo[[col]]), "", pedo[[col]])
}

#tableau avec kableExtra et centrage du contenu des cellules
kableExtra::kable(pedo) %>%
  kableExtra::kable_styling() %>%
  kableExtra::column_spec(1:ncol(pedo))  # Centrer le contenu de toutes les colonnes
Source DB.name Categories Variables Units Formats Periods Resolution References X10
Fourcade et al., 2022 OpenLandMap  soil data pH .tif 1950 - 2017 250 m https://zenodo.org/records/2525664 NA
Carbone content g/kg https://zenodo.org/records/2525553
BDD / BDAT Data gouv Sand NA 90 m https://doi.org/10.57745/N4E4NE
Clay
Silt
Salako et al., 2023 USGS-NASA spatial data Elevation NA .shp 2010 1 km https://www.usgs.gov/centers/eros/science/usgs-eros-archive-digital-elevation-global-multi-resolution-terrain-elevation
Rutgers et al., 2018 JRC Capacité d'échange de cations (CEC) cmol·kg −1 2009/2012 https://www.sciencedirect.com/science/article/pii/S0016706119304768 2 à 20 cm
Rutgers et al., 2019 Carbonates de calcium (CaCO 3 ) g·kg −1 3 à 20 cm
Rutgers et al., 2020 Rapport C:N 4 à 20 cm
Rutgers et al., 2021 Azote (N) 5 à 20 cm
Rutgers et al., 2022 Phosphore (P) mg·kg −1 6 à 20 cm
Rutgers et al., 2023 Potassium (K) 7 à 20 cm
Rutgers et al., 2016  pH dans H 2 O https://esdac.jrc.ec.europa.eu/search 0 à 20 cm
Bulk density kg / m-cube https://zenodo.org/records/2525665
Rutgers et al., 2017  pH dans une solution de CaCl2
pH dans H 2 O moins pH dans Cacl 2
  • Average values between surface (0 cm) and 30 cm depth

5.2 Changing the resolution

  • Long compilation time in R

  • GDAL module with the resampleAlg = bilinear method

  • Resolution = 0.0083 = 30 arc-second ~ 1km

5.3 Soil organic carbone (g/kg)

5.4 pH

Extracted values

Measured values & extracted values

  • Clean pH column
Code
# On recupere les deux colonnes du pH
df_comp=bdd[, c("ID", "ID_Site","ph_eau","ph_0_a_30" )]
df_comp =df_comp[complete.cases(df_comp$ph_eau),] 
df_comp =df_comp[complete.cases(df_comp$ph_0_a_30),] 
df_comp <- df_comp[!grepl("[^0-9.]", df_comp$ph_eau), ]
df_comp$ph_eau <- as.numeric(df_comp$ph_eau)
df_comp$ph_0_a_30 <- as.numeric(df_comp$ph_0_a_30)


df_comp = df_comp[!df_comp$ph_eau== 44140.00,]
df_comp = df_comp[!df_comp$ph_eau== "NA",]
df_comp = df_comp[!df_comp$ph_0_a_30== "NA",]
df_comp = droplevels(df_comp)
Code
ID_Site_dupliques <- df_comp$ID_Site[duplicated(df_comp$ID_Site)]
#length(ID_Site_dupliques)

lignes_dupliquees <- subset(df_comp, duplicated(ID_Site) & duplicated(ph_eau))

lignes_unique <- unique(lignes_dupliquees$ID_Site )
#length(lignes_unique)

# nrow(df_comp) - length(ID_Site_dupliques) + length(lignes_unique)


dupliquees <- duplicated(df_comp$ID_Site)
df_comp <- df_comp[!dupliquees, ]
df_comp=droplevels(df_comp)

# correlation <- cor.test(df_comp$ph_eau, df_comp$ph_0_a_30,method = "pearson")
#resultat_test <- t.test(df_comp$ph_eau, df_comp$ph_0_a_30)

df_comp$ph_eau <- as.numeric(df_comp$ph_eau)
df_comp$ph_0_a_30 <- as.numeric(df_comp$ph_0_a_30)
  • Method ?

  • Depth ?

  • Measured values (CR = 54.8%)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  4.170   6.690   7.680   7.352   8.150   8.900 
  • Extracted values
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  5.200   6.300   6.800   6.711   7.100   7.900 

5.5 Sand

Extracted values (g/kg, 0 - 30 cm)

Measured values & extracted values

  • Clean sand column
Code
# On recupere les deux colonnes du pH
df_comp=bdd[, c("ID", "ID_Site","sand","sable.0_30" )]
df_comp =df_comp[complete.cases(df_comp$sand),] 
df_comp =df_comp[complete.cases(df_comp$sable.0_30),] 
df_comp <- df_comp[!grepl("[^0-9.]", df_comp$sand), ]
df_comp$sand <- as.numeric(df_comp$sand)
df_comp$sable.0_30 <- as.numeric(df_comp$sable.0_30)
# colSums(is.na(df_comp))

df_comp = df_comp[!df_comp$sand== "NA",]
df_comp = df_comp[!df_comp$sable.0_30== "NaN",]
df_comp = droplevels(df_comp)
Code
# -   Deleting duplicate measured values

ID_Site_dupliques <- df_comp$ID_Site[duplicated(df_comp$ID_Site)]
#length(ID_Site_dupliques)

lignes_dupliquees <- subset(df_comp, duplicated(ID_Site) & duplicated(sand))

lignes_unique <- unique(lignes_dupliquees$ID_Site )
#length(lignes_unique)
# nrow(df_comp) - length(ID_Site_dupliques) + length(lignes_unique)

dupliquees <- duplicated(df_comp$ID_Site)
df_comp <- df_comp[!dupliquees, ]
df_comp=droplevels(df_comp)
df_comp$sand <- as.numeric(df_comp$sand)
df_comp$sable.0_30 <- as.numeric(df_comp$sable.0_30)

# summary(df_comp$sand)
# explo_num(nom_col = "sand", titre = "Sand extracted values",df = df_comp)
id_ligne <- df_comp[which(df_comp$sand >=83), "ID"] 
df_comp <- df_comp[!df_comp$ID %in% id_ligne, ]
df_comp=droplevels(df_comp)


# 
# summary(df_comp$sable.0_30)
# explo_num(nom_col = "sable.0_30", titre = "Sand extracted values",df = df_comp)
id_ligne <- df_comp[which(df_comp$sable.0_30 >=60), "ID"] 
df_comp <- df_comp[!df_comp$ID %in% id_ligne, ]
df_comp=droplevels(df_comp)
  • Method ?

  • Depth ?

  • Measured values (CR = 28.2%)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.15   19.60   30.34   33.84   46.48   81.80 
  • Extracted values
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   8.90   22.50   33.00   33.64   44.00   56.80 

5.6 Silt

Extracted values (g/kg, 0 - 30 cm)

Measured values & extracted values

  • Clean silt column
Code
# On recupere les deux colonnes du pH
df_comp=bdd[, c("ID", "ID_Site","silt","limon.0_30" )]
df_comp =df_comp[complete.cases(df_comp$silt),] 
df_comp =df_comp[complete.cases(df_comp$limon.0_30),] 
df_comp <- df_comp[!grepl("[^0-9.]", df_comp$silt), ]
df_comp$silt <- as.numeric(df_comp$silt)
df_comp$limon.0_30 <- as.numeric(df_comp$limon.0_30)
# colSums(is.na(df_comp))


df_comp = df_comp[!df_comp$silt== "NA",]
df_comp = df_comp[!df_comp$limon.0_30== "NaN",]
df_comp = droplevels(df_comp)
Code
# -   Deleting duplicate measured values

ID_Site_dupliques <- df_comp$ID_Site[duplicated(df_comp$ID_Site)]
#length(ID_Site_dupliques)

lignes_dupliquees <- subset(df_comp, duplicated(ID_Site) & duplicated(silt))

lignes_unique <- unique(lignes_dupliquees$ID_Site )
#length(lignes_unique)
# nrow(df_comp) - length(ID_Site_dupliques) + length(lignes_unique)


dupliquees <- duplicated(df_comp$ID_Site)
df_comp <- df_comp[!dupliquees, ]
df_comp=droplevels(df_comp)
df_comp$silt <- as.numeric(df_comp$silt)
df_comp$limon.0_30 <- as.numeric(df_comp$limon.0_30)




# summary(df_comp$silt)
# explo_num(nom_col = "silt", titre = "Silt",df = df_comp)
id_ligne <- df_comp[which(df_comp$silt <=7.3), "ID"] 
df_comp <- df_comp[!df_comp$ID %in% id_ligne, ]
df_comp=droplevels(df_comp)



# summary(df_comp$limon.0_30)
# explo_num(nom_col = "limon.0_30", titre = "limon.0_30",df = df_comp)
id_ligne <- df_comp[which(df_comp$limon.0_30 >=80), "ID"] 
df_comp <- df_comp[!df_comp$ID %in% id_ligne, ]
df_comp=droplevels(df_comp)
  • Method ?

  • Depth ?

  • Measured values (CR = 28.2%)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  9.102  34.900  46.550  46.859  60.375  81.200 
  • Extracted values
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  24.40   37.02   43.90   46.05   54.17   71.50 

5.7 Clay

Extracted values (g/kg, 0 - 30 cm)

Measured values & extracted values - Clean clay column

Code
# On recupere les deux colonnes du pH
df_comp=bdd[, c("ID", "ID_Site","clay","argile.0_30" )]
df_comp =df_comp[complete.cases(df_comp$clay),] 
df_comp =df_comp[complete.cases(df_comp$argile.0_30),] 
df_comp <- df_comp[!grepl("[^0-9.]", df_comp$clay), ]
df_comp$clay <- as.numeric(df_comp$clay)
df_comp$argile.0_30 <- as.numeric(df_comp$argile.0_30)
# colSums(is.na(df_comp))

df_comp = df_comp[!df_comp$clay== "NA",]
df_comp = df_comp[!df_comp$argile.0_30== "NaN",]
df_comp = droplevels(df_comp)
Code
# -   Deleting duplicate measured values

ID_Site_dupliques <- df_comp$ID_Site[duplicated(df_comp$ID_Site)]
#length(ID_Site_dupliques)

lignes_dupliquees <- subset(df_comp, duplicated(ID_Site) & duplicated(clay))

lignes_unique <- unique(lignes_dupliquees$ID_Site )
#length(lignes_unique)
# nrow(df_comp) - length(ID_Site_dupliques) + length(lignes_unique)

dupliquees <- duplicated(df_comp$ID_Site)
df_comp <- df_comp[!dupliquees, ]
df_comp=droplevels(df_comp)
df_comp$clay <- as.numeric(df_comp$clay)
df_comp$argile.0_30 <- as.numeric(df_comp$argile.0_30)


df_comp$clay = as.numeric(df_comp$clay)/10 # pour conv en %
  • Method ?

  • Depth ?

  • Measured values (CR = 70.5%)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.002   2.598  20.100  21.221  34.500  66.400 
  • Extracted values
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.40   18.30   26.20   26.78   35.30   53.00 

5.8 Elevation

5.9 Phosphore (P, mg/kg)

5.10 Azote (N, g/kg)

5.11 Potassium (K, mg/kg)

5.12 C/N

5.13 Capacité d’échange de cations (CEC, cmol/kg)

5.14 Carbonates de calcium (CaCO3, g/kg)

6 Exploratory analysis

Data set reduction

Code
id_col=c("ID","Programme","Annee","ID_Site","Protocole")

vdt_col=c("AB_tot", "BM_tot", "Richesse_tot")

land_cover_col=c("clcm_lvl3")

topo_col=c("elevation","gps_x","gps_y")


soil_col=c("ph_0_a_30","sable.0_30","limon.0_30","argile.0_30","c_orga_0_a_30","P","N","K","CN","CEC","CaCO3")


climate_col=c()
for (i in 1:19){
  climate_col=c(climate_col, paste0("bio",i) )
}
climate_col=c(climate_col,"cmi_mean","gdd0","gdd10","hurs_mean","pet_penman_mean")

bdd_explo= bdd[,c(id_col,vdt_col,land_cover_col,topo_col,soil_col,climate_col)]
# str(bdd_explo)
bdd_explo$ID = as.factor(bdd_explo$ID)


# Renome

new_soil_col=c("pH","sand","silt","clay","C","P","N","K","CN","CEC","CaCO3")
# bdd_explo <- rename(bdd_explo, !!setNames(soil_col, new_soil_col))
bdd_explo <- bdd_explo %>%
  dplyr::rename(
    pH = ph_0_a_30,
    sand = sable.0_30,
    silt = limon.0_30,
    clay = argile.0_30,
    C = c_orga_0_a_30,
    PET = pet_penman_mean
  )



climate_col=c()
for (i in 1:19){
  climate_col=c(climate_col, paste0("bio",i) )
}
climate_col=c(climate_col,"cmi_mean","gdd0","gdd10","hurs_mean","PET")


col_graph=c(vdt_col,land_cover_col,topo_col,new_soil_col,climate_col)
# for (i in names(bdd_explo[,col_graph])){
#   par(mfrow=c(2,2))
#   plot(bdd_explo[[i]], main=i)
# }





levels(bdd_explo$clcm_lvl3)[levels(bdd_explo$clcm_lvl3) == "Broad-leaved forest"] <- "Forest"
levels(bdd_explo$clcm_lvl3)[levels(bdd_explo$clcm_lvl3) == "Coniferous forest"] <- "Forest"
levels(bdd_explo$clcm_lvl3)[levels(bdd_explo$clcm_lvl3) == "Mixed forest"] <- "Forest"

bdd_explo$clcm_lvl3= as.factor(bdd_explo$clcm_lvl3)

cl_original <- levels(bdd_explo$clcm_lvl3)
new_cl <- c("f","gua", "ng", "nial", "p", "v")
bdd_explo$clcm_lvl3 <- factor(bdd_explo$clcm_lvl3, levels = cl_original, labels = new_cl)

Fork (Bifurcation)

6.1 Total abundance distributions


  • Transformation sqrt

lamda = 0.3




6.2 Total biomass distributions


  • Transformation sqrt

lamda = 0.4




6.3 Total taxonomic richness distributions


  • Transformation sqrt

lamda = 0.5




6.4 Standarization

  • Transformation sqrt des variables de l’abondance et de la biomasse

  • Transformation centrée reduite des prédicteurs

7 Nettoyages des variables pour les GLM, GAM et POLY

7.1 Test de correlation : intra catégories

  • Topographie

Colonnes supprimée : gps_x


- Soil data


Colonnes supprimée : sand


- Climat data


Colonnes supprimmées :bio2, bio4, bio5, bio6, bio7, bio9, bio10, bio11, bio13, bio16, bio17, bio18, bio19, gdd0, gdd10, cmi_mean, PET

7.2 Test de correlation : inter catégories

Colonnes supprimée : gps_y

{width=“1200”,aligne=“center”}

7.3 VIF

Suppression de la variable bio14

1 variables from the 18 input variables have collinearity problem: 
 
bio14 

After excluding the collinear variables, the linear correlation coefficients ranges between: 
min correlation ( bio8 ~ P ):  0.01122735 
max correlation ( hurs_mean ~ bio1 ):  -0.6791758 

---------- VIFs of the remained variables -------- 
   Variables      VIF
1  elevation 3.170519
2         pH 2.989771
3       silt 1.467205
4       clay 2.184657
5          C 1.973258
6          P 2.356855
7          N 2.587901
8          K 1.427705
9         CN 2.049177
10       CEC 3.967772
11     CaCO3 2.267881
12      bio1 6.451877
13      bio3 1.672321
14      bio8 1.473784
15     bio12 2.169044
16     bio15 3.603108
17 hurs_mean 5.361998

8 Selecting variables with regsubsets()

Code
# colSums(is.na(data_lm))
data_lm <- data_lm[apply(data_lm[, !colnames(data_lm) %in% "BM_tot"], 1, function(x) all(!is.na(x))), ]
# colSums(is.na(data_lm))
# dim(data_lm)

8.1 Selection for total abundance

Code
# names(data_lm)
clc3_col= c("clc3_mf","clc3_gua", "clc3_ng", "clc3_nial", "clc3_p", "clc3_v")
supp = c("ID","Programme","Annee","ID_Site", "Protocole","BM_tot", "Richesse_tot","clcm_lvl3")
df_AB_tot= data_lm[, setdiff(names(data_lm), supp)]
df_AB_tot= df_AB_tot[, setdiff(names(df_AB_tot), clc3_col)]
# str(df_AB_tot)
# colSums(is.na(df_AB_tot))
results_AB_tot <- regsubsets(AB_tot ~ ., data = df_AB_tot,method = "exhaustive",nvmax =17 )
# plot(results_AB_tot, scale = "r2", main='R² criteria')
# summary(results_AB_tot)
rsq_AB_tot= round (summary(results_AB_tot)$rsq,2)
adjr2_AB_tot= round(summary(results_AB_tot)$adjr2,2)
cp_AB_tot=round (summary(results_AB_tot)$cp,2)
bic_AB_tot=round(summary(results_AB_tot)$bic,2)
  • Selection by R² adj : stable from 12 variables

  • Selection by Cp : stable from 17 variables

  • Selection by BIC : stable from 9 variables

  • Les 17 meilleurs variables sont: clc3gua, clc3ng, clc3nial, clc3p, elevation, silt, C, P, N, CN, CEC, CaCO3, bio1, bio3, bio12, bio15, hurs_mean

8.2 Selection for total biomass

Code
# names(data_lm)
supp = c("ID","Programme","Annee","ID_Site", "Protocole","AB_tot", "Richesse_tot","clcm_lvl3")
df_BM_tot= data_lm[, setdiff(names(data_lm), supp)]
df_BM_tot= df_BM_tot[, setdiff(names(df_BM_tot), clc3_col)]
df_BM_tot=drop_na(df_BM_tot)
# str(df_BM_tot)
# colSums(is.na(df_BM_tot))
results_BM_tot <- regsubsets(BM_tot ~ ., data = df_BM_tot,method = "exhaustive",nvmax =17 )
# summary(results_BM_tot)
rsq_BM_tot= round (summary(results_BM_tot)$rsq,2)
adjr2_BM_tot= round(summary(results_BM_tot)$adjr2,2)
cp_BM_tot=round (summary(results_BM_tot)$cp,2)
bic_BM_tot=round(summary(results_BM_tot)$bic,2)
  • Suppression de 1534 lignes de NA de BM_tot (nrow = 1598)

  • Selection by R² adj : stable from 12 variables

  • Selection by Cp : stable from 12 variables

  • Selection by BIC : stable from 7 variables

  • Les 12 meilleurs variables sont: clc3gua, clc3p, C, P, N, K, CN, bio8, bio12, bio15, hurs_mean, clc3_f

8.3 Selection for total taxonomic richness

Code
# names(data_lm)
supp = c("ID","Programme","Annee","ID_Site", "Protocole","BM_tot", "AB_tot","clcm_lvl3")
df_Richesse_tot= data_lm[, setdiff(names(data_lm), supp)]
df_Richesse_tot= df_Richesse_tot[, setdiff(names(df_Richesse_tot), clc3_col)]
# str(df_Richesse_tot)
# colSums(is.na(df_Richesse_tot))
results_Richesse_tot <- regsubsets(Richesse_tot ~ ., data = df_Richesse_tot,method = "exhaustive",nvmax =17 )
# summary(results_Richesse_tot)
rsq_Richesse_tot= round (summary(results_Richesse_tot)$rsq,2)
adjr2_Richesse_tot= round(summary(results_Richesse_tot)$adjr2,2)
cp_Richesse_tot=round (summary(results_Richesse_tot)$cp,2)
bic_Richesse_tot=round(summary(results_Richesse_tot)$bic,2)
  • Selection by R² adj : stable from 12 variables

  • Selection by Cp : stable from 15 variables

  • Selection by BIC : stable from 12 variables

  • Les 15 meilleurs variables sont: clc3gua, clc3ng, clc3p, elevation, pH, P, N, K, CN, CEC, CaCO3, bio3, bio8, bio12, hurs_mean

9 Selection des variables pour RF, GBM et ANN

9.1 Importance of variables for total abundance

9.2 Importance of variables for total biomass

9.3 Importance of variables for total taxonomic richness

No variable from the 9 input variables has collinearity problem. 

The linear correlation coefficients ranges between: 
min correlation ( gps_y ~ gps_x ):  -0.004493436 
max correlation ( bio12 ~ N ):  0.5197976 

---------- VIFs of the remained variables -------- 
  Variables      VIF
1     CaCO3 1.426888
2     gps_x 2.156835
3         N 1.896999
4      bio3 1.686488
5     gps_y 1.930782
6      clay 1.635334
7      silt 1.288895
8         P 1.508710
9     bio12 2.005289

10 Relationship between variables

[Explanatory power of variables]

10.1 Abundance

  • Plots
- Abundance & CaCO3 

- Abundance & gps_x 

- Abundance & N 

- Abundance & bio3 

- Abundance & gps_y 

- Abundance & clay 

- Abundance & silt 

- Abundance & clcm_lvl3 

- Abundance & P 

- Abundance & bio12 

10.2 Biomass

  • Plots
- Biomass & CaCO3 

- Biomass & gps_x 

- Biomass & N 

- Biomass & bio3 

- Biomass & gps_y 

- Biomass & clay 

- Biomass & silt 

- Biomass & clcm_lvl3 

- Biomass & P 

- Biomass & bio12 

10.3 Richness

  • Plots
- Richness & CaCO3 

- Richness & gps_x 

- Richness & N 

- Richness & bio3 

- Richness & gps_y 

- Richness & clay 

- Richness & silt 

- Richness & clcm_lvl3 

- Richness & P 

- Richness & bio12 

11 Species

See species explorations

12 ACP

Plan

12.1 Abundance

Code
df_acp_AB_tot = data_deep[,c("AB_tot",Predictors_f)]

idf = c("OPVT_IDF","TRAMBIOSOIL")
bzh = c("Dephy Bio","Dephy", "Breizh Sukr", "Sols de Bretagne","Kerguéhennec")
bfc=c("TIGA")
df_region = data_deep
df_region$regions <- ifelse(df_region$Programme %in% idf, "idf",
                            ifelse(df_region$Programme %in% bzh, "bzh",
                                   ifelse(df_region$Programme %in% bfc, "bfc",
                                          "autres")))
df_acp_AB_tot = df_region[,c("AB_tot",Predictors_f,"regions")]
# # # colnames(df_acp_AB_tot)[colnames(df_acp_AB_tot) == "clcm_lvl3"] <- "clc3"
# dummy_vars <- model.matrix(~ clcm_lvl3 - 1, data = df_acp_AB_tot)
# df_acp_AB_tot <- cbind(df_acp_AB_tot, dummy_vars)
# df_acp_AB_tot <- df_acp_AB_tot[, -which(names(df_acp_AB_tot) == "clcm_lvl3")]

df_acp_AB_tot = drop_na(df_acp_AB_tot)
df_acp_AB_tot = droplevels(df_acp_AB_tot)


acp_AB_tot <- PCA(df_acp_AB_tot, graph = FALSE,quanti.sup=1,quali.sup=c(9,12))

## Choix du nombre d'axes
# acp_AB_tot$eig
fviz_eig(acp_AB_tot, addlabels = TRUE) # on prend les trois premiers axes

Code
contrib_axes <- acp_AB_tot$var$contrib[, 1:2]  # 3 premiers axes
contrib_axes <- round(contrib_axes, 2)   # Plus facile a lire
fviz_contrib(acp_AB_tot, choice = "var", axes = 1)

Code
fviz_contrib(acp_AB_tot, choice = "var", axes = 2)

Code
# fviz_contrib(acp_AB_tot, choice = "var", axes = 3)


# coloree les variables selon leurs contributions aux axes
fviz_pca_var(
  axes = c(1, 2),
  acp_AB_tot,
  col.var = "contrib",
  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
  repel = TRUE
) # evite le chevauchement de texte

Code
seuil <- 1 / ncol(df_acp_AB_tot[,-c(1:2)]) * 100
lignes_superieures <- rownames(contrib_axes)[apply(contrib_axes, 1,
                                                   function(x)
                                                     any(x >= seuil))]

#  En consderant les deux premiers axes, les predicteurs les moins importants sont:
names(df_acp_AB_tot[,-c(1:2)]) [! names(df_acp_AB_tot[,-c(1:2)]) %in% lignes_superieures]
[1] "silt"      "clcm_lvl3" "regions"  
Code
coul = c("yellow", "green", "violet", "blue", "black", "red")
coul <- c("077255000", "255166255", "204242077", "255255168", "230230077", "230128000")

# Convertir les codes de couleur hexadécimaux en couleurs RGB
colors <- sapply(coul, function(hex) {
  r <- as.numeric(substr(hex, 1, 3)) / 255
  g <- as.numeric(substr(hex, 4, 6)) / 255
  b <- as.numeric(substr(hex, 7, 9)) / 255
  rgb(r, g, b)
})
colors = unname(colors)


fviz_pca_ind(
  axes = c(1, 2),
  acp_AB_tot,
  geom.ind = "point",
  pointshape = 21,
  pointsize = 2,
  palette = "viridus",
  # addEllipses = TRUE,
  legend.title = "Abundance",
  fill.ind = df_acp_AB_tot$AB_tot^2
)

Code
fviz_pca_ind(
  axes = c(1, 2),
  acp_AB_tot,
  geom.ind = "point",
  pointshape = 21,
  pointsize = 2,
  palette = colors,
  addEllipses = TRUE,
  legend.title = "Land use",
  fill.ind = df_acp_AB_tot$clcm_lvl3
  )

Code
# Graphique biplot
fviz_pca_biplot(
  axes = c(1, 2),
  acp_AB_tot,
  geom.ind = "point",
  pointshape = 21,
  pointsize = 2,
  palette = colors,
  addEllipses = TRUE,
  legend.title = "Land use",
  fill.ind = df_acp_AB_tot$clcm_lvl3
  )

Code
# Tracez les individus avec les variables qualitatives supplémentaires
fviz_pca_ind(acp_AB_tot, 
            habillage = 12, 
             geom.ind = "point",
              palette = "jco",        # Choisissez une palette de couleurs
              addEllipses = TRUE,     # Ajoutez des ellipses de confiance
              ellipse.type = "t",     # Type d'ellipse
              legend.title = "~ Regions"
              )

12.2 Biomass

Code
df_acp_BM_tot = data_deep[,c("BM_tot",Predictors_f)]
# # # colnames(df_acp_BM_tot)[colnames(df_acp_BM_tot) == "clcm_lvl3"] <- "clc3"
# dummy_vars <- model.matrix(~ clcm_lvl3 - 1, data = df_acp_BM_tot)
# df_acp_BM_tot <- cbind(df_acp_BM_tot, dummy_vars)
# df_acp_BM_tot <- df_acp_BM_tot[, -which(names(df_acp_BM_tot) == "clcm_lvl3")]

df_acp_BM_tot = drop_na(df_acp_BM_tot)
df_acp_BM_tot = droplevels(df_acp_BM_tot)


acp_BM_tot <- PCA(df_acp_BM_tot, graph = FALSE,quanti.sup=1,quali.sup=9)

## Choix du nombre d'axes
# acp_BM_tot$eig
fviz_eig(acp_BM_tot, addlabels = TRUE) # on prend les trois premiers axes

Code
contrib_axes <- acp_BM_tot$var$contrib[, 1:2]  # 3 premiers axes
contrib_axes <- round(contrib_axes, 2)   # Plus facile a lire
fviz_contrib(acp_BM_tot, choice = "var", axes = 1)

Code
fviz_contrib(acp_BM_tot, choice = "var", axes = 2)

Code
# fviz_contrib(acp_BM_tot, choice = "var", axes = 3)


# coloree les variables selon leurs contributions aux axes
fviz_pca_var(
  axes = c(1, 2),
  acp_BM_tot,
  col.var = "contrib",
  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
  repel = TRUE
) # evite le chevauchement de texte

Code
seuil <- 1 / ncol(df_acp_BM_tot[,-c(1:2)]) * 100
lignes_superieures <- rownames(contrib_axes)[apply(contrib_axes, 1,
                                                   function(x)
                                                     any(x >= seuil))]

#  En consderant les deux premiers axes, les predicteurs les moins importants sont:
names(df_acp_BM_tot[,-c(1:2)]) [! names(df_acp_BM_tot[,-c(1:2)]) %in% lignes_superieures]
[1] "clcm_lvl3"
Code
coul = c("yellow", "green", "violet", "blue", "black", "red")
coul <- c("077255000", "255166255", "204242077", "255255168", "230230077", "230128000")

# Convertir les codes de couleur hexadécimaux en couleurs RGB
colors <- sapply(coul, function(hex) {
  r <- as.numeric(substr(hex, 1, 3)) / 255
  g <- as.numeric(substr(hex, 4, 6)) / 255
  b <- as.numeric(substr(hex, 7, 9)) / 255
  rgb(r, g, b)
})
colors = unname(colors)


fviz_pca_ind(
  axes = c(1, 2),
  acp_BM_tot,
  geom.ind = "point",
  pointshape = 21,
  pointsize = 2,
  palette = "viridus",
  # addEllipses = TRUE,
  legend.title = "Biomass",
  fill.ind = df_acp_BM_tot$BM_tot^2
)

Code
fviz_pca_ind(
  axes = c(1, 2),
  acp_BM_tot,
  geom.ind = "point",
  pointshape = 21,
  pointsize = 2,
  palette = colors,
  addEllipses = TRUE,
  legend.title = "Land use",
  fill.ind = df_acp_BM_tot$clcm_lvl3
  )

Code
# Graphique biplot
fviz_pca_biplot(
  axes = c(1, 2),
  acp_BM_tot,
  geom.ind = "point",
  pointshape = 21,
  pointsize = 2,
  palette = colors,
  addEllipses = TRUE,
  legend.title = "Land use",
  fill.ind = df_acp_BM_tot$clcm_lvl3
  )

12.3 Richness

Code
df_acp_Richesse_tot = data_deep[,c("Richesse_tot",Predictors_f)]
# # # colnames(df_acp_Richesse_tot)[colnames(df_acp_Richesse_tot) == "clcm_lvl3"] <- "clc3"
# dummy_vars <- model.matrix(~ clcm_lvl3 - 1, data = df_acp_Richesse_tot)
# df_acp_Richesse_tot <- cbind(df_acp_Richesse_tot, dummy_vars)
# df_acp_Richesse_tot <- df_acp_Richesse_tot[, -which(names(df_acp_Richesse_tot) == "clcm_lvl3")]

df_acp_Richesse_tot = drop_na(df_acp_Richesse_tot)
df_acp_Richesse_tot = droplevels(df_acp_Richesse_tot)

posi_co = which(names(df_acp_Richesse_tot)=="clcm_lvl3")

acp_Richesse_tot <- PCA(df_acp_Richesse_tot, graph = FALSE,quanti.sup=1,quali.sup=posi_co)

## Choix du nombre d'axes
# acp_Richesse_tot$eig
fviz_eig(acp_Richesse_tot, addlabels = TRUE) # on prend les trois premiers axes

Code
contrib_axes <- acp_Richesse_tot$var$contrib[, 1:2]  # 3 premiers axes
contrib_axes <- round(contrib_axes, 2)   # Plus facile a lire
fviz_contrib(acp_Richesse_tot, choice = "var", axes = 1)

Code
fviz_contrib(acp_Richesse_tot, choice = "var", axes = 2)

Code
# fviz_contrib(acp_Richesse_tot, choice = "var", axes = 3)


# coloree les variables selon leurs contributions aux axes
fviz_pca_var(
  axes = c(1, 2),
  acp_Richesse_tot,
  col.var = "contrib",
  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
  repel = TRUE
) # evite le chevauchement de texte

Code
seuil <- 1 / ncol(df_acp_Richesse_tot[,-c(1,posi_co)]) * 100
lignes_superieures <- rownames(contrib_axes)[apply(contrib_axes, 1,
                                                   function(x)
                                                     any(x >= seuil))]

#  En consderant les deux premiers axes, les predicteurs les moins importants sont:
names(df_acp_Richesse_tot[,-c(1,posi_co)]) [! names(df_acp_Richesse_tot[,-c(1:2)]) %in% lignes_superieures]
[1] "clay" "silt"
Code
coul = c("yellow", "green", "violet", "blue", "black", "red")
coul <- c("077255000", "255166255", "204242077", "255255168", "230230077", "230128000")

# Convertir les codes de couleur hexadécimaux en couleurs RGB
colors <- sapply(coul, function(hex) {
  r <- as.numeric(substr(hex, 1, 3)) / 255
  g <- as.numeric(substr(hex, 4, 6)) / 255
  b <- as.numeric(substr(hex, 7, 9)) / 255
  rgb(r, g, b)
})
colors = unname(colors)


fviz_pca_ind(
  axes = c(1, 2),
  acp_Richesse_tot,
  geom.ind = "point",
  pointshape = 21,
  pointsize = 2,
  palette = "viridus",
  # addEllipses = TRUE,
  legend.title = "Richness",
  fill.ind = df_acp_Richesse_tot$Richesse_tot
)

Code
fviz_pca_ind(
  axes = c(1, 2),
  acp_Richesse_tot,
  geom.ind = "point",
  pointshape = 21,
  pointsize = 2,
  palette = colors,
  addEllipses = TRUE,
  legend.title = "Land use",
  fill.ind = df_acp_Richesse_tot$clcm_lvl3
  )

Code
# Graphique biplot
fviz_pca_biplot(
  axes = c(1, 2),
  acp_Richesse_tot,
  geom.ind = "point",
  pointshape = 21,
  pointsize = 2,
  palette = colors,
  addEllipses = TRUE,
  legend.title = "Land use",
  fill.ind = df_acp_Richesse_tot$clcm_lvl3
  )

13 Modeling

13.1 Data preparation

Code
# AB_tot --------------------------------------------------------------------------
df_mod_AB_tot = data_deep[,c("AB_tot",Predictors_f)]
# # # colnames(df_mod_AB_tot)[colnames(df_mod_AB_tot) == "clcm_lvl3"] <- "clc3"
dummy_vars <- model.matrix(~ clcm_lvl3 - 1, data = df_mod_AB_tot)
df_mod_AB_tot <- cbind(df_mod_AB_tot, dummy_vars)
df_mod_AB_tot <- df_mod_AB_tot[, -which(names(df_mod_AB_tot) == "clcm_lvl3")]

df_mod_AB_tot = drop_na(df_mod_AB_tot)
df_mod_AB_tot = droplevels(df_mod_AB_tot)


# Partition
set.seed(3500)
ind <- sample(2, nrow(df_mod_AB_tot), replace = T, prob = c(.8, .2))
AB_tot_train <- df_mod_AB_tot[ind==1,]
AB_tot_test <- df_mod_AB_tot[ind==2,]

write.csv2(x =AB_tot_train,file = "datas/AB_tot_train.csv", row.names = FALSE)
write.csv2(x =AB_tot_test,file = "datas/AB_tot_test.csv", row.names = FALSE)

# AB_tot_train = read.csv2("datas/AB_tot_train.csv")
# AB_tot_test = read.csv2("datas/AB_tot_test.csv")
# df_mod_AB_tot = rbind(AB_tot_train,AB_tot_test)

AB_tot_train = as.data.frame(AB_tot_train)
AB_tot_test = as.data.frame(AB_tot_test)

df <- data.frame(y =AB_tot_train[,"AB_tot"])
abundance_dist_train = ggplot(df, aes(x=y)) +
  geom_histogram(aes(y=..density..), fill="#69b3a2", color="#e9ecef", bins=30, alpha=2) +
  geom_density(fill="black", alpha=0.2) +
  theme_gray() +
  labs(title="Abundance: Train", x="Value", y="Density") +
  theme(plot.title = element_text(hjust = 0.5))
# ggsave("Results/abundance_dist_train.png", plot = abundance_dist_train, dpi = 300,width = 3,height = 2)

df <- data.frame(y =AB_tot_test[,"AB_tot"])
abundance_dist_test = ggplot(df, aes(x=y)) +
  geom_histogram(aes(y=..density..), fill="#69b3a2", color="#e9ecef", bins=30, alpha=2) +
  geom_density(fill="black", alpha=0.2) +
  theme_gray() +
  labs(title="Abundance: Test", x="Value", y="Density") +
  theme(plot.title = element_text(hjust = 0.5))
# ggsave("Results/abundance_dist_test.png", plot = abundance_dist_test, dpi = 300,width = 3,height = 2)

# Distrvitbution de var rep dans train et de test: est ce homogene ?
abundance_dist_train_and_test = ggarrange(abundance_dist_train, abundance_dist_test,
                          labels = c('(a)', '(b)'),
                          common.legend = TRUE,
                          legend = 'right'
)


ggsave("Results/abundance_dist_train_and_test.png", plot = abundance_dist_train_and_test, dpi = 300,height = 2,width = 4)

Abundance

  • Data partition (3138, 16):

    • train data (80 %) = 2512, 16

    • test data (20 %) = 626, 16

Biomasse

  • Data partition (1598, 16):

    • train data (80 %) = 1263, 16

    • test data (20 %) = 335, 16

Richness

  • Data partition (3138, 16):

    • train data (80 %) = 2512, 16

    • test data (20 %) = 626, 16

13.2 GLM

Code
GLM <- function(var_rep, df_app, df_valid,family = 'gaussian'){
  
  
  var_predicteurs = names(df_app[,-1])
 
  df_app = df_app[,c(var_rep,var_predicteurs)]
  df_valid = df_valid[,c(var_rep,var_predicteurs)]
  
  formula <- substitute(var_rep ~ ., list(var_rep = as.name(var_rep)))
  
  
  # entrainement du modele sur le jeu d'entrainement
  modelglm<-glm(formula,family = family ,data = df_app)
  
  # Prediction sur le jeu de validation
  pred.GLM<-predict(modelglm,newdata=as.data.frame(df_valid[,var_predicteurs]))
  
  # Calcul du RMSE pour évaluer la qualite du modele
  rmse <- round (sqrt(mean((df_valid[,var_rep] - pred.GLM)^2,na.rm=TRUE)),2)
  
  
 # Calcul du R² ajusté pour train
  R_adj_train <- calcule_R2(df_app[,var_rep],  predict(modelglm, newdata=df_app))
  n_train <- nrow(df_app)
  p_train <- ncol(df_app) - 1
  r_adj_train <- 1 - ((1 - R_adj_train) * (n_train - 1) / (n_train - p_train - 1))
  
  # Calcul du R² ajusté pour test
  R_adj_test <-calcule_R2(df_valid[,var_rep],pred.GLM)
  n_test <- nrow(df_valid)
  p_test <- ncol(df_valid) - 1
  r_adj_test <- 1 - ((1 - R_adj_test) * (n_test - 1) / (n_test - p_test - 1))

  
  MAE <- mean(abs(pred.GLM - df_valid[,var_rep]),na.rm=TRUE)
  
  # Round results
  rmse <- round(rmse, 2)
  r_adj_train <- round(r_adj_train, 2)
  r_adj_test <- round(r_adj_test, 2)
  MAE <- round(MAE, 2)
  
  # output
  results_df <- data.frame(Algorithms = "GLM",
                         Response_variables = var_rep,
                         R2_adjusted_train = r_adj_train,
                         R2_adjusted_test = r_adj_test,
                         RMSE = rmse,
                         MAE = MAE)
    
  
  results <- list(RMSE = rmse, R_adj_train = r_adj_train, R_adj_test = r_adj_test, MAE = MAE, model = modelglm,predit = pred.GLM, df = results_df)
  return(results)
}
  • Gaussian distribution

13.3 GAM

Code
GAM <- function(var_rep, df_app, df_valid, family = 'gaussian',method = "REML", interaction = FALSE){
  
  var_predicteurs = names(df_app[,-1])
  
  
  if (var_rep == "AB_tot"){ 

  modelgam<-gam(AB_tot ~ s(CaCO3) + s(gps_x) + s(N) + s(bio3) + s(gps_y) + s(clay) + s(silt) + s(P) + s(bio12) + clcm_lvl3f + clcm_lvl3gua + clcm_lvl3ng + clcm_lvl3nial + clcm_lvl3p + clcm_lvl3v,
        family=family,method = method,data = df_app)
  
  }
  
  
  
  
  if (var_rep == "BM_tot"){ 

  modelgam<-gam(BM_tot ~ s(CaCO3) + s(gps_x) + s(N) + s(bio3) + s(gps_y) + s(clay) + s(silt) + s(P) + s(bio12) + clcm_lvl3f + clcm_lvl3gua + clcm_lvl3ng + clcm_lvl3nial + clcm_lvl3p + clcm_lvl3v,
        family=family,method = method,data = df_app)
  
    
  }
  
  
  
  if(var_rep == "Richesse_tot"){ 
    
  modelgam<-gam(Richesse_tot ~ s(CaCO3) + s(gps_x) + s(N) + s(bio3) + s(gps_y) + s(clay) + s(silt) + s(P) + s(bio12) + clcm_lvl3f + clcm_lvl3gua + clcm_lvl3ng + clcm_lvl3nial + clcm_lvl3p + clcm_lvl3v ,
        family=family,method = method,data = df_app)
   
  }
  
  
  # Prediction sur le jeu de validation
  pred.GAM <- predict(modelgam,newdata=as.data.frame(df_valid[,var_predicteurs]))
  
  # Calcul du RMSE pour évaluer la qualite du modele
  rmse <- sqrt(mean((df_valid[,var_rep] - pred.GAM)^2,na.rm=TRUE))

  
# Calcul du R² ajusté pour train
  R_adj_train <- calcule_R2(df_app[,var_rep],  predict(modelgam, newdata=df_app))
  n_train <- nrow(df_app)
  p_train <- ncol(df_app) - 1
  r_adj_train <- 1 - ((1 - R_adj_train) * (n_train - 1) / (n_train - p_train - 1))
  
  # Calcul du R² ajusté pour test
  R_adj_test <-calcule_R2(df_valid[,var_rep],pred.GAM)
  n_test <- nrow(df_valid)
  p_test <- ncol(df_valid) - 1
  r_adj_test <- 1 - ((1 - R_adj_test) * (n_test - 1) / (n_test - p_test - 1))

  # Calcule le MAE
  MAE <- mean(abs(pred.GAM - df_valid[,var_rep]))
  
  # Round results
  rmse <- round(rmse, 2)
  r_adj_train <- round(r_adj_train, 2)
  r_adj_test <- round(r_adj_test, 2)
  MAE <- round(MAE, 2)
  
  
  # output
  results_df <- data.frame(Algorithms = "GAM",
                         Response_variables = var_rep,
                         R2_adjusted_train = r_adj_train,
                         R2_adjusted_test = r_adj_test,
                         RMSE = rmse,
                         MAE = MAE)
  
  
  results <- list(RMSE = rmse, R_adj_train = r_adj_train, R_adj_test = r_adj_test, MAE = MAE, model = modelgam, predit = pred.GAM, df = results_df)
  
  return(results)
}
  • Family = gaussian

  • Link function = identity

  • Method = REML

  • Tuning

13.4 RF

  • Default model
  • RF model tuning by grid

  • ntree = \(100,300,500,700,900,1000,1300,1500,1700,2000\)

  • mtry = \(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24\)

  • maxnodes = \(10 , 20, 30, 40, 50, 60, 70, 80, 90, 100\)

Total number of models = \(ntree * mtry * maxnode = 960\)

  • Validation of models on test data
Code
ForetAlea <- function(var_rep, df_app, df_valid, mtry, ntree, maxnodes) {
  
  set.seed(1863)
  col_posi <- which(names(df_app) == var_rep)
  ForeVDT <- randomForest::randomForest(df_app[-col_posi], df_app[[col_posi]], mtry = mtry, ntree = ntree, maxnodes = maxnodes)
  
  # Prediction on the validation dataset
  col_posi <- which(names(df_valid) == var_rep)
  pred.RF <- predict(ForeVDT, newdata = df_valid[, -col_posi])
  
  # Calculate RMSE to evaluate model quality
  rmse <- sqrt(mean((df_valid[, col_posi] - pred.RF)^2))
  
  
  # Calcul du R² ajusté pour train
  R_adj_train <- calcule_R2(df_app[,var_rep],  predict(ForeVDT, newdata=df_app))
  n_train <- nrow(df_app)
  p_train <- ncol(df_app) - 1
  r_adj_train <- 1 - ((1 - R_adj_train) * (n_train - 1) / (n_train - p_train - 1))
  
  # Calcul du R² ajusté pour test
  R_adj_test <-calcule_R2(df_valid[,col_posi],pred.RF)
  n_test <- nrow(df_valid)
  p_test <- ncol(df_valid) - 1
  r_adj_test <- 1 - ((1 - R_adj_test) * (n_test - 1) / (n_test - p_test - 1))
  
  
  # Calculate MAE
  MAE <- mean(abs(pred.RF - df_valid[, col_posi]))
  
  # Round results
  rmse <- round(rmse, 2)
  r_adj_train <- round(r_adj_train, 2)
  r_adj_test <- round(r_adj_test, 2)
  MAE <- round(MAE, 2)
  
    # output
  results_df <- data.frame(Algorithms = "RF",
                         Response_variables = var_rep,
                         R2_adjusted_train = r_adj_train,
                         R2_adjusted_test = r_adj_test,
                         RMSE = rmse,
                         MAE = MAE)
  
  
  results <- list(RMSE = rmse, R_adj_train = r_adj_train, R_adj_test = r_adj_test, MAE = MAE, model = ForeVDT, predit = pred.RF, df = results_df)
  
  return(results)
}



# # Pour AB_tot  -----------------------------------------------------------------

AB_tot_RF_tuning = read.csv2("results_tuning/AB_tot_RF_tuning.csv")


AB_tot_RF_tuning = as.data.frame(AB_tot_RF_tuning)
AB_tot_RF_tuning = AB_tot_RF_tuning %>% arrange(mae)
# head(AB_tot_RF_tuning)

AB_tot_best_param = AB_tot_RF_tuning[1,]

# plot(seq(1:nrow(AB_tot_RF_tuning)), AB_tot_RF_tuning$r_squared)



df <- data.frame(x = seq(1:nrow(AB_tot_RF_tuning)), y = AB_tot_RF_tuning$r_squared)
RF_tuning = ggplot(df, aes(x = x, y = y)) +
  geom_point() +
  labs(x = "Index", y = "R Squared", title = "Abundance: R Squared over Index") +
  theme_minimal()


# ggsave("results_tuning/RF_tuning.png", plot = RF_tuning, dpi = 300)

13.5 GBM

  • Default model

  • GBM model tuning by grid

  • n.trees = \(1000, 1500, 1700, 2000, 3000\)

  • shrinkage = \(0.01, 0.02, 0.05, 0.001, 0.002, 0.005\)

  • interaction.depth = \(3, 5, 6, 8, 10\)

  • n.minobsinnode = \(2, 5, 10, 30, 50, 70\)

Total number of models = \(n.trees * shrinkage * interaction.depth * n.minobsinnode = 900\)

  • Validation of models on test data
Code
GBM <- function(var_rep, df_app, df_valid,distribution = 'gaussian',n.trees ,shrinkage,interaction.depth,n.minobsinnode){

  formula <- substitute(var_rep ~ ., list(var_rep = as.name(var_rep)))

  Gradboost<-gbm(formula, data = df_app,
    distribution = distribution, 
    n.trees = n.trees,
    shrinkage = shrinkage,
    interaction.depth = interaction.depth,
    n.minobsinnode = n.minobsinnode) 
  
  # Prediction sur le jeu de validation :
   col_posi <- which(names(df_valid) == var_rep)
  prev.GBM<-predict(Gradboost,newdata=as.data.frame(df_valid[,-col_posi]))
 
  # Calcul du RMSE pour évaluer la qualité du modele
  rmse <- sqrt(mean((df_valid[,var_rep] - prev.GBM)^2))


# Calcul du R² ajusté pour train
  R_adj_train <- calcule_R2(df_app[,var_rep],  predict(Gradboost, newdata=df_app))
  n_train <- nrow(df_app)
  p_train <- ncol(df_app) - 1
  r_adj_train <- 1 - ((1 - R_adj_train) * (n_train - 1) / (n_train - p_train - 1))
  
  # Calcul du R² ajusté pour test
  R_adj_test <-calcule_R2(df_valid[,col_posi],prev.GBM)
  n_test <- nrow(df_valid)
  p_test <- ncol(df_valid) - 1
  r_adj_test <- 1 - ((1 - R_adj_test) * (n_test - 1) / (n_test - p_test - 1))

  # calcule MAE
  MAE <- mean(abs(prev.GBM - df_valid[,col_posi])) 
  
    
    # Round results
  rmse <- round(rmse, 2)
  r_adj_train <- round(r_adj_train, 2)
  r_adj_test <- round(r_adj_test, 2)
  MAE <- round(MAE, 2)
  
  
      # output
  results_df <- data.frame(Algorithms = "GBM",
                         Response_variables = var_rep,
                         R2_adjusted_train = r_adj_train,
                         R2_adjusted_test = r_adj_test,
                         RMSE = rmse,
                         MAE = MAE)
  
  
  results <- list(RMSE = rmse, R_adj_train = r_adj_train, R_adj_test = r_adj_test, MAE = MAE, model = Gradboost, predit = prev.GBM, df = results_df)
  
  
  return(results)
}


# Pour AB_tot  ------------------------------------------------------------------
AB_tot_GBM_tuning = read.csv2("results_tuning/AB_tot_GBM_tuning.csv")


AB_tot_GBM_tuning = as.data.frame(AB_tot_GBM_tuning)
AB_tot_GBM_tuning = AB_tot_GBM_tuning %>% arrange(mae)
# head(AB_tot_GBM_tuning)
AB_tot_best_param = AB_tot_GBM_tuning[1,]


df <- data.frame(x = seq(1:nrow(AB_tot_GBM_tuning)), y = AB_tot_GBM_tuning$r_squared)
GBM_tuning = ggplot(df, aes(x = x, y = y)) +
  geom_point() +
  labs(x = "Index", y = "R Squared", title = "Abundance: R Squared over Index") +
  theme_minimal()


# ggsave("results_tuning/GBM_tuning.png", plot = GBM_tuning, dpi = 300)

13.6 ANN

  • Default model
Model: "sequential"
________________________________________________________________________________
 Layer (type)                       Output Shape                    Param #     
================================================================================
 dense_1 (Dense)                    (None, 1)                       16          
 dense (Dense)                      (None, 1)                       2           
================================================================================
Total params: 18 (72.00 Byte)
Trainable params: 18 (72.00 Byte)
Non-trainable params: 0 (0.00 Byte)
________________________________________________________________________________

  • Tunning

runs <- tuning_run(“Experiment.R”, flags = list(dense_units1 = c(32, 64), dense_units2 = c(16, 32), dense_units3 = c(8, 16), dense_units4 = c(4, 8), dropout1 = c(0.4, 0.5), dropout1 = c(0.3, 0.4), dropout1 = c(0.2, 0.3), dropout1 = c(0.1, 0.2), batch_size = c(32, 64)))

  • hidden = c(32,32,16,8)
Model: "sequential_1"
________________________________________________________________________________
 Layer (type)                       Output Shape                    Param #     
================================================================================
 dense_6 (Dense)                    (None, 32)                      512         
 dropout_3 (Dropout)                (None, 32)                      0           
 dense_5 (Dense)                    (None, 32)                      1056        
 dropout_2 (Dropout)                (None, 32)                      0           
 dense_4 (Dense)                    (None, 16)                      528         
 dropout_1 (Dropout)                (None, 16)                      0           
 dense_3 (Dense)                    (None, 8)                       136         
 dropout (Dropout)                  (None, 8)                       0           
 dense_2 (Dense)                    (None, 1)                       9           
================================================================================
Total params: 2241 (8.75 KB)
Trainable params: 2241 (8.75 KB)
Non-trainable params: 0 (0.00 Byte)
________________________________________________________________________________

13.7 Compilation pour chaque algoritme

  • GLM
  • GAM
  • RF
  • GBM
  • ANN AB_tot
Epoch 1/100
32/32 - 3s - loss: 215.1726 - mae: 13.0471 - val_loss: 141.2137 - val_mae: 10.1002 - 3s/epoch - 107ms/step
Epoch 2/100
32/32 - 0s - loss: 143.0973 - mae: 10.1008 - val_loss: 66.8006 - val_mae: 6.2173 - 380ms/epoch - 12ms/step
Epoch 3/100
32/32 - 0s - loss: 78.8611 - mae: 7.0799 - val_loss: 40.0958 - val_mae: 4.8694 - 233ms/epoch - 7ms/step
Epoch 4/100
32/32 - 0s - loss: 66.9187 - mae: 6.4698 - val_loss: 37.7008 - val_mae: 4.7452 - 236ms/epoch - 7ms/step
Epoch 5/100
32/32 - 0s - loss: 62.8379 - mae: 6.2799 - val_loss: 36.3094 - val_mae: 4.6730 - 256ms/epoch - 8ms/step
Epoch 6/100
32/32 - 0s - loss: 61.7096 - mae: 6.2026 - val_loss: 34.7736 - val_mae: 4.6083 - 238ms/epoch - 7ms/step
Epoch 7/100
32/32 - 0s - loss: 60.4005 - mae: 6.1854 - val_loss: 33.3736 - val_mae: 4.4927 - 245ms/epoch - 8ms/step
Epoch 8/100
32/32 - 0s - loss: 58.9367 - mae: 6.0642 - val_loss: 33.4570 - val_mae: 4.5767 - 246ms/epoch - 8ms/step
Epoch 9/100
32/32 - 0s - loss: 58.6489 - mae: 6.0756 - val_loss: 32.9920 - val_mae: 4.4720 - 252ms/epoch - 8ms/step
Epoch 10/100
32/32 - 0s - loss: 53.9013 - mae: 5.8193 - val_loss: 32.5835 - val_mae: 4.5045 - 259ms/epoch - 8ms/step
Epoch 11/100
32/32 - 0s - loss: 53.0623 - mae: 5.7403 - val_loss: 31.9318 - val_mae: 4.4359 - 276ms/epoch - 9ms/step
Epoch 12/100
32/32 - 0s - loss: 52.9333 - mae: 5.7916 - val_loss: 32.1980 - val_mae: 4.4338 - 233ms/epoch - 7ms/step
Epoch 13/100
32/32 - 0s - loss: 51.6050 - mae: 5.7385 - val_loss: 32.4512 - val_mae: 4.4428 - 210ms/epoch - 7ms/step
Epoch 14/100
32/32 - 0s - loss: 51.0942 - mae: 5.6059 - val_loss: 32.7069 - val_mae: 4.5153 - 223ms/epoch - 7ms/step
Epoch 15/100
32/32 - 0s - loss: 50.3328 - mae: 5.5566 - val_loss: 31.5101 - val_mae: 4.4049 - 237ms/epoch - 7ms/step
Epoch 16/100
32/32 - 0s - loss: 51.6115 - mae: 5.6886 - val_loss: 31.7535 - val_mae: 4.4042 - 210ms/epoch - 7ms/step
Epoch 17/100
32/32 - 0s - loss: 49.8412 - mae: 5.5324 - val_loss: 31.2384 - val_mae: 4.3406 - 233ms/epoch - 7ms/step
Epoch 18/100
32/32 - 0s - loss: 49.6909 - mae: 5.6192 - val_loss: 30.0819 - val_mae: 4.2882 - 216ms/epoch - 7ms/step
Epoch 19/100
32/32 - 0s - loss: 51.5608 - mae: 5.6653 - val_loss: 30.9665 - val_mae: 4.3493 - 236ms/epoch - 7ms/step
Epoch 20/100
32/32 - 0s - loss: 48.4568 - mae: 5.5310 - val_loss: 30.5728 - val_mae: 4.3325 - 243ms/epoch - 8ms/step
Epoch 21/100
32/32 - 0s - loss: 49.6089 - mae: 5.5850 - val_loss: 31.4880 - val_mae: 4.3553 - 258ms/epoch - 8ms/step
Epoch 22/100
32/32 - 0s - loss: 47.1728 - mae: 5.4437 - val_loss: 30.7230 - val_mae: 4.3258 - 230ms/epoch - 7ms/step
Epoch 23/100
32/32 - 0s - loss: 48.4276 - mae: 5.4683 - val_loss: 30.2005 - val_mae: 4.2932 - 243ms/epoch - 8ms/step
Epoch 24/100
32/32 - 0s - loss: 47.8558 - mae: 5.5257 - val_loss: 30.0394 - val_mae: 4.2772 - 236ms/epoch - 7ms/step
Epoch 25/100
32/32 - 0s - loss: 45.9213 - mae: 5.3785 - val_loss: 30.2334 - val_mae: 4.2892 - 231ms/epoch - 7ms/step
Epoch 26/100
32/32 - 0s - loss: 46.9614 - mae: 5.4406 - val_loss: 29.8767 - val_mae: 4.2949 - 212ms/epoch - 7ms/step
Epoch 27/100
32/32 - 0s - loss: 45.7691 - mae: 5.3501 - val_loss: 29.5017 - val_mae: 4.2556 - 193ms/epoch - 6ms/step
Epoch 28/100
32/32 - 0s - loss: 45.7549 - mae: 5.3687 - val_loss: 28.8141 - val_mae: 4.1678 - 187ms/epoch - 6ms/step
Epoch 29/100
32/32 - 0s - loss: 45.4432 - mae: 5.2967 - val_loss: 28.8720 - val_mae: 4.1491 - 147ms/epoch - 5ms/step
Epoch 30/100
32/32 - 0s - loss: 46.1207 - mae: 5.4040 - val_loss: 29.6095 - val_mae: 4.2263 - 178ms/epoch - 6ms/step
Epoch 31/100
32/32 - 0s - loss: 44.9022 - mae: 5.3325 - val_loss: 30.1833 - val_mae: 4.2915 - 211ms/epoch - 7ms/step
Epoch 32/100
32/32 - 0s - loss: 45.7768 - mae: 5.3676 - val_loss: 29.6317 - val_mae: 4.2516 - 203ms/epoch - 6ms/step
Epoch 33/100
32/32 - 0s - loss: 45.5058 - mae: 5.3723 - val_loss: 30.1291 - val_mae: 4.3000 - 232ms/epoch - 7ms/step
Epoch 34/100
32/32 - 0s - loss: 45.3344 - mae: 5.3464 - val_loss: 30.0016 - val_mae: 4.2757 - 238ms/epoch - 7ms/step
Epoch 35/100
32/32 - 0s - loss: 44.6659 - mae: 5.3322 - val_loss: 29.7761 - val_mae: 4.2610 - 202ms/epoch - 6ms/step
Epoch 36/100
32/32 - 0s - loss: 45.7712 - mae: 5.3391 - val_loss: 29.5085 - val_mae: 4.2387 - 175ms/epoch - 5ms/step
Epoch 37/100
32/32 - 0s - loss: 44.4487 - mae: 5.2988 - val_loss: 29.7953 - val_mae: 4.2788 - 206ms/epoch - 6ms/step
Epoch 38/100
32/32 - 0s - loss: 43.4370 - mae: 5.2896 - val_loss: 30.2030 - val_mae: 4.3169 - 200ms/epoch - 6ms/step
Epoch 39/100
32/32 - 0s - loss: 43.2072 - mae: 5.2063 - val_loss: 31.3170 - val_mae: 4.4383 - 204ms/epoch - 6ms/step
Epoch 40/100
32/32 - 0s - loss: 44.2836 - mae: 5.2634 - val_loss: 29.8058 - val_mae: 4.2984 - 202ms/epoch - 6ms/step
Epoch 41/100
32/32 - 0s - loss: 43.4449 - mae: 5.2453 - val_loss: 28.8314 - val_mae: 4.2160 - 209ms/epoch - 7ms/step
Epoch 42/100
32/32 - 0s - loss: 43.2672 - mae: 5.2302 - val_loss: 29.2791 - val_mae: 4.2580 - 214ms/epoch - 7ms/step
Epoch 43/100
32/32 - 0s - loss: 42.4249 - mae: 5.1922 - val_loss: 29.8537 - val_mae: 4.2816 - 242ms/epoch - 8ms/step
Epoch 44/100
32/32 - 0s - loss: 43.3541 - mae: 5.2521 - val_loss: 30.8890 - val_mae: 4.3711 - 229ms/epoch - 7ms/step
Epoch 45/100
32/32 - 0s - loss: 42.7845 - mae: 5.2402 - val_loss: 31.0093 - val_mae: 4.4009 - 191ms/epoch - 6ms/step
Epoch 46/100
32/32 - 0s - loss: 42.3924 - mae: 5.2005 - val_loss: 30.7056 - val_mae: 4.3883 - 237ms/epoch - 7ms/step
Epoch 47/100
32/32 - 0s - loss: 42.4449 - mae: 5.1819 - val_loss: 30.1273 - val_mae: 4.3227 - 219ms/epoch - 7ms/step
Epoch 48/100
32/32 - 0s - loss: 41.5070 - mae: 5.1188 - val_loss: 30.9232 - val_mae: 4.4120 - 230ms/epoch - 7ms/step
Epoch 49/100
32/32 - 0s - loss: 41.7975 - mae: 5.1534 - val_loss: 31.1358 - val_mae: 4.4031 - 238ms/epoch - 7ms/step
Epoch 50/100
32/32 - 0s - loss: 42.2860 - mae: 5.1252 - val_loss: 31.0588 - val_mae: 4.4150 - 220ms/epoch - 7ms/step
Epoch 51/100
32/32 - 0s - loss: 42.7550 - mae: 5.2004 - val_loss: 30.8915 - val_mae: 4.4062 - 235ms/epoch - 7ms/step
Epoch 52/100
32/32 - 0s - loss: 41.4220 - mae: 5.1107 - val_loss: 30.8155 - val_mae: 4.3931 - 226ms/epoch - 7ms/step
Epoch 53/100
32/32 - 0s - loss: 41.6546 - mae: 5.1091 - val_loss: 31.1292 - val_mae: 4.3967 - 199ms/epoch - 6ms/step
Epoch 54/100
32/32 - 0s - loss: 42.4103 - mae: 5.1706 - val_loss: 30.5571 - val_mae: 4.3696 - 221ms/epoch - 7ms/step
Epoch 55/100
32/32 - 0s - loss: 41.1014 - mae: 5.0932 - val_loss: 31.9261 - val_mae: 4.4756 - 238ms/epoch - 7ms/step
Epoch 56/100
32/32 - 0s - loss: 42.9222 - mae: 5.2341 - val_loss: 30.7990 - val_mae: 4.3919 - 199ms/epoch - 6ms/step
Epoch 57/100
32/32 - 0s - loss: 40.8370 - mae: 5.0632 - val_loss: 30.8905 - val_mae: 4.3915 - 232ms/epoch - 7ms/step
Epoch 58/100
32/32 - 0s - loss: 41.4631 - mae: 5.1398 - val_loss: 30.9696 - val_mae: 4.4194 - 239ms/epoch - 7ms/step
Epoch 59/100
32/32 - 0s - loss: 41.1574 - mae: 5.0961 - val_loss: 31.2480 - val_mae: 4.4817 - 253ms/epoch - 8ms/step
Epoch 60/100
32/32 - 0s - loss: 40.2723 - mae: 4.9768 - val_loss: 31.5974 - val_mae: 4.4981 - 216ms/epoch - 7ms/step
Epoch 61/100
32/32 - 0s - loss: 41.4815 - mae: 5.0881 - val_loss: 31.6667 - val_mae: 4.4768 - 259ms/epoch - 8ms/step
Epoch 62/100
32/32 - 0s - loss: 41.1304 - mae: 5.0874 - val_loss: 30.8398 - val_mae: 4.3759 - 251ms/epoch - 8ms/step
Epoch 63/100
32/32 - 0s - loss: 41.7279 - mae: 5.1080 - val_loss: 30.4656 - val_mae: 4.3816 - 231ms/epoch - 7ms/step
Epoch 64/100
32/32 - 0s - loss: 40.4147 - mae: 5.0199 - val_loss: 30.8918 - val_mae: 4.4311 - 217ms/epoch - 7ms/step
Epoch 65/100
32/32 - 0s - loss: 40.1705 - mae: 5.0564 - val_loss: 30.9870 - val_mae: 4.4390 - 217ms/epoch - 7ms/step
Epoch 66/100
32/32 - 0s - loss: 40.0894 - mae: 5.0484 - val_loss: 30.7740 - val_mae: 4.3656 - 244ms/epoch - 8ms/step
Epoch 67/100
32/32 - 0s - loss: 39.7137 - mae: 4.9948 - val_loss: 31.1577 - val_mae: 4.4238 - 242ms/epoch - 8ms/step
Epoch 68/100
32/32 - 0s - loss: 40.0248 - mae: 5.0556 - val_loss: 30.7502 - val_mae: 4.3894 - 248ms/epoch - 8ms/step
Epoch 69/100
32/32 - 0s - loss: 41.0365 - mae: 5.0466 - val_loss: 31.0563 - val_mae: 4.4197 - 252ms/epoch - 8ms/step
Epoch 70/100
32/32 - 0s - loss: 40.5714 - mae: 5.0420 - val_loss: 31.2108 - val_mae: 4.4100 - 243ms/epoch - 8ms/step
Epoch 71/100
32/32 - 0s - loss: 40.0002 - mae: 5.0082 - val_loss: 30.5633 - val_mae: 4.3726 - 218ms/epoch - 7ms/step
Epoch 72/100
32/32 - 0s - loss: 38.8842 - mae: 4.9541 - val_loss: 32.1985 - val_mae: 4.5160 - 213ms/epoch - 7ms/step
Epoch 73/100
32/32 - 0s - loss: 39.3398 - mae: 5.0051 - val_loss: 31.5951 - val_mae: 4.4442 - 220ms/epoch - 7ms/step
Epoch 74/100
32/32 - 0s - loss: 39.1166 - mae: 4.9406 - val_loss: 31.9833 - val_mae: 4.5124 - 234ms/epoch - 7ms/step
Epoch 75/100
32/32 - 0s - loss: 39.8963 - mae: 5.0039 - val_loss: 31.1303 - val_mae: 4.4056 - 242ms/epoch - 8ms/step
Epoch 76/100
32/32 - 0s - loss: 38.8646 - mae: 4.9568 - val_loss: 30.3233 - val_mae: 4.3525 - 235ms/epoch - 7ms/step
Epoch 77/100
32/32 - 0s - loss: 39.2301 - mae: 4.9172 - val_loss: 30.9704 - val_mae: 4.4076 - 240ms/epoch - 7ms/step
Epoch 78/100
32/32 - 0s - loss: 39.2851 - mae: 4.9879 - val_loss: 30.7540 - val_mae: 4.3940 - 302ms/epoch - 9ms/step
Epoch 79/100
32/32 - 0s - loss: 39.3340 - mae: 4.9950 - val_loss: 30.7799 - val_mae: 4.4079 - 226ms/epoch - 7ms/step
Epoch 80/100
32/32 - 0s - loss: 37.9570 - mae: 4.8884 - val_loss: 30.0968 - val_mae: 4.3420 - 226ms/epoch - 7ms/step
Epoch 81/100
32/32 - 0s - loss: 37.5607 - mae: 4.8691 - val_loss: 30.4126 - val_mae: 4.3595 - 226ms/epoch - 7ms/step
Epoch 82/100
32/32 - 0s - loss: 38.7499 - mae: 4.9098 - val_loss: 30.4902 - val_mae: 4.3533 - 265ms/epoch - 8ms/step
Epoch 83/100
32/32 - 0s - loss: 39.6327 - mae: 4.9866 - val_loss: 29.9598 - val_mae: 4.3239 - 250ms/epoch - 8ms/step
Epoch 84/100
32/32 - 0s - loss: 39.4468 - mae: 4.9728 - val_loss: 30.8083 - val_mae: 4.3825 - 189ms/epoch - 6ms/step
Epoch 85/100
32/32 - 0s - loss: 39.1057 - mae: 4.9468 - val_loss: 30.8017 - val_mae: 4.4042 - 209ms/epoch - 7ms/step
Epoch 86/100
32/32 - 0s - loss: 39.2065 - mae: 4.9262 - val_loss: 30.5369 - val_mae: 4.3927 - 219ms/epoch - 7ms/step
Epoch 87/100
32/32 - 0s - loss: 38.9504 - mae: 4.8964 - val_loss: 30.8347 - val_mae: 4.3888 - 244ms/epoch - 8ms/step
Epoch 88/100
32/32 - 0s - loss: 38.3907 - mae: 4.9219 - val_loss: 30.3676 - val_mae: 4.3496 - 235ms/epoch - 7ms/step
Epoch 89/100
32/32 - 0s - loss: 38.2753 - mae: 4.8934 - val_loss: 30.5148 - val_mae: 4.3597 - 253ms/epoch - 8ms/step
Epoch 90/100
32/32 - 0s - loss: 37.8201 - mae: 4.8695 - val_loss: 29.9937 - val_mae: 4.3315 - 242ms/epoch - 8ms/step
Epoch 91/100
32/32 - 0s - loss: 38.4713 - mae: 4.9478 - val_loss: 30.1669 - val_mae: 4.3567 - 224ms/epoch - 7ms/step
Epoch 92/100
32/32 - 0s - loss: 39.6206 - mae: 5.0026 - val_loss: 29.9851 - val_mae: 4.3456 - 226ms/epoch - 7ms/step
Epoch 93/100
32/32 - 0s - loss: 38.5403 - mae: 4.8890 - val_loss: 30.3206 - val_mae: 4.3425 - 216ms/epoch - 7ms/step
Epoch 94/100
32/32 - 0s - loss: 39.6380 - mae: 5.0080 - val_loss: 30.3783 - val_mae: 4.3685 - 198ms/epoch - 6ms/step
Epoch 95/100
32/32 - 0s - loss: 37.4539 - mae: 4.8843 - val_loss: 30.3588 - val_mae: 4.3644 - 211ms/epoch - 7ms/step
Epoch 96/100
32/32 - 0s - loss: 37.9033 - mae: 4.8887 - val_loss: 30.2323 - val_mae: 4.3388 - 202ms/epoch - 6ms/step
Epoch 97/100
32/32 - 0s - loss: 38.4699 - mae: 4.9070 - val_loss: 30.0065 - val_mae: 4.3255 - 215ms/epoch - 7ms/step
Epoch 98/100
32/32 - 0s - loss: 38.8779 - mae: 4.9464 - val_loss: 29.8595 - val_mae: 4.3151 - 190ms/epoch - 6ms/step
Epoch 99/100
32/32 - 0s - loss: 38.9402 - mae: 4.9859 - val_loss: 29.7076 - val_mae: 4.3196 - 191ms/epoch - 6ms/step
Epoch 100/100
32/32 - 0s - loss: 36.5633 - mae: 4.7878 - val_loss: 30.0559 - val_mae: 4.3548 - 187ms/epoch - 6ms/step
20/20 - 0s - 198ms/epoch - 10ms/step
79/79 - 0s - 157ms/epoch - 2ms/step
  • ANN BM_tot
Epoch 1/100
32/32 - 2s - loss: 81.6266 - mae: 8.2504 - val_loss: 78.3562 - val_mae: 8.0363 - 2s/epoch - 48ms/step
Epoch 2/100
32/32 - 0s - loss: 51.9499 - mae: 6.1402 - val_loss: 36.3471 - val_mae: 5.2082 - 188ms/epoch - 6ms/step
Epoch 3/100
32/32 - 0s - loss: 28.3407 - mae: 4.2775 - val_loss: 21.2572 - val_mae: 3.7658 - 197ms/epoch - 6ms/step
Epoch 4/100
32/32 - 0s - loss: 22.2959 - mae: 3.7891 - val_loss: 17.3968 - val_mae: 3.3440 - 177ms/epoch - 6ms/step
Epoch 5/100
32/32 - 0s - loss: 23.9555 - mae: 3.8709 - val_loss: 16.8166 - val_mae: 3.3023 - 188ms/epoch - 6ms/step
Epoch 6/100
32/32 - 0s - loss: 21.1700 - mae: 3.6441 - val_loss: 16.4041 - val_mae: 3.2519 - 184ms/epoch - 6ms/step
Epoch 7/100
32/32 - 0s - loss: 21.7206 - mae: 3.7309 - val_loss: 16.5823 - val_mae: 3.2849 - 179ms/epoch - 6ms/step
Epoch 8/100
32/32 - 0s - loss: 19.6479 - mae: 3.5002 - val_loss: 16.8660 - val_mae: 3.2936 - 201ms/epoch - 6ms/step
Epoch 9/100
32/32 - 0s - loss: 18.9336 - mae: 3.4266 - val_loss: 16.0103 - val_mae: 3.2140 - 211ms/epoch - 7ms/step
Epoch 10/100
32/32 - 0s - loss: 18.8976 - mae: 3.4775 - val_loss: 15.0379 - val_mae: 3.0786 - 194ms/epoch - 6ms/step
Epoch 11/100
32/32 - 0s - loss: 17.7019 - mae: 3.3541 - val_loss: 14.0396 - val_mae: 2.9392 - 196ms/epoch - 6ms/step
Epoch 12/100
32/32 - 0s - loss: 18.3591 - mae: 3.4051 - val_loss: 14.4101 - val_mae: 3.0277 - 238ms/epoch - 7ms/step
Epoch 13/100
32/32 - 0s - loss: 17.5847 - mae: 3.3066 - val_loss: 13.3050 - val_mae: 2.8660 - 214ms/epoch - 7ms/step
Epoch 14/100
32/32 - 0s - loss: 17.9780 - mae: 3.3959 - val_loss: 13.7091 - val_mae: 2.9276 - 199ms/epoch - 6ms/step
Epoch 15/100
32/32 - 0s - loss: 17.8234 - mae: 3.3525 - val_loss: 14.4317 - val_mae: 3.0253 - 205ms/epoch - 6ms/step
Epoch 16/100
32/32 - 0s - loss: 17.3360 - mae: 3.2889 - val_loss: 14.6465 - val_mae: 3.0544 - 228ms/epoch - 7ms/step
Epoch 17/100
32/32 - 0s - loss: 16.1939 - mae: 3.2004 - val_loss: 12.7869 - val_mae: 2.8148 - 205ms/epoch - 6ms/step
Epoch 18/100
32/32 - 0s - loss: 16.2776 - mae: 3.2040 - val_loss: 12.1926 - val_mae: 2.7116 - 205ms/epoch - 6ms/step
Epoch 19/100
32/32 - 0s - loss: 16.6221 - mae: 3.2592 - val_loss: 12.0468 - val_mae: 2.6976 - 179ms/epoch - 6ms/step
Epoch 20/100
32/32 - 0s - loss: 15.9223 - mae: 3.1341 - val_loss: 11.9129 - val_mae: 2.6847 - 190ms/epoch - 6ms/step
Epoch 21/100
32/32 - 0s - loss: 16.4204 - mae: 3.2034 - val_loss: 12.7992 - val_mae: 2.8036 - 201ms/epoch - 6ms/step
Epoch 22/100
32/32 - 0s - loss: 16.0684 - mae: 3.1696 - val_loss: 12.2061 - val_mae: 2.7234 - 236ms/epoch - 7ms/step
Epoch 23/100
32/32 - 0s - loss: 15.1724 - mae: 3.0890 - val_loss: 11.7469 - val_mae: 2.6643 - 201ms/epoch - 6ms/step
Epoch 24/100
32/32 - 0s - loss: 15.5443 - mae: 3.1295 - val_loss: 13.6265 - val_mae: 2.9169 - 188ms/epoch - 6ms/step
Epoch 25/100
32/32 - 0s - loss: 15.7534 - mae: 3.1491 - val_loss: 12.2022 - val_mae: 2.7223 - 201ms/epoch - 6ms/step
Epoch 26/100
32/32 - 0s - loss: 15.7055 - mae: 3.1651 - val_loss: 11.8770 - val_mae: 2.6740 - 204ms/epoch - 6ms/step
Epoch 27/100
32/32 - 0s - loss: 15.2678 - mae: 3.0819 - val_loss: 12.2640 - val_mae: 2.7221 - 213ms/epoch - 7ms/step
Epoch 28/100
32/32 - 0s - loss: 14.4876 - mae: 3.0276 - val_loss: 11.9986 - val_mae: 2.6907 - 238ms/epoch - 7ms/step
Epoch 29/100
32/32 - 0s - loss: 14.4475 - mae: 3.0545 - val_loss: 12.2534 - val_mae: 2.7211 - 244ms/epoch - 8ms/step
Epoch 30/100
32/32 - 0s - loss: 14.6075 - mae: 3.0333 - val_loss: 12.9448 - val_mae: 2.8045 - 210ms/epoch - 7ms/step
Epoch 31/100
32/32 - 0s - loss: 14.6739 - mae: 3.0043 - val_loss: 12.6390 - val_mae: 2.7720 - 189ms/epoch - 6ms/step
Epoch 32/100
32/32 - 1s - loss: 15.5978 - mae: 3.1399 - val_loss: 14.3014 - val_mae: 2.9854 - 561ms/epoch - 18ms/step
Epoch 33/100
32/32 - 0s - loss: 13.1764 - mae: 2.8860 - val_loss: 11.7269 - val_mae: 2.6587 - 210ms/epoch - 7ms/step
Epoch 34/100
32/32 - 0s - loss: 13.9699 - mae: 2.9490 - val_loss: 11.8929 - val_mae: 2.6723 - 203ms/epoch - 6ms/step
Epoch 35/100
32/32 - 0s - loss: 14.4477 - mae: 3.0273 - val_loss: 12.6297 - val_mae: 2.7594 - 211ms/epoch - 7ms/step
Epoch 36/100
32/32 - 0s - loss: 14.7321 - mae: 3.0715 - val_loss: 13.7170 - val_mae: 2.8922 - 194ms/epoch - 6ms/step
Epoch 37/100
32/32 - 0s - loss: 13.8366 - mae: 2.9480 - val_loss: 12.7299 - val_mae: 2.7795 - 216ms/epoch - 7ms/step
Epoch 38/100
32/32 - 0s - loss: 14.5456 - mae: 3.0233 - val_loss: 12.8752 - val_mae: 2.7860 - 231ms/epoch - 7ms/step
Epoch 39/100
32/32 - 0s - loss: 13.9998 - mae: 2.9251 - val_loss: 12.3420 - val_mae: 2.7231 - 205ms/epoch - 6ms/step
Epoch 40/100
32/32 - 0s - loss: 13.3205 - mae: 2.8964 - val_loss: 13.4314 - val_mae: 2.8469 - 196ms/epoch - 6ms/step
Epoch 41/100
32/32 - 0s - loss: 13.8462 - mae: 2.8877 - val_loss: 12.4072 - val_mae: 2.7312 - 257ms/epoch - 8ms/step
Epoch 42/100
32/32 - 0s - loss: 13.4584 - mae: 2.9104 - val_loss: 12.0798 - val_mae: 2.6915 - 239ms/epoch - 7ms/step
Epoch 43/100
32/32 - 0s - loss: 13.3521 - mae: 2.8961 - val_loss: 12.0926 - val_mae: 2.6930 - 187ms/epoch - 6ms/step
Epoch 44/100
32/32 - 0s - loss: 12.7645 - mae: 2.8327 - val_loss: 12.6495 - val_mae: 2.7584 - 222ms/epoch - 7ms/step
Epoch 45/100
32/32 - 0s - loss: 12.7313 - mae: 2.8294 - val_loss: 13.4889 - val_mae: 2.8523 - 251ms/epoch - 8ms/step
Epoch 46/100
32/32 - 0s - loss: 13.2231 - mae: 2.8967 - val_loss: 13.4070 - val_mae: 2.8445 - 189ms/epoch - 6ms/step
Epoch 47/100
32/32 - 0s - loss: 13.3540 - mae: 2.8777 - val_loss: 12.8957 - val_mae: 2.7900 - 208ms/epoch - 6ms/step
Epoch 48/100
32/32 - 0s - loss: 12.3487 - mae: 2.8046 - val_loss: 12.6028 - val_mae: 2.7579 - 174ms/epoch - 5ms/step
Epoch 49/100
32/32 - 0s - loss: 13.1562 - mae: 2.8888 - val_loss: 12.9914 - val_mae: 2.8001 - 209ms/epoch - 7ms/step
Epoch 50/100
32/32 - 0s - loss: 13.2719 - mae: 2.8669 - val_loss: 12.4616 - val_mae: 2.7335 - 234ms/epoch - 7ms/step
Epoch 51/100
32/32 - 0s - loss: 12.8100 - mae: 2.8233 - val_loss: 12.7660 - val_mae: 2.7665 - 244ms/epoch - 8ms/step
Epoch 52/100
32/32 - 0s - loss: 12.4456 - mae: 2.7870 - val_loss: 12.2471 - val_mae: 2.7209 - 251ms/epoch - 8ms/step
Epoch 53/100
32/32 - 0s - loss: 13.4275 - mae: 2.8866 - val_loss: 13.0582 - val_mae: 2.7995 - 215ms/epoch - 7ms/step
Epoch 54/100
32/32 - 0s - loss: 13.4271 - mae: 2.8893 - val_loss: 13.1633 - val_mae: 2.8143 - 140ms/epoch - 4ms/step
Epoch 55/100
32/32 - 0s - loss: 12.9660 - mae: 2.8590 - val_loss: 12.9222 - val_mae: 2.7865 - 115ms/epoch - 4ms/step
Epoch 56/100
32/32 - 0s - loss: 12.8788 - mae: 2.8356 - val_loss: 12.5242 - val_mae: 2.7446 - 80ms/epoch - 3ms/step
Epoch 57/100
32/32 - 0s - loss: 12.4738 - mae: 2.8026 - val_loss: 13.1900 - val_mae: 2.8130 - 93ms/epoch - 3ms/step
Epoch 58/100
32/32 - 0s - loss: 12.3428 - mae: 2.7425 - val_loss: 13.1748 - val_mae: 2.8076 - 84ms/epoch - 3ms/step
Epoch 59/100
32/32 - 0s - loss: 12.9565 - mae: 2.8429 - val_loss: 12.8777 - val_mae: 2.7749 - 85ms/epoch - 3ms/step
Epoch 60/100
32/32 - 0s - loss: 12.5912 - mae: 2.8107 - val_loss: 12.9614 - val_mae: 2.7786 - 80ms/epoch - 3ms/step
Epoch 61/100
32/32 - 0s - loss: 12.8329 - mae: 2.8021 - val_loss: 13.1770 - val_mae: 2.8083 - 84ms/epoch - 3ms/step
Epoch 62/100
32/32 - 0s - loss: 12.3188 - mae: 2.7786 - val_loss: 13.5985 - val_mae: 2.8700 - 145ms/epoch - 5ms/step
Epoch 63/100
32/32 - 0s - loss: 11.6901 - mae: 2.7232 - val_loss: 12.6803 - val_mae: 2.7574 - 378ms/epoch - 12ms/step
Epoch 64/100
32/32 - 0s - loss: 12.4464 - mae: 2.7897 - val_loss: 12.8501 - val_mae: 2.7658 - 111ms/epoch - 3ms/step
Epoch 65/100
32/32 - 0s - loss: 11.8023 - mae: 2.7335 - val_loss: 12.0624 - val_mae: 2.6837 - 113ms/epoch - 4ms/step
Epoch 66/100
32/32 - 0s - loss: 12.2778 - mae: 2.7855 - val_loss: 12.6095 - val_mae: 2.7555 - 95ms/epoch - 3ms/step
Epoch 67/100
32/32 - 0s - loss: 12.3124 - mae: 2.7104 - val_loss: 12.6765 - val_mae: 2.7628 - 85ms/epoch - 3ms/step
Epoch 68/100
32/32 - 0s - loss: 12.4958 - mae: 2.8061 - val_loss: 12.5117 - val_mae: 2.7577 - 92ms/epoch - 3ms/step
Epoch 69/100
32/32 - 0s - loss: 12.4546 - mae: 2.7810 - val_loss: 13.1641 - val_mae: 2.8304 - 89ms/epoch - 3ms/step
Epoch 70/100
32/32 - 0s - loss: 12.3068 - mae: 2.7930 - val_loss: 12.8956 - val_mae: 2.7896 - 89ms/epoch - 3ms/step
Epoch 71/100
32/32 - 0s - loss: 12.4366 - mae: 2.7941 - val_loss: 13.0510 - val_mae: 2.8080 - 80ms/epoch - 3ms/step
Epoch 72/100
32/32 - 0s - loss: 12.0619 - mae: 2.7754 - val_loss: 12.8825 - val_mae: 2.7894 - 94ms/epoch - 3ms/step
Epoch 73/100
32/32 - 0s - loss: 12.4645 - mae: 2.7900 - val_loss: 13.0108 - val_mae: 2.8049 - 90ms/epoch - 3ms/step
Epoch 74/100
32/32 - 0s - loss: 12.6047 - mae: 2.8102 - val_loss: 13.9098 - val_mae: 2.9115 - 85ms/epoch - 3ms/step
Epoch 75/100
32/32 - 0s - loss: 11.2525 - mae: 2.6790 - val_loss: 13.7848 - val_mae: 2.9053 - 84ms/epoch - 3ms/step
Epoch 76/100
32/32 - 0s - loss: 11.9718 - mae: 2.7240 - val_loss: 13.6821 - val_mae: 2.8774 - 88ms/epoch - 3ms/step
Epoch 77/100
32/32 - 0s - loss: 11.4246 - mae: 2.6991 - val_loss: 14.0456 - val_mae: 2.9258 - 83ms/epoch - 3ms/step
Epoch 78/100
32/32 - 0s - loss: 11.6100 - mae: 2.6977 - val_loss: 14.0896 - val_mae: 2.9625 - 80ms/epoch - 3ms/step
Epoch 79/100
32/32 - 0s - loss: 11.1699 - mae: 2.6328 - val_loss: 14.1464 - val_mae: 2.9826 - 78ms/epoch - 2ms/step
Epoch 80/100
32/32 - 0s - loss: 11.3158 - mae: 2.6845 - val_loss: 13.1808 - val_mae: 2.8500 - 97ms/epoch - 3ms/step
Epoch 81/100
32/32 - 0s - loss: 11.4033 - mae: 2.6713 - val_loss: 13.5560 - val_mae: 2.8878 - 137ms/epoch - 4ms/step
Epoch 82/100
32/32 - 0s - loss: 12.2137 - mae: 2.7559 - val_loss: 12.9125 - val_mae: 2.8100 - 175ms/epoch - 5ms/step
Epoch 83/100
32/32 - 0s - loss: 12.0887 - mae: 2.7364 - val_loss: 13.0570 - val_mae: 2.8638 - 214ms/epoch - 7ms/step
Epoch 84/100
32/32 - 0s - loss: 12.0275 - mae: 2.7586 - val_loss: 12.4771 - val_mae: 2.7902 - 140ms/epoch - 4ms/step
Epoch 85/100
32/32 - 0s - loss: 12.0213 - mae: 2.7079 - val_loss: 12.8920 - val_mae: 2.8397 - 172ms/epoch - 5ms/step
Epoch 86/100
32/32 - 0s - loss: 12.4336 - mae: 2.7827 - val_loss: 13.0850 - val_mae: 2.8801 - 148ms/epoch - 5ms/step
Epoch 87/100
32/32 - 0s - loss: 11.5935 - mae: 2.7116 - val_loss: 13.3507 - val_mae: 2.9282 - 152ms/epoch - 5ms/step
Epoch 88/100
32/32 - 0s - loss: 11.6292 - mae: 2.7050 - val_loss: 13.2072 - val_mae: 2.8913 - 167ms/epoch - 5ms/step
Epoch 89/100
32/32 - 0s - loss: 11.3517 - mae: 2.6749 - val_loss: 12.5993 - val_mae: 2.8022 - 161ms/epoch - 5ms/step
Epoch 90/100
32/32 - 0s - loss: 11.5584 - mae: 2.6921 - val_loss: 13.0462 - val_mae: 2.8632 - 156ms/epoch - 5ms/step
Epoch 91/100
32/32 - 0s - loss: 11.1935 - mae: 2.6850 - val_loss: 12.8817 - val_mae: 2.8331 - 125ms/epoch - 4ms/step
Epoch 92/100
32/32 - 0s - loss: 11.5518 - mae: 2.7021 - val_loss: 12.6166 - val_mae: 2.8040 - 159ms/epoch - 5ms/step
Epoch 93/100
32/32 - 0s - loss: 11.3923 - mae: 2.6967 - val_loss: 13.3256 - val_mae: 2.9013 - 129ms/epoch - 4ms/step
Epoch 94/100
32/32 - 0s - loss: 10.7877 - mae: 2.6128 - val_loss: 12.3754 - val_mae: 2.7699 - 145ms/epoch - 5ms/step
Epoch 95/100
32/32 - 0s - loss: 11.8212 - mae: 2.7137 - val_loss: 13.2236 - val_mae: 2.8826 - 208ms/epoch - 6ms/step
Epoch 96/100
32/32 - 0s - loss: 11.0667 - mae: 2.6339 - val_loss: 13.0411 - val_mae: 2.8609 - 144ms/epoch - 4ms/step
Epoch 97/100
32/32 - 0s - loss: 11.5749 - mae: 2.6974 - val_loss: 12.7628 - val_mae: 2.8442 - 129ms/epoch - 4ms/step
Epoch 98/100
32/32 - 0s - loss: 11.3292 - mae: 2.6738 - val_loss: 13.1355 - val_mae: 2.9034 - 133ms/epoch - 4ms/step
Epoch 99/100
32/32 - 0s - loss: 11.4278 - mae: 2.6469 - val_loss: 13.3411 - val_mae: 2.9327 - 175ms/epoch - 5ms/step
Epoch 100/100
32/32 - 0s - loss: 10.8318 - mae: 2.6110 - val_loss: 12.6956 - val_mae: 2.8530 - 155ms/epoch - 5ms/step
11/11 - 0s - 131ms/epoch - 12ms/step
40/40 - 0s - 100ms/epoch - 3ms/step
  • ANN Richesse_tot
Epoch 1/100
32/32 - 2s - loss: 24.0812 - mae: 4.0758 - val_loss: 23.0456 - val_mae: 4.1482 - 2s/epoch - 49ms/step
Epoch 2/100
32/32 - 0s - loss: 15.6435 - mae: 3.1421 - val_loss: 16.9762 - val_mae: 3.5002 - 176ms/epoch - 6ms/step
Epoch 3/100
32/32 - 0s - loss: 14.0804 - mae: 2.9340 - val_loss: 14.8705 - val_mae: 3.2402 - 218ms/epoch - 7ms/step
Epoch 4/100
32/32 - 0s - loss: 12.8260 - mae: 2.7834 - val_loss: 12.5410 - val_mae: 2.9441 - 211ms/epoch - 7ms/step
Epoch 5/100
32/32 - 0s - loss: 12.0399 - mae: 2.6819 - val_loss: 11.5286 - val_mae: 2.8062 - 209ms/epoch - 7ms/step
Epoch 6/100
32/32 - 0s - loss: 10.8889 - mae: 2.5517 - val_loss: 11.3366 - val_mae: 2.7779 - 213ms/epoch - 7ms/step
Epoch 7/100
32/32 - 0s - loss: 10.8611 - mae: 2.5506 - val_loss: 11.5298 - val_mae: 2.7991 - 205ms/epoch - 6ms/step
Epoch 8/100
32/32 - 0s - loss: 9.7645 - mae: 2.4107 - val_loss: 12.5028 - val_mae: 2.9112 - 209ms/epoch - 7ms/step
Epoch 9/100
32/32 - 0s - loss: 10.0555 - mae: 2.4587 - val_loss: 11.1215 - val_mae: 2.7467 - 203ms/epoch - 6ms/step
Epoch 10/100
32/32 - 0s - loss: 9.7301 - mae: 2.3773 - val_loss: 10.3311 - val_mae: 2.6344 - 196ms/epoch - 6ms/step
Epoch 11/100
32/32 - 0s - loss: 9.6926 - mae: 2.3715 - val_loss: 10.1919 - val_mae: 2.6186 - 216ms/epoch - 7ms/step
Epoch 12/100
32/32 - 0s - loss: 9.4085 - mae: 2.3586 - val_loss: 10.8014 - val_mae: 2.6975 - 248ms/epoch - 8ms/step
Epoch 13/100
32/32 - 0s - loss: 8.9972 - mae: 2.2884 - val_loss: 9.0286 - val_mae: 2.4653 - 223ms/epoch - 7ms/step
Epoch 14/100
32/32 - 0s - loss: 8.6581 - mae: 2.2443 - val_loss: 9.6882 - val_mae: 2.5566 - 216ms/epoch - 7ms/step
Epoch 15/100
32/32 - 0s - loss: 8.1523 - mae: 2.1928 - val_loss: 9.1110 - val_mae: 2.4752 - 195ms/epoch - 6ms/step
Epoch 16/100
32/32 - 0s - loss: 8.7452 - mae: 2.2320 - val_loss: 9.5688 - val_mae: 2.5336 - 223ms/epoch - 7ms/step
Epoch 17/100
32/32 - 1s - loss: 8.1337 - mae: 2.1737 - val_loss: 8.1897 - val_mae: 2.3363 - 805ms/epoch - 25ms/step
Epoch 18/100
32/32 - 0s - loss: 8.2538 - mae: 2.1895 - val_loss: 9.2937 - val_mae: 2.4965 - 186ms/epoch - 6ms/step
Epoch 19/100
32/32 - 0s - loss: 8.1863 - mae: 2.1569 - val_loss: 8.8285 - val_mae: 2.4264 - 186ms/epoch - 6ms/step
Epoch 20/100
32/32 - 0s - loss: 7.5938 - mae: 2.0997 - val_loss: 9.2280 - val_mae: 2.4831 - 190ms/epoch - 6ms/step
Epoch 21/100
32/32 - 0s - loss: 7.7870 - mae: 2.1099 - val_loss: 8.8380 - val_mae: 2.4296 - 187ms/epoch - 6ms/step
Epoch 22/100
32/32 - 0s - loss: 7.4680 - mae: 2.0673 - val_loss: 8.6858 - val_mae: 2.4056 - 191ms/epoch - 6ms/step
Epoch 23/100
32/32 - 0s - loss: 7.5729 - mae: 2.0797 - val_loss: 8.3652 - val_mae: 2.3555 - 206ms/epoch - 6ms/step
Epoch 24/100
32/32 - 0s - loss: 7.7115 - mae: 2.0928 - val_loss: 8.4220 - val_mae: 2.3646 - 189ms/epoch - 6ms/step
Epoch 25/100
32/32 - 0s - loss: 7.5596 - mae: 2.0693 - val_loss: 8.3930 - val_mae: 2.3583 - 211ms/epoch - 7ms/step
Epoch 26/100
32/32 - 0s - loss: 7.4542 - mae: 2.0617 - val_loss: 8.3276 - val_mae: 2.3519 - 203ms/epoch - 6ms/step
Epoch 27/100
32/32 - 0s - loss: 7.2517 - mae: 2.0373 - val_loss: 8.1807 - val_mae: 2.3293 - 203ms/epoch - 6ms/step
Epoch 28/100
32/32 - 0s - loss: 7.3700 - mae: 2.0615 - val_loss: 8.5733 - val_mae: 2.3831 - 226ms/epoch - 7ms/step
Epoch 29/100
32/32 - 0s - loss: 7.1206 - mae: 1.9992 - val_loss: 7.9702 - val_mae: 2.2926 - 213ms/epoch - 7ms/step
Epoch 30/100
32/32 - 0s - loss: 7.4454 - mae: 2.0610 - val_loss: 8.2981 - val_mae: 2.3375 - 247ms/epoch - 8ms/step
Epoch 31/100
32/32 - 0s - loss: 6.7350 - mae: 1.9841 - val_loss: 8.0342 - val_mae: 2.2984 - 198ms/epoch - 6ms/step
Epoch 32/100
32/32 - 0s - loss: 7.1839 - mae: 2.0205 - val_loss: 8.5238 - val_mae: 2.3675 - 199ms/epoch - 6ms/step
Epoch 33/100
32/32 - 0s - loss: 7.1005 - mae: 2.0258 - val_loss: 8.1048 - val_mae: 2.3101 - 227ms/epoch - 7ms/step
Epoch 34/100
32/32 - 0s - loss: 6.7976 - mae: 1.9782 - val_loss: 7.8701 - val_mae: 2.2775 - 226ms/epoch - 7ms/step
Epoch 35/100
32/32 - 0s - loss: 6.4859 - mae: 1.9472 - val_loss: 8.0670 - val_mae: 2.3022 - 213ms/epoch - 7ms/step
Epoch 36/100
32/32 - 0s - loss: 6.4179 - mae: 1.9405 - val_loss: 8.2548 - val_mae: 2.3241 - 223ms/epoch - 7ms/step
Epoch 37/100
32/32 - 0s - loss: 6.8224 - mae: 1.9668 - val_loss: 8.1560 - val_mae: 2.3131 - 219ms/epoch - 7ms/step
Epoch 38/100
32/32 - 0s - loss: 6.5838 - mae: 1.9714 - val_loss: 8.0844 - val_mae: 2.3041 - 215ms/epoch - 7ms/step
Epoch 39/100
32/32 - 0s - loss: 6.4934 - mae: 1.9367 - val_loss: 8.2735 - val_mae: 2.3264 - 230ms/epoch - 7ms/step
Epoch 40/100
32/32 - 0s - loss: 6.3121 - mae: 1.9137 - val_loss: 8.1514 - val_mae: 2.3112 - 221ms/epoch - 7ms/step
Epoch 41/100
32/32 - 0s - loss: 6.2110 - mae: 1.8839 - val_loss: 8.0381 - val_mae: 2.3010 - 211ms/epoch - 7ms/step
Epoch 42/100
32/32 - 0s - loss: 6.5557 - mae: 1.9269 - val_loss: 7.9672 - val_mae: 2.2883 - 192ms/epoch - 6ms/step
Epoch 43/100
32/32 - 0s - loss: 6.4419 - mae: 1.9299 - val_loss: 7.8716 - val_mae: 2.2751 - 204ms/epoch - 6ms/step
Epoch 44/100
32/32 - 0s - loss: 6.2871 - mae: 1.9210 - val_loss: 8.0648 - val_mae: 2.3013 - 236ms/epoch - 7ms/step
Epoch 45/100
32/32 - 0s - loss: 6.0850 - mae: 1.8864 - val_loss: 7.4487 - val_mae: 2.2210 - 264ms/epoch - 8ms/step
Epoch 46/100
32/32 - 0s - loss: 6.3986 - mae: 1.9345 - val_loss: 7.8566 - val_mae: 2.2759 - 214ms/epoch - 7ms/step
Epoch 47/100
32/32 - 0s - loss: 6.5063 - mae: 1.9333 - val_loss: 7.7698 - val_mae: 2.2659 - 245ms/epoch - 8ms/step
Epoch 48/100
32/32 - 0s - loss: 6.0746 - mae: 1.8667 - val_loss: 7.6655 - val_mae: 2.2506 - 484ms/epoch - 15ms/step
Epoch 49/100
32/32 - 0s - loss: 6.1522 - mae: 1.8756 - val_loss: 8.2434 - val_mae: 2.3290 - 185ms/epoch - 6ms/step
Epoch 50/100
32/32 - 0s - loss: 6.1274 - mae: 1.8972 - val_loss: 8.1070 - val_mae: 2.3111 - 230ms/epoch - 7ms/step
Epoch 51/100
32/32 - 0s - loss: 5.9911 - mae: 1.8493 - val_loss: 7.3648 - val_mae: 2.2115 - 236ms/epoch - 7ms/step
Epoch 52/100
32/32 - 0s - loss: 5.8550 - mae: 1.8439 - val_loss: 7.5960 - val_mae: 2.2421 - 210ms/epoch - 7ms/step
Epoch 53/100
32/32 - 0s - loss: 6.3441 - mae: 1.9145 - val_loss: 7.3212 - val_mae: 2.2051 - 248ms/epoch - 8ms/step
Epoch 54/100
32/32 - 0s - loss: 5.9961 - mae: 1.8597 - val_loss: 7.5258 - val_mae: 2.2350 - 256ms/epoch - 8ms/step
Epoch 55/100
32/32 - 0s - loss: 5.6644 - mae: 1.8427 - val_loss: 7.8735 - val_mae: 2.2803 - 223ms/epoch - 7ms/step
Epoch 56/100
32/32 - 0s - loss: 5.9661 - mae: 1.8695 - val_loss: 7.6570 - val_mae: 2.2500 - 250ms/epoch - 8ms/step
Epoch 57/100
32/32 - 0s - loss: 6.1226 - mae: 1.8953 - val_loss: 7.4133 - val_mae: 2.2162 - 252ms/epoch - 8ms/step
Epoch 58/100
32/32 - 0s - loss: 6.0099 - mae: 1.8670 - val_loss: 7.5367 - val_mae: 2.2306 - 229ms/epoch - 7ms/step
Epoch 59/100
32/32 - 0s - loss: 6.0603 - mae: 1.8806 - val_loss: 7.5274 - val_mae: 2.2296 - 249ms/epoch - 8ms/step
Epoch 60/100
32/32 - 0s - loss: 5.8119 - mae: 1.8382 - val_loss: 7.6538 - val_mae: 2.2495 - 212ms/epoch - 7ms/step
Epoch 61/100
32/32 - 0s - loss: 6.0433 - mae: 1.8634 - val_loss: 7.6774 - val_mae: 2.2542 - 261ms/epoch - 8ms/step
Epoch 62/100
32/32 - 0s - loss: 5.7730 - mae: 1.8557 - val_loss: 7.5130 - val_mae: 2.2324 - 217ms/epoch - 7ms/step
Epoch 63/100
32/32 - 0s - loss: 5.7356 - mae: 1.8109 - val_loss: 7.7387 - val_mae: 2.2617 - 220ms/epoch - 7ms/step
Epoch 64/100
32/32 - 0s - loss: 5.6967 - mae: 1.8276 - val_loss: 7.4701 - val_mae: 2.2244 - 215ms/epoch - 7ms/step
Epoch 65/100
32/32 - 0s - loss: 5.7976 - mae: 1.8193 - val_loss: 7.5693 - val_mae: 2.2383 - 196ms/epoch - 6ms/step
Epoch 66/100
32/32 - 0s - loss: 5.8409 - mae: 1.8238 - val_loss: 7.4409 - val_mae: 2.2190 - 216ms/epoch - 7ms/step
Epoch 67/100
32/32 - 0s - loss: 6.0276 - mae: 1.8517 - val_loss: 7.7214 - val_mae: 2.2621 - 231ms/epoch - 7ms/step
Epoch 68/100
32/32 - 0s - loss: 5.7232 - mae: 1.8397 - val_loss: 7.7158 - val_mae: 2.2602 - 230ms/epoch - 7ms/step
Epoch 69/100
32/32 - 0s - loss: 5.8727 - mae: 1.8505 - val_loss: 7.6199 - val_mae: 2.2457 - 142ms/epoch - 4ms/step
Epoch 70/100
32/32 - 0s - loss: 5.6879 - mae: 1.8299 - val_loss: 7.4637 - val_mae: 2.2246 - 108ms/epoch - 3ms/step
Epoch 71/100
32/32 - 0s - loss: 5.6623 - mae: 1.8035 - val_loss: 7.3107 - val_mae: 2.2063 - 106ms/epoch - 3ms/step
Epoch 72/100
32/32 - 0s - loss: 5.7409 - mae: 1.8167 - val_loss: 7.2618 - val_mae: 2.1988 - 87ms/epoch - 3ms/step
Epoch 73/100
32/32 - 0s - loss: 5.6045 - mae: 1.7970 - val_loss: 7.3465 - val_mae: 2.2115 - 87ms/epoch - 3ms/step
Epoch 74/100
32/32 - 0s - loss: 5.5147 - mae: 1.7782 - val_loss: 7.3143 - val_mae: 2.2082 - 85ms/epoch - 3ms/step
Epoch 75/100
32/32 - 0s - loss: 5.5734 - mae: 1.8010 - val_loss: 7.4132 - val_mae: 2.2226 - 87ms/epoch - 3ms/step
Epoch 76/100
32/32 - 0s - loss: 5.5431 - mae: 1.7842 - val_loss: 7.2795 - val_mae: 2.2047 - 83ms/epoch - 3ms/step
Epoch 77/100
32/32 - 0s - loss: 5.5326 - mae: 1.8029 - val_loss: 6.9106 - val_mae: 2.1480 - 86ms/epoch - 3ms/step
Epoch 78/100
32/32 - 0s - loss: 5.4276 - mae: 1.7757 - val_loss: 7.5642 - val_mae: 2.2432 - 84ms/epoch - 3ms/step
Epoch 79/100
32/32 - 0s - loss: 5.4612 - mae: 1.7795 - val_loss: 6.9083 - val_mae: 2.1496 - 85ms/epoch - 3ms/step
Epoch 80/100
32/32 - 0s - loss: 5.4077 - mae: 1.7723 - val_loss: 7.2697 - val_mae: 2.1991 - 119ms/epoch - 4ms/step
Epoch 81/100
32/32 - 0s - loss: 5.5779 - mae: 1.8158 - val_loss: 7.4986 - val_mae: 2.2332 - 122ms/epoch - 4ms/step
Epoch 82/100
32/32 - 0s - loss: 5.4352 - mae: 1.7774 - val_loss: 7.2731 - val_mae: 2.2040 - 108ms/epoch - 3ms/step
Epoch 83/100
32/32 - 0s - loss: 5.3438 - mae: 1.7664 - val_loss: 6.8550 - val_mae: 2.1428 - 121ms/epoch - 4ms/step
Epoch 84/100
32/32 - 0s - loss: 5.5127 - mae: 1.8102 - val_loss: 7.3062 - val_mae: 2.2083 - 115ms/epoch - 4ms/step
Epoch 85/100
32/32 - 0s - loss: 5.2917 - mae: 1.7592 - val_loss: 7.1921 - val_mae: 2.1938 - 88ms/epoch - 3ms/step
Epoch 86/100
32/32 - 0s - loss: 5.1789 - mae: 1.7349 - val_loss: 6.9451 - val_mae: 2.1577 - 82ms/epoch - 3ms/step
Epoch 87/100
32/32 - 0s - loss: 5.6231 - mae: 1.8007 - val_loss: 7.1926 - val_mae: 2.1940 - 88ms/epoch - 3ms/step
Epoch 88/100
32/32 - 0s - loss: 5.4081 - mae: 1.7830 - val_loss: 7.4285 - val_mae: 2.2260 - 123ms/epoch - 4ms/step
Epoch 89/100
32/32 - 1s - loss: 5.3894 - mae: 1.7871 - val_loss: 7.2829 - val_mae: 2.2091 - 957ms/epoch - 30ms/step
Epoch 90/100
32/32 - 0s - loss: 5.2149 - mae: 1.7566 - val_loss: 7.4382 - val_mae: 2.2286 - 94ms/epoch - 3ms/step
Epoch 91/100
32/32 - 0s - loss: 5.4113 - mae: 1.7718 - val_loss: 6.9568 - val_mae: 2.1632 - 95ms/epoch - 3ms/step
Epoch 92/100
32/32 - 0s - loss: 5.3297 - mae: 1.7523 - val_loss: 7.0870 - val_mae: 2.1822 - 117ms/epoch - 4ms/step
Epoch 93/100
32/32 - 0s - loss: 5.4947 - mae: 1.7818 - val_loss: 6.9830 - val_mae: 2.1676 - 119ms/epoch - 4ms/step
Epoch 94/100
32/32 - 0s - loss: 5.4375 - mae: 1.7846 - val_loss: 7.1312 - val_mae: 2.1894 - 134ms/epoch - 4ms/step
Epoch 95/100
32/32 - 0s - loss: 5.3218 - mae: 1.7554 - val_loss: 7.0426 - val_mae: 2.1712 - 119ms/epoch - 4ms/step
Epoch 96/100
32/32 - 0s - loss: 5.5519 - mae: 1.7837 - val_loss: 7.0240 - val_mae: 2.1695 - 86ms/epoch - 3ms/step
Epoch 97/100
32/32 - 0s - loss: 5.2904 - mae: 1.7547 - val_loss: 7.0180 - val_mae: 2.1718 - 83ms/epoch - 3ms/step
Epoch 98/100
32/32 - 0s - loss: 5.0500 - mae: 1.7176 - val_loss: 7.1596 - val_mae: 2.1900 - 91ms/epoch - 3ms/step
Epoch 99/100
32/32 - 0s - loss: 5.2775 - mae: 1.7391 - val_loss: 7.0752 - val_mae: 2.1799 - 93ms/epoch - 3ms/step
Epoch 100/100
32/32 - 0s - loss: 5.1417 - mae: 1.7148 - val_loss: 6.7634 - val_mae: 2.1368 - 115ms/epoch - 4ms/step
20/20 - 0s - 87ms/epoch - 4ms/step
79/79 - 0s - 77ms/epoch - 970us/step

14 Results: Case 1 -> repeated data

14.1 Prediction of total abundance

The best algorithm for total abundance is: RF

14.2 Prediction of total biomass

The best algorithm for total Biomass is: RF

14.3 Prediction of total taxonomic richness

The best algorithm for total Richness is: RF

14.4 Best algo: RF

- Summary: Results Case 1 -> repeated data

14.5 Transformations effect

  • Data without transformations

  • Data transformed with sqrt()

14.6 Comparaison data partition

Abundance partition

  • Comparison with kenStone method

CaCO3 partition

  • Comparison with kenStone method

Kolmogorov-Smirnov test

  • sample methode

    Asymptotic two-sample Kolmogorov-Smirnov test

data:  AB_tot_train$AB_tot and AB_tot_test$AB_tot
D = 0.037142, p-value = 0.4939
alternative hypothesis: two-sided
  • kenStone methode

    Asymptotic two-sample Kolmogorov-Smirnov test

data:  AB_tot_train$AB_tot and AB_tot_test$AB_tot
D = 0.12272, p-value = 5.569e-07
alternative hypothesis: two-sided

14.7 Polynomial model results

  • Comparison with the best algorithms

14.8 Improved prediction of extreme values

  • best 1

  • best 2

14.9 Results EcoBioSoil

14.10 Comparaison entre EcoBiosoil et LandWorm

15 Réduction et effets des variables

15.1 Abundance

Plan

Importance of predictors

Predictor effects

Predictor interactions

Predictor interactions: Land use

Plan

15.2 Biomass

Plan

Importance of predictors

Predictor effects

Predictor interactions

Predictor interactions: Land use

Plan

15.3 Richness

Plan

Importance of predictors

Predictor effects

Predictor interactions

Predictor interactions: Land use

Plan

16 Results: Case 2 -> non-repeated data

16.1 Comparison

  • Zero duplicates in Programme, ID_Site and GPS (see Script.R )

  • Comparison with the best algorithms (best 2)

17 Results: Case 3 -> original data

17.1 Inputs


Predicteurs = c(“gps_x”, “gps_y”, “clay”, “fine_sand”, “coarse_sand”, “fine_silt”, “coarse_silt”, “ph_eau”, “om”, “n_tot”)


OS = c(“Arable land”, “Artificial, non-agricultural vegetated areas”, “Forests” “Pastures”, “Permanent crops” )

17.2 Results with measured data (BDD1.9)

  • Comparison with the best algorithms

18 Dicussion

18.1 Comparison

Autors Abundance Biomass Richness Comments
EcoBioSoil 0.34/30 0.17/9 0.54/1.79 R²/RMSE of data test
Salako et al., 2023 0.84 NA 0.57 R² of RF max
Phillips et al., 2019 0.64/134 0.65/55 0.74/1.17 R²/RMSE of data train
Rutgers et al., 2016 0.25 NA 0.24 NA
  • Fig S3 of phillip et al., 2019

Question

  • Retransformation des données vdt
  • Overfitting

To do next

    1. Rédaction, protocol ODMAP;
    1. Models avec LandWorm
    1. Réduction des variables
    1. Effets des variables
    1. Prédiction et cartographie

Plan

18.2 Additional information




See species explorations

All the material from my internship, including scripts and datasets, is available on my GitHub.

Thank you for your attention