Summary SDMs

Author

Bioshifts

Code
rm(list=ls())
gc()
          used (Mb) gc trigger (Mb) max used (Mb)
Ncells  586609 31.4    1345571 71.9   660908 35.3
Vcells 1064787  8.2    8388608 64.0  1800310 13.8
Code
library(dplyr)
library(tidyr)
library(ggplot2)
library(knitr)
library(pbapply)
library(data.table)

1 sources

Code
try(source(here::here("R/settings.R")))
Error in eval(ei, envir) : object 'realm' not found

2 Load data

Code
maxnet_dir <- "/media/seagate/boliveira/SDMs/MaxNet"

#################################
# sp list Mar
SDMsSpListMar <- list.files(here::here(maxnet_dir,"Mar"))
# sp list Ter
SDMsSpListTer <- list.files(here::here(maxnet_dir,"Ter"))

SDMsSpList <- data.frame(species = unique(c(SDMsSpListMar,SDMsSpListTer)))
SDMsSpList$Mar <- ifelse(SDMsSpList$species %in% SDMsSpListMar, 1, 0)
SDMsSpList$Ter <- ifelse(SDMsSpList$species %in% SDMsSpListTer, 1, 0)

#################################
# Get taxonomy
FullList <- read.csv(here::here("Data/splist.csv"))
FullList <- FullList %>% filter(v1 == 1)

MyList <- FullList %>% filter(scientificName %in% gsub("_"," ",SDMsSpList$species))
MyList <- MyList %>% filter(!duplicated(scientificName))

#################################
# Load bioshifts
biov1 <- read.table(here::here("Data/Shifts2018_checkedtaxo.txt"),header = T)
## Use LAT ELE shifts
biov1$Type[which(!is.na(biov1$Azimuth))] <- "LAT" # All obs type == HOR contain Azimuth value
biov1 <- biov1[which(biov1$Type=="ELE" | biov1$Type=="LAT"),]
## Use temporal period from the environmental data
biov1_Mar <- biov1 %>% filter(START >= get_temporal_range_env_data("Mar")[1] + n_yr_bioclimatic)
biov1_Ter <- biov1 %>% filter(START >= get_temporal_range_env_data("Ter")[1] + n_yr_bioclimatic)

#################################
# Load SDM data
## Info used for fitting the SDMs
### Marines
sp_Mar <- SDMsSpList$species[which(SDMsSpList$Mar==1)]
if(length(sp_Mar)>0){
    N_occ_Mar <- pblapply(1:length(sp_Mar), function(i) {
        file_i <- here::here(maxnet_dir,"Mar",sp_Mar[i],paste0(sp_Mar[i],"_SDM_info.csv"))
        if(file.exists(file_i)){
            read.csv(file_i)
        }
    })
    N_occ_Mar <- rbindlist(N_occ_Mar)
}

### Terrestrials
sp_Ter <- SDMsSpList$species[which(SDMsSpList$Ter==1)]
if(length(sp_Ter)>0){
    N_occ_Ter <- pblapply(1:length(sp_Ter), function(i) {
        file_i <- here::here(maxnet_dir,"Ter",sp_Ter[i],paste0(sp_Ter[i],"_SDM_info.csv"))
        if(file.exists(file_i)){
            read.csv(file_i)
        }
    })
    N_occ_Ter <- rbindlist(N_occ_Ter)
}

### Merge all
N_occ <- data.frame()
if(length(sp_Mar)>0){
    N_occ <- rbind(N_occ,
                   N_occ_Mar)
}
if(length(sp_Ter)>0){
    N_occ <- rbind(N_occ,
                   N_occ_Ter)
}


#################################
# Load SDM data
if(length(sp_Mar)>0){
    SDM_Mar <- pblapply(1:length(sp_Mar), function(i) {
        file_i <- here::here(maxnet_dir,"Mar",sp_Mar[i],paste0(sp_Mar[i],"_SDM_CV.csv"))
        if(file.exists(file_i)){
            tmp <- read.csv(file_i)
            tmp <- tmp %>% gather("metric","value",-Mod)
            data.frame(Realm = "Mar", sps = sp_Mar[i], tmp)
        }
    })
    SDM_Mar <- rbindlist(SDM_Mar)
}

### Terrestrials
if(length(sp_Ter)>0){
    SDM_Ter <- pblapply(1:length(sp_Ter), function(i) {
        file_i <- here::here(maxnet_dir,"Ter",sp_Ter[i],paste0(sp_Ter[i],"_SDM_CV.csv"))
        if(file.exists(file_i)){
            tmp <- read.csv(file_i)
            tmp <- tmp %>% gather("metric","value",-Mod)
            data.frame(Realm = "Mar", sps = sp_Ter[i], tmp)
        }
    })
    SDM_Ter <- rbindlist(SDM_Ter)
}

### Merge all
SDM_info <- data.frame()
if(length(sp_Mar)>0){
    SDM_info <- rbind(SDM_info,
                       SDM_Mar)
}
if(length(sp_Ter)>0){
    SDM_info <- rbind(SDM_info,
                       SDM_Ter)
}

Out of the 13 in bioshifts v1, we fitted SDMs for 3 species (382 marine and 0 terrestrials). We cannot fit SDMs for all species in v1 due to limitation in the time-span of the available environmental data. This means, we selected marine species with shifts occurring after 1983 and terrestrial species with shifts occurring after 1981.

3 What taxa?

Code
ggplot(MyList, aes(x=class))+
    ggtitle("N species by Class and Phylum")+
    geom_bar()+
    theme_classic()+
    coord_flip()+
    facet_wrap(.~phylum, scales = "free_y", ncol = 3)

4 N of occurrences

Code
ggplot(data = N_occ, aes(N_occ))+
    geom_histogram(bins = 100)+
    theme_classic()

Code
summary(N_occ$N_occ)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   15.0   201.8   600.5  2047.4  2134.5 23664.0 

5 N time periods

Code
tmp <- data.frame(table(N_occ$N_time_periods))

ggplot(tmp, aes(x = Var1, y = Freq))+
    geom_col()+
    theme_classic()+
    geom_text(aes(label = Freq), vjust = -0.5)+
    xlab("N time periods")

6 N study areas

Code
tmp <- data.frame(table(N_occ$N_study_areas))

ggplot(tmp, aes(x = Var1, y = Freq))+
    geom_col()+
    theme_classic()+
    geom_text(aes(label = Freq), vjust = -0.5)+
    xlab("N study areas")

7 Shift duration

Code
periods <- lapply(N_occ$Time_periods, function(x){
    tmp <- as.numeric(strsplit(x,"-")[[1]])
    data.frame(start = tmp[1],
               end = tmp[2],
               duration = tmp[2]-tmp[1])
})
Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion
Code
periods <- do.call(rbind,periods)

tmp <- data.frame(table(periods$duration))

ggplot(tmp, aes(x = Var1, y = Freq))+
    geom_col()+
    theme_classic()+
    geom_text(aes(label = Freq), vjust = -0.5)+
    xlab("Shift duration (years)")

8 Shift periods

Code
tmp <- periods[,c("start","end")]
tmp <- melt(tmp)
Warning in melt(tmp): The melt generic in data.table has been passed a
data.frame and will attempt to redirect to the relevant reshape2 method;
please note that reshape2 is deprecated, and this redirection is now
deprecated as well. To continue using melt methods from reshape2 while both
libraries are attached, e.g. melt.list, you can prepend the namespace like
reshape2::melt(tmp). In the next version, this warning will become an error.
No id variables; using all as measure variables
Code
ggplot(data = tmp, aes(x = value, fill = variable, color = variable))+
    geom_density(aes(y=2.5 * ..count..), alpha = .3)+
    geom_histogram(bins = 100, alpha = .7)+
    theme_classic()+
    xlab("Shift period (years)")+
    ylab("Freq")
Warning: Removed 8 rows containing non-finite values (stat_density).
Warning: Removed 8 rows containing non-finite values (stat_bin).

9 SDM performace

Code
tmp <- SDM_info[-which(SDM_info$metric %in% c("thresh","thresh_method")),]
tmp$value <- as.numeric(tmp$value)

ggplot(data = tmp, aes(x = metric, y = value, fill = Mod))+
    geom_jitter(alpha = .3, position = position_jitterdodge(jitter.width = 0.2, dodge.width = 0.8), aes(color = Mod))+
    geom_boxplot(alpha = .5)+
    theme_classic()+
    xlab("Shift period (years)")+
    ylab("Freq")+
    facet_wrap(.~metric, scales = "free", ncol = 4)

Code
tmp2 <- tmp %>% spread(Mod, value)
tmp2 <- tmp2[which(tmp2$metric=="AUC"),]
tmp2$sps <- factor(tmp2$sps, levels = tmp2$sps[order(tmp2$CV)])

ggplot(tmp2) +
  geom_segment( aes(x=sps, xend=sps, y=CV, yend=Train)) +
  geom_point( aes(x=sps, y=CV), color=rgb(0.2,0.7,0.1,0.5), size=1 ) + #green
  geom_point( aes(x=sps, y=Train), color=rgb(0.7,0.2,0.1,0.5), size=1 ) + # red
  coord_flip()+
  theme(
      axis.text=element_text(size=6),
    legend.position = "none",
  ) +
  xlab("") +
  ylab("AUC")