Team: “Extintos”

Datos GBIF

Cariniana pyriformis

# Biblioteca rgbif
library(rgbif)
gbif_cp <- occ_search(scientificName = "Cariniana pyriformis",
                  limit = 5000, return = "data",
                  hasCoordinate = TRUE)

data_cp <- gbif_cp$data
data_cp

Limpieza

Propuesta: App BioCol
La App sólo utiliza datos para Colombia (inicialmente)
Filtra coordenadas ausentes.
Filtra puntos de observación duplicados.
Para la base de datos final que estará disponible para el usuario, sólo se muestran las variables de interés.

library(tidyverse)
library(Hmisc)
data_cp %>% 
  filter(country == "Colombia") %>% 
  filter(!duplicated(decimalLatitude, decimalLongitude)) %>% 
  dplyr::select(scientificName, decimalLatitude, decimalLongitude, kingdom, phylum,
         family, genus, genericName, stateProvince, year, month, day,
         eventDate, class, country, occurrenceRemarks, habitat) %>% 
  mutate(habitat = capitalize(tolower(habitat))) ->
  data_cp_clean
data_cp_clean

Tremarctos ornatus

gbif_to <- occ_search(scientificName = "Tremarctos ornatus",
                  limit = 5000, return = "data",
                  hasCoordinate = TRUE)

data_to <- gbif_to$data
data_to

Limpieza

data_to %>% 
  filter(country == "Colombia") %>% 
  filter(!duplicated(decimalLatitude, decimalLongitude)) %>% 
  dplyr::select(scientificName, decimalLatitude, decimalLongitude, kingdom, phylum,
         family, genus, genericName, stateProvince, year, month, day,
         eventDate, class, country, occurrenceRemarks, habitat) %>% 
  mutate(habitat = capitalize(tolower(habitat))) ->
  data_to_clean
data_to_clean

Visualizaciones

Ejemplos con Tremarctos ornatus.

Mapas

Mapa Colombia (1)

Mapa de Colombia: obtenido con la biblioteca raster.

library(raster)
# colombia0 <- getData(name = "GADM", country = "COL", level = 1, download = TRUE)
colombia <- read_rds("../data/gadm36_COL_1_sp.rds")
data_col <- fortify(colombia, region = "NAME_1")
data_col %>% 
  ggplot(aes(long, lat, group = group)) +
  geom_polygon(fill = "white") + 
  coord_equal() + 
  geom_path(color = "black") +
  theme_void()

Mapa Colombia (2)

library("ggmap")
mapa <- c(left = -85, bottom = -5, right = -66, top = 13)
get_stamenmap(mapa, zoom = 5,
              source = "stamen", maptype = "terrain",
              force = TRUE) %>% ggmap() +
  labs(x = "Longitud", y = "Latitud") +
  scale_size_manual(values = c(0.1, 0.1)) +
  geom_point(data = data_to_clean, aes(x = decimalLongitude, y = decimalLatitude),
             shape = 19)

Mapa Colombia (3)

library("ggmap")
get_stamenmap(mapa, zoom = 5,
              source = "stamen", maptype = "terrain",
              force = TRUE) %>% ggmap() +
  geom_polygon(data = data_col, aes(x = long, y = lat, group = group),
               alpha = 0.001) + 
  geom_path(color = "black",
            data = data_col, aes(x = long, y = lat, group = group)) +
  labs(x = "Longitud", y = "Latitud") +
  scale_size_manual(values = c(0.1, 0.1)) +
  geom_point(data = data_to_clean, aes(x = decimalLongitude, y = decimalLatitude),
             shape = 19)

Conteos anuales

data_to_clean %>% 
  group_by(year) %>% 
  count() %>% 
  ggplot(aes(x = year, y = n)) +
  geom_line() +
  theme_light()

Conteos mensuales

data_to_clean %>% 
  group_by(month) %>% 
  count() %>% 
  ggplot(aes(x = month, y = n)) +
  geom_line() +
  theme_light() +
  scale_x_continuous(breaks = seq(1, 12, 1))

Conteo por departamento

data_to_clean %>% 
  group_by(stateProvince) %>% 
  count() %>% 
  filter(!is.na(stateProvince)) %>% 
  ggplot(aes(x = reorder(stateProvince, n), y = n)) +
  geom_col(color = "black") +
  coord_flip() +
  theme_light() +
  labs(x = "Departamento", y = "Total") +
  scale_y_continuous(breaks = seq(1, 61, 10))

Zona de vida

data_to_clean %>% 
  group_by(habitat) %>% 
  count() %>% 
  filter(!is.na(habitat)) %>% 
  ggplot(aes(x = reorder(habitat, n), y = n)) +
  geom_col(color = "black") +
  coord_flip() +
  theme_light() +
  labs(x = "Zona de vida", y = "Total") +
  scale_y_continuous(breaks = seq(1, 61, 10))

Nubes de palabras

Descripción de observación

# Tokenización de texto
library(tidytext)
source("../functions/tokenizar.R")

# Función cleanText() 
texto <- data_to_clean %>% 
  filter(!is.na(occurrenceRemarks)) %>% 
  mutate(textTokenize = map(.x = occurrenceRemarks,
                            .f = cleanText))
# Texto tidy (ordenado)
texto_tidy <- texto %>%
  unnest(cols = textTokenize) %>% 
  rename(token = textTokenize)

# Frecuencia de palabras
freq_texto <- texto_tidy %>% 
  group_by(token) %>% 
  summarise(frequence = n())

# nube de palabras
library(wordcloud)
wordcloudCustom <- function(data){
  wordcloud(words = data$token, freq = data$frequence,
            max.words = 400, random.order = FALSE, rot.per = 0.35,
            colors = jcolors(palette = "pal9"))
}

library(jcolors)
wordcloudCustom(data = freq_texto)

Hábitat

# Función cleanText() 
texto2 <- data_to_clean %>% 
  filter(!is.na(habitat)) %>% 
  mutate(textTokenize = map(.x = habitat,
                            .f = cleanText))
# Texto tidy (ordenado)
texto_tidy2 <- texto2 %>%
  unnest(cols = textTokenize) %>% 
  rename(token = textTokenize)

# Frecuencia de palabras
freq_texto2 <- texto_tidy2 %>% 
  group_by(token) %>% 
  summarise(frequence = n())

wordcloudCustom(data = freq_texto2)

Ambientales

Imágenes CHELSA + NASA

# Cargando bibliotecas
library(tidyverse)
library(raster)
library(biomod2)

# Lista de archivos tif
lista_tif <- list.files("../data/chelsa_climate/", pattern = ".tif$", full.names = TRUE)

# Ordenando la lista
# Esto lo hago pensando en gráficos que están más adelante
lista_tif <- c(lista_tif[1], lista_tif[12:19], lista_tif[2:11])


# Importando imágenes individuales
# Se alamacenan en una lista de nombre chelsa
chelsa <- list()
for (i in 1:length(lista_tif)) {
  chelsa[[i]] = raster(lista_tif[i])
}

# Stack de raster
predictoras <- stack(chelsa[[1]], chelsa[[2]], chelsa[[3]], chelsa[[4]],
                     chelsa[[5]], chelsa[[6]], chelsa[[7]], chelsa[[8]],
                     chelsa[[9]], chelsa[[10]], chelsa[[11]], chelsa[[12]],
                     chelsa[[13]], chelsa[[14]], chelsa[[15]], chelsa[[16]],
                     chelsa[[17]], chelsa[[18]], chelsa[[19]])
plot(predictoras)

Pseudo-Ausencias

library(dismo)

# Coordenadas de las ocurrencias
xmin <- min(data_to_clean$decimalLongitude)
xmax <- max(data_to_clean$decimalLongitude)
ymin <- min(data_to_clean$decimalLatitude)
ymax <- max(data_to_clean$decimalLatitude)

# Formando la "caja" de muestreo
bb <- matrix(c(xmin, xmin, xmax, xmax, xmin, ymin, ymax, ymax, ymin, ymin),
             ncol=2)

# Transformando la matriz en SpatialPolygons
bgExt <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(bb)), 1)))

# Extendiendo 3 grados la zona de muestreo desde los puntos
bgExt <- rgeos::gBuffer(bgExt, width = 3)


# Corte de imágnes chelsa 
envsBgCrop <- raster::crop(predictoras, bgExt)
envsBgMsk <- raster::mask(envsBgCrop, bgExt)

# Muestreo aleatorio de pseudo-ausencias
set.seed(123)
bg.xy <- dismo::randomPoints(envsBgMsk, 1000)

# Matriz de coordenadas a data frame --> nueva variable "target"
bg.xy <- as.data.frame(bg.xy) %>% 
  rename(decimalLongitude = x, decimalLatitude = y) %>% 
  mutate(target = 0)
bg.xy

Unión de presencias y pseudo-ausencias

# Base de datos con "1" y "0"
data_to_clean %>% 
  mutate(target = 1) %>% 
  bind_rows(bg.xy) %>% 
  dplyr::select(decimalLatitude, decimalLongitude, target) ->
  data_to_final
data_to_final

Unión de presencias/ausencias con variables ambientales (raster-tif):

library(biomod2)
# Datos para base de datos de modelos
rta <- data_to_final$target
coord_rta = data_to_final[c('decimalLongitude','decimalLatitude')]
nombre_rta = "Tremarcto_ornatus"

# Formato de base de datos para modelo
datos_modelos <- BIOMOD_FormatingData(
  resp.var = rta,
  expl.var = predictoras,
  resp.xy = coord_rta,
  resp.name = nombre_rta,
  
)

## 
## -=-=-=-=-=-=-=-=-=-=-=-= Tremarcto_ornatus Data Formating -=-=-=-=-=-=-=-=-=-=-=-=
## 
##  Response variable name was converted into Tremarcto.ornatus
## > No pseudo absences selection !
##       ! No data has been set aside for modeling evaluation
##          ! Some NAs have been automatically removed from your data
## -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

Extracción de base de datos para modelos y visualización:

# Extracción de base de datos
df_Tremarcto_ornatus <- datos_modelos@coord %>% 
  bind_cols(datos_modelos@data.env.var) %>% 
  bind_cols(as.data.frame(datos_modelos@data.species))

# Cambiando nombres
names(df_Tremarcto_ornatus) <- c("long", "lat", paste0("Bio", 1:19), "target")
df_Tremarcto_ornatus

Exportando data final

save(df_Tremarcto_ornatus, file = "../data/data_Tremarcto_ornatus.Rdata",
     compress = "xz")

Visualización data final

Curvas de respuesta logística

modelo1 <- glm(target ~Bio1 + Bio2,
               data = df_Tremarcto_ornatus, family = "binomial")
library(ggiraphExtra)
ggPredict(modelo1, jitter = FALSE, colorn = 10, se = F) +
  theme_bw() +
  labs(x = "Bio1", y = "Probabilidad", color = "Bio2")

Distribuciones

df_Tremarcto_ornatus %>% 
  dplyr::select(-c(long, lat)) %>% 
  gather(key = "key", value = "value", -target) %>% 
  ggplot(aes(x = value, fill = as.factor(target))) +
  facet_wrap(~key, scales = "free") +
  geom_density(alpha = 0.8, ncol = 4) +
  theme_light() +
  labs(fill = "") +
  scale_fill_jcolors(palette = "pal7")

Correlaciones

library(corrplot)
df_Tremarcto_ornatus %>% 
  dplyr::select(-c(long, lat, target)) %>%
  cor(method = "spearman") %>% 
  corrplot(method = "pie",
           diag = FALSE,
           addgrid.col = "black",
           type = "lower",
           tl.srt = 20,
           tl.col = "black",
           col = jcolors(palette = "pal4"),
           order = "hclust")

Componentes principales

library(FactoMineR)
data_acp <- df_Tremarcto_ornatus %>% 
  dplyr::select(-c(long, lat, target))
acp_oso <- PCA(X = data_acp,
               scale.unit = TRUE, 
               ncp = ncol(data_acp),
               graph = FALSE)
summary(acp_oso)

## 
## Call:
## PCA(X = data_acp, scale.unit = TRUE, ncp = ncol(data_acp), graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance              11.705   3.512   2.133   1.024   0.417   0.167   0.024
## % of var.             61.606  18.486  11.224   5.390   2.193   0.877   0.124
## Cumulative % of var.  61.606  80.092  91.316  96.706  98.899  99.776  99.900
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.014   0.002   0.002   0.001   0.000   0.000   0.000
## % of var.              0.076   0.011   0.009   0.003   0.000   0.000   0.000
## Cumulative % of var.  99.976  99.988  99.997  99.999 100.000 100.000 100.000
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19
## Variance               0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000
## 
## Individuals (the 10 first)
##           Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr
## 1     |  4.090 | -3.163  0.039  0.598 |  0.873  0.010  0.046 |  0.471  0.005
## 2     |  3.689 | -2.966  0.034  0.646 | -0.553  0.004  0.022 |  0.889  0.017
## 3     |  3.762 | -3.031  0.036  0.649 | -1.171  0.018  0.097 |  0.969  0.020
## 4     |  3.689 | -2.966  0.034  0.646 | -0.553  0.004  0.022 |  0.889  0.017
## 5     |  3.703 | -2.961  0.034  0.640 | -0.096  0.000  0.001 |  0.869  0.016
## 6     |  3.762 | -3.031  0.036  0.649 | -1.171  0.018  0.097 |  0.969  0.020
## 7     |  3.689 | -2.966  0.034  0.646 | -0.553  0.004  0.022 |  0.889  0.017
## 8     |  3.825 | -3.263  0.041  0.728 | -0.538  0.004  0.020 |  0.611  0.008
## 9     |  3.847 | -3.203  0.040  0.693 |  1.112  0.016  0.084 | -0.978  0.020
## 10    |  1.168 | -0.288  0.000  0.061 |  0.604  0.005  0.267 |  0.446  0.004
##         cos2  
## 1      0.013 |
## 2      0.058 |
## 3      0.066 |
## 4      0.058 |
## 5      0.055 |
## 6      0.066 |
## 7      0.058 |
## 8      0.026 |
## 9      0.065 |
## 10     0.145 |
## 
## Variables (the 10 first)
##          Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr   cos2  
## Bio1  |  0.871  6.486  0.759 |  0.362  3.739  0.131 |  0.225  2.369  0.051 |
## Bio2  | -0.940  7.555  0.884 | -0.036  0.037  0.001 |  0.317  4.699  0.100 |
## Bio3  | -0.940  7.555  0.884 | -0.036  0.037  0.001 |  0.317  4.699  0.100 |
## Bio4  |  0.446  1.700  0.199 |  0.508  7.350  0.258 |  0.509 12.161  0.259 |
## Bio5  |  0.832  5.915  0.692 |  0.402  4.597  0.161 |  0.289  3.928  0.084 |
## Bio6  |  0.900  6.922  0.810 |  0.335  3.187  0.112 |  0.177  1.475  0.031 |
## Bio7  | -0.797  5.426  0.635 |  0.199  1.132  0.040 |  0.517 12.552  0.268 |
## Bio8  | -0.940  7.555  0.884 | -0.036  0.037  0.001 |  0.317  4.699  0.100 |
## Bio9  | -0.940  7.555  0.884 | -0.036  0.037  0.001 |  0.317  4.699  0.100 |
## Bio10 |  0.867  6.428  0.752 |  0.375  4.008  0.141 |  0.236  2.619  0.056 |

CP1 vs CP2

# Añadiendo datos a la base de datos inicial para gráficos ACP
df_Tremarcto_ornatus$cp1 <- acp_oso$ind$coord[, 1]
df_Tremarcto_ornatus$cp2 <- acp_oso$ind$coord[, 2]
df_Tremarcto_ornatus$cp3 <- acp_oso$ind$coord[, 3]

# Variables e Individuos
library(ggpubr)
library(factoextra)
ggarrange(
  fviz_pca_var(X = acp_oso, axes = c(1, 2), select.var = list(cos2 = 0.5)) +
  ggtitle(""),
  
  df_Tremarcto_ornatus %>% 
  ggplot(data =., aes(x = cp1, y = cp2, color = factor(target))) +
  geom_point(size = 2, alpha = 0.7) +
  geom_vline(xintercept = 0, lty = 2, size = 0.3, color = "darkred") +
  geom_hline(yintercept = 0, lty = 2, size = 0.3, color = "darkred") +
  labs(title = "CP1 vs CP2", color = "G. interrupta") +
  scale_color_jcolors(palette = "pal7") +
  theme_light() +
  theme(legend.position = "top"),
  
  ncol = 2
)

CP1, CP2, CP3

library(plotly)
plot_ly(df_Tremarcto_ornatus,
        x = ~ cp1,
        y = ~ cp2,
        z = ~ cp3,
        color = ~factor(target),
        colors = c("blue", "red")) %>% 
  add_markers()

Modelos

Partición de datos

# Data para modelos
data_modelo1 <- df_Tremarcto_ornatus %>% 
  dplyr::select(-c(cp1:cp3))

# Cambiando 0 por "No" y 1 por "Si"
data_modelo2 <- data_modelo1 %>% 
  mutate(target2 = factor(target, labels  = c("No", "Si")))
data_modelo3 <- as.data.frame(data_modelo2)

# Partición de datos
library(caret)
set.seed(123)
idx <- createDataPartition(y = data_modelo3$target2, times = 1, p = 0.70, list = FALSE)
dataTrain <- data_modelo3[idx, ]
dataTesti <- data_modelo3[-idx, ]

Exportando predictoras

# Creando base de datos con predictoras
dataPredictoras <- as.data.frame(coordinates(envsBgMsk))
dataPredictoras$Bio1 <- as.vector(values(envsBgMsk[[1]]))
dataPredictoras$Bio2 <- as.vector(values(envsBgMsk[[2]]))
dataPredictoras$Bio3 <- as.vector(values(envsBgMsk[[3]]))
dataPredictoras$Bio4 <- as.vector(values(envsBgMsk[[4]]))
dataPredictoras$Bio5 <- as.vector(values(envsBgMsk[[5]]))
dataPredictoras$Bio6 <- as.vector(values(envsBgMsk[[6]]))
dataPredictoras$Bio7 <- as.vector(values(envsBgMsk[[7]]))
dataPredictoras$Bio8 <- as.vector(values(envsBgMsk[[8]]))
dataPredictoras$Bio9 <- as.vector(values(envsBgMsk[[9]]))
dataPredictoras$Bio10 <- as.vector(values(envsBgMsk[[10]]))
dataPredictoras$Bio11 <- as.vector(values(envsBgMsk[[11]]))
dataPredictoras$Bio12 <- as.vector(values(envsBgMsk[[12]]))
dataPredictoras$Bio13 <- as.vector(values(envsBgMsk[[13]]))
dataPredictoras$Bio14 <- as.vector(values(envsBgMsk[[14]]))
dataPredictoras$Bio15 <- as.vector(values(envsBgMsk[[15]]))
dataPredictoras$Bio16 <- as.vector(values(envsBgMsk[[16]]))
dataPredictoras$Bio17 <- as.vector(values(envsBgMsk[[17]]))
dataPredictoras$Bio18 <- as.vector(values(envsBgMsk[[18]]))
dataPredictoras$Bio19 <- as.vector(values(envsBgMsk[[19]]))

# Liberando memoria RAM
gc()

# Cambiando nombres
names(dataPredictoras) <- c("long", "lat", paste0("Bio", 1:19))

# Eliminando NAs para predicciones
dataPredictoras <- na.omit(dataPredictoras)

dataPredictoras

save(dataPredictoras, file = "../data/chelsa.Rdata", compress = "xz")

Modelo Random Forest

# Bibliotecas
library(caret)
library(recipes)
library(xgboost)
library(doParallel)

# PARALELIZACIÓN DE PROCESO
#===============================================================================
cl <- makePSOCKcluster(5)
registerDoParallel(cl)

# HIPERPARÁMETROS, NÚMERO DE REPETICIONES Y SEMILLAS PARA CADA REPETICIÓN
#===============================================================================
particiones  <- 5
repeticiones <- 3

# Hiperparámetros
hiperparametros <- expand.grid(mtry = c(3, 5, 10),
                               min.node.size = c(2, 5, 15, 30),
                               splitrule = "gini")

set.seed(1992)
seeds <- vector(mode = "list", length = (particiones * repeticiones) + 1)
for (i in 1:(particiones * repeticiones)) {
  seeds[[i]] <- sample.int(1000, nrow(hiperparametros))
}
seeds[[(particiones * repeticiones) + 1]] <- sample.int(1000, 1)

# DEFINICIÓN DEL ENTRENAMIENTO
#===============================================================================
control_train <- trainControl(method = "repeatedcv", number = particiones,
                              repeats = repeticiones, seeds = seeds,
                              returnResamp = "final", verboseIter = FALSE,
                              allowParallel = TRUE, classProbs = TRUE)

# AJUSTE DEL MODELO
# ==============================================================================
set.seed(1992)
modelo_rf1 <- train(target2 ~ .,
                    data = dataTrain %>% select(-target),
                    method = "ranger",
                    tuneGrid = hiperparametros,
                    metric = "Accuracy",
                    trControl = control_train,
                    num.trees = 500)
## When you are done:
stopCluster(cl)

modelo_rf1

## Random Forest 
## 
## 1540 samples
##   21 predictor
##    2 classes: 'No', 'Si' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold, repeated 3 times) 
## Summary of sample sizes: 1232, 1232, 1232, 1232, 1232, 1232, ... 
## Resampling results across tuning parameters:
## 
##   mtry  min.node.size  Accuracy   Kappa    
##    3     2             0.9471861  0.8935220
##    3     5             0.9474026  0.8939345
##    3    15             0.9465368  0.8922954
##    3    30             0.9461039  0.8913963
##    5     2             0.9465368  0.8922425
##    5     5             0.9480519  0.8952916
##    5    15             0.9452381  0.8896421
##    5    30             0.9476190  0.8944472
##   10     2             0.9456710  0.8904601
##   10     5             0.9445887  0.8882700
##   10    15             0.9452381  0.8895611
##   10    30             0.9452381  0.8895868
## 
## Tuning parameter 'splitrule' was held constant at a value of gini
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were mtry = 5, splitrule = gini
##  and min.node.size = 5.

Matriz de Confusión

# Predicciones en test
predict_rf1 <- predict(object = modelo_rf1, newdata = dataTesti)

# Matriz de confusión
confusionMatrix(data = predict_rf1, reference = dataTesti$target2,
                positive = "Si", dnn = c("Predicho", "Real"))

## Confusion Matrix and Statistics
## 
##         Real
## Predicho  No  Si
##       No 289  13
##       Si  11 346
##                                           
##                Accuracy : 0.9636          
##                  95% CI : (0.9463, 0.9765)
##     No Information Rate : 0.5448          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9266          
##                                           
##  Mcnemar's Test P-Value : 0.8383          
##                                           
##             Sensitivity : 0.9638          
##             Specificity : 0.9633          
##          Pos Pred Value : 0.9692          
##          Neg Pred Value : 0.9570          
##              Prevalence : 0.5448          
##          Detection Rate : 0.5250          
##    Detection Prevalence : 0.5417          
##       Balanced Accuracy : 0.9636          
##                                           
##        'Positive' Class : Si              
##

Exportando modelo

saveRDS(object = modelo_rf1, file = "../models/rf.rds", compress = "xz")

Predicciones

Predicción con Random Forest

load("../data/chelsa.Rdata")
modRF1 <- readRDS(file = "../models/rf.rds")
predicciones <- predict(object = modRF1,
                        newdata = dataPredictoras,
                        type = "prob")

Probabilidades predichas:

dataPredicciones <- data.frame(
  long = dataPredictoras$long,
  lat = dataPredictoras$lat,
  prob = predicciones$Si
)
dataPredicciones

Mapa de predicción 2

library(rnaturalearth)
countries <- ne_countries(scale=110)

ggplot() + 
  geom_polygon(data = countries, aes(x = long, y = lat, group = group),
               color = "black", lwd = .25, fill = "white", alpha = 0.2) +
  scale_y_continuous(limits = c(-10, 13)) +
  scale_x_continuous(limits = c(-85, -60)) +
  geom_tile(data = dataPredicciones,
              aes(fill = prob, x = long, y = lat), alpha = 0.8) +
  scale_fill_gradient2("Probability",
                       low = "#ffffbf", mid = "#1a9641", high = "#d7191c",
                       midpoint = 0.5) +
  theme_void() +
  labs(fill = "Probabilidad")

Recursos de información

https://access.earthdata.nasa.gov/datasets/C1579029605-SCIOPS?action=index&controller=datasets&dataset_page_num=3240&dataset_page_size=10&keyword=%2A
Instituto Humboldt: http://www.humboldt.org.co/es/boletines-y-comunicados/item/1087-biodiversidad-colombiana-numero-tener-en-cuenta
National Geographic Society. (2019). Conservation. National Geographic. https://www.nationalgeographic.org/encyclopedia/conservation/?utm_source=BibblioRCM_Row
Rutledge, K. et al., 2011. Endangered species. National Geographic. https://www.nationalgeographic.org/encyclopedia/endangered-species/
https://rspatial.org/
https://besjournals.onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00172.x
https://onlinelibrary.wiley.com/doi/10.1111/aec.12658
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6441032/
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6812352/
https://www.car.gov.co/uploads/files/5bd8b7fe8125c.pdf
https://core.ac.uk/download/pdf/11058436.pdf
https://www.car.gov.co/uploads/files/5c4649693d7f8.pdf
https://repository.javeriana.edu.co/bitstream/handle/10554/34046/TESIS%20JESSICA%20ARIAS.pdf?sequence=1&isAllowed=y

Modelamiento de Distribución de Especies: Marco Metodológico

Space Apps Challenge - NASA

Paola, Mauricio, David, Cristian, Edimer

4/10/2020

Team: “Extintos”

Datos GBIF

Cariniana pyriformis

Limpieza

Tremarctos ornatus

Limpieza

Visualizaciones

Mapas

Mapa Colombia (1)

Mapa Colombia (2)

Mapa Colombia (3)

Conteos anuales

Conteos mensuales

Conteo por departamento

Zona de vida

Nubes de palabras

Descripción de observación

Hábitat

Ambientales

Imágenes CHELSA + NASA

Pseudo-Ausencias

Unión de presencias y pseudo-ausencias

Exportando data final

Visualización data final

Curvas de respuesta logística

Distribuciones

Correlaciones

Componentes principales

CP1 vs CP2

CP1, CP2, CP3

Modelos

Partición de datos

Exportando predictoras

Modelo Random Forest

Matriz de Confusión

Exportando modelo

Predicciones

Predicción con Random Forest

Mapa de predicción 2

Recursos de información