Team: “Extintos”

Datos GBIF

Cariniana pyriformis

# Biblioteca rgbif
library(rgbif)
gbif_cp <- occ_search(scientificName = "Cariniana pyriformis",
                  limit = 5000, return = "data",
                  hasCoordinate = TRUE)

data_cp <- gbif_cp$data
data_cp

Limpieza

  • Propuesta: App BioCol
  • La App sólo utiliza datos para Colombia (inicialmente)
  • Filtra coordenadas ausentes.
  • Filtra puntos de observación duplicados.
  • Para la base de datos final que estará disponible para el usuario, sólo se muestran las variables de interés.
library(tidyverse)
library(Hmisc)
data_cp %>% 
  filter(country == "Colombia") %>% 
  filter(!duplicated(decimalLatitude, decimalLongitude)) %>% 
  dplyr::select(scientificName, decimalLatitude, decimalLongitude, kingdom, phylum,
         family, genus, genericName, stateProvince, year, month, day,
         eventDate, class, country, occurrenceRemarks, habitat) %>% 
  mutate(habitat = capitalize(tolower(habitat))) ->
  data_cp_clean
data_cp_clean

Tremarctos ornatus

gbif_to <- occ_search(scientificName = "Tremarctos ornatus",
                  limit = 5000, return = "data",
                  hasCoordinate = TRUE)

data_to <- gbif_to$data
data_to

Limpieza

data_to %>% 
  filter(country == "Colombia") %>% 
  filter(!duplicated(decimalLatitude, decimalLongitude)) %>% 
  dplyr::select(scientificName, decimalLatitude, decimalLongitude, kingdom, phylum,
         family, genus, genericName, stateProvince, year, month, day,
         eventDate, class, country, occurrenceRemarks, habitat) %>% 
  mutate(habitat = capitalize(tolower(habitat))) ->
  data_to_clean
data_to_clean

Visualizaciones

Mapas

Mapa Colombia (1)

  • Mapa de Colombia: obtenido con la biblioteca raster.
library(raster)
# colombia0 <- getData(name = "GADM", country = "COL", level = 1, download = TRUE)
colombia <- read_rds("../data/gadm36_COL_1_sp.rds")
data_col <- fortify(colombia, region = "NAME_1")
data_col %>% 
  ggplot(aes(long, lat, group = group)) +
  geom_polygon(fill = "white") + 
  coord_equal() + 
  geom_path(color = "black") +
  theme_void() 

Mapa Colombia (2)

library("ggmap")
mapa <- c(left = -85, bottom = -5, right = -66, top = 13)
get_stamenmap(mapa, zoom = 5,
              source = "stamen", maptype = "terrain",
              force = TRUE) %>% ggmap() +
  labs(x = "Longitud", y = "Latitud") +
  scale_size_manual(values = c(0.1, 0.1)) +
  geom_point(data = data_to_clean, aes(x = decimalLongitude, y = decimalLatitude),
             shape = 19)

Mapa Colombia (3)

library("ggmap")
get_stamenmap(mapa, zoom = 5,
              source = "stamen", maptype = "terrain",
              force = TRUE) %>% ggmap() +
  geom_polygon(data = data_col, aes(x = long, y = lat, group = group),
               alpha = 0.001) + 
  geom_path(color = "black",
            data = data_col, aes(x = long, y = lat, group = group)) +
  labs(x = "Longitud", y = "Latitud") +
  scale_size_manual(values = c(0.1, 0.1)) +
  geom_point(data = data_to_clean, aes(x = decimalLongitude, y = decimalLatitude),
             shape = 19)

Conteos anuales

data_to_clean %>% 
  group_by(year) %>% 
  count() %>% 
  ggplot(aes(x = year, y = n)) +
  geom_line() +
  theme_light()

Conteos mensuales

data_to_clean %>% 
  group_by(month) %>% 
  count() %>% 
  ggplot(aes(x = month, y = n)) +
  geom_line() +
  theme_light() +
  scale_x_continuous(breaks = seq(1, 12, 1))

Conteo por departamento

data_to_clean %>% 
  group_by(stateProvince) %>% 
  count() %>% 
  filter(!is.na(stateProvince)) %>% 
  ggplot(aes(x = reorder(stateProvince, n), y = n)) +
  geom_col(color = "black") +
  coord_flip() +
  theme_light() +
  labs(x = "Departamento", y = "Total") +
  scale_y_continuous(breaks = seq(1, 61, 10))

Zona de vida

data_to_clean %>% 
  group_by(habitat) %>% 
  count() %>% 
  filter(!is.na(habitat)) %>% 
  ggplot(aes(x = reorder(habitat, n), y = n)) +
  geom_col(color = "black") +
  coord_flip() +
  theme_light() +
  labs(x = "Zona de vida", y = "Total") +
  scale_y_continuous(breaks = seq(1, 61, 10))

Nubes de palabras

Descripción de observación

# Tokenización de texto
library(tidytext)
source("../functions/tokenizar.R")

# Función cleanText() 
texto <- data_to_clean %>% 
  filter(!is.na(occurrenceRemarks)) %>% 
  mutate(textTokenize = map(.x = occurrenceRemarks,
                            .f = cleanText))
# Texto tidy (ordenado)
texto_tidy <- texto %>%
  unnest(cols = textTokenize) %>% 
  rename(token = textTokenize)

# Frecuencia de palabras
freq_texto <- texto_tidy %>% 
  group_by(token) %>% 
  summarise(frequence = n())

# nube de palabras
library(wordcloud)
wordcloudCustom <- function(data){
  wordcloud(words = data$token, freq = data$frequence,
            max.words = 400, random.order = FALSE, rot.per = 0.35,
            colors = jcolors(palette = "pal9"))
}

library(jcolors)
wordcloudCustom(data = freq_texto)

Hábitat

# Función cleanText() 
texto2 <- data_to_clean %>% 
  filter(!is.na(habitat)) %>% 
  mutate(textTokenize = map(.x = habitat,
                            .f = cleanText))
# Texto tidy (ordenado)
texto_tidy2 <- texto2 %>%
  unnest(cols = textTokenize) %>% 
  rename(token = textTokenize)

# Frecuencia de palabras
freq_texto2 <- texto_tidy2 %>% 
  group_by(token) %>% 
  summarise(frequence = n())

wordcloudCustom(data = freq_texto2)

Ambientales

Imágenes CHELSA + NASA

# Cargando bibliotecas
library(tidyverse)
library(raster)
library(biomod2)

# Lista de archivos tif
lista_tif <- list.files("../data/chelsa_climate/", pattern = ".tif$", full.names = TRUE)

# Ordenando la lista
# Esto lo hago pensando en gráficos que están más adelante
lista_tif <- c(lista_tif[1], lista_tif[12:19], lista_tif[2:11])


# Importando imágenes individuales
# Se alamacenan en una lista de nombre chelsa
chelsa <- list()
for (i in 1:length(lista_tif)) {
  chelsa[[i]] = raster(lista_tif[i])
}

# Stack de raster
predictoras <- stack(chelsa[[1]], chelsa[[2]], chelsa[[3]], chelsa[[4]],
                     chelsa[[5]], chelsa[[6]], chelsa[[7]], chelsa[[8]],
                     chelsa[[9]], chelsa[[10]], chelsa[[11]], chelsa[[12]],
                     chelsa[[13]], chelsa[[14]], chelsa[[15]], chelsa[[16]],
                     chelsa[[17]], chelsa[[18]], chelsa[[19]])
plot(predictoras)

Pseudo-Ausencias

library(dismo)

# Coordenadas de las ocurrencias
xmin <- min(data_to_clean$decimalLongitude)
xmax <- max(data_to_clean$decimalLongitude)
ymin <- min(data_to_clean$decimalLatitude)
ymax <- max(data_to_clean$decimalLatitude)

# Formando la "caja" de muestreo
bb <- matrix(c(xmin, xmin, xmax, xmax, xmin, ymin, ymax, ymax, ymin, ymin),
             ncol=2)

# Transformando la matriz en SpatialPolygons
bgExt <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(bb)), 1)))

# Extendiendo 3 grados la zona de muestreo desde los puntos
bgExt <- rgeos::gBuffer(bgExt, width = 3)


# Corte de imágnes chelsa 
envsBgCrop <- raster::crop(predictoras, bgExt)
envsBgMsk <- raster::mask(envsBgCrop, bgExt)

# Muestreo aleatorio de pseudo-ausencias
set.seed(123)
bg.xy <- dismo::randomPoints(envsBgMsk, 1000)

# Matriz de coordenadas a data frame --> nueva variable "target"
bg.xy <- as.data.frame(bg.xy) %>% 
  rename(decimalLongitude = x, decimalLatitude = y) %>% 
  mutate(target = 0)
bg.xy

Unión de presencias y pseudo-ausencias

# Base de datos con "1" y "0"
data_to_clean %>% 
  mutate(target = 1) %>% 
  bind_rows(bg.xy) %>% 
  dplyr::select(decimalLatitude, decimalLongitude, target) ->
  data_to_final
data_to_final
  • Unión de presencias/ausencias con variables ambientales (raster-tif):
library(biomod2)
# Datos para base de datos de modelos
rta <- data_to_final$target
coord_rta = data_to_final[c('decimalLongitude','decimalLatitude')]
nombre_rta = "Tremarcto_ornatus"

# Formato de base de datos para modelo
datos_modelos <- BIOMOD_FormatingData(
  resp.var = rta,
  expl.var = predictoras,
  resp.xy = coord_rta,
  resp.name = nombre_rta,
  
)
## 
## -=-=-=-=-=-=-=-=-=-=-=-= Tremarcto_ornatus Data Formating -=-=-=-=-=-=-=-=-=-=-=-=
## 
##  Response variable name was converted into Tremarcto.ornatus
## > No pseudo absences selection !
##       ! No data has been set aside for modeling evaluation
##          ! Some NAs have been automatically removed from your data
## -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  • Extracción de base de datos para modelos y visualización:
# Extracción de base de datos
df_Tremarcto_ornatus <- datos_modelos@coord %>% 
  bind_cols(datos_modelos@data.env.var) %>% 
  bind_cols(as.data.frame(datos_modelos@data.species))

# Cambiando nombres
names(df_Tremarcto_ornatus) <- c("long", "lat", paste0("Bio", 1:19), "target")
df_Tremarcto_ornatus

Exportando data final

save(df_Tremarcto_ornatus, file = "../data/data_Tremarcto_ornatus.Rdata",
     compress = "xz")

Visualización data final

Curvas de respuesta logística

modelo1 <- glm(target ~Bio1 + Bio2,
               data = df_Tremarcto_ornatus, family = "binomial")
library(ggiraphExtra)
ggPredict(modelo1, jitter = FALSE, colorn = 10, se = F) +
  theme_bw() +
  labs(x = "Bio1", y = "Probabilidad", color = "Bio2")

Distribuciones

df_Tremarcto_ornatus %>% 
  dplyr::select(-c(long, lat)) %>% 
  gather(key = "key", value = "value", -target) %>% 
  ggplot(aes(x = value, fill = as.factor(target))) +
  facet_wrap(~key, scales = "free") +
  geom_density(alpha = 0.8, ncol = 4) +
  theme_light() +
  labs(fill = "") +
  scale_fill_jcolors(palette = "pal7")

Correlaciones

library(corrplot)
df_Tremarcto_ornatus %>% 
  dplyr::select(-c(long, lat, target)) %>%
  cor(method = "spearman") %>% 
  corrplot(method = "pie",
           diag = FALSE,
           addgrid.col = "black",
           type = "lower",
           tl.srt = 20,
           tl.col = "black",
           col = jcolors(palette = "pal4"),
           order = "hclust")

Componentes principales

library(FactoMineR)
data_acp <- df_Tremarcto_ornatus %>% 
  dplyr::select(-c(long, lat, target))
acp_oso <- PCA(X = data_acp,
               scale.unit = TRUE, 
               ncp = ncol(data_acp),
               graph = FALSE)
summary(acp_oso)
## 
## Call:
## PCA(X = data_acp, scale.unit = TRUE, ncp = ncol(data_acp), graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance              11.705   3.512   2.133   1.024   0.417   0.167   0.024
## % of var.             61.606  18.486  11.224   5.390   2.193   0.877   0.124
## Cumulative % of var.  61.606  80.092  91.316  96.706  98.899  99.776  99.900
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.014   0.002   0.002   0.001   0.000   0.000   0.000
## % of var.              0.076   0.011   0.009   0.003   0.000   0.000   0.000
## Cumulative % of var.  99.976  99.988  99.997  99.999 100.000 100.000 100.000
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19
## Variance               0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000
## 
## Individuals (the 10 first)
##           Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr
## 1     |  4.090 | -3.163  0.039  0.598 |  0.873  0.010  0.046 |  0.471  0.005
## 2     |  3.689 | -2.966  0.034  0.646 | -0.553  0.004  0.022 |  0.889  0.017
## 3     |  3.762 | -3.031  0.036  0.649 | -1.171  0.018  0.097 |  0.969  0.020
## 4     |  3.689 | -2.966  0.034  0.646 | -0.553  0.004  0.022 |  0.889  0.017
## 5     |  3.703 | -2.961  0.034  0.640 | -0.096  0.000  0.001 |  0.869  0.016
## 6     |  3.762 | -3.031  0.036  0.649 | -1.171  0.018  0.097 |  0.969  0.020
## 7     |  3.689 | -2.966  0.034  0.646 | -0.553  0.004  0.022 |  0.889  0.017
## 8     |  3.825 | -3.263  0.041  0.728 | -0.538  0.004  0.020 |  0.611  0.008
## 9     |  3.847 | -3.203  0.040  0.693 |  1.112  0.016  0.084 | -0.978  0.020
## 10    |  1.168 | -0.288  0.000  0.061 |  0.604  0.005  0.267 |  0.446  0.004
##         cos2  
## 1      0.013 |
## 2      0.058 |
## 3      0.066 |
## 4      0.058 |
## 5      0.055 |
## 6      0.066 |
## 7      0.058 |
## 8      0.026 |
## 9      0.065 |
## 10     0.145 |
## 
## Variables (the 10 first)
##          Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr   cos2  
## Bio1  |  0.871  6.486  0.759 |  0.362  3.739  0.131 |  0.225  2.369  0.051 |
## Bio2  | -0.940  7.555  0.884 | -0.036  0.037  0.001 |  0.317  4.699  0.100 |
## Bio3  | -0.940  7.555  0.884 | -0.036  0.037  0.001 |  0.317  4.699  0.100 |
## Bio4  |  0.446  1.700  0.199 |  0.508  7.350  0.258 |  0.509 12.161  0.259 |
## Bio5  |  0.832  5.915  0.692 |  0.402  4.597  0.161 |  0.289  3.928  0.084 |
## Bio6  |  0.900  6.922  0.810 |  0.335  3.187  0.112 |  0.177  1.475  0.031 |
## Bio7  | -0.797  5.426  0.635 |  0.199  1.132  0.040 |  0.517 12.552  0.268 |
## Bio8  | -0.940  7.555  0.884 | -0.036  0.037  0.001 |  0.317  4.699  0.100 |
## Bio9  | -0.940  7.555  0.884 | -0.036  0.037  0.001 |  0.317  4.699  0.100 |
## Bio10 |  0.867  6.428  0.752 |  0.375  4.008  0.141 |  0.236  2.619  0.056 |

CP1 vs CP2

# Añadiendo datos a la base de datos inicial para gráficos ACP
df_Tremarcto_ornatus$cp1 <- acp_oso$ind$coord[, 1]
df_Tremarcto_ornatus$cp2 <- acp_oso$ind$coord[, 2]
df_Tremarcto_ornatus$cp3 <- acp_oso$ind$coord[, 3]

# Variables e Individuos
library(ggpubr)
library(factoextra)
ggarrange(
  fviz_pca_var(X = acp_oso, axes = c(1, 2), select.var = list(cos2 = 0.5)) +
  ggtitle(""),
  
  df_Tremarcto_ornatus %>% 
  ggplot(data =., aes(x = cp1, y = cp2, color = factor(target))) +
  geom_point(size = 2, alpha = 0.7) +
  geom_vline(xintercept = 0, lty = 2, size = 0.3, color = "darkred") +
  geom_hline(yintercept = 0, lty = 2, size = 0.3, color = "darkred") +
  labs(title = "CP1 vs CP2", color = "G. interrupta") +
  scale_color_jcolors(palette = "pal7") +
  theme_light() +
  theme(legend.position = "top"),
  
  ncol = 2
)

CP1, CP2, CP3

library(plotly)
plot_ly(df_Tremarcto_ornatus,
        x = ~ cp1,
        y = ~ cp2,
        z = ~ cp3,
        color = ~factor(target),
        colors = c("blue", "red")) %>% 
  add_markers()

Modelos

Partición de datos

# Data para modelos
data_modelo1 <- df_Tremarcto_ornatus %>% 
  dplyr::select(-c(cp1:cp3))

# Cambiando 0 por "No" y 1 por "Si"
data_modelo2 <- data_modelo1 %>% 
  mutate(target2 = factor(target, labels  = c("No", "Si")))
data_modelo3 <- as.data.frame(data_modelo2)

# Partición de datos
library(caret)
set.seed(123)
idx <- createDataPartition(y = data_modelo3$target2, times = 1, p = 0.70, list = FALSE)
dataTrain <- data_modelo3[idx, ]
dataTesti <- data_modelo3[-idx, ]

Exportando predictoras

# Creando base de datos con predictoras
dataPredictoras <- as.data.frame(coordinates(envsBgMsk))
dataPredictoras$Bio1 <- as.vector(values(envsBgMsk[[1]]))
dataPredictoras$Bio2 <- as.vector(values(envsBgMsk[[2]]))
dataPredictoras$Bio3 <- as.vector(values(envsBgMsk[[3]]))
dataPredictoras$Bio4 <- as.vector(values(envsBgMsk[[4]]))
dataPredictoras$Bio5 <- as.vector(values(envsBgMsk[[5]]))
dataPredictoras$Bio6 <- as.vector(values(envsBgMsk[[6]]))
dataPredictoras$Bio7 <- as.vector(values(envsBgMsk[[7]]))
dataPredictoras$Bio8 <- as.vector(values(envsBgMsk[[8]]))
dataPredictoras$Bio9 <- as.vector(values(envsBgMsk[[9]]))
dataPredictoras$Bio10 <- as.vector(values(envsBgMsk[[10]]))
dataPredictoras$Bio11 <- as.vector(values(envsBgMsk[[11]]))
dataPredictoras$Bio12 <- as.vector(values(envsBgMsk[[12]]))
dataPredictoras$Bio13 <- as.vector(values(envsBgMsk[[13]]))
dataPredictoras$Bio14 <- as.vector(values(envsBgMsk[[14]]))
dataPredictoras$Bio15 <- as.vector(values(envsBgMsk[[15]]))
dataPredictoras$Bio16 <- as.vector(values(envsBgMsk[[16]]))
dataPredictoras$Bio17 <- as.vector(values(envsBgMsk[[17]]))
dataPredictoras$Bio18 <- as.vector(values(envsBgMsk[[18]]))
dataPredictoras$Bio19 <- as.vector(values(envsBgMsk[[19]]))

# Liberando memoria RAM
gc()

# Cambiando nombres
names(dataPredictoras) <- c("long", "lat", paste0("Bio", 1:19))

# Eliminando NAs para predicciones
dataPredictoras <- na.omit(dataPredictoras)

dataPredictoras

save(dataPredictoras, file = "../data/chelsa.Rdata", compress = "xz")

Modelo Random Forest

# Bibliotecas
library(caret)
library(recipes)
library(xgboost)
library(doParallel)

# PARALELIZACIÓN DE PROCESO
#===============================================================================
cl <- makePSOCKcluster(5)
registerDoParallel(cl)

# HIPERPARÁMETROS, NÚMERO DE REPETICIONES Y SEMILLAS PARA CADA REPETICIÓN
#===============================================================================
particiones  <- 5
repeticiones <- 3

# Hiperparámetros
hiperparametros <- expand.grid(mtry = c(3, 5, 10),
                               min.node.size = c(2, 5, 15, 30),
                               splitrule = "gini")

set.seed(1992)
seeds <- vector(mode = "list", length = (particiones * repeticiones) + 1)
for (i in 1:(particiones * repeticiones)) {
  seeds[[i]] <- sample.int(1000, nrow(hiperparametros))
}
seeds[[(particiones * repeticiones) + 1]] <- sample.int(1000, 1)

# DEFINICIÓN DEL ENTRENAMIENTO
#===============================================================================
control_train <- trainControl(method = "repeatedcv", number = particiones,
                              repeats = repeticiones, seeds = seeds,
                              returnResamp = "final", verboseIter = FALSE,
                              allowParallel = TRUE, classProbs = TRUE)

# AJUSTE DEL MODELO
# ==============================================================================
set.seed(1992)
modelo_rf1 <- train(target2 ~ .,
                    data = dataTrain %>% select(-target),
                    method = "ranger",
                    tuneGrid = hiperparametros,
                    metric = "Accuracy",
                    trControl = control_train,
                    num.trees = 500)
## When you are done:
stopCluster(cl)

modelo_rf1
## Random Forest 
## 
## 1540 samples
##   21 predictor
##    2 classes: 'No', 'Si' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold, repeated 3 times) 
## Summary of sample sizes: 1232, 1232, 1232, 1232, 1232, 1232, ... 
## Resampling results across tuning parameters:
## 
##   mtry  min.node.size  Accuracy   Kappa    
##    3     2             0.9471861  0.8935220
##    3     5             0.9474026  0.8939345
##    3    15             0.9465368  0.8922954
##    3    30             0.9461039  0.8913963
##    5     2             0.9465368  0.8922425
##    5     5             0.9480519  0.8952916
##    5    15             0.9452381  0.8896421
##    5    30             0.9476190  0.8944472
##   10     2             0.9456710  0.8904601
##   10     5             0.9445887  0.8882700
##   10    15             0.9452381  0.8895611
##   10    30             0.9452381  0.8895868
## 
## Tuning parameter 'splitrule' was held constant at a value of gini
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were mtry = 5, splitrule = gini
##  and min.node.size = 5.

Matriz de Confusión

# Predicciones en test
predict_rf1 <- predict(object = modelo_rf1, newdata = dataTesti)

# Matriz de confusión
confusionMatrix(data = predict_rf1, reference = dataTesti$target2,
                positive = "Si", dnn = c("Predicho", "Real"))
## Confusion Matrix and Statistics
## 
##         Real
## Predicho  No  Si
##       No 289  13
##       Si  11 346
##                                           
##                Accuracy : 0.9636          
##                  95% CI : (0.9463, 0.9765)
##     No Information Rate : 0.5448          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9266          
##                                           
##  Mcnemar's Test P-Value : 0.8383          
##                                           
##             Sensitivity : 0.9638          
##             Specificity : 0.9633          
##          Pos Pred Value : 0.9692          
##          Neg Pred Value : 0.9570          
##              Prevalence : 0.5448          
##          Detection Rate : 0.5250          
##    Detection Prevalence : 0.5417          
##       Balanced Accuracy : 0.9636          
##                                           
##        'Positive' Class : Si              
## 

Exportando modelo

saveRDS(object = modelo_rf1, file = "../models/rf.rds", compress = "xz")

Predicciones

Predicción con Random Forest

load("../data/chelsa.Rdata")
modRF1 <- readRDS(file = "../models/rf.rds")
predicciones <- predict(object = modRF1,
                        newdata = dataPredictoras,
                        type = "prob")
  • Probabilidades predichas:
dataPredicciones <- data.frame(
  long = dataPredictoras$long,
  lat = dataPredictoras$lat,
  prob = predicciones$Si
)
dataPredicciones

Mapa de predicción 2

library(rnaturalearth)
countries <- ne_countries(scale=110)

ggplot() + 
  geom_polygon(data = countries, aes(x = long, y = lat, group = group),
               color = "black", lwd = .25, fill = "white", alpha = 0.2) +
  scale_y_continuous(limits = c(-10, 13)) +
  scale_x_continuous(limits = c(-85, -60)) +
  geom_tile(data = dataPredicciones,
              aes(fill = prob, x = long, y = lat), alpha = 0.8) +
  scale_fill_gradient2("Probability",
                       low = "#ffffbf", mid = "#1a9641", high = "#d7191c",
                       midpoint = 0.5) +
  theme_void() +
  labs(fill = "Probabilidad")

Recursos de información