library(raster)
library(BIEN)
library(MuMIn)
library(dismo)
library(tidyverse)
library(sf)
Bioclim <- getData(name = "worldclim", var = "bio", res=0.5, lon=-71, lat = -40)
Luego grafico la primeras 5 variables para asegurarme que esto esté buen descargadas
plot(Bioclim[[1:5]])
N_dombeyi <- BIEN_occurrence_species(species = "Nothofagus dombeyi", only.new.world = FALSE, native.status = T, observation.type = T, natives.only = T)
El raster de temperatura media:
Temp_Media <- Bioclim[[1]] %>%
as("SpatialPixelsDataFrame") %>%
as.data.frame()
y el de puntos:
N_dombeyi_SF <- st_as_sf(N_dombeyi, coords = 10:9)
Ahora puedo ver los puntos:
ggplot(data = N_dombeyi_SF) +geom_raster(data = Temp_Media, aes(x = x, y = y, fill = bio1_43)) + scale_fill_gradient2(name = "temperatura", midpoint = 112, high = scales::muted("red"), low = scales::muted("blue")) + geom_sf(color = "black") + theme_bw() + ylab("latitud") + xlab("longitud")
Cond <- raster::extract(Bioclim, y = N_dombeyi[,10:9])
Cond <- Cond[complete.cases(Cond),] %>% as.data.frame()
Le agregamos el valor 1 para demarcar presencias
Cond$Pres <- 1
Y luego generamos las pseudoausencias:
set.seed(2019)
PseudoAbs <- dismo::randomPoints(mask = Bioclim, n = 10000, p =N_dombeyi[,10:9])
Luego extraemos los datos de las pseudoauscencias
PseudoAbs <- raster::extract(Bioclim, y = PseudoAbs)
PseudoAbs <- PseudoAbs[complete.cases(PseudoAbs),] %>% as.data.frame()
PseudoAbs$Pres <- 0
Ahora unimos las Condiciones de presencias con las de pseudoausencias y empezamos a modelar:
Cond <- bind_rows(Cond, PseudoAbs)
Primero hacemos el modelo general:
Fit1 <- glm(Pres ~., data = Cond, family = binomial)
Y luego hacemos la selección de modelos
Primero veamos las variables que tenemos:
colnames(Cond)
## [1] "bio1_43" "bio2_43" "bio3_43" "bio4_43" "bio5_43" "bio6_43"
## [7] "bio7_43" "bio8_43" "bio9_43" "bio10_43" "bio11_43" "bio12_43"
## [13] "bio13_43" "bio14_43" "bio15_43" "bio16_43" "bio17_43" "bio18_43"
## [19] "bio19_43" "Pres"
Ahora vemos las correlaciones entre ellas, menos la variable 20 que es la resupuesta
smat <- abs(cor(Cond[, -20])) <= 0.7
smat[!lower.tri(smat)] <- NA
bio1_43 | bio2_43 | bio3_43 | bio4_43 | bio5_43 | bio6_43 | bio7_43 | bio8_43 | bio9_43 | bio10_43 | bio11_43 | bio12_43 | bio13_43 | bio14_43 | bio15_43 | bio16_43 | bio17_43 | bio18_43 | bio19_43 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
bio1_43 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
bio2_43 | TRUE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
bio3_43 | TRUE | TRUE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
bio4_43 | TRUE | FALSE | TRUE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
bio5_43 | FALSE | FALSE | TRUE | FALSE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
bio6_43 | FALSE | TRUE | TRUE | TRUE | TRUE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
bio7_43 | TRUE | FALSE | TRUE | FALSE | FALSE | TRUE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
bio8_43 | FALSE | TRUE | TRUE | TRUE | FALSE | TRUE | TRUE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
bio9_43 | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
bio10_43 | FALSE | TRUE | TRUE | FALSE | FALSE | FALSE | FALSE | FALSE | TRUE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
bio11_43 | FALSE | TRUE | TRUE | TRUE | FALSE | FALSE | TRUE | FALSE | TRUE | FALSE | NA | NA | NA | NA | NA | NA | NA | NA | NA |
bio12_43 | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | NA | NA | NA | NA | NA | NA | NA | NA |
bio13_43 | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | FALSE | NA | NA | NA | NA | NA | NA | NA |
bio14_43 | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | FALSE | FALSE | NA | NA | NA | NA | NA | NA |
bio15_43 | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | NA | NA | NA | NA | NA |
bio16_43 | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | FALSE | FALSE | FALSE | TRUE | NA | NA | NA | NA |
bio17_43 | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | FALSE | FALSE | FALSE | TRUE | FALSE | NA | NA | NA |
bio18_43 | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | FALSE | TRUE | FALSE | TRUE | TRUE | FALSE | NA | NA |
bio19_43 | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | TRUE | FALSE | FALSE | FALSE | TRUE | FALSE | FALSE | TRUE | NA |
Y luego usamos eso para subsetear los modelos y seleccionarlos usando la función dredge de MuMIn
options(na.action = "na.fail")
Selected <- dredge(Fit1, subset = smat)
Para ver los mejores modelo podemos ver esto:
BestModels <- subset(Selected, delta < 2)
(Intercept) | bio1_43 | bio10_43 | bio11_43 | bio12_43 | bio13_43 | bio14_43 | bio15_43 | bio16_43 | bio17_43 | bio18_43 | bio19_43 | bio2_43 | bio3_43 | bio4_43 | bio5_43 | bio6_43 | bio7_43 | bio8_43 | bio9_43 | df | logLik | AICc | delta | weight | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
274962 | -6.054606 | -0.0460438 | NA | NA | NA | 0.0295404 | NA | NA | NA | NA | -0.0056531 | NA | NA | -0.1626493 | 0.0022270 | NA | NA | NA | NA | 0.0095209 | 7 | -537.2654 | 1088.542 | 0.000000 | 0.7278569 |
275026 | -5.862813 | -0.0461978 | NA | NA | NA | 0.0295062 | NA | 0.0014521 | NA | NA | -0.0055771 | NA | NA | -0.1678202 | 0.0022306 | NA | NA | NA | NA | 0.0094192 | 8 | -537.2476 | 1090.509 | 1.967553 | 0.2721431 |
bestmodel <- get.models(Selected, delta < 2)[[1]]
Podemos ver sus parametros
summary(bestmodel)
##
## Call:
## glm(formula = Pres ~ bio1_43 + bio13_43 + bio18_43 + bio3_43 +
## bio4_43 + bio9_43 + 1, family = binomial, data = Cond)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2217 -0.1079 -0.0554 -0.0349 3.6679
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.0546057 2.2666880 -2.671 0.00756 **
## bio1_43 -0.0460438 0.0047462 -9.701 < 2e-16 ***
## bio13_43 0.0295404 0.0020491 14.416 < 2e-16 ***
## bio18_43 -0.0056531 0.0012551 -4.504 6.67e-06 ***
## bio3_43 -0.1626493 0.0402573 -4.040 5.34e-05 ***
## bio4_43 0.0022270 0.0002653 8.395 < 2e-16 ***
## bio9_43 0.0095209 0.0044710 2.129 0.03322 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1801.4 on 10178 degrees of freedom
## Residual deviance: 1074.5 on 10172 degrees of freedom
## AIC: 1088.5
##
## Number of Fisher Scoring iterations: 9
y podemos ver su proyección en el mapa:
MapLM <- predict(Bioclim, bestmodel, type = "response")
plot(MapLM)
y podemos ver las respuestas para cada variable
Cond_Scaled <- Cond
Cond_Scaled[,-20] <- scale(Cond_Scaled[,-20])
VarImp <- glm(formula = Pres ~ bio1_43 + bio13_43 + bio18_43 + bio3_43 + bio4_43 + bio9_43 + 1, family = binomial, data = Cond_Scaled)
summary(bestmodel)
##
## Call:
## glm(formula = Pres ~ bio1_43 + bio13_43 + bio18_43 + bio3_43 +
## bio4_43 + bio9_43 + 1, family = binomial, data = Cond)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2217 -0.1079 -0.0554 -0.0349 3.6679
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.0546057 2.2666880 -2.671 0.00756 **
## bio1_43 -0.0460438 0.0047462 -9.701 < 2e-16 ***
## bio13_43 0.0295404 0.0020491 14.416 < 2e-16 ***
## bio18_43 -0.0056531 0.0012551 -4.504 6.67e-06 ***
## bio3_43 -0.1626493 0.0402573 -4.040 5.34e-05 ***
## bio4_43 0.0022270 0.0002653 8.395 < 2e-16 ***
## bio9_43 0.0095209 0.0044710 2.129 0.03322 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1801.4 on 10178 degrees of freedom
## Residual deviance: 1074.5 on 10172 degrees of freedom
## AIC: 1088.5
##
## Number of Fisher Scoring iterations: 9
summary(VarImp)
##
## Call:
## glm(formula = Pres ~ bio1_43 + bio13_43 + bio18_43 + bio3_43 +
## bio4_43 + bio9_43 + 1, family = binomial, data = Cond_Scaled)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2217 -0.1079 -0.0554 -0.0349 3.6679
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.1439 0.2387 -25.737 < 2e-16 ***
## bio1_43 -2.2079 0.2276 -9.701 < 2e-16 ***
## bio13_43 2.5542 0.1772 14.416 < 2e-16 ***
## bio18_43 -0.9647 0.2142 -4.504 6.67e-06 ***
## bio3_43 -0.5432 0.1344 -4.040 5.34e-05 ***
## bio4_43 2.2224 0.2647 8.395 < 2e-16 ***
## bio9_43 0.4067 0.1910 2.129 0.0332 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1801.4 on 10178 degrees of freedom
## Residual deviance: 1074.5 on 10172 degrees of freedom
## AIC: 1088.5
##
## Number of Fisher Scoring iterations: 9
NewData <- Cond %>% dplyr::select(Pres, bio1_43, bio13_43, bio18_43, bio3_43, bio4_43, bio9_43)
NewData <- NewData %>% mutate(bio1_43 = mean(bio1_43), bio13_43 = mean(bio13_43), bio18_43 = mean(bio18_43), bio3_43 = mean(bio3_43), bio4_43 = mean(bio4_43), bio9_43 = mean(bio9_43))
NewData$bio1_43 <- seq(from = min(Cond$bio1_43),to = max(Cond$bio1_43), length.out = nrow(NewData))
NewData$Prob <- predict(bestmodel, NewData, type = "response")
NewData$se <- predict(bestmodel, NewData, type = "response", se.fit = TRUE)$se.fit
ggplot(NewData, aes(x = bio1_43, y = Prob)) + geom_ribbon(aes(ymax = Prob + se, ymin = Prob -se, fill = "red"), alpha = 0.5) + geom_line() + theme_classic() + xlab("Temp x 10")
NewData <- Cond %>% dplyr::select(Pres, bio1_43, bio13_43, bio18_43, bio3_43, bio4_43, bio9_43)
NewData <- NewData %>% mutate(bio1_43 = mean(bio1_43), bio13_43 = mean(bio13_43), bio18_43 = mean(bio18_43), bio3_43 = mean(bio3_43), bio4_43 = mean(bio4_43), bio9_43 = mean(bio9_43))
NewData$bio9_43 <- seq(from = min(Cond$bio9_43),to = max(Cond$bio9_43), length.out = nrow(NewData))
NewData$Prob <- predict(bestmodel, NewData, type = "response")
NewData$se <- predict(bestmodel, NewData, type = "response", se.fit = TRUE)$se.fit
ggplot(NewData, aes(x = bio9_43, y = Prob)) + geom_ribbon(aes(ymax = Prob + se, ymin = Prob -se, fill = "red"), alpha = 0.5) + geom_line() + theme_classic() + xlab("Variable 9")