23 Julio 2019 - Sacar algunos generos del analisis desde la tabla inicial: Phylomedusa Flectonotus Caecilia Rheobates Trachyicefalus
-Intentar por familia. Aldemar sube la tabla con familia al github. Hecho!
## rgdal: version: 1.4-3, (SVN revision 828)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Users/diego.lizcano/Documents/R/win-library/3.5/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Users/diego.lizcano/Documents/R/win-library/3.5/rgdal/proj
## Linking to sp version: 1.3-1
## Loading required package: lattice
## Loading required package: latticeExtra
## Loading required package: RColorBrewer
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
##
## Attaching package: 'corrgram'
## The following object is masked from 'package:latticeExtra':
##
## panel.ellipse
## The following object is masked from 'package:lattice':
##
## panel.fill
## corrplot 0.84 loaded
## Loading required package: Matrix
##
## Attaching package: 'nlme'
## The following object is masked from 'package:lme4':
##
## lmList
## The following object is masked from 'package:raster':
##
## getData
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:latticeExtra':
##
## layer
## #refugeeswelcome
## This is mgcv 1.8-27. For overview type 'help("mgcv-package")'.
# Path to working directory
path <- "C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/data"
# shp <- "C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/Data2"
shp <- "C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/shp"
bio <- "C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/Capas2"
# BD4<-readOGR(dsn=shp, layer="Bd_generos1")
BD6<-readOGR(dsn=shp, layer="Bd5")## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\diego.lizcano\Documents\GitHub\Bd_N_Santander\shp", layer: "Bd5"
## with 150 features
## It has 8 fields
## Integer64 fields read as strings: Diagnostic
BD6<-BD6[!(BD6@data$Familia)=="Ranidae",]
BD6<-BD6[!(BD6@data$Familia)=="Caeciliidae",]
# BD6 <- read.csv("C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/data/Bd6.csv", header = T, sep = ";")
# define geo coords and put to SpatialPointDataFrames
crs.geo <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0") # geographical, datum WGS84
# make factor numeric
# BD6$X <- as.numeric(BD6$X)
# BD6$Y <- as.numeric(BD6$Y)
# add coord
# coordinates(BD6) <- ~X+Y # c(BD5$X, BD5$Y)
# proj4string(BD6) <- crs.geo # define projection system of our data
BD <- st_as_sf(BD6) # Convert foreign object to an sf object
pos <- grep(" ", BD$Species)
keep <- substr(pos, 1, pos-1)
########################
# BD$genus <-
###cargando variables ambientales
files <- list.files(path=bio, pattern='tif', full.names=TRUE )# View(files)
# Agrupar las variables ambientales en un objeto stack
predictors <- stack(files)
### clac slope aspect
slope_aspect <- terrain(predictors$DEM30, opt = c("slope", "aspect"), unit = "degrees")
predictors <- addLayer(predictors, slope_aspect)
deptos <-raster::getData ('GADM', country='COL', level=1)
# names(predictors)
# plot(predictors$Bio1)
# plot(BD1, pch=18, add=TRUE, col="blue", main="Batrachochytrium dendrobatidis")
mapview(BD, zcol = "Diagnostic", legend = TRUE, map.types = c("Esri.WorldShadedRelief", "Esri.WorldImagery"), color = "grey40")# plot(BD["Diagnostic"], add=T)
# levelplot(predictors)
### correlation
jnk=layerStats(predictors, 'pearson', na.rm=T)
corr_matrix=jnk$'pearson correlation coefficient'
# corrgram(corr_matrix, order=NULL, lower.panel=panel.shade,
# upper.panel=NULL, text.panel=panel.txt,
# main="Car")
##### correlation plot
# corrplot(corr_matrix, order = "FPC",type = "lower", tl.cex = 0.8) # tl.pos = "lower",
####### function to add significance
# mat : is a matrix of data
# ... : further arguments to pass to the native R cor.test function
cor.mtest <- function(mat, ...) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j], ...)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
# matrix of the p-value of the correlation
# use the function
p.mat <- cor.mtest(predictors)
# head(p.mat[, 1:5])
##### correlation plot
# Leave blank on no significant coefficient
corrplot(corr_matrix, p.mat = p.mat, sig.level = 0.05, order="FPC", type = "lower", tl.cex = 0.8) # tl.pos = "lower",###Using non correlated variables
#Extraer la informacion de variables ambientales por ptos. con presencia de BD
# # head(BD)
# BD <- BD1[,-1] # quito la primera columna
# head(BD)
# names(BD)[1]<-paste("x")
# names(BD)[2]<-paste("y") # re nombro encabezados de columnas
# # head(BD)
#
# presvals <- (BD)
positive <- BD %>% dplyr::filter (Diagnostic==1) # %>% as.data.frame()
negative <- BD %>% dplyr::filter (Diagnostic==0) # %>% as.data.frame()
###################################
##### standarizing! ###############
###################################
presvals <- as.data.frame(extract(
scale(predictors2, center=TRUE, scale=TRUE), as(positive, "Spatial")))
ausvals <- as.data.frame(extract(scale(predictors2, center=TRUE, scale=TRUE),
as(negative, "Spatial")))
###################################
##### NO standarizing! ############
###################################
real_presvals <- as.data.frame(extract(predictors2,
as(positive, "Spatial")))
#
real_ausvals <- as.data.frame(extract(predictors2,
as(negative, "Spatial")))
###### Family adding
presvals$family <- positive$Familia
presvals$pa <- rep(1,88)
ausvals$family <- negative$Familia
ausvals$pa <- rep(0,60)
###### Family adding
real_presvals$family <- positive$Familia
real_presvals$pa <- rep(1,88)
real_ausvals$family <- negative$Familia
real_ausvals$pa <- rep(0,60)
########### Full data
fulldat <- rbind(presvals, ausvals) #####
real_fulldat <- rbind(real_presvals, real_ausvals) #####
#
set.seed(2000)
# backg <- randomPoints(predictors, n=500, extf = 1.25)
# plot(predictors2$DEM30)
# plot(BD["Diagnostic"], add=T)
# plot(BD, pch=18, add=TRUE, col="blue", main="Batrachochytrium dendrobatidis")
#
#generando datos de prueba y entrenamiento con los datos de presencia y pseudo-ausencia con particion 80-20%
indexes_training = sample(1:nrow(fulldat), size=0.70*nrow(fulldat))
indexes_testing = sample(1:nrow(fulldat), size=0.30*nrow(fulldat))
###Presencias
#identifica el 80% de los datos como datos training
training = as.data.frame(fulldat[indexes_training,])
dim(training)## [1] 103 15
#identifica el 20% de los datos como datos testing
testing = as.data.frame(fulldat[-indexes_training,])
dim(testing)## [1] 45 15
# species
# testing$species <- positive$Species[-indexes_pres]
# training$species <- positive$Species[indexes_pres]
# View(testing)
###ausencia
#identifica el 20% de los datos como datos prueba/test
# backg_test = as.data.frame(ausvals[indexes_aus_test,])
# dim(backg_test)
#identifica el 80% de los datos como datos entrenamiento/train
# backg_train = as.data.frame(ausvals[-indexes_aus_test,])
# dim(backg_train)
# species
# View(testing)
# backg_train$species <- negative$Species[-indexes_aus_test]
# backg_test$species <- negative$Species[indexes_aus_test]
# mapeando los datos de entrenamiento y prueba de presencias y ausvals
# r = raster(predictors2, 1)
# plot(!is.na(r), col=c('white', 'light grey'), legend=FALSE, main="datos de entrenamiento y prueba ")
# points(backg_train, pch='-', cex=0.5, col='red')
# points(backg_test, pch='-', cex=0.5, col='black')
# points(testing, pch= '+', col='green')
# points(training, pch='+', col='blue')
#
trainpres <- as.data.frame(testing)# data.frame( extract(predictors2, testing) )
# trainbackg <- as.data.frame(backg_train)# data.frame( extract(predictors2, backg_train) )
# train <- rbind(trainpres, trainbackg)
# pb_train <- c(rep(1, nrow(trainpres)), rep(0, nrow(trainbackg)))
# envtrain <- data.frame( cbind(pa=pb_train, train) )
#### extract from stack
testpres <- as.data.frame(training)# data.frame( extract(predictors2, training) )
# testbackg <- as.data.frame(backg_test)# data.frame( extract(predictors2, backg_test) )
# head(envtrain)
#Ajuste del modelo GLM y predicc??n
# GLM binomial
glm1 <- glm(pa ~ Bio13 + Bio14 + Bio15 +
Bio16 + Bio18 + Bio19 + Bio2 +
Bio3 + Bio4 + Bio7 +
DEM30 + slope + aspect,
family = binomial(link = "logit"), data=training)
summary(glm1)##
## Call:
## glm(formula = pa ~ Bio13 + Bio14 + Bio15 + Bio16 + Bio18 + Bio19 +
## Bio2 + Bio3 + Bio4 + Bio7 + DEM30 + slope + aspect, family = binomial(link = "logit"),
## data = training)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2486 -0.9095 0.4127 0.8735 1.9157
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.91825 0.68413 1.342 0.17953
## Bio13 -2.77026 1.07658 -2.573 0.01008 *
## Bio14 0.19130 0.43490 0.440 0.66003
## Bio15 0.48622 0.42425 1.146 0.25176
## Bio16 2.03790 1.28082 1.591 0.11159
## Bio18 0.35357 0.56255 0.629 0.52967
## Bio19 0.55392 0.34382 1.611 0.10716
## Bio2 -6.05458 13.56544 -0.446 0.65536
## Bio3 4.10654 7.61584 0.539 0.58974
## Bio4 2.09498 1.26191 1.660 0.09688 .
## Bio7 4.51304 14.29230 0.316 0.75218
## DEM30 -2.11768 0.72802 -2.909 0.00363 **
## slope -0.08845 0.33506 -0.264 0.79179
## aspect 0.58791 0.28965 2.030 0.04238 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 142.00 on 102 degrees of freedom
## Residual deviance: 107.38 on 89 degrees of freedom
## AIC: 135.38
##
## Number of Fisher Scoring iterations: 5
## (Intercept) Bio13 Bio14 Bio15 Bio16 Bio18
## 0.91824726 -2.77026494 0.19129749 0.48622467 2.03789785 0.35356895
## Bio19 Bio2 Bio3 Bio4 Bio7 DEM30
## 0.55392133 -6.05458464 4.10654324 2.09498293 4.51303779 -2.11768479
## slope aspect
## -0.08845001 0.58790570
##
## Attaching package: 'MASS'
## The following objects are masked from 'package:raster':
##
## area, select
## Start: AIC=135.38
## pa ~ Bio13 + Bio14 + Bio15 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 +
## Bio4 + Bio7 + DEM30 + slope + aspect
##
## Df Deviance AIC
## - slope 1 107.45 133.45
## - Bio7 1 107.48 133.48
## - Bio14 1 107.57 133.57
## - Bio2 1 107.58 133.58
## - Bio3 1 107.67 133.67
## - Bio18 1 107.77 133.76
## - Bio15 1 108.72 134.72
## <none> 107.38 135.38
## - Bio19 1 110.09 136.09
## - Bio16 1 110.31 136.31
## - Bio4 1 110.39 136.39
## - aspect 1 111.86 137.86
## - Bio13 1 114.97 140.97
## - DEM30 1 119.11 145.11
##
## Step: AIC=133.45
## pa ~ Bio13 + Bio14 + Bio15 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 +
## Bio4 + Bio7 + DEM30 + aspect
##
## Df Deviance AIC
## - Bio7 1 107.56 131.56
## - Bio14 1 107.60 131.60
## - Bio2 1 107.66 131.66
## - Bio3 1 107.76 131.76
## - Bio18 1 107.82 131.82
## - Bio15 1 108.73 132.74
## <none> 107.45 133.45
## - Bio19 1 110.36 134.36
## - Bio16 1 110.47 134.47
## - Bio4 1 110.71 134.71
## - aspect 1 111.91 135.91
## - Bio13 1 115.05 139.05
## - DEM30 1 119.18 143.18
##
## Step: AIC=131.56
## pa ~ Bio13 + Bio14 + Bio15 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 +
## Bio4 + DEM30 + aspect
##
## Df Deviance AIC
## - Bio14 1 107.71 129.71
## - Bio18 1 107.83 129.83
## - Bio15 1 108.87 130.87
## <none> 107.56 131.56
## - Bio3 1 110.31 132.31
## - Bio16 1 110.66 132.66
## - Bio19 1 110.69 132.69
## - Bio4 1 111.97 133.97
## - aspect 1 112.05 134.05
## - Bio13 1 115.06 137.06
## - Bio2 1 117.31 139.31
## - DEM30 1 119.20 141.20
##
## Step: AIC=129.71
## pa ~ Bio13 + Bio15 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 + Bio4 +
## DEM30 + aspect
##
## Df Deviance AIC
## - Bio18 1 107.96 127.96
## - Bio15 1 109.07 129.07
## <none> 107.71 129.71
## - Bio19 1 110.87 130.87
## - Bio3 1 110.99 130.99
## - Bio16 1 111.25 131.25
## - aspect 1 112.25 132.25
## - Bio4 1 113.04 133.04
## - Bio13 1 115.07 135.07
## - Bio2 1 117.61 137.61
## - DEM30 1 119.21 139.21
##
## Step: AIC=127.96
## pa ~ Bio13 + Bio15 + Bio16 + Bio19 + Bio2 + Bio3 + Bio4 + DEM30 +
## aspect
##
## Df Deviance AIC
## - Bio15 1 109.09 127.09
## <none> 107.96 127.96
## - Bio19 1 110.87 128.87
## - Bio3 1 111.34 129.34
## - aspect 1 112.33 130.33
## - Bio4 1 114.02 132.02
## - Bio16 1 114.17 132.17
## - Bio13 1 115.36 133.36
## - Bio2 1 117.71 135.71
## - DEM30 1 119.68 137.68
##
## Step: AIC=127.09
## pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio3 + Bio4 + DEM30 + aspect
##
## Df Deviance AIC
## <none> 109.09 127.09
## - Bio3 1 112.06 128.06
## - Bio19 1 112.42 128.42
## - aspect 1 112.91 128.91
## - Bio16 1 114.93 130.93
## - Bio4 1 114.93 130.93
## - Bio13 1 115.75 131.75
## - Bio2 1 118.25 134.25
## - DEM30 1 120.09 136.09
##########################################
## now only covars from Final Model glm1
##########################################
glm2 <- glm(pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio4 + DEM30,
family = binomial(link = "logit"), data=training)
glm3 <- glm(pa ~ Bio13 + Bio19 + Bio2 + DEM30,
family = binomial(link = "logit"), data=training)
glm4 <- glm(pa ~ Bio13 + Bio2 + DEM30,
family = binomial(link = "logit"), data=training)
glm5 <- glm(pa ~ Bio13 + Bio19 + DEM30, # only precipitation
family = binomial(link = "logit"), data=training)
glm6 <- glm(pa ~ Bio13 + Bio19 + family, # only precipitation
family = binomial(link = "logit"), data=training)
AICc(glm2, glm3, glm4, glm5)# AICc should be used instead AIC when sample size is small in comparison to the number of estimated parameters (Burnham & Anderson 2002 recommend its use when n / K < 40). 105/6 = 17.5
summary(glm2)##
## Call:
## glm(formula = pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio4 + DEM30,
## family = binomial(link = "logit"), data = training)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7602 -0.9354 0.4546 0.8944 1.9607
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7478 0.3377 2.214 0.02680 *
## Bio13 -2.1045 0.9558 -2.202 0.02767 *
## Bio16 1.9912 0.9720 2.049 0.04050 *
## Bio19 0.4598 0.3052 1.507 0.13185
## Bio2 -1.1588 0.4686 -2.473 0.01341 *
## Bio4 0.9664 0.6291 1.536 0.12451
## DEM30 -1.4359 0.4969 -2.890 0.00385 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 142.00 on 102 degrees of freedom
## Residual deviance: 114.85 on 96 degrees of freedom
## AIC: 128.85
##
## Number of Fisher Scoring iterations: 4
#### multiple group effects with multiple group effect terms factor species.
# #### Random-effects
# mod_lmer1 <- glmer(pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio4 + DEM30 + (1|genus), data=training, #fulldat,
# family = binomial(link = "logit"))
# #### fixed-effects
# mod_lmer2 <- glmer(pa ~ I(Bio13^2) : I(Bio16^2) + I(Bio19^2) + I(Bio2^2) + (1|genus), data=training, #fulldat,
# family = binomial(link = "logit"))
#
# mod_lmer3 <- glmer(pa ~ Bio16 + DEM30 + (Bio2|genus), data=training, #fulldat,
# family = binomial(link = "logit"))
#
# mod_lmer4 <- glmer(pa ~ DEM30 + Bio2 + (Bio16|genus), data=training, #fulldat,
# family = binomial(link = "logit"))
#
# mod_lmer5 <- glmer(pa ~ Bio16 : Bio2 * (1|genus), data=training, #fulldat,
# family = binomial(link = "logit"))
mod_lmer1 <- glmer(pa ~ Bio13 + Bio2 + DEM30 + (1|family), data=training, #fulldat,
family = binomial(link = "logit"))
mod_lmer2 <- glmer(pa ~ Bio14 + Bio4 + DEM30 + (1|family), data=training, #fulldat,
family = binomial(link = "logit"))
mod_lmer3 <- glmer(pa ~ Bio19 + Bio2 + Bio4 + (1|family), data=training, #fulldat,
family = binomial(link = "logit"))
mod_lmer4 <- glmer(pa ~ Bio2 + Bio14 + DEM30 + (1|family), data=training, #fulldat,
family = binomial(link = "logit"))
mod_lmer5 <- glmer(pa ~ Bio4 + Bio13 + DEM30 + (1|family), data=training, #fulldat,
family = binomial(link = "logit"))
mod_lmer6 <- glmer(pa ~ DEM30 + Bio2 + Bio14 : Bio13 + (1|family), data=training, #fulldat,
family = binomial(link = "logit"))
mod_lmer7 <- glmer(pa ~ DEM30 + Bio2 + Bio14 + Bio13 + Bio14 : Bio13 + (1|family), data=training, #fulldat,
family = binomial(link = "logit"))
# mod_lmer3 <- glmer(pa ~ Bio16 + DEM30 + (Bio2|family), data=training, #fulldat,
# family = binomial(link = "logit"))
#
# mod_lmer4 <- glmer(pa ~ DEM30 + Bio2 + (Bio16|family), data=training, #fulldat,
# family = binomial(link = "logit"))
#
# mod_lmer5 <- glmer(pa ~ Bio16 : Bio2 * (1|family), data=training, #fulldat,
# >>>>>>> 5d9913e35d0aedac423cdfd46f27ec3dcb4c31ba
# family = binomial(link = "logit"))
summary(mod_lmer1)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: pa ~ Bio13 + Bio2 + DEM30 + (1 | family)
## Data: training
##
## AIC BIC logLik deviance df.resid
## 128.5 141.7 -59.2 118.5 98
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1214 -0.7653 0.3409 0.7274 2.4305
##
## Random effects:
## Groups Name Variance Std.Dev.
## family (Intercept) 1.293 1.137
## Number of obs: 103, groups: family, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.76108 0.55141 1.380 0.16752
## Bio13 -0.01878 0.23215 -0.081 0.93553
## Bio2 -1.16925 0.43311 -2.700 0.00694 **
## DEM30 -1.27497 0.48302 -2.640 0.00830 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Bio13 Bio2
## Bio13 -0.039
## Bio2 0.025 0.162
## DEM30 0.015 0.309 0.766
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: pa ~ Bio14 + Bio4 + DEM30 + (1 | family)
## Data: training
##
## AIC BIC logLik deviance df.resid
## 135.9 149.1 -63.0 125.9 98
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5065 -0.7577 0.3710 0.8304 1.6825
##
## Random effects:
## Groups Name Variance Std.Dev.
## family (Intercept) 1.063 1.031
## Number of obs: 103, groups: family, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0511 0.5541 1.897 0.0578 .
## Bio14 0.1695 0.2569 0.660 0.5093
## Bio4 0.4531 0.4499 1.007 0.3139
## DEM30 -0.2471 0.3144 -0.786 0.4318
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Bio14 Bio4
## Bio14 0.123
## Bio4 0.336 0.126
## DEM30 0.096 0.483 0.225
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: pa ~ Bio19 + Bio2 + Bio4 + (1 | family)
## Data: training
##
## AIC BIC logLik deviance df.resid
## 135.1 148.3 -62.5 125.1 98
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2975 -0.7393 0.3709 0.7104 1.5748
##
## Random effects:
## Groups Name Variance Std.Dev.
## family (Intercept) 1.261 1.123
## Number of obs: 103, groups: family, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0709 0.5855 1.829 0.0674 .
## Bio19 0.3914 0.3035 1.290 0.1972
## Bio2 -0.4278 0.3040 -1.407 0.1593
## Bio4 0.9639 0.5834 1.652 0.0985 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Bio19 Bio2
## Bio19 0.083
## Bio2 -0.013 -0.323
## Bio4 0.308 0.622 -0.186
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: pa ~ Bio2 + Bio14 + DEM30 + (1 | family)
## Data: training
##
## AIC BIC logLik deviance df.resid
## 128.4 141.6 -59.2 118.4 98
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0613 -0.7470 0.3322 0.7104 2.4328
##
## Random effects:
## Groups Name Variance Std.Dev.
## family (Intercept) 1.34 1.158
## Number of obs: 103, groups: family, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.77053 0.55890 1.379 0.16800
## Bio2 -1.15610 0.42901 -2.695 0.00704 **
## Bio14 0.07206 0.26349 0.273 0.78449
## DEM30 -1.21811 0.48937 -2.489 0.01281 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Bio2 Bio14
## Bio2 0.037
## Bio14 0.075 0.059
## DEM30 0.052 0.739 0.332
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: pa ~ Bio4 + Bio13 + DEM30 + (1 | family)
## Data: training
##
## AIC BIC logLik deviance df.resid
## 135.7 148.8 -62.8 125.7 98
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2818 -0.8040 0.3789 0.7784 1.7711
##
## Random effects:
## Groups Name Variance Std.Dev.
## family (Intercept) 1.014 1.007
## Number of obs: 103, groups: family, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0461 0.5437 1.924 0.0544 .
## Bio4 0.5490 0.4772 1.150 0.2500
## Bio13 0.1948 0.2350 0.829 0.4072
## DEM30 -0.2346 0.3087 -0.760 0.4474
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Bio4 Bio13
## Bio4 0.330
## Bio13 0.090 0.346
## DEM30 0.078 0.310 0.436
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: pa ~ DEM30 + Bio2 + Bio14:Bio13 + (1 | family)
## Data: training
##
## AIC BIC logLik deviance df.resid
## 125.8 139.0 -57.9 115.8 98
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7672 -0.6769 0.2354 0.6769 2.5162
##
## Random effects:
## Groups Name Variance Std.Dev.
## family (Intercept) 1.784 1.336
## Number of obs: 103, groups: family, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6296 0.6189 1.017 0.30907
## DEM30 -1.3733 0.4671 -2.940 0.00328 **
## Bio2 -1.2759 0.4382 -2.912 0.00360 **
## Bio14:Bio13 0.3456 0.2384 1.450 0.14718
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) DEM30 Bio2
## DEM30 0.045
## Bio2 0.046 0.758
## Bio14:Bio13 -0.112 -0.264 -0.257
################ plot using sjPlot
# plot_model(mod_lmer1, vline.color = "gray",
# sort.est = TRUE)
plot_model(mod_lmer6, type = "re", sort.est = TRUE) #plots conditionals of random effects## Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.
## $DEM30
##
## $Bio2
##
## $Bio14
##
## $Bio13
# cero es mean(min(real_fulldat$DEM30)) /ELEVATION.sd + ELEVATION.mean
# min min(real_fulldat$DEM30)
# max max(real_fulldat$DEM30)
plot_model(mod_lmer6, type = "diag", #fixed effects
show.ci = TRUE, sort.est = TRUE)## $family
dat <-ggpredict(mod_lmer6, c( "DEM30", "Bio2","family") )
ggplot(dat, aes(x = x, y = predicted, colour = group)) +
stat_smooth( se = FALSE, fullrange = TRUE) +
facet_wrap(~facet)## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
DEM30 <- seq( from=min(fulldat$DEM30), to= max(fulldat$DEM30), length.out=148)
Bio2 <- seq( from=min(fulldat$Bio2), to=max(fulldat$Bio2), length.out=148)
Bio14 <- seq( from=min(fulldat$Bio14), to=max(fulldat$Bio14), length.out=148)
Bio13 <- seq( from=min(fulldat$Bio13), to=max(fulldat$Bio13), length.out=148)
new_mat <- as.data.frame(cbind(DEM30, Bio2, Bio13, Bio14))
###### prediction
# plot and predict Occu
Predicted <-predict(mod_lmer6, re.form=NA,newdata=new_mat)
predichos <- cbind(Predicted,new_mat)
# #predict elevation
# ELEVATION.mean <- mean(real_fulldat$DEM30)
# ELEVATION.sd <- sd(real_fulldat$DEM30)
# predichos$ELEVATION2 <- (real_fulldat$DEM30)*ELEVATION.sd + ELEVATION.mean
#
# plot(predichos$ELEVATION2 ~Predicted,type="l",col="blue",
# xlab="Elevación",
# ylab="Probabilidad")
# lines(lower~ELEVATION2, pred_p,type="l",col=gray(0.5))
# lines(upper~ELEVATION2, pred_p,type="l",col=gray(0.5))
# nuevodato <- new_data(mod_lmer6, c("family", "Bio2", "Bio14", "DEM30", "Bio13"))
# dat2 <- ggpredict(mod_lmer6, terms = c("Bio2", "DEM30", "genus"))
# ggplot(dat2, aes(x = x, y = predicted, colour = group)) +
# stat_smooth( fullrange = TRUE) +
# facet_wrap(~facet)
# Probabilities of fixed effects depending on grouping level (random intercept)
# plot_model(mod_lmer1, type = "ri.pc",
# show.se = TRUE)
# sjp.lmer(mod_lmer1, type = "fe",
# show.ci = TRUE, sort.est = TRUE)
# sjp.lmer(mod_lmer1, type = "slope")
## evaluacion
ge2 <- evaluate(testing[testing$pa==1,], testing[testing$pa==0,], glm2)
ge2## class : ModelEvaluation
## n presences : 32
## n absences : 13
## AUC : 0.65625
## cor : 0.2256837
## max TPR+TNR at : 0.5621106
ge_lmer <- evaluate(testing[testing$pa==1,], testing[testing$pa==0,], mod_lmer6, allow.new.levels = TRUE) ####### change by genus
ge_lmer## class : ModelEvaluation
## n presences : 32
## n absences : 13
## AUC : 0.6442308
## cor : 0.2341882
## max TPR+TNR at : 0.9254064
ge3 <- evaluate(testing[testing$pa==1,], testing[testing$pa==0,], glm3, allow.new.levels = TRUE) ####### change by genus
ge3## class : ModelEvaluation
## n presences : 32
## n absences : 13
## AUC : 0.6490385
## cor : 0.2705193
## max TPR+TNR at : 0.1874548
par(mfrow=c(1,3))
plot(ge2, 'ROC', main="GLM")
plot(ge_lmer, 'ROC', main="GLMM")
plot(ge3, 'ROC', main="GLMM")## Area under the curve: 0.8518
# Area under the curve:
# An AUROC of > 70 represents a good fit. this is not an overoptimistic estimation since we have computed it on cross-validation aproach. Aditionally and in the case of spatial data should make use of spatial CV.
##mapeo del modelo GLMM 4
var_orig <- scale(predictors2, center=TRUE, scale=TRUE)
##mapeo del modelo GLM2
pg3 <- predict(scale(predictors2, center=TRUE, scale=TRUE), glm2, ext=predictors2)
par(mfrow=c(1,2))
plot(pg3, main='GLM2/Binomial, BD')
plot(BD6, add=TRUE, col='grey50')
plot(deptos, add=TRUE, border='white')
tr <- threshold(ge2, 'prevalence')
plot(pg3>tr, main='presence/absence')
plot(BD6, add=TRUE, col='grey50')
plot(deptos, add=TRUE, border='white')# points(testing, pch='+')
# dev.off()
pg4 <- raster::predict(scale(predictors2, center=TRUE, scale=TRUE), mod_lmer6, re.form=~0, type = "response")
plot(pg4)
list_ras <- list()
for(i in 1:length(unique(BD$Familia))){
list_ras[[i]] <- raster::predict(scale(predictors2, center=TRUE, scale=TRUE), mod_lmer6, const=(data.frame(family=unique(BD$Familia)[i])),
type = "response", allow.new.levels=T)
}
family_maps <- stack(list_ras)
names(family_maps) <- unique(BD$Familia)
# library(ggiraphExtra)
for(i in 1:length(unique(BD$Familia))){
par(mfrow=c(1,2))
plot(family_maps[[i]],
main=paste('Pred. prob. BD for', names(family_maps[[i]])))
plot(BD6, add=TRUE, col='grey50')
plot(deptos, add=TRUE, border='white')
tr <- threshold(ge_lmer, 'prevalence')
plot(family_maps[[i]]>tr,
main=paste('BD pres/abs for', names(family_maps[[i]])))
plot(BD6, add=TRUE, col='grey50')
plot(deptos, add=TRUE, border='white')
# points(testing, pch='+')
# dev.off()
}##mapeo del modelo lmer1
###########################
###### no es posible... falta genero
############################
# pg_lmer <- predict(scale(predictors2, center=TRUE, scale=TRUE), mod_lmer1, ext=predictors2)
# par(mfrow=c(1,2))
# plot(pg_lmer, main='GLM/binomial, BD')
# plot(BD4, add=TRUE, border='dark grey')
# tr <- threshold(ge_lmer, 'spec_sens')
# plot(pg>tr, main='presence/backgence')
# plot(BD4, add=TRUE, border='dark grey')
# points(testing, pch='+')
# dev.off()BIO1 = Annual Mean Temperature BIO2 = Mean Diurnal Range (Mean of monthly (max temp - min temp)) BIO3 = Isothermality (BIO2/BIO7) (* 100) BIO4 = Temperature Seasonality (standard deviation *100) BIO5 = Max Temperature of Warmest Month BIO6 = Min Temperature of Coldest Month BIO7 = Temperature Annual Range (BIO5-BIO6) BIO8 = Mean Temperature of Wettest Quarter BIO9 = Mean Temperature of Driest Quarter BIO10 = Mean Temperature of Warmest Quarter BIO11 = Mean Temperature of Coldest Quarter BIO12 = Annual Precipitation BIO13 = Precipitation of Wettest Month BIO14 = Precipitation of Driest Month BIO15 = Precipitation Seasonality (Coefficient of Variation) BIO16 = Precipitation of Wettest Quarter BIO17 = Precipitation of Driest Quarter BIO18 = Precipitation of Warmest Quarter BIO19 = Precipitation of Coldest Quarter