Reunion con Aldemar

23 Julio 2019 - Sacar algunos generos del analisis desde la tabla inicial: Phylomedusa Flectonotus Caecilia Rheobates Trachyicefalus

-Intentar por familia. Aldemar sube la tabla con familia al github. Hecho!

BD

Read packages

knitr::opts_chunk$set(echo = TRUE)

library(sp)
library(rgdal)

## rgdal: version: 1.4-3, (SVN revision 828)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/diego.lizcano/Documents/R/win-library/3.5/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/diego.lizcano/Documents/R/win-library/3.5/rgdal/proj
##  Linking to sp version: 1.3-1

library(raster)
library(rgdal)
library(spatial.tools)
library(dismo)
library(rasterVis)

## Loading required package: lattice

## Loading required package: latticeExtra

## Loading required package: RColorBrewer

library(xtable)
library(sf)

## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3

library(mapview)
library(corrgram)

## 
## Attaching package: 'corrgram'

## The following object is masked from 'package:latticeExtra':
## 
##     panel.ellipse

## The following object is masked from 'package:lattice':
## 
##     panel.fill

library (corrplot) # correlations

## corrplot 0.84 loaded

library(lme4)

## Loading required package: Matrix

library(nlme)

## 
## Attaching package: 'nlme'

## The following object is masked from 'package:lme4':
## 
##     lmList

## The following object is masked from 'package:raster':
## 
##     getData

library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following object is masked from 'package:latticeExtra':
## 
##     layer

library(sjPlot)

## #refugeeswelcome

library(mgcv) # GAM

## This is mgcv 1.8-27. For overview type 'help("mgcv-package")'.

library(MuMIn) # model average
library(mgcv) # visual GAM
library(ggeffects) # to predict models

Read Data

# Path to working directory
path <- "C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/data"
# shp <- "C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/Data2"
shp <- "C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/shp"
bio <- "C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/Capas2"

# BD4<-readOGR(dsn=shp, layer="Bd_generos1")

BD6<-readOGR(dsn=shp, layer="Bd5")

## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\diego.lizcano\Documents\GitHub\Bd_N_Santander\shp", layer: "Bd5"
## with 150 features
## It has 8 fields
## Integer64 fields read as strings:  Diagnostic

BD6<-BD6[!(BD6@data$Familia)=="Ranidae",]
BD6<-BD6[!(BD6@data$Familia)=="Caeciliidae",]

# BD6 <- read.csv("C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/data/Bd6.csv", header = T, sep = ";")

# define geo coords and put to SpatialPointDataFrames
crs.geo <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")  # geographical, datum WGS84

# make factor numeric
# BD6$X <- as.numeric(BD6$X)
# BD6$Y <- as.numeric(BD6$Y)
# add coord
# coordinates(BD6) <- ~X+Y # c(BD5$X, BD5$Y)
# proj4string(BD6) <- crs.geo  # define projection system of our data

BD <- st_as_sf(BD6) # Convert foreign object to an sf object


pos <-  grep(" ", BD$Species)
keep <-  substr(pos, 1, pos-1)

########################
# BD$genus <- 

###cargando variables ambientales
files <- list.files(path=bio, pattern='tif', full.names=TRUE )

View files

# View(files)
# Agrupar las variables ambientales en un objeto stack
predictors <- stack(files)
### clac  slope aspect
slope_aspect <- terrain(predictors$DEM30, opt = c("slope", "aspect"), unit = "degrees")

predictors <- addLayer(predictors, slope_aspect)

deptos <-raster::getData ('GADM', country='COL', level=1)

# names(predictors)
# plot(predictors$Bio1)
# plot(BD1, pch=18, add=TRUE, col="blue", main="Batrachochytrium dendrobatidis")

mapview(BD, zcol = "Diagnostic", legend = TRUE, map.types = c("Esri.WorldShadedRelief", "Esri.WorldImagery"), color = "grey40")

levelplot(predictors, main="all predictors")

# plot(BD["Diagnostic"], add=T)
# levelplot(predictors)

### correlation 
jnk=layerStats(predictors, 'pearson', na.rm=T)
corr_matrix=jnk$'pearson correlation coefficient'


# corrgram(corr_matrix, order=NULL, lower.panel=panel.shade,
#  upper.panel=NULL, text.panel=panel.txt,
#  main="Car")

##### correlation plot
# corrplot(corr_matrix, order = "FPC",type = "lower",  tl.cex = 0.8) # tl.pos = "lower",

####### function to add significance 
# mat : is a matrix of data
# ... : further arguments to pass to the native R cor.test function
cor.mtest <- function(mat, ...) {
    mat <- as.matrix(mat)
    n <- ncol(mat)
    p.mat<- matrix(NA, n, n)
    diag(p.mat) <- 0
    for (i in 1:(n - 1)) {
        for (j in (i + 1):n) {
            tmp <- cor.test(mat[, i], mat[, j], ...)
            p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
        }
    }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}

# matrix of the p-value of the correlation
# use the function
p.mat <- cor.mtest(predictors)
# head(p.mat[, 1:5])

##### correlation plot 
# Leave blank on no significant coefficient
corrplot(corr_matrix, p.mat = p.mat, sig.level = 0.05,  order="FPC", type = "lower",  tl.cex = 0.8) # tl.pos = "lower",

Remove higly correlated layers

## remove correlated layers
predictors2 <- dropLayer(predictors, c(1,2,3,4,9,15,16,18,19))

### correlation 
jnk=layerStats(predictors2, 'pearson', na.rm=T)
corr_matrix=jnk$'pearson correlation coefficient'


# matrix of the p-value of the correlation
p.mat <- cor.mtest(predictors2)

corrplot(corr_matrix, p.mat = p.mat, sig.level = 0.05,  order="FPC", type = "lower",  tl.cex = 0.8) # tl.pos = "lower",

print("predictores no correlacionados")

## [1] "predictores no correlacionados"

names(predictors2)

##  [1] "Bio13"  "Bio14"  "Bio15"  "Bio16"  "Bio18"  "Bio19"  "Bio2"  
##  [8] "Bio3"   "Bio4"   "Bio7"   "DEM30"  "slope"  "aspect"

Building GLM models

###Using non correlated variables

#Extraer la informacion de variables ambientales por ptos. con presencia de BD

# # head(BD)
# BD <- BD1[,-1] # quito la primera columna
# head(BD)
# names(BD)[1]<-paste("x")
# names(BD)[2]<-paste("y") # re nombro encabezados de  columnas
# # head(BD)
# 
# presvals <- (BD)
positive <- BD %>% dplyr::filter (Diagnostic==1) # %>% as.data.frame()
negative <- BD %>% dplyr::filter (Diagnostic==0) # %>% as.data.frame()


###################################  
##### standarizing! ############### 
###################################

presvals <- as.data.frame(extract(
    scale(predictors2, center=TRUE, scale=TRUE), as(positive, "Spatial")))

ausvals <- as.data.frame(extract(scale(predictors2, center=TRUE, scale=TRUE),
                   as(negative, "Spatial")))

###################################  
##### NO standarizing! ############
###################################

real_presvals <- as.data.frame(extract(predictors2, 
                     as(positive, "Spatial")))
# 
real_ausvals <- as.data.frame(extract(predictors2, 
                   as(negative, "Spatial")))


###### Family adding 
presvals$family <- positive$Familia
presvals$pa <-  rep(1,88)
ausvals$family  <- negative$Familia
ausvals$pa <-  rep(0,60)

###### Family adding 
real_presvals$family <- positive$Familia
real_presvals$pa <-  rep(1,88)
real_ausvals$family  <- negative$Familia
real_ausvals$pa <-  rep(0,60)
                 
########### Full data
fulldat <- rbind(presvals, ausvals) #####

real_fulldat <- rbind(real_presvals, real_ausvals) #####

# 
set.seed(2000)
# backg <- randomPoints(predictors, n=500, extf = 1.25)


# plot(predictors2$DEM30)
# plot(BD["Diagnostic"], add=T)
# plot(BD, pch=18, add=TRUE, col="blue", main="Batrachochytrium dendrobatidis")
# 

#generando datos de prueba y entrenamiento con los datos de presencia y pseudo-ausencia con particion 80-20%
indexes_training = sample(1:nrow(fulldat), size=0.70*nrow(fulldat))
indexes_testing = sample(1:nrow(fulldat), size=0.30*nrow(fulldat))

###Presencias
#identifica el 80% de los datos como datos training
training = as.data.frame(fulldat[indexes_training,])
dim(training)

## [1] 103  15

#identifica el 20% de los datos como datos testing
testing = as.data.frame(fulldat[-indexes_training,])
dim(testing)

## [1] 45 15

# species
# testing$species <- positive$Species[-indexes_pres]
# training$species <- positive$Species[indexes_pres]
# View(testing)


###ausencia
#identifica el 20% de los datos como datos prueba/test
# backg_test = as.data.frame(ausvals[indexes_aus_test,])
# dim(backg_test)
#identifica el 80% de los datos como datos entrenamiento/train
# backg_train = as.data.frame(ausvals[-indexes_aus_test,])
# dim(backg_train)

# species
# View(testing)
# backg_train$species <- negative$Species[-indexes_aus_test]
# backg_test$species <- negative$Species[indexes_aus_test]


# mapeando los datos de entrenamiento y prueba de presencias y ausvals
# r = raster(predictors2, 1)
# plot(!is.na(r), col=c('white', 'light grey'), legend=FALSE, main="datos de entrenamiento y prueba ")
# points(backg_train, pch='-', cex=0.5, col='red')
# points(backg_test, pch='-', cex=0.5, col='black')
# points(testing, pch= '+', col='green')
# points(training, pch='+', col='blue')
# 

trainpres <- as.data.frame(testing)# data.frame( extract(predictors2, testing) )
# trainbackg <- as.data.frame(backg_train)# data.frame( extract(predictors2, backg_train) )
# train <- rbind(trainpres, trainbackg)
# pb_train <- c(rep(1, nrow(trainpres)), rep(0, nrow(trainbackg)))
# envtrain <- data.frame( cbind(pa=pb_train, train) )


#### extract from stack
testpres <- as.data.frame(training)# data.frame( extract(predictors2, training) )
# testbackg <- as.data.frame(backg_test)# data.frame( extract(predictors2, backg_test) )

# head(envtrain)


#Ajuste del modelo GLM y predicc??n 
# GLM binomial 
glm1 <- glm(pa ~  Bio13 + Bio14 + Bio15 +
            Bio16 +  Bio18 + Bio19 + Bio2 +  
            Bio3 + Bio4 +  Bio7 + 
            DEM30 + slope + aspect,
           family = binomial(link = "logit"), data=training)
summary(glm1)

## 
## Call:
## glm(formula = pa ~ Bio13 + Bio14 + Bio15 + Bio16 + Bio18 + Bio19 + 
##     Bio2 + Bio3 + Bio4 + Bio7 + DEM30 + slope + aspect, family = binomial(link = "logit"), 
##     data = training)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2486  -0.9095   0.4127   0.8735   1.9157  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.91825    0.68413   1.342  0.17953   
## Bio13       -2.77026    1.07658  -2.573  0.01008 * 
## Bio14        0.19130    0.43490   0.440  0.66003   
## Bio15        0.48622    0.42425   1.146  0.25176   
## Bio16        2.03790    1.28082   1.591  0.11159   
## Bio18        0.35357    0.56255   0.629  0.52967   
## Bio19        0.55392    0.34382   1.611  0.10716   
## Bio2        -6.05458   13.56544  -0.446  0.65536   
## Bio3         4.10654    7.61584   0.539  0.58974   
## Bio4         2.09498    1.26191   1.660  0.09688 . 
## Bio7         4.51304   14.29230   0.316  0.75218   
## DEM30       -2.11768    0.72802  -2.909  0.00363 **
## slope       -0.08845    0.33506  -0.264  0.79179   
## aspect       0.58791    0.28965   2.030  0.04238 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 142.00  on 102  degrees of freedom
## Residual deviance: 107.38  on  89  degrees of freedom
## AIC: 135.38
## 
## Number of Fisher Scoring iterations: 5

coef(glm1)

## (Intercept)       Bio13       Bio14       Bio15       Bio16       Bio18 
##  0.91824726 -2.77026494  0.19129749  0.48622467  2.03789785  0.35356895 
##       Bio19        Bio2        Bio3        Bio4        Bio7       DEM30 
##  0.55392133 -6.05458464  4.10654324  2.09498293  4.51303779 -2.11768479 
##       slope      aspect 
## -0.08845001  0.58790570

# Stepwise Regression
library(MASS)

## 
## Attaching package: 'MASS'

## The following objects are masked from 'package:raster':
## 
##     area, select

step <- stepAIC(glm1, direction="backward", k = 2)

## Start:  AIC=135.38
## pa ~ Bio13 + Bio14 + Bio15 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 + 
##     Bio4 + Bio7 + DEM30 + slope + aspect
## 
##          Df Deviance    AIC
## - slope   1   107.45 133.45
## - Bio7    1   107.48 133.48
## - Bio14   1   107.57 133.57
## - Bio2    1   107.58 133.58
## - Bio3    1   107.67 133.67
## - Bio18   1   107.77 133.76
## - Bio15   1   108.72 134.72
## <none>        107.38 135.38
## - Bio19   1   110.09 136.09
## - Bio16   1   110.31 136.31
## - Bio4    1   110.39 136.39
## - aspect  1   111.86 137.86
## - Bio13   1   114.97 140.97
## - DEM30   1   119.11 145.11
## 
## Step:  AIC=133.45
## pa ~ Bio13 + Bio14 + Bio15 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 + 
##     Bio4 + Bio7 + DEM30 + aspect
## 
##          Df Deviance    AIC
## - Bio7    1   107.56 131.56
## - Bio14   1   107.60 131.60
## - Bio2    1   107.66 131.66
## - Bio3    1   107.76 131.76
## - Bio18   1   107.82 131.82
## - Bio15   1   108.73 132.74
## <none>        107.45 133.45
## - Bio19   1   110.36 134.36
## - Bio16   1   110.47 134.47
## - Bio4    1   110.71 134.71
## - aspect  1   111.91 135.91
## - Bio13   1   115.05 139.05
## - DEM30   1   119.18 143.18
## 
## Step:  AIC=131.56
## pa ~ Bio13 + Bio14 + Bio15 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 + 
##     Bio4 + DEM30 + aspect
## 
##          Df Deviance    AIC
## - Bio14   1   107.71 129.71
## - Bio18   1   107.83 129.83
## - Bio15   1   108.87 130.87
## <none>        107.56 131.56
## - Bio3    1   110.31 132.31
## - Bio16   1   110.66 132.66
## - Bio19   1   110.69 132.69
## - Bio4    1   111.97 133.97
## - aspect  1   112.05 134.05
## - Bio13   1   115.06 137.06
## - Bio2    1   117.31 139.31
## - DEM30   1   119.20 141.20
## 
## Step:  AIC=129.71
## pa ~ Bio13 + Bio15 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 + Bio4 + 
##     DEM30 + aspect
## 
##          Df Deviance    AIC
## - Bio18   1   107.96 127.96
## - Bio15   1   109.07 129.07
## <none>        107.71 129.71
## - Bio19   1   110.87 130.87
## - Bio3    1   110.99 130.99
## - Bio16   1   111.25 131.25
## - aspect  1   112.25 132.25
## - Bio4    1   113.04 133.04
## - Bio13   1   115.07 135.07
## - Bio2    1   117.61 137.61
## - DEM30   1   119.21 139.21
## 
## Step:  AIC=127.96
## pa ~ Bio13 + Bio15 + Bio16 + Bio19 + Bio2 + Bio3 + Bio4 + DEM30 + 
##     aspect
## 
##          Df Deviance    AIC
## - Bio15   1   109.09 127.09
## <none>        107.96 127.96
## - Bio19   1   110.87 128.87
## - Bio3    1   111.34 129.34
## - aspect  1   112.33 130.33
## - Bio4    1   114.02 132.02
## - Bio16   1   114.17 132.17
## - Bio13   1   115.36 133.36
## - Bio2    1   117.71 135.71
## - DEM30   1   119.68 137.68
## 
## Step:  AIC=127.09
## pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio3 + Bio4 + DEM30 + aspect
## 
##          Df Deviance    AIC
## <none>        109.09 127.09
## - Bio3    1   112.06 128.06
## - Bio19   1   112.42 128.42
## - aspect  1   112.91 128.91
## - Bio16   1   114.93 130.93
## - Bio4    1   114.93 130.93
## - Bio13   1   115.75 131.75
## - Bio2    1   118.25 134.25
## - DEM30   1   120.09 136.09

step$anova # display results

Now only significant explanatory covars (layers) from Final Model glm1

##########################################
## now only covars from Final Model glm1
##########################################

glm2 <- glm(pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio4 + DEM30,
            family = binomial(link = "logit"), data=training)

glm3 <- glm(pa ~ Bio13 + Bio19 + Bio2 + DEM30,
            family = binomial(link = "logit"), data=training)

glm4 <- glm(pa ~ Bio13 + Bio2 +  DEM30,
            family = binomial(link = "logit"), data=training)

glm5 <- glm(pa ~ Bio13 + Bio19 + DEM30, # only precipitation
            family = binomial(link = "logit"), data=training)

glm6 <- glm(pa ~ Bio13 + Bio19 + family, # only precipitation
            family = binomial(link = "logit"), data=training)

AICc(glm2, glm3, glm4, glm5)

# AICc should be used instead AIC when sample size is small in comparison to the number of estimated parameters (Burnham & Anderson 2002 recommend its use when n / K < 40). 105/6 = 17.5
summary(glm2)

## 
## Call:
## glm(formula = pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio4 + DEM30, 
##     family = binomial(link = "logit"), data = training)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7602  -0.9354   0.4546   0.8944   1.9607  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   0.7478     0.3377   2.214  0.02680 * 
## Bio13        -2.1045     0.9558  -2.202  0.02767 * 
## Bio16         1.9912     0.9720   2.049  0.04050 * 
## Bio19         0.4598     0.3052   1.507  0.13185   
## Bio2         -1.1588     0.4686  -2.473  0.01341 * 
## Bio4          0.9664     0.6291   1.536  0.12451   
## DEM30        -1.4359     0.4969  -2.890  0.00385 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 142.00  on 102  degrees of freedom
## Residual deviance: 114.85  on  96  degrees of freedom
## AIC: 128.85
## 
## Number of Fisher Scoring iterations: 4

#### multiple group effects with multiple group effect terms factor species.
# #### Random-effects  
# mod_lmer1 <- glmer(pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio4 + DEM30 + (1|genus), data=training, #fulldat,
#                    family = binomial(link = "logit"))
# #### fixed-effects  
# mod_lmer2 <- glmer(pa ~  I(Bio13^2) : I(Bio16^2) + I(Bio19^2) + I(Bio2^2) + (1|genus), data=training, #fulldat,
#                    family = binomial(link = "logit"))
# 
# mod_lmer3 <- glmer(pa ~ Bio16 +  DEM30 + (Bio2|genus), data=training, #fulldat,
#                    family = binomial(link = "logit"))
# 
# mod_lmer4 <- glmer(pa ~ DEM30 + Bio2 + (Bio16|genus), data=training, #fulldat,
#                    family = binomial(link = "logit"))
# 
# mod_lmer5 <- glmer(pa ~ Bio16 : Bio2 * (1|genus), data=training, #fulldat,
#                    family = binomial(link = "logit"))


mod_lmer1 <- glmer(pa ~  Bio13 + Bio2 + DEM30 + (1|family), data=training, #fulldat,
                   family = binomial(link = "logit"))

mod_lmer2 <- glmer(pa ~   Bio14 +  Bio4 + DEM30 + (1|family), data=training, #fulldat,
                   family = binomial(link = "logit"))


mod_lmer3 <- glmer(pa ~  Bio19 + Bio2  + Bio4 + (1|family), data=training, #fulldat,
                   family = binomial(link = "logit"))

mod_lmer4 <- glmer(pa ~  Bio2 + Bio14 + DEM30 + (1|family), data=training, #fulldat,
                   family = binomial(link = "logit"))

mod_lmer5 <- glmer(pa ~  Bio4 + Bio13 + DEM30 + (1|family), data=training, #fulldat,
              family = binomial(link = "logit"))

mod_lmer6 <- glmer(pa ~  DEM30 + Bio2 + Bio14 : Bio13 + (1|family), data=training, #fulldat,
                   family = binomial(link = "logit"))

mod_lmer7 <- glmer(pa ~  DEM30 + Bio2 + Bio14 + Bio13 + Bio14 : Bio13 + (1|family), data=training, #fulldat,
                   family = binomial(link = "logit"))

# mod_lmer3 <- glmer(pa ~ Bio16 +  DEM30 + (Bio2|family), data=training, #fulldat,
#                    family = binomial(link = "logit"))
# 
# mod_lmer4 <- glmer(pa ~ DEM30 + Bio2 + (Bio16|family), data=training, #fulldat,
#                    family = binomial(link = "logit"))
# 
# mod_lmer5 <- glmer(pa ~ Bio16 : Bio2 * (1|family), data=training, #fulldat,
# >>>>>>> 5d9913e35d0aedac423cdfd46f27ec3dcb4c31ba
#                    family = binomial(link = "logit"))




summary(mod_lmer1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: pa ~ Bio13 + Bio2 + DEM30 + (1 | family)
##    Data: training
## 
##      AIC      BIC   logLik deviance df.resid 
##    128.5    141.7    -59.2    118.5       98 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1214 -0.7653  0.3409  0.7274  2.4305 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  family (Intercept) 1.293    1.137   
## Number of obs: 103, groups:  family, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.76108    0.55141   1.380  0.16752   
## Bio13       -0.01878    0.23215  -0.081  0.93553   
## Bio2        -1.16925    0.43311  -2.700  0.00694 **
## DEM30       -1.27497    0.48302  -2.640  0.00830 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr) Bio13  Bio2  
## Bio13 -0.039              
## Bio2   0.025  0.162       
## DEM30  0.015  0.309  0.766

summary(mod_lmer2)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: pa ~ Bio14 + Bio4 + DEM30 + (1 | family)
##    Data: training
## 
##      AIC      BIC   logLik deviance df.resid 
##    135.9    149.1    -63.0    125.9       98 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5065 -0.7577  0.3710  0.8304  1.6825 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  family (Intercept) 1.063    1.031   
## Number of obs: 103, groups:  family, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   1.0511     0.5541   1.897   0.0578 .
## Bio14         0.1695     0.2569   0.660   0.5093  
## Bio4          0.4531     0.4499   1.007   0.3139  
## DEM30        -0.2471     0.3144  -0.786   0.4318  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr) Bio14 Bio4 
## Bio14 0.123             
## Bio4  0.336  0.126      
## DEM30 0.096  0.483 0.225

summary(mod_lmer3)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: pa ~ Bio19 + Bio2 + Bio4 + (1 | family)
##    Data: training
## 
##      AIC      BIC   logLik deviance df.resid 
##    135.1    148.3    -62.5    125.1       98 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2975 -0.7393  0.3709  0.7104  1.5748 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  family (Intercept) 1.261    1.123   
## Number of obs: 103, groups:  family, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   1.0709     0.5855   1.829   0.0674 .
## Bio19         0.3914     0.3035   1.290   0.1972  
## Bio2         -0.4278     0.3040  -1.407   0.1593  
## Bio4          0.9639     0.5834   1.652   0.0985 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr) Bio19  Bio2  
## Bio19  0.083              
## Bio2  -0.013 -0.323       
## Bio4   0.308  0.622 -0.186

summary(mod_lmer4)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: pa ~ Bio2 + Bio14 + DEM30 + (1 | family)
##    Data: training
## 
##      AIC      BIC   logLik deviance df.resid 
##    128.4    141.6    -59.2    118.4       98 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0613 -0.7470  0.3322  0.7104  2.4328 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  family (Intercept) 1.34     1.158   
## Number of obs: 103, groups:  family, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.77053    0.55890   1.379  0.16800   
## Bio2        -1.15610    0.42901  -2.695  0.00704 **
## Bio14        0.07206    0.26349   0.273  0.78449   
## DEM30       -1.21811    0.48937  -2.489  0.01281 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr) Bio2  Bio14
## Bio2  0.037             
## Bio14 0.075  0.059      
## DEM30 0.052  0.739 0.332

summary(mod_lmer5)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: pa ~ Bio4 + Bio13 + DEM30 + (1 | family)
##    Data: training
## 
##      AIC      BIC   logLik deviance df.resid 
##    135.7    148.8    -62.8    125.7       98 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2818 -0.8040  0.3789  0.7784  1.7711 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  family (Intercept) 1.014    1.007   
## Number of obs: 103, groups:  family, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   1.0461     0.5437   1.924   0.0544 .
## Bio4          0.5490     0.4772   1.150   0.2500  
## Bio13         0.1948     0.2350   0.829   0.4072  
## DEM30        -0.2346     0.3087  -0.760   0.4474  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr) Bio4  Bio13
## Bio4  0.330             
## Bio13 0.090  0.346      
## DEM30 0.078  0.310 0.436

summary(mod_lmer6)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: pa ~ DEM30 + Bio2 + Bio14:Bio13 + (1 | family)
##    Data: training
## 
##      AIC      BIC   logLik deviance df.resid 
##    125.8    139.0    -57.9    115.8       98 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7672 -0.6769  0.2354  0.6769  2.5162 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  family (Intercept) 1.784    1.336   
## Number of obs: 103, groups:  family, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   0.6296     0.6189   1.017  0.30907   
## DEM30        -1.3733     0.4671  -2.940  0.00328 **
## Bio2         -1.2759     0.4382  -2.912  0.00360 **
## Bio14:Bio13   0.3456     0.2384   1.450  0.14718   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DEM30  Bio2  
## DEM30        0.045              
## Bio2         0.046  0.758       
## Bio14:Bio13 -0.112 -0.264 -0.257

AICc(mod_lmer1, mod_lmer2, mod_lmer3, mod_lmer4,  mod_lmer5, mod_lmer6, mod_lmer7)#, mod_lmer6 )

Plot Models

################ plot using sjPlot
# plot_model(mod_lmer1, vline.color = "gray", 
#                     sort.est = TRUE)

plot_model(mod_lmer6, type = "re", sort.est = TRUE) #plots conditionals of random effects

## Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.

plot_model(mod_lmer6, type = "est", #fixed effects
         show.ci = TRUE, sort.est = TRUE)

plot_model(mod_lmer6, type = "eff", sort.est = TRUE) #### cambiar ejes

## $DEM30

## 
## $Bio2

## 
## $Bio14

## 
## $Bio13

# cero es mean(min(real_fulldat$DEM30)) /ELEVATION.sd + ELEVATION.mean
# min min(real_fulldat$DEM30)
# max max(real_fulldat$DEM30)

plot_model(mod_lmer6, type = "diag", #fixed effects
         show.ci = TRUE, sort.est = TRUE)

## $family

dat <-ggpredict(mod_lmer6, c( "DEM30", "Bio2","family") )
ggplot(dat, aes(x = x, y = predicted, colour = group)) +
    stat_smooth( se = FALSE, fullrange = TRUE) +
    facet_wrap(~facet)

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

DEM30 <- seq( from=min(fulldat$DEM30), to=  max(fulldat$DEM30), length.out=148)
Bio2 <- seq( from=min(fulldat$Bio2), to=max(fulldat$Bio2), length.out=148)
Bio14 <- seq( from=min(fulldat$Bio14), to=max(fulldat$Bio14), length.out=148)
Bio13 <- seq( from=min(fulldat$Bio13), to=max(fulldat$Bio13), length.out=148)
new_mat <- as.data.frame(cbind(DEM30, Bio2, Bio13, Bio14))

###### prediction

# plot and predict Occu
Predicted <-predict(mod_lmer6, re.form=NA,newdata=new_mat)
predichos <- cbind(Predicted,new_mat)

# #predict elevation
# ELEVATION.mean <- mean(real_fulldat$DEM30)
# ELEVATION.sd <- sd(real_fulldat$DEM30)
# predichos$ELEVATION2 <- (real_fulldat$DEM30)*ELEVATION.sd + ELEVATION.mean
# 
# plot(predichos$ELEVATION2 ~Predicted,type="l",col="blue",
#      xlab="Elevación",
#      ylab="Probabilidad")
# lines(lower~ELEVATION2, pred_p,type="l",col=gray(0.5))
# lines(upper~ELEVATION2, pred_p,type="l",col=gray(0.5))


# nuevodato <- new_data(mod_lmer6, c("family", "Bio2", "Bio14", "DEM30", "Bio13"))
# dat2 <- ggpredict(mod_lmer6, terms = c("Bio2", "DEM30", "genus"))
# ggplot(dat2, aes(x = x, y = predicted, colour = group)) +
#     stat_smooth( fullrange = TRUE) +
#     facet_wrap(~facet)


# Probabilities of fixed effects depending on grouping level (random intercept)
# plot_model(mod_lmer1, type = "ri.pc",
#           show.se = TRUE)

# sjp.lmer(mod_lmer1, type = "fe", 
#        show.ci = TRUE, sort.est = TRUE)
# sjp.lmer(mod_lmer1, type = "slope")

## evaluacion
ge2 <- evaluate(testing[testing$pa==1,], testing[testing$pa==0,],  glm2)
ge2

## class          : ModelEvaluation 
## n presences    : 32 
## n absences     : 13 
## AUC            : 0.65625 
## cor            : 0.2256837 
## max TPR+TNR at : 0.5621106

ge_lmer <- evaluate(testing[testing$pa==1,], testing[testing$pa==0,], mod_lmer6, allow.new.levels = TRUE) ####### change by genus
ge_lmer

## class          : ModelEvaluation 
## n presences    : 32 
## n absences     : 13 
## AUC            : 0.6442308 
## cor            : 0.2341882 
## max TPR+TNR at : 0.9254064

ge3 <- evaluate(testing[testing$pa==1,], testing[testing$pa==0,],  glm3, allow.new.levels = TRUE) ####### change by genus
ge3

## class          : ModelEvaluation 
## n presences    : 32 
## n absences     : 13 
## AUC            : 0.6490385 
## cor            : 0.2705193 
## max TPR+TNR at : 0.1874548

par(mfrow=c(1,3))
plot(ge2, 'ROC', main="GLM")
plot(ge_lmer, 'ROC', main="GLMM")
plot(ge3, 'ROC', main="GLMM")

# dev.off() 

####### 
pROC::auc(pROC::roc(training$pa, fitted(mod_lmer6)))

## Area under the curve: 0.8518

# Area under the curve: 
# An AUROC of > 70 represents a good fit. this is not an overoptimistic estimation since we have computed it on  cross-validation aproach. Aditionally and in the case of spatial data should make use of spatial CV.


##mapeo del modelo GLMM 4


var_orig <- scale(predictors2, center=TRUE, scale=TRUE)



##mapeo del modelo GLM2
pg3 <- predict(scale(predictors2, center=TRUE, scale=TRUE), glm2, ext=predictors2)
par(mfrow=c(1,2))
plot(pg3, main='GLM2/Binomial, BD')
plot(BD6, add=TRUE, col='grey50')
plot(deptos, add=TRUE, border='white')
tr <- threshold(ge2, 'prevalence')
plot(pg3>tr, main='presence/absence')
plot(BD6, add=TRUE, col='grey50')
plot(deptos, add=TRUE, border='white')

# points(testing, pch='+')
# dev.off()


pg4 <- raster::predict(scale(predictors2, center=TRUE, scale=TRUE), mod_lmer6, re.form=~0, type = "response") 
plot(pg4)


list_ras <- list()
for(i in 1:length(unique(BD$Familia))){
  list_ras[[i]] <- raster::predict(scale(predictors2, center=TRUE, scale=TRUE), mod_lmer6, const=(data.frame(family=unique(BD$Familia)[i])), 
                            type = "response", allow.new.levels=T) 

}

family_maps <- stack(list_ras)
names(family_maps) <- unique(BD$Familia)

# library(ggiraphExtra)


for(i in 1:length(unique(BD$Familia))){
  par(mfrow=c(1,2))
  plot(family_maps[[i]], 
       main=paste('Pred. prob. BD for', names(family_maps[[i]])))
  plot(BD6, add=TRUE, col='grey50')
  plot(deptos, add=TRUE, border='white')
  tr <- threshold(ge_lmer, 'prevalence')
  plot(family_maps[[i]]>tr, 
       main=paste('BD pres/abs for', names(family_maps[[i]])))
  plot(BD6, add=TRUE, col='grey50')
  plot(deptos, add=TRUE, border='white')
  # points(testing, pch='+')
  # dev.off()
}

##mapeo del modelo lmer1 
###########################
###### no es posible... falta genero
############################
# pg_lmer <- predict(scale(predictors2, center=TRUE, scale=TRUE), mod_lmer1, ext=predictors2)
# par(mfrow=c(1,2))
# plot(pg_lmer, main='GLM/binomial, BD')
# plot(BD4, add=TRUE, border='dark grey')
# tr <- threshold(ge_lmer, 'spec_sens')
# plot(pg>tr, main='presence/backgence')
# plot(BD4, add=TRUE, border='dark grey')
# points(testing, pch='+')

# dev.off()

Comentarios

BIO1 = Annual Mean Temperature BIO2 = Mean Diurnal Range (Mean of monthly (max temp - min temp)) BIO3 = Isothermality (BIO2/BIO7) (* 100) BIO4 = Temperature Seasonality (standard deviation *100) BIO5 = Max Temperature of Warmest Month BIO6 = Min Temperature of Coldest Month BIO7 = Temperature Annual Range (BIO5-BIO6) BIO8 = Mean Temperature of Wettest Quarter BIO9 = Mean Temperature of Driest Quarter BIO10 = Mean Temperature of Warmest Quarter BIO11 = Mean Temperature of Coldest Quarter BIO12 = Annual Precipitation BIO13 = Precipitation of Wettest Month BIO14 = Precipitation of Driest Month BIO15 = Precipitation Seasonality (Coefficient of Variation) BIO16 = Precipitation of Wettest Quarter BIO17 = Precipitation of Driest Quarter BIO18 = Precipitation of Warmest Quarter BIO19 = Precipitation of Coldest Quarter

BD in amphibians, Norte de Santander. A GLMM aproach

Acevedo A, Lizcano D, et.al.,

23 de Julio de 2019