Reunion con Aldemar

23 Julio 2019 - Sacar algunos generos del analisis desde la tabla inicial: Phylomedusa Flectonotus Caecilia Rheobates Trachyicefalus

-Intentar por familia. Aldemar sube la tabla con familia al github. Hecho!

BD

Read packages

knitr::opts_chunk$set(echo = TRUE)

library(sp)
library(rgdal)

## rgdal: version: 1.4-3, (SVN revision 828)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/diego.lizcano/Documents/R/win-library/3.5/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/diego.lizcano/Documents/R/win-library/3.5/rgdal/proj
##  Linking to sp version: 1.3-1

library(raster)
library(rgdal)
library(spatial.tools)
library(dismo)
library(rasterVis)

## Loading required package: lattice

## Loading required package: latticeExtra

## Loading required package: RColorBrewer

library(xtable)
library(sf)

## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3

library(mapview)
library(corrgram)

## 
## Attaching package: 'corrgram'

## The following object is masked from 'package:latticeExtra':
## 
##     panel.ellipse

## The following object is masked from 'package:lattice':
## 
##     panel.fill

library (corrplot) # correlations

## corrplot 0.84 loaded

library(lme4)

## Loading required package: Matrix

library(nlme)

## 
## Attaching package: 'nlme'

## The following object is masked from 'package:lme4':
## 
##     lmList

## The following object is masked from 'package:raster':
## 
##     getData

library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following object is masked from 'package:latticeExtra':
## 
##     layer

library(sjPlot)

library(mgcv) # GAM

## This is mgcv 1.8-27. For overview type 'help("mgcv-package")'.

library(MuMIn) # model average
library(mgcv) # visual GAM
library(ggeffects) # to predict models

Read Data

# Path to working directory
path <- "C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/data"
# shp <- "C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/Data2"
shp <- "C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/shp"
bio <- "C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/Capas2"

# BD4<-readOGR(dsn=shp, layer="Bd_generos1")

BD6<-readOGR(dsn=shp, layer="Bd5")

## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\diego.lizcano\Documents\GitHub\Bd_N_Santander\shp", layer: "Bd5"
## with 150 features
## It has 8 fields
## Integer64 fields read as strings:  Diagnostic

# BD6 <- read.csv("C:/Users/diego.lizcano/Documents/GitHub/Bd_N_Santander/data/Bd6.csv", header = T, sep = ";")

# define geo coords and put to SpatialPointDataFrames
crs.geo <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")  # geographical, datum WGS84

# make factor numeric
# BD6$X <- as.numeric(BD6$X)
# BD6$Y <- as.numeric(BD6$Y)
# add coord
# coordinates(BD6) <- ~X+Y # c(BD5$X, BD5$Y)
# proj4string(BD6) <- crs.geo  # define projection system of our data

BD <- st_as_sf(BD6) # Convert foreign object to an sf object

pos <-  grep(" ", BD$Species)
keep <-  substr(pos, 1, pos-1)

########################
# BD$genus <- 

###cargando variables ambientales
files <- list.files(path=bio, pattern='tif', full.names=TRUE )

View files

# View(files)
# Agrupar las variables ambientales en un objeto stack
predictors <- stack(files)
### clac  slope aspect
slope_aspect <- terrain(predictors$DEM30, opt = c("slope", "aspect"), unit = "degrees")

predictors <- addLayer(predictors, slope_aspect)

deptos <-raster::getData ('GADM', country='COL', level=1)

# names(predictors)
# plot(predictors$Bio1)
# plot(BD1, pch=18, add=TRUE, col="blue", main="Batrachochytrium dendrobatidis")

mapview(BD, zcol = "Diagnostic", legend = TRUE, map.types = c("Esri.WorldShadedRelief", "Esri.WorldImagery"), color = "grey40")

levelplot(predictors, main="all predictors")

# plot(BD["Diagnostic"], add=T)
# levelplot(predictors)

### correlation 
jnk=layerStats(predictors, 'pearson', na.rm=T)
corr_matrix=jnk$'pearson correlation coefficient'


# corrgram(corr_matrix, order=NULL, lower.panel=panel.shade,
#  upper.panel=NULL, text.panel=panel.txt,
#  main="Car")

##### correlation plot
# corrplot(corr_matrix, order = "FPC",type = "lower",  tl.cex = 0.8) # tl.pos = "lower",

####### function to add significance 
# mat : is a matrix of data
# ... : further arguments to pass to the native R cor.test function
cor.mtest <- function(mat, ...) {
    mat <- as.matrix(mat)
    n <- ncol(mat)
    p.mat<- matrix(NA, n, n)
    diag(p.mat) <- 0
    for (i in 1:(n - 1)) {
        for (j in (i + 1):n) {
            tmp <- cor.test(mat[, i], mat[, j], ...)
            p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
        }
    }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}

# matrix of the p-value of the correlation
# use the function
p.mat <- cor.mtest(predictors)
# head(p.mat[, 1:5])

##### correlation plot 
# Leave blank on no significant coefficient
corrplot(corr_matrix, p.mat = p.mat, sig.level = 0.05,  order="FPC", type = "lower",  tl.cex = 0.8) # tl.pos = "lower",

Remove higly correlated layers

## remove correlated layers
predictors2 <- dropLayer(predictors, c(1,2,3,4,9,15,16,18,19))

### correlation 
jnk=layerStats(predictors2, 'pearson', na.rm=T)
corr_matrix=jnk$'pearson correlation coefficient'


# matrix of the p-value of the correlation
p.mat <- cor.mtest(predictors2)

corrplot(corr_matrix, p.mat = p.mat, sig.level = 0.05,  order="FPC", type = "lower",  tl.cex = 0.8) # tl.pos = "lower",

print("predictores no correlacionados")

## [1] "predictores no correlacionados"

names(predictors2)

##  [1] "Bio13"  "Bio14"  "Bio15"  "Bio16"  "Bio18"  "Bio19"  "Bio2"  
##  [8] "Bio3"   "Bio4"   "Bio7"   "DEM30"  "slope"  "aspect"

Building GLM models

###Using non correlated variables

#Extraer la informacion de variables ambientales por ptos. con presencia de BD

# # head(BD)
# BD <- BD1[,-1] # quito la primera columna
# head(BD)
# names(BD)[1]<-paste("x")
# names(BD)[2]<-paste("y") # re nombro encabezados de  columnas
# # head(BD)
# 
# presvals <- (BD)
positive <- BD %>% dplyr::filter (Diagnostic==1) # %>% as.data.frame()
negative <- BD %>% dplyr::filter (Diagnostic==0) # %>% as.data.frame()


###################################  
##### standarizing! ############### 
###################################

presvals <- as.data.frame(extract(
    scale(predictors2, center=TRUE, scale=TRUE), as(positive, "Spatial")))

ausvals <- as.data.frame(extract(scale(predictors2, center=TRUE, scale=TRUE),
                   as(negative, "Spatial")))

###################################  
##### NO standarizing! ############
###################################

# presvals <- as.data.frame(extract(predictors2, 
#                     as(positive, "Spatial")))
# 
# ausvals <- as.data.frame(extract(predictors2, 
#                    as(negative, "Spatial")))


###### Family adding 
presvals$family <- positive$Familia
presvals$pa <-  rep(1,88)
ausvals$family  <- negative$Familia
ausvals$pa <-  rep(0,62)
                 
########### Full data
fulldat <- rbind(presvals, ausvals) #####


# 
set.seed(2000)
# backg <- randomPoints(predictors, n=500, extf = 1.25)


# plot(predictors2$DEM30)
# plot(BD["Diagnostic"], add=T)
# plot(BD, pch=18, add=TRUE, col="blue", main="Batrachochytrium dendrobatidis")
# 

#generando datos de prueba y entrenamiento con los datos de presencia y pseudo-ausencia con particion 80-20%
indexes_training = sample(1:nrow(fulldat), size=0.70*nrow(fulldat))
indexes_testing = sample(1:nrow(fulldat), size=0.30*nrow(fulldat))

###Presencias
#identifica el 80% de los datos como datos training
training = as.data.frame(fulldat[indexes_training,])
dim(training)

## [1] 105  15

#identifica el 20% de los datos como datos testing
testing = as.data.frame(fulldat[-indexes_training,])
dim(testing)

## [1] 45 15

# species
# testing$species <- positive$Species[-indexes_pres]
# training$species <- positive$Species[indexes_pres]
# View(testing)


###ausencia
#identifica el 20% de los datos como datos prueba/test
# backg_test = as.data.frame(ausvals[indexes_aus_test,])
# dim(backg_test)
#identifica el 80% de los datos como datos entrenamiento/train
# backg_train = as.data.frame(ausvals[-indexes_aus_test,])
# dim(backg_train)

# species
# View(testing)
# backg_train$species <- negative$Species[-indexes_aus_test]
# backg_test$species <- negative$Species[indexes_aus_test]


# mapeando los datos de entrenamiento y prueba de presencias y ausvals
# r = raster(predictors2, 1)
# plot(!is.na(r), col=c('white', 'light grey'), legend=FALSE, main="datos de entrenamiento y prueba ")
# points(backg_train, pch='-', cex=0.5, col='red')
# points(backg_test, pch='-', cex=0.5, col='black')
# points(testing, pch= '+', col='green')
# points(training, pch='+', col='blue')
# 

trainpres <- as.data.frame(testing)# data.frame( extract(predictors2, testing) )
# trainbackg <- as.data.frame(backg_train)# data.frame( extract(predictors2, backg_train) )
# train <- rbind(trainpres, trainbackg)
# pb_train <- c(rep(1, nrow(trainpres)), rep(0, nrow(trainbackg)))
# envtrain <- data.frame( cbind(pa=pb_train, train) )


#### extract from stack
testpres <- as.data.frame(training)# data.frame( extract(predictors2, training) )
# testbackg <- as.data.frame(backg_test)# data.frame( extract(predictors2, backg_test) )

# head(envtrain)


#Ajuste del modelo GLM y predicc??n 
# GLM binomial 
glm1 <- glm(pa ~  Bio13 + Bio14 + Bio15 +
            Bio16 +  Bio18 + Bio19 + Bio2 +  
            Bio3 + Bio4 +  Bio7 + 
            DEM30 + slope + aspect,
           family = binomial(link = "logit"), data=training)
summary(glm1)

## 
## Call:
## glm(formula = pa ~ Bio13 + Bio14 + Bio15 + Bio16 + Bio18 + Bio19 + 
##     Bio2 + Bio3 + Bio4 + Bio7 + DEM30 + slope + aspect, family = binomial(link = "logit"), 
##     data = training)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9859  -0.7970   0.4089   0.7890   1.8390  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   1.3247     0.7229   1.833  0.06687 . 
## Bio13        -2.6277     1.1428  -2.299  0.02148 * 
## Bio14         0.3997     0.4256   0.939  0.34763   
## Bio15         0.4988     0.4455   1.120  0.26287   
## Bio16         1.5125     1.3098   1.155  0.24820   
## Bio18         0.7039     0.5286   1.332  0.18298   
## Bio19         0.6918     0.3055   2.264  0.02355 * 
## Bio2        -13.4655    13.1114  -1.027  0.30442   
## Bio3          7.4036     7.4207   0.998  0.31843   
## Bio4          1.2590     1.1828   1.064  0.28711   
## Bio7         12.0277    13.7746   0.873  0.38257   
## DEM30        -2.2766     0.7076  -3.217  0.00129 **
## slope        -0.1587     0.3194  -0.497  0.61929   
## aspect        0.4633     0.3033   1.528  0.12659   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 144.41  on 104  degrees of freedom
## Residual deviance: 109.66  on  91  degrees of freedom
## AIC: 137.66
## 
## Number of Fisher Scoring iterations: 5

coef(glm1)

## (Intercept)       Bio13       Bio14       Bio15       Bio16       Bio18 
##   1.3246576  -2.6276997   0.3997465   0.4988384   1.5124701   0.7038615 
##       Bio19        Bio2        Bio3        Bio4        Bio7       DEM30 
##   0.6918303 -13.4655406   7.4035918   1.2590180  12.0276705  -2.2766347 
##       slope      aspect 
##  -0.1586991   0.4632679

# Stepwise Regression
library(MASS)

## 
## Attaching package: 'MASS'

## The following objects are masked from 'package:raster':
## 
##     area, select

step <- stepAIC(glm1, direction="backward", k = 2)

## Start:  AIC=137.66
## pa ~ Bio13 + Bio14 + Bio15 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 + 
##     Bio4 + Bio7 + DEM30 + slope + aspect
## 
##          Df Deviance    AIC
## - slope   1   109.91 135.91
## - Bio7    1   110.42 136.42
## - Bio14   1   110.57 136.57
## - Bio3    1   110.66 136.66
## - Bio2    1   110.72 136.72
## - Bio4    1   110.83 136.83
## - Bio15   1   110.95 136.95
## - Bio16   1   111.17 137.16
## - Bio18   1   111.40 137.40
## <none>        109.66 137.66
## - aspect  1   112.16 138.16
## - Bio19   1   115.20 141.20
## - Bio13   1   115.72 141.72
## - DEM30   1   124.47 150.47
## 
## Step:  AIC=135.91
## pa ~ Bio13 + Bio14 + Bio15 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 + 
##     Bio4 + Bio7 + DEM30 + aspect
## 
##          Df Deviance    AIC
## - Bio14   1   110.67 134.67
## - Bio7    1   110.78 134.78
## - Bio3    1   111.06 135.06
## - Bio2    1   111.09 135.09
## - Bio15   1   111.13 135.13
## - Bio4    1   111.33 135.32
## - Bio18   1   111.52 135.52
## - Bio16   1   111.72 135.72
## <none>        109.91 135.91
## - aspect  1   112.48 136.48
## - Bio19   1   115.71 139.71
## - Bio13   1   116.41 140.41
## - DEM30   1   124.54 148.54
## 
## Step:  AIC=134.67
## pa ~ Bio13 + Bio15 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 + Bio4 + 
##     Bio7 + DEM30 + aspect
## 
##          Df Deviance    AIC
## - Bio15   1   111.16 133.16
## - Bio7    1   111.48 133.48
## - Bio2    1   111.79 133.79
## - Bio3    1   111.81 133.81
## - Bio18   1   112.25 134.25
## - Bio16   1   112.61 134.61
## <none>        110.67 134.67
## - Bio4    1   112.72 134.72
## - aspect  1   113.18 135.18
## - Bio13   1   116.52 138.52
## - Bio19   1   116.55 138.55
## - DEM30   1   124.89 146.89
## 
## Step:  AIC=133.16
## pa ~ Bio13 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 + Bio4 + Bio7 + 
##     DEM30 + aspect
## 
##          Df Deviance    AIC
## - Bio7    1   112.07 132.07
## - Bio18   1   112.34 132.34
## - Bio2    1   112.39 132.39
## - Bio3    1   112.40 132.40
## <none>        111.16 133.16
## - Bio4    1   113.35 133.35
## - aspect  1   113.38 133.38
## - Bio16   1   113.56 133.56
## - Bio13   1   116.87 136.87
## - Bio19   1   116.92 136.92
## - DEM30   1   126.08 146.08
## 
## Step:  AIC=132.07
## pa ~ Bio13 + Bio16 + Bio18 + Bio19 + Bio2 + Bio3 + Bio4 + DEM30 + 
##     aspect
## 
##          Df Deviance    AIC
## - Bio18   1   112.79 130.79
## - Bio3    1   113.45 131.45
## <none>        112.07 132.07
## - Bio16   1   114.17 132.17
## - aspect  1   114.63 132.63
## - Bio4    1   116.02 134.01
## - Bio13   1   117.03 135.03
## - Bio19   1   118.63 136.63
## - DEM30   1   126.09 144.09
## - Bio2    1   126.51 144.51
## 
## Step:  AIC=130.79
## pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio3 + Bio4 + DEM30 + aspect
## 
##          Df Deviance    AIC
## - Bio3    1   114.06 130.06
## <none>        112.79 130.79
## - aspect  1   114.88 130.88
## - Bio4    1   117.20 133.20
## - Bio16   1   117.20 133.20
## - Bio13   1   118.12 134.12
## - Bio19   1   118.77 134.77
## - DEM30   1   126.23 142.23
## - Bio2    1   127.34 143.34
## 
## Step:  AIC=130.06
## pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio4 + DEM30 + aspect
## 
##          Df Deviance    AIC
## - aspect  1   115.59 129.59
## <none>        114.06 130.06
## - Bio4    1   118.19 132.19
## - Bio16   1   118.45 132.45
## - Bio19   1   118.82 132.82
## - Bio13   1   119.23 133.23
## - DEM30   1   126.45 140.46
## - Bio2    1   128.82 142.82
## 
## Step:  AIC=129.59
## pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio4 + DEM30
## 
##         Df Deviance    AIC
## <none>       115.59 129.59
## - Bio4   1   119.07 131.07
## - Bio16  1   119.69 131.69
## - Bio19  1   120.32 132.32
## - Bio13  1   120.53 132.53
## - DEM30  1   128.18 140.18
## - Bio2   1   130.28 142.28

step$anova # display results

Now only significant explanatory covars (layers) from Final Model glm1

##########################################
## now only covars from Final Model glm1
##########################################

glm2 <- glm(pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio4 + DEM30,
            family = binomial(link = "logit"), data=training)

glm3 <- glm(pa ~ Bio13 + Bio19 + Bio2 + DEM30,
            family = binomial(link = "logit"), data=training)

glm4 <- glm(pa ~ Bio13 + Bio2 +  DEM30,
            family = binomial(link = "logit"), data=training)

glm5 <- glm(pa ~ Bio13 + Bio19 + DEM30, # only precipitation
            family = binomial(link = "logit"), data=training)

glm6 <- glm(pa ~ Bio13 + Bio19 + family, # only precipitation
            family = binomial(link = "logit"), data=training)

AICc(glm2, glm3, glm4, glm5)

# AICc should be used instead AIC when sample size is small in comparison to the number of estimated parameters (Burnham & Anderson 2002 recommend its use when n / K < 40). 105/6 = 17.5
summary(glm2)

## 
## Call:
## glm(formula = pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio4 + DEM30, 
##     family = binomial(link = "logit"), data = training)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3859  -0.9372   0.4960   0.8673   1.9627  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.8295     0.3453   2.402 0.016284 *  
## Bio13        -2.1483     1.0090  -2.129 0.033245 *  
## Bio16         2.0441     1.0584   1.931 0.053437 .  
## Bio19         0.5698     0.2745   2.076 0.037939 *  
## Bio2         -1.6674     0.4871  -3.423 0.000619 ***
## Bio4          1.0658     0.5854   1.821 0.068646 .  
## DEM30        -1.6701     0.5330  -3.133 0.001730 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 144.41  on 104  degrees of freedom
## Residual deviance: 115.59  on  98  degrees of freedom
## AIC: 129.59
## 
## Number of Fisher Scoring iterations: 4

#### multiple group effects with multiple group effect terms factor species.
# #### Random-effects  
# mod_lmer1 <- glmer(pa ~ Bio13 + Bio16 + Bio19 + Bio2 + Bio4 + DEM30 + (1|genus), data=training, #fulldat,
#                    family = binomial(link = "logit"))
# #### fixed-effects  
# mod_lmer2 <- glmer(pa ~  I(Bio13^2) : I(Bio16^2) + I(Bio19^2) + I(Bio2^2) + (1|genus), data=training, #fulldat,
#                    family = binomial(link = "logit"))
# 
# mod_lmer3 <- glmer(pa ~ Bio16 +  DEM30 + (Bio2|genus), data=training, #fulldat,
#                    family = binomial(link = "logit"))
# 
# mod_lmer4 <- glmer(pa ~ DEM30 + Bio2 + (Bio16|genus), data=training, #fulldat,
#                    family = binomial(link = "logit"))
# 
# mod_lmer5 <- glmer(pa ~ Bio16 : Bio2 * (1|genus), data=training, #fulldat,
#                    family = binomial(link = "logit"))


mod_lmer1 <- glmer(pa ~  Bio13 + Bio2 + DEM30 + (1|family), data=training, #fulldat,
                   family = binomial(link = "logit"))

mod_lmer2 <- glmer(pa ~   Bio14 +  Bio4 + DEM30 + (1|family), data=training, #fulldat,
                   family = binomial(link = "logit"))


mod_lmer3 <- glmer(pa ~  Bio19 + Bio2  + Bio4 + (1|family), data=training, #fulldat,
                   family = binomial(link = "logit"))

mod_lmer4 <- glmer(pa ~  Bio2 + Bio14 + DEM30 + (1|family), data=training, #fulldat,
                   family = binomial(link = "logit"))

mod_lmer5 <- glmer(pa ~  Bio4 + Bio13 + DEM30 + (1|family), data=training, #fulldat,
              family = binomial(link = "logit"))
# mod_lmer3 <- glmer(pa ~ Bio16 +  DEM30 + (Bio2|family), data=training, #fulldat,
#                    family = binomial(link = "logit"))
# 
# mod_lmer4 <- glmer(pa ~ DEM30 + Bio2 + (Bio16|family), data=training, #fulldat,
#                    family = binomial(link = "logit"))
# 
# mod_lmer5 <- glmer(pa ~ Bio16 : Bio2 * (1|family), data=training, #fulldat,
# >>>>>>> 5d9913e35d0aedac423cdfd46f27ec3dcb4c31ba
#                    family = binomial(link = "logit"))

mod_lmer6 <- glmer(pa ~  Bio2 : Bio13 +  DEM30 + (1|family), data=training, #fulldat,
                   family = binomial(link = "logit"))


summary(mod_lmer1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: pa ~ Bio13 + Bio2 + DEM30 + (1 | family)
##    Data: training
## 
##      AIC      BIC   logLik deviance df.resid 
##    127.3    140.6    -58.7    117.3      100 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.7234 -0.6877  0.2566  0.6816  2.4521 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  family (Intercept) 1.89     1.375   
## Number of obs: 105, groups:  family, 10
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.49912    0.57267   0.872  0.38344   
## Bio13       -0.01819    0.25042  -0.073  0.94210   
## Bio2        -1.59123    0.48777  -3.262  0.00111 **
## DEM30       -1.56975    0.55529  -2.827  0.00470 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr) Bio13  Bio2  
## Bio13 -0.073              
## Bio2   0.025  0.148       
## DEM30  0.024  0.264  0.825

summary(mod_lmer2)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: pa ~ Bio14 + Bio4 + DEM30 + (1 | family)
##    Data: training
## 
##      AIC      BIC   logLik deviance df.resid 
##    138.9    152.2    -64.5    128.9      100 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3586 -0.7416  0.2829  0.8305  1.6860 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  family (Intercept) 1.905    1.38    
## Number of obs: 105, groups:  family, 10
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.94151    0.61630   1.528    0.127
## Bio14        0.24986    0.28024   0.892    0.373
## Bio4         0.77386    0.49902   1.551    0.121
## DEM30        0.01837    0.31798   0.058    0.954
## 
## Correlation of Fixed Effects:
##       (Intr) Bio14 Bio4 
## Bio14 0.076             
## Bio4  0.390  0.141      
## DEM30 0.138  0.477 0.320

summary(mod_lmer3)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: pa ~ Bio19 + Bio2 + Bio4 + (1 | family)
##    Data: training
## 
##      AIC      BIC   logLik deviance df.resid 
##    132.0    145.3    -61.0    122.0      100 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.1874 -0.7413  0.2351  0.7615  1.5238 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  family (Intercept) 1.627    1.276   
## Number of obs: 105, groups:  family, 10
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   1.1469     0.6110   1.877   0.0605 .
## Bio19         0.5186     0.2838   1.827   0.0677 .
## Bio2         -0.8151     0.3291  -2.477   0.0133 *
## Bio4          1.4781     0.6117   2.416   0.0157 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr) Bio19  Bio2  
## Bio19  0.166              
## Bio2  -0.168 -0.440       
## Bio4   0.441  0.544 -0.390

summary(mod_lmer4)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: pa ~ Bio2 + Bio14 + DEM30 + (1 | family)
##    Data: training
## 
##      AIC      BIC   logLik deviance df.resid 
##    127.1    140.4    -58.6    117.1      100 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.6185 -0.6670  0.2252  0.6955  2.5113 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  family (Intercept) 2.039    1.428   
## Number of obs: 105, groups:  family, 10
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   0.4995     0.5872   0.851  0.39494   
## Bio2         -1.5845     0.4866  -3.256  0.00113 **
## Bio14         0.1401     0.2939   0.477  0.63363   
## DEM30        -1.4888     0.5590  -2.663  0.00774 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr) Bio2   Bio14 
## Bio2   0.038              
## Bio14  0.012 -0.007       
## DEM30  0.050  0.795  0.252

AICc(mod_lmer1, mod_lmer2, mod_lmer3, mod_lmer4,  mod_lmer5, mod_lmer6)#, mod_lmer6 )

G A M Models

#### G A M   Models 
# #### 
# gam1 <-  gam (pa ~ s(Bio13) + s(Bio16) + s(Bio19) + 
#                 s(Bio2) + s(Bio4) + DEM30,
#               family = binomial(link = "logit"), data=training)
# 
# gam2 <-  gam (pa ~ te(Bio13, Bio16) + 
#                 s(Bio19) + te(Bio2, Bio4) ,
#               family = binomial(link = "logit"), method="REML",
#               select=TRUE, data=training)
# 
# gam3 <-  gam (pa ~ Bio13 + Bio16 + 
#                  Bio19 + Bio2 + Bio4 + s(DEM30),
#               family = binomial, data=training)
# 
# gam4 <-  gam (pa ~ s(DEM30) + te(Bio16, Bio2),
#               family = binomial, data=training)
# 
# 
# 
# AICc(gam1, gam2, gam3, gam4)
# 
# #vis.gam(gam1, view=c("Bio6", "Bio13"), type="response")  
# gam.ME <-  gamm(pa ~ s(Bio13) + s(Bio16) + s(Bio19) + 
#                 s(Bio2) + s(Bio4) + s(DEM30) , method="REML",
#               family = binomial(link = "logit"), data=training )#, random=list(genus=~1))  
# 


################ plot using sjPlot
# plot_model(mod_lmer1, vline.color = "gray", 
#                     sort.est = TRUE)

plot_model(mod_lmer4, type = "re", sort.est = TRUE) #plots conditionals of random effects

## Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.

plot_model(mod_lmer4, type = "est", #fixed effects
         show.ci = TRUE, sort.est = TRUE)

plot_model(mod_lmer4, type = "diag", #fixed effects
         show.ci = TRUE, sort.est = TRUE)

## $family

dat <-ggpredict(mod_lmer4, c( "DEM30", "Bio2","family") )
ggplot(dat, aes(x = x, y = predicted, colour = group)) +
    stat_smooth( se = FALSE, fullrange = TRUE) +
    facet_wrap(~facet)

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# dat2 <- ggpredict(mod_lmer6, terms = c("Bio2", "DEM30", "genus"))
# ggplot(dat2, aes(x = x, y = predicted, colour = group)) +
#     stat_smooth( fullrange = TRUE) +
#     facet_wrap(~facet)


# Probabilities of fixed effects depending on grouping level (random intercept)
# plot_model(mod_lmer1, type = "ri.pc",
#           show.se = TRUE)

# sjp.lmer(mod_lmer1, type = "fe", 
#        show.ci = TRUE, sort.est = TRUE)
# sjp.lmer(mod_lmer1, type = "slope")

## evaluacion
ge2 <- evaluate(testing[testing$pa==1,], testing[testing$pa==0,],  glm2)
ge2

## class          : ModelEvaluation 
## n presences    : 30 
## n absences     : 15 
## AUC            : 0.6911111 
## cor            : 0.2996497 
## max TPR+TNR at : 0.7853403

ge_lmer <- evaluate(testing[testing$pa==1,], testing[testing$pa==0,], mod_lmer4, allow.new.levels = TRUE) ####### change by genus
ge_lmer

## class          : ModelEvaluation 
## n presences    : 30 
## n absences     : 15 
## AUC            : 0.7022222 
## cor            : 0.3350928 
## max TPR+TNR at : -0.06868684

ge3 <- evaluate(testing[testing$pa==1,], testing[testing$pa==0,],  glm3, allow.new.levels = TRUE) ####### change by genus
ge3

## class          : ModelEvaluation 
## n presences    : 30 
## n absences     : 15 
## AUC            : 0.7133333 
## cor            : 0.3529656 
## max TPR+TNR at : -0.1701741

par(mfrow=c(1,3))
plot(ge2, 'ROC', main="GLM")
plot(ge_lmer, 'ROC', main="GLMM")
plot(ge3, 'ROC', main="GLMM")

# dev.off() 

####### 
pROC::auc(pROC::roc(training$pa, fitted(mod_lmer4)))

## Area under the curve: 0.865

# Area under the curve: 
# An AUROC of > 70 represents a good fit. this is not an overoptimistic estimation since we have computed it on  cross-validation aproach. Aditionally and in the case of spatial data should make use of spatial CV.


##mapeo del modelo GLMM 4


var_orig <- scale(predictors2, center=TRUE, scale=TRUE)



##mapeo del modelo GLM2
pg3 <- predict(scale(predictors2, center=TRUE, scale=TRUE), glm2, ext=predictors2)
par(mfrow=c(1,2))
plot(pg3, main='GLM2/Binomial, BD')
plot(BD6, add=TRUE, col='grey50')
plot(deptos, add=TRUE, border='white')
tr <- threshold(ge2, 'prevalence')
plot(pg3>tr, main='presence/absence')
plot(BD6, add=TRUE, col='grey50')
plot(deptos, add=TRUE, border='white')

# points(testing, pch='+')
# dev.off()


pg4 <- raster::predict(scale(predictors2, center=TRUE, scale=TRUE), mod_lmer4, re.form=~0, type = "response") 
plot(pg4)


list_ras <- list()
for(i in 1:length(unique(BD$Familia))){
  list_ras[[i]] <- raster::predict(scale(predictors2, center=TRUE, scale=TRUE), mod_lmer4, const=(data.frame(family=unique(BD$Familia)[i])), 
                            type = "response", allow.new.levels=T) 

}

family_maps <- stack(list_ras)
names(family_maps) <- unique(BD$Familia)

# library(ggiraphExtra)


for(i in 1:length(unique(BD$Familia))){
  par(mfrow=c(1,2))
  plot(family_maps[[i]], 
       main=paste('Pred. prob. BD for', names(family_maps[[i]])))
  plot(BD6, add=TRUE, col='grey50')
  plot(deptos, add=TRUE, border='white')
  tr <- threshold(ge_lmer, 'prevalence')
  plot(family_maps[[i]]>tr, 
       main=paste('BD pres/abs for', names(family_maps[[i]])))
  plot(BD6, add=TRUE, col='grey50')
  plot(deptos, add=TRUE, border='white')
  # points(testing, pch='+')
  # dev.off()
}

##mapeo del modelo lmer1 
###########################
###### no es posible... falta genero
############################
# pg_lmer <- predict(scale(predictors2, center=TRUE, scale=TRUE), mod_lmer1, ext=predictors2)
# par(mfrow=c(1,2))
# plot(pg_lmer, main='GLM/binomial, BD')
# plot(BD4, add=TRUE, border='dark grey')
# tr <- threshold(ge_lmer, 'spec_sens')
# plot(pg>tr, main='presence/backgence')
# plot(BD4, add=TRUE, border='dark grey')
# points(testing, pch='+')

# dev.off()

Comentarios

BIO1 = Annual Mean Temperature BIO2 = Mean Diurnal Range (Mean of monthly (max temp - min temp)) BIO3 = Isothermality (BIO2/BIO7) (* 100) BIO4 = Temperature Seasonality (standard deviation *100) BIO5 = Max Temperature of Warmest Month BIO6 = Min Temperature of Coldest Month BIO7 = Temperature Annual Range (BIO5-BIO6) BIO8 = Mean Temperature of Wettest Quarter BIO9 = Mean Temperature of Driest Quarter BIO10 = Mean Temperature of Warmest Quarter BIO11 = Mean Temperature of Coldest Quarter BIO12 = Annual Precipitation BIO13 = Precipitation of Wettest Month BIO14 = Precipitation of Driest Month BIO15 = Precipitation Seasonality (Coefficient of Variation) BIO16 = Precipitation of Wettest Quarter BIO17 = Precipitation of Driest Quarter BIO18 = Precipitation of Warmest Quarter BIO19 = Precipitation of Coldest Quarter

BD in amphibians, Norte de Santander. A GLMM aproach

Acevedo A, Lizcano D, et.al.,

23 de Julio de 2019