R Load Packages and Data

Load Packages

library (readxl) # read excel data
library (tidyverse) # Data handling
library (tmap) # nice maps
# library (unmarked) # occu analitics
# library (ggcorrplot) # corplot in ggplot
library (raster)
library (rasterVis)
library (knitr) #
library (kableExtra) # nice tables
library (sf) #sf
library (mapview)
library (stringr)
library (lubridate)
library (ggeffects)
library (caret)

Load Data

The original data set is a excel file manual vetted by Fabian and another file from automatic identification using Kaleidoscope

#Tabla Fabian
manual_vetting<- read_excel("C:/Users/diego.lizcano/Box Sync/CodigoR/Murcielagos/data/Datos_Cubarral_FH_v2.xlsx")
#View(Datos_Cubarral_FH)

source("C:/Users/diego.lizcano/Box Sync/CodigoR/Murcielagos/R/dg2dec.R")

# case of "degrees minutes seconds" (DMS) for latitude 
# with character for Northern (N) and Southern (S) Hemispheres
# use regular expression metacharacter | to indicate logical operation "OR"
manual_vetting$Latitud <- gsub(" ", "", manual_vetting$Latitud) # remove spaces
manual_vetting$Longitud <- gsub(" ", "", manual_vetting$Longitud) # remove spaces
manual_vetting$Longitud <- paste(manual_vetting$Longitud, "W", sep="")
manual_vetting$Latitud <- paste(manual_vetting$Latitud, "N", sep="")

manual_vetting$lat<-dg2dec(varb=manual_vetting$Latitud, Dg="°", Min="′N")
manual_vetting$lon<-dg2dec(varb=manual_vetting$Longitud, Dg="°", Min="'", Sec="''E|W")

Study Area

See Recordings in a Map

 # put coords
manual_vetting_sf <-  st_as_sf(manual_vetting, coords = c("lon", "lat"), crs = "+proj=longlat +datum=WGS84")

recordings_plot <- mapview(manual_vetting_sf, zcol = "Cobertura",
                     color = "black",
                    legend = FALSE , 
        map.types = c("Esri.WorldImagery", "OpenTopoMap"))

recordings_plot
recordings_sp <- as(manual_vetting_sf, "Spatial")
centroid <- c(mean(manual_vetting$lon), mean(manual_vetting$lat))


##################################
# set extent get SRTM
##################################

# clip_window <- extent(-75.61 , -75.46, 4.65, 4.85)
# srtm <- raster::getData('SRTM', lon=centroid[1], lat=centroid[2])
# 
# # plot(clip_window,  border = "blue") 
# # plot(cams_sp, add=TRUE)
# 
# # crop the  raster using the vector extent
# srtm_crop <- crop(srtm, clip_window)
# siteCovs$Elev <- extract(srtm_crop, cams_sp)
# 
# plot(srtm_crop) 
# plot(cams_sp, add=TRUE)

See the Species Acumulation Curve for Hours

manual_vetting$por_Hora <- hour(manual_vetting$Hora)
manual_vetting$Secuencia <- paste(manual_vetting$Salida, manual_vetting$Dia_salida, sep="-")

manual_vetting$Sec_Hora <- paste(manual_vetting$Secuencia, manual_vetting$por_Hora, sep="-")

library(vegan)


# select by uniques
site_species <- manual_vetting %>% dplyr::select(lon, lat, Pases, Fases_Busq, Fases_aproxi, Fases_Termi, ID, Fecha, Hora, Cobertura, Noche, Sec_Hora, por_Hora) %>%  distinct() # igual a site_dept_loc<-

site_species_c1 <- site_species %>% filter(Cobertura == unique(site_species$Cobertura)[1]) 

site_species_c2 <- site_species %>% filter(Cobertura == unique(site_species$Cobertura)[2]) 

site_species_c3 <- site_species %>% filter(Cobertura == unique(site_species$Cobertura)[3]) 
# site_species$por_Hora <- hour(site_species$Hora)


########  acum for C1 
# get species
sp_qy1 <- unique (site_species_c1$ID)
# get sites
site_qy1 <- unique (site_species_c1$Cobertura)

# mat of site by day

mat_qy1 <- site_species %>%
  group_by(Sec_Hora, ID) %>%
  summarize_at("Pases", sum) %>% 
  spread(ID, Pases, fill=0) %>% as.data.frame() 


sp_y1 <- specaccum(mat_qy1[2:dim(mat_qy1)[2]])
plot(sp_y1, ci.type="poly", col="blue", lwd=2, ci.lty=0, ci.col="lightblue", 
     main = "Bat Accumulation Curve by Hour", xlab="Hour", ylab="Species")

rarecurve(t(mat_qy1[2:dim(mat_qy1)[2]]))#, step = 2, sample = raremax, col = "blue", cex = 0.6)

Activity (pases) by covertura by hour (1-5)

Notice the logaritmic scale

pas_by_noche <-  manual_vetting %>%
  group_by(Cobertura, ID, Noche) %>%
  summarize_at("Pases", sum)

pas_by_hora <-  manual_vetting %>%
  group_by(Cobertura, ID, por_Hora) %>%
  summarize_at("Pases", sum)

sp_by_cover <- manual_vetting %>%
  count(Cobertura, ID) 

ggplot(pas_by_noche, aes(x=Cobertura, y=Pases)) +
  geom_boxplot() + # facet_grid(Noche ~ .) +
  scale_y_continuous(trans='log2') + labs(y = "Pases por noche")

test1<-lm(Pases ~ Cobertura, data=manual_vetting, family = "poisson")
anova(test1)
anova2 <- aov(manual_vetting$Pases ~ manual_vetting$Cobertura)
summary(anova2)
##                            Df Sum Sq Mean Sq F value   Pr(>F)    
## manual_vetting$Cobertura    2   2.56  1.2812   19.82 2.67e-09 ***
## Residuals                4890 316.07  0.0646                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(anova2)

En este caso el ANOVA ha resultado significativo por lo que tiene sentido realizar comparaciones uno a uno. Se muestra una comparacion múltiple con test de TukeyHSD y comparacion T multiple.

Sin embargo los residuos sugieren que los datos no son normales!. En efecto son Conteos por lo que tiene mas sentido hacer algo Poisson.

print("Pairwise t test")
## [1] "Pairwise t test"
pairwise.t.test(manual_vetting$Pases, manual_vetting$Cobertura, p.adj = "none")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  manual_vetting$Pases and manual_vetting$Cobertura 
## 
##          Bosque  Pastizal
## Pastizal 4.4e-10 -       
## SSP      4.5e-07 0.36    
## 
## P value adjustment method: none
print("Tukey")
## [1] "Tukey"
TukeyHSD(anova2)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = manual_vetting$Pases ~ manual_vetting$Cobertura)
## 
## $`manual_vetting$Cobertura`
##                         diff         lwr         upr     p adj
## Pastizal-Bosque -0.066874476 -0.09194696 -0.04180200 0.0000000
## SSP-Bosque      -0.059225258 -0.08670261 -0.03174790 0.0000014
## SSP-Pastizal     0.007649219 -0.01188275  0.02718118 0.6288329
plot(TukeyHSD(anova2))

La comparacion Pastizal-Bosque es diferente. Silvopastoril-Bosque tambien es diferente (el intervalo de confianza no cruza el cero), mientras que Silvopastoril-Pastizal no es diferente…. sugiere que el silvopastoril se comporta mas como pastizal que como bosque…. alrevez de lo que pensabamos!

Hay diferencias si tenemos en cuentas las especies?

Anova de dos vias

anova_2vias <- aov(formula = Pases  ~ Cobertura * ID, data = manual_vetting)
summary(anova_2vias)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## Cobertura       2   2.56  1.2812  20.290 1.68e-09 ***
## ID             58   6.39  0.1101   1.744 0.000434 ***
## Cobertura:ID   30   6.47  0.2156   3.414 1.02e-09 ***
## Residuals    4802 303.21  0.0631                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow = c(1,2))
plot(anova_2vias)

Si hay diferencias entre especies y coberturas y estas interactuan. Pero de nuevo no hay normalidad en los datos de acuerdo a los residuos.

probemos un GLM contando (Poisson) los pases por hora por cobertura

to_remove <- which(is.na(pas_by_hora$Pases)) # marca Nas
pas_by_hora$Pases[to_remove] <- 0 # remplaza NA por cero

glm1 <- glm(formula = Pases  ~ factor(por_Hora) + Cobertura , data = pas_by_hora, family = "poisson")


glm2 <- glm(formula = Pases  ~ factor(por_Hora) * Cobertura , data = pas_by_hora, family = "poisson")

anova (glm1, glm2)
AIC (glm1, glm2)
summary (glm2)
## 
## Call:
## glm(formula = Pases ~ factor(por_Hora) * Cobertura, family = "poisson", 
##     data = pas_by_hora)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -9.217  -4.702  -2.515  -0.286  24.171  
## 
## Coefficients: (4 not defined because of singularities)
##                                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)                           3.08453    0.08083  38.162  < 2e-16
## factor(por_Hora)1                    -1.44230    0.19695  -7.323 2.42e-13
## factor(por_Hora)2                    -2.57370    0.45446  -5.663 1.49e-08
## factor(por_Hora)3                    -3.08453    0.71171  -4.334 1.46e-05
## factor(por_Hora)4                    -2.39138    0.41617  -5.746 9.13e-09
## factor(por_Hora)5                    -1.10618    0.13104  -8.442  < 2e-16
## factor(por_Hora)6                    -1.13862    0.20554  -5.540 3.03e-08
## factor(por_Hora)17                   -1.98592    0.41617  -4.772 1.83e-06
## factor(por_Hora)18                    0.11415    0.10303   1.108 0.267899
## factor(por_Hora)19                   -0.85605    0.14805  -5.782 7.37e-09
## factor(por_Hora)20                   -1.13862    0.14429  -7.891 2.99e-15
## factor(por_Hora)21                   -0.59962    0.15231  -3.937 8.26e-05
## factor(por_Hora)22                   -2.12902    0.28889  -7.370 1.71e-13
## factor(por_Hora)23                   -2.29607    0.31216  -7.355 1.90e-13
## CoberturaPastizal                    -0.12846    0.11097  -1.158 0.247002
## CoberturaSSP                         -1.18741    0.17758  -6.687 2.28e-11
## factor(por_Hora)1:CoberturaPastizal   0.83327    0.23080   3.610 0.000306
## factor(por_Hora)2:CoberturaPastizal   1.94142    0.46830   4.146 3.39e-05
## factor(por_Hora)3:CoberturaPastizal   2.94387    0.71993   4.089 4.33e-05
## factor(por_Hora)4:CoberturaPastizal   1.26790    0.43472   2.917 0.003539
## factor(por_Hora)5:CoberturaPastizal   1.19464    0.16048   7.444 9.75e-14
## factor(por_Hora)6:CoberturaPastizal        NA         NA      NA       NA
## factor(por_Hora)17:CoberturaPastizal       NA         NA      NA       NA
## factor(por_Hora)18:CoberturaPastizal  0.66746    0.13285   5.024 5.05e-07
## factor(por_Hora)19:CoberturaPastizal  0.76788    0.17744   4.328 1.51e-05
## factor(por_Hora)20:CoberturaPastizal  1.30612    0.17493   7.467 8.22e-14
## factor(por_Hora)21:CoberturaPastizal  0.81743    0.18211   4.489 7.17e-06
## factor(por_Hora)22:CoberturaPastizal  2.15863    0.30706   7.030 2.07e-12
## factor(por_Hora)23:CoberturaPastizal  1.39229    0.33526   4.153 3.28e-05
## factor(por_Hora)1:CoberturaSSP        1.23686    0.30018   4.120 3.78e-05
## factor(por_Hora)2:CoberturaSSP        2.65066    0.50923   5.205 1.94e-07
## factor(por_Hora)3:CoberturaSSP        2.57370    0.77074   3.339 0.000840
## factor(por_Hora)4:CoberturaSSP        2.06288    0.48976   4.212 2.53e-05
## factor(por_Hora)5:CoberturaSSP        2.28868    0.21224  10.784  < 2e-16
## factor(por_Hora)6:CoberturaSSP             NA         NA      NA       NA
## factor(por_Hora)17:CoberturaSSP            NA         NA      NA       NA
## factor(por_Hora)18:CoberturaSSP       1.84595    0.19241   9.594  < 2e-16
## factor(por_Hora)19:CoberturaSSP       1.32605    0.23394   5.668 1.44e-08
## factor(por_Hora)20:CoberturaSSP       0.93609    0.25735   3.637 0.000275
## factor(por_Hora)21:CoberturaSSP       0.26648    0.26731   0.997 0.318825
## factor(por_Hora)22:CoberturaSSP       1.25355    0.38530   3.253 0.001140
## factor(por_Hora)23:CoberturaSSP       1.28625    0.42575   3.021 0.002518
##                                         
## (Intercept)                          ***
## factor(por_Hora)1                    ***
## factor(por_Hora)2                    ***
## factor(por_Hora)3                    ***
## factor(por_Hora)4                    ***
## factor(por_Hora)5                    ***
## factor(por_Hora)6                    ***
## factor(por_Hora)17                   ***
## factor(por_Hora)18                      
## factor(por_Hora)19                   ***
## factor(por_Hora)20                   ***
## factor(por_Hora)21                   ***
## factor(por_Hora)22                   ***
## factor(por_Hora)23                   ***
## CoberturaPastizal                       
## CoberturaSSP                         ***
## factor(por_Hora)1:CoberturaPastizal  ***
## factor(por_Hora)2:CoberturaPastizal  ***
## factor(por_Hora)3:CoberturaPastizal  ***
## factor(por_Hora)4:CoberturaPastizal  ** 
## factor(por_Hora)5:CoberturaPastizal  ***
## factor(por_Hora)6:CoberturaPastizal     
## factor(por_Hora)17:CoberturaPastizal    
## factor(por_Hora)18:CoberturaPastizal ***
## factor(por_Hora)19:CoberturaPastizal ***
## factor(por_Hora)20:CoberturaPastizal ***
## factor(por_Hora)21:CoberturaPastizal ***
## factor(por_Hora)22:CoberturaPastizal ***
## factor(por_Hora)23:CoberturaPastizal ***
## factor(por_Hora)1:CoberturaSSP       ***
## factor(por_Hora)2:CoberturaSSP       ***
## factor(por_Hora)3:CoberturaSSP       ***
## factor(por_Hora)4:CoberturaSSP       ***
## factor(por_Hora)5:CoberturaSSP       ***
## factor(por_Hora)6:CoberturaSSP          
## factor(por_Hora)17:CoberturaSSP         
## factor(por_Hora)18:CoberturaSSP      ***
## factor(por_Hora)19:CoberturaSSP      ***
## factor(por_Hora)20:CoberturaSSP      ***
## factor(por_Hora)21:CoberturaSSP         
## factor(por_Hora)22:CoberturaSSP      ** 
## factor(por_Hora)23:CoberturaSSP      ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 14417  on 341  degrees of freedom
## Residual deviance: 11474  on 304  degrees of freedom
## AIC: 12643
## 
## Number of Fisher Scoring iterations: 6
dat <- ggpredict(glm2, terms = c("por_Hora ", "Cobertura" ))
plot(dat, connect.lines = TRUE)

#####################################################
## Make your reciever-operater curve
#####################################################

 library(pROC)
#

# 
m.roc <- multiclass.roc(pas_by_hora$Pases, predict(glm2, backtransform = TRUE))
auc(m.roc)
## Multi-class area under the curve: 0.8394
plot(m.roc[[1]], m.roc[[2]])

roc(pas_by_hora$Pases, predict(glm2, backtransform = TRUE), plot=TRUE, smooth=TRUE)

## 
## Call:
## roc.default(response = pas_by_hora$Pases, predictor = predict(glm2,     backtransform = TRUE), smooth = TRUE, plot = TRUE)
## 
## Data: predict(glm2, backtransform = TRUE) in 128 controls (pas_by_hora$Pases 1) < 58 cases (pas_by_hora$Pases 2).
## Smoothing: binormal 
## Area under the curve: 0.4988
g1<-roc(pas_by_hora$Pases, predict(glm2, backtransform = TRUE))
ggroc(g1)

Estimating Model Accuracy Using Bootstrap… not ready yet!

# Bootstrap resampling involves taking random samples from the dataset (with re-selection) against which to evaluate the model. In aggregate, the results provide an indication of the variance of the models performance. Typically, large number of resampling iterations are performed (thousands or tends of thousands).

# define training control paquete caret
# define training control
train_control <- trainControl(method="cv", number=10)
# fix the parameters of the algorithm
grid <- expand.grid(.fL=c(0), .usekernel=c(FALSE))
# train the model
# model <- train(Pases ~ factor(por_Hora) + Cobertura, data=pas_by_hora, trControl=train_control, method="nb", tuneGrid=grid)

algorithmList <- c('glm')
control <- trainControl(method="repeatedcv", number=10, repeats=3, 
savePredictions=TRUE, classProbs=TRUE)

my_model <- train(as.data.frame(pas_by_hora)$Pases~., data=as.data.frame(pas_by_hora), trControl=control, method="nb")

models <- caretList(Pases ~ factor(por_Hora) + Cobertura, 
data=pas_by_hora, trControl=control, methodList=algorithmList)
results <- resamples(models)
summary(results)

Session Info

Details and pakages used

print(sessionInfo(), locale = FALSE)
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17134)
## 
## Matrix products: default
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] pROC_1.14.0         vegan_2.5-5         permute_0.9-5      
##  [4] caret_6.0-83        ggeffects_0.9.0     lubridate_1.7.4    
##  [7] mapview_2.7.0       sf_0.7-3            kableExtra_1.1.0   
## [10] knitr_1.23          rasterVis_0.45      latticeExtra_0.6-28
## [13] RColorBrewer_1.1-2  lattice_0.20-38     raster_2.8-19      
## [16] sp_1.3-1            tmap_2.2            forcats_0.4.0      
## [19] stringr_1.4.0       dplyr_0.8.1         purrr_0.3.2        
## [22] readr_1.3.1         tidyr_0.8.3         tibble_2.1.1       
## [25] ggplot2_3.1.1       tidyverse_1.2.1     readxl_1.3.1       
## 
## loaded via a namespace (and not attached):
##  [1] leafem_0.0.1       colorspace_1.4-1   class_7.3-15      
##  [4] leaflet_2.0.2      rgdal_1.4-3        sjlabelled_1.0.17 
##  [7] snakecase_0.10.0   satellite_1.0.1    base64enc_0.1-3   
## [10] dichromat_2.0-0    rstudioapi_0.10    hexbin_1.27.3     
## [13] prodlim_2018.04.18 xml2_1.2.0         codetools_0.2-16  
## [16] splines_3.5.3      sjmisc_2.7.9       jsonlite_1.6      
## [19] tmaptools_2.0-1    broom_0.5.2        cluster_2.0.7-1   
## [22] png_0.1-7          rgeos_0.4-2        shiny_1.3.2       
## [25] compiler_3.5.3     httr_1.4.0         backports_1.1.4   
## [28] assertthat_0.2.1   Matrix_1.2-17      lazyeval_0.2.2    
## [31] cli_1.1.0          later_0.8.0        htmltools_0.3.6   
## [34] tools_3.5.3        gtable_0.3.0       glue_1.3.1        
## [37] reshape2_1.4.3     Rcpp_1.0.1         cellranger_1.1.0  
## [40] nlme_3.1-140       iterators_1.0.10   crosstalk_1.0.0   
## [43] insight_0.2.0      timeDate_3043.102  lwgeom_0.1-7      
## [46] xfun_0.7           gower_0.2.0        rvest_0.3.4       
## [49] mime_0.6           XML_3.98-1.19      MASS_7.3-51.1     
## [52] zoo_1.8-5          scales_1.0.0       ipred_0.9-8       
## [55] hms_0.4.2          promises_1.0.1     parallel_3.5.3    
## [58] yaml_2.2.0         rpart_4.1-13       stringi_1.4.3     
## [61] leafpop_0.0.1      foreach_1.4.4      e1071_1.7-1       
## [64] lava_1.6.5         rlang_0.3.4        pkgconfig_2.0.2   
## [67] evaluate_0.14      labeling_0.3       recipes_0.1.5     
## [70] htmlwidgets_1.3    tidyselect_0.2.5   plyr_1.8.4        
## [73] magrittr_1.5       R6_2.4.0           generics_0.0.2    
## [76] DBI_1.0.0          mgcv_1.8-27        pillar_1.4.0      
## [79] haven_2.1.0        withr_2.1.2        units_0.6-2       
## [82] survival_2.43-3    nnet_7.3-12        modelr_0.1.4      
## [85] crayon_1.3.4       KernSmooth_2.23-15 rmarkdown_1.13    
## [88] grid_3.5.3         data.table_1.12.2  ModelMetrics_1.2.2
## [91] digest_0.6.19      classInt_0.3-3     webshot_0.5.1     
## [94] xtable_1.8-4       httpuv_1.5.1       stats4_3.5.3      
## [97] munsell_0.5.0      viridisLite_0.3.0

Cited Literature