knitr::opts_chunk$set(
    eval = TRUE,
    echo = TRUE,
    message = FALSE,
    warning = FALSE
)

Introduction

This document gives an introduction to the statistical analysis of insect data gathered by exclosure experiments.

The data was analyzed by an N-Mixture model, considering random effects

Fitting the multinomial-Poisson mixture model to data collected.

##Libraries used (Packages)
# library(ggplot2) # Used for data visualisation (i.e barcharts)
# library(reshape2) # Manipulation of data frame
# library(RColorBrewer) # Colours in bar charts
# library(knitr) # Complies this dynamic report

library (readxl) # read excel data
library (tidyverse) # Data handling
# library (plyr)
#library (hrbrthemes)
# library(dplyr)
# library(tidyr)
# library (viridis)

# library (unmarked) # occu analitics
# library (ggcorrplot) # corplot in ggplot
# library (raster)
# library (rasterVis)
# library (kableExtra) # nice tables

# library (stringr)
library (lubridate)
library (ggeffects) # test and plot glm
library (bayesplot)
library (ubms)
library (stocc)
library (mapview)
library (sf)
library(wiqid) #Quick and Dirty Estimates for Wildlife

Load data

# bat_exclusure <- read_excel("D:/BoxFiles/Box Sync/CodigoR/Unillanos-Bat2019/data/Datos_Bat_exclusion_29sep2021_tydy_fix_unmarked_no_na.xlsx", 
#     sheet = "Full_plagas")
# 
# luna <- read_excel("D:/BoxFiles/Box Sync/CodigoR/Unillanos-Bat2019/data/Datos_Bat_exclusion_12dic2019_tydy_fix_unmarked_no_na.xlsx", 
#     sheet = "luna")


bat_exclusure <- read_excel("D:/BoxFiles/Box Sync/CodigoR/Unillanos-Bat2019/data/datos_analisis_servicios2022.xlsx", 
    sheet = "vaca")

luna <-  read_excel("D:/BoxFiles/Box Sync/CodigoR/Unillanos-Bat2019/data/datos_analisis_servicios2022.xlsx", 
    sheet = "luna") # read_excel("D:/BoxFiles/Box Sync/CodigoR/Unillanos-Bat2019/data/Datos_Bat_exclusion_12dic2019_tydy_fix_unmarked_no_na.xlsx", sheet = "luna")

Fit the N-mixture model of Royle (2004) in unmarked and compare with package ubms

Multinomial-Poisson mixture model of Royle (2004) fited by multinomPois function in unmarked and stan_multinomPois in ubms.

Fiting the multinomial-Poisson mixture model is useful for data collected via survey methods such as removal or double observer.

Scales

det is in plogis scale and state in exp. So to get real values use plogis and log

bat_exclusure$site <- seq(1:dim(bat_exclusure)[1])
bat_exclusure$Lat <- as.numeric(bat_exclusure$Latitud)
bat_exclusure$Lon <- as.numeric(bat_exclusure$Longitud)


############## convert to sf
bat_exclusure_sf = st_as_sf(bat_exclusure, coords = c("Lon", "Lat"), crs = 4326, agr = "constant")

mapview(bat_exclusure_sf, zcol = "Tratamiento")#, map.types = c("Esri.WorldShadedRelief", "OpenStreetMap.DE"), color = "grey40")
# Organize data
CountMat <- as.matrix(bat_exclusure[,9:13])

umf <- unmarkedFrameMPois(y=CountMat, 
                          siteCovs=data.frame(bat_exclusure[,1:8]),
                          obsCovs=list(luna=standardize(as.matrix(luna[,8:12]),
                           center = TRUE, scale = TRUE)),
                          type = "removal"  )

summary(umf)
## unmarkedFrame Object
## 
## 88 sites
## Maximum number of observations per site: 5 
## Mean number of observations per site: 5 
## Sites with at least one detection: 88 
## 
## Tabulation of y observations:
##   0   1   2   3   4   5   6   7   8   9  10  11  12  16 
## 152 114  64  37  22  16   8   9  10   4   1   1   1   1 
## 
## Site-level covariates:
##        s                 dia                                    Sitio   
##  Min.   : 1.00   10/11/2019:28   ANDORRA 11ConvencionalEncierro    : 2  
##  1st Qu.:22.75   17/11/2019:32   ANDORRA 11ConvencionalSin_Encierro: 2  
##  Median :44.50   18/11/2019: 8   ANDORRA 12ConvencionalEncierro    : 2  
##  Mean   :44.50   23/11/2019:20   ANDORRA 12ConvencionalSin_Encierro: 2  
##  3rd Qu.:66.25                   ANDORRA 21ConvencionalEncierro    : 2  
##  Max.   :88.00                   ANDORRA 21SilvopastorilEncierro   : 2  
##                                  (Other)                           :76  
##      Latitud         Longitud         Finca             Sistema  
##  3.81637 : 2   -73.80739 : 2   ANDORRA 1 :20   Convencional :44  
##  3.8183  : 2   -73.82338 : 2   ANDORRA 2 :20   Silvopastoril:44  
##  3.83619 : 2   -73.83781 : 2   LA PRADERA:20                     
##  3.667923: 1   -73.796028: 1   PORVENIR  :12                     
##  3.668081: 1   -73.796334: 1   ROSANIA   :16                     
##  3.668588: 1   -73.796521: 1                                     
##  (Other) :79   (Other)   :79                                     
##        Tratamiento
##  Encierro    :44  
##  Sin_Encierro:44  
##                   
##                   
##                   
##                   
##                   
## 
## Observation-level covariates:
##       luna        
##  Min.   :-1.6371  
##  1st Qu.:-0.7237  
##  Median : 0.1493  
##  Mean   : 0.0000  
##  3rd Qu.: 1.0924  
##  Max.   : 1.2501
plot(umf)

# Fit a models, multinomPois order of formulas: detection, abundance
fm0 <- multinomPois(~1 ~ 1, umf, se = TRUE)
fm1 <- multinomPois(~luna ~ 1, umf, se = TRUE)
fm2 <- multinomPois(~luna + I(luna^2) ~ 1, umf, se = TRUE)
fm3 <- multinomPois(~luna + I(luna^2) ~ Sistema, umf, se = TRUE)
fm4 <- multinomPois(~luna + I(luna^2) ~ Tratamiento, umf, se = TRUE)
fm5 <- multinomPois(~luna + I(luna^2) ~ Sistema+Tratamiento, umf, se = TRUE)
fm6 <- multinomPois(~luna + I(luna^2) ~ Sistema*Tratamiento, umf, se = TRUE)


umf2 <- unmarkedFramePCount(y=CountMat, siteCovs=data.frame(bat_exclusure[,1:8]) , obsCovs=list(luna=standardize(as.matrix(luna[,8:12]), center = TRUE, scale = TRUE))
          )
#summary(umf2)

fm0_ <- pcount(~1 ~ 1, umf2, se = TRUE)
fm1_ <- pcount(~luna ~ 1, umf2, se = TRUE)
fm2_ <- pcount(~luna + I(luna^2) ~ 1, umf2, se = TRUE)
fm3_ <- pcount(~luna + I(luna^2) ~ Sistema, umf2, se = TRUE)
fm4_ <- pcount(~luna + I(luna^2) ~ Tratamiento, umf2, se = TRUE)
fm5_ <- pcount(~luna + I(luna^2) ~ Sistema+Tratamiento, umf2, se = TRUE)
fm6_ <- pcount(~luna + I(luna^2) ~ Sistema*Tratamiento, umf2, se = TRUE)
fm7_ <- pcount(~luna + I(luna^2) ~ Sistema:Tratamiento, umf2, se = TRUE)

fm6ZIP <- pcount(~luna + I(luna^2) ~ Sistema*Tratamiento, umf2, se = TRUE, mixture = "ZIP")

fm6NB <- pcount(~luna + I(luna^2) ~ Sistema*Tratamiento, umf2, se = TRUE, mixture = "NB")


fmList <- fitList("p(.) lamdba(.)"=fm0, 
                  "p(luna) lamdba(.)"=fm1,
                  "p(luna + I(luna^2)) lamdba(.)"=fm2,
                  "p(luna + I(luna^2)) lamdba(Sistema)"=fm3,
                  "p(luna + I(luna^2)) lamdba(Tratamiento)"=fm4,
                  "p(luna + I(luna^2)) lamdba(Sistema+Tratamiento)"=fm5,
                  "p(luna + I(luna^2)) lamdba(Sistema*Tratamiento)"=fm6
                  )


fmList2 <- fitList("p(.) lamdba(.)"=fm0_,
                  "p(luna) lamdba(.)"=fm1_,
                  "p(luna + I(luna^2)) lamdba(.)"=fm2_,
                  "p(luna + I(luna^2)) lamdba(Sistema)"=fm3_,
                  "p(luna + I(luna^2)) lamdba(Tratamiento)"=fm4_,
                  "p(luna + I(luna^2)) lamdba(Sistema+Tratamiento)"=fm5_,
                  "p(luna + I(luna^2)) lamdba(Sistema*Tratamiento)"=fm6_,
                  #"p(luna + I(luna^2)) lamdba(Sistema:Tratamiento)"=fm7_,
                  "p(luna + I(luna^2)) lamdba(Sistema*Tratamiento)ZIP"=fm6ZIP,
                  "p(luna + I(luna^2)) lamdba(Sistema*Tratamiento)NB"=fm6NB
                  )

# Model selection
modSel(fmList)
##                                                 nPars     AIC delta   AICwt
## p(luna + I(luna^2)) lamdba(Sistema*Tratamiento)     7 1874.31  0.00 8.3e-01
## p(luna + I(luna^2)) lamdba(Sistema+Tratamiento)     6 1877.64  3.33 1.6e-01
## p(luna + I(luna^2)) lamdba(Sistema)                 5 1881.88  7.58 1.9e-02
## p(luna + I(luna^2)) lamdba(Tratamiento)             5 1891.39 17.08 1.6e-04
## p(luna + I(luna^2)) lamdba(.)                       4 1895.41 21.10 2.2e-05
## p(luna) lamdba(.)                                   3 1895.85 21.55 1.7e-05
## p(.) lamdba(.)                                      2 1905.15 30.84 1.7e-07
##                                                 cumltvWt
## p(luna + I(luna^2)) lamdba(Sistema*Tratamiento)     0.83
## p(luna + I(luna^2)) lamdba(Sistema+Tratamiento)     0.98
## p(luna + I(luna^2)) lamdba(Sistema)                 1.00
## p(luna + I(luna^2)) lamdba(Tratamiento)             1.00
## p(luna + I(luna^2)) lamdba(.)                       1.00
## p(luna) lamdba(.)                                   1.00
## p(.) lamdba(.)                                      1.00
# Model selection
modSel(fmList2)
##                                                    nPars     AIC delta   AICwt
## p(luna + I(luna^2)) lamdba(Sistema*Tratamiento)NB      8 1838.67  0.00 1.0e+00
## p(luna + I(luna^2)) lamdba(Sistema*Tratamiento)        7 1877.59 38.92 3.5e-09
## p(luna + I(luna^2)) lamdba(Sistema*Tratamiento)ZIP     8 1879.60 40.92 1.3e-09
## p(luna + I(luna^2)) lamdba(Sistema+Tratamiento)        6 1880.31 41.64 9.1e-10
## p(luna + I(luna^2)) lamdba(Sistema)                    5 1884.01 45.34 1.4e-10
## p(luna + I(luna^2)) lamdba(Tratamiento)                5 1892.23 53.56 2.3e-12
## p(luna + I(luna^2)) lamdba(.)                          4 1895.99 57.32 3.6e-13
## p(luna) lamdba(.)                                      3 1896.58 57.90 2.7e-13
## p(.) lamdba(.)                                         2 1905.64 66.96 2.9e-15
##                                                    cumltvWt
## p(luna + I(luna^2)) lamdba(Sistema*Tratamiento)NB      1.00
## p(luna + I(luna^2)) lamdba(Sistema*Tratamiento)        1.00
## p(luna + I(luna^2)) lamdba(Sistema*Tratamiento)ZIP     1.00
## p(luna + I(luna^2)) lamdba(Sistema+Tratamiento)        1.00
## p(luna + I(luna^2)) lamdba(Sistema)                    1.00
## p(luna + I(luna^2)) lamdba(Tratamiento)                1.00
## p(luna + I(luna^2)) lamdba(.)                          1.00
## p(luna) lamdba(.)                                      1.00
## p(.) lamdba(.)                                         1.00
##############  abundance in real scale
cbind(rbind("Poisson" = exp(coef(fm6_)[1]), 
            "NegBin" = exp(coef(fm6NB)[1]), 
            "ZIP"= exp(coef(fm6ZIP)[1])), 
      rbind(plogis(coef(fm6_)[5:7]),
            plogis(coef(fm6NB)[5:7]), 
            plogis(coef(fm6ZIP)[5:7])))
##         lam(Int)     p(Int)   p(luna) p(I(luna^2))
## Poisson 57.45922 0.02466372 0.5374301    0.5218685
## NegBin  41.87571 0.03263646 0.5458921    0.5337166
## ZIP     57.46378 0.02465971 0.5374302    0.5218552
# Detection probability after 5 removal passes
rowSums(getP(fm6))
##  [1] 0.0002835027 0.0002835027 0.0002835027 0.0002835027 0.0002835027
##  [6] 0.0002835027 0.0002835027 0.0002835027 0.0002550039 0.0002550039
## [11] 0.0002550039 0.0002550039 0.0002550039 0.0002550039 0.0002164637
## [16] 0.0002164637 0.0002164637 0.0002164637 0.0002164637 0.0002164637
## [21] 0.0002164637 0.0002164637 0.0002164637 0.0002164637 0.0002068250
## [26] 0.0002068250 0.0002068250 0.0002068250 0.0002068250 0.0002068250
## [31] 0.0002068250 0.0002068250 0.0002068250 0.0002068250 0.0002164637
## [36] 0.0002164637 0.0002164637 0.0002164637 0.0002164637 0.0002164637
## [41] 0.0002164637 0.0002164637 0.0002164637 0.0002164637 0.0002835027
## [46] 0.0002835027 0.0002835027 0.0002835027 0.0002835027 0.0002835027
## [51] 0.0002835027 0.0002835027 0.0002860242 0.0002860242 0.0002860242
## [56] 0.0002860242 0.0002860242 0.0002860242 0.0002164637 0.0002164637
## [61] 0.0002164637 0.0002164637 0.0002164637 0.0002164637 0.0002164637
## [66] 0.0002164637 0.0002164637 0.0002164637 0.0002071859 0.0002071859
## [71] 0.0002071859 0.0002071859 0.0002071859 0.0002071859 0.0002071859
## [76] 0.0002071859 0.0002071859 0.0002071859 0.0002182084 0.0002182084
## [81] 0.0002182084 0.0002182084 0.0002182084 0.0002182084 0.0002182084
## [86] 0.0002182084 0.0002182084 0.0002182084
# Empirical Bayes estimation of random effects
fm6re <- ranef(fm6, K=80)
# plot(fm5re, subset=site %in% 1:25)
sum(bup(fm6re))         # Estimated population size
## [1] NaN
# parametric bootstrap
# Function returning three fit-statistics.
fitstats <- function(fm) {
    observed <- getY(fm@data)
    expected <- fitted(fm)
    resids <- residuals(fm)
    sse <- sum(resids^2)
    chisq <- sum((observed - expected)^2 / expected)
    freeTuke <- sum((sqrt(observed) - sqrt(expected))^2)
    out <- chisq# c(SSE=sse, Chisq=chisq, freemanTukey=freeTuke)
    return(out)
}

(bt <- parboot(fm6, fitstats, nsim=1000, report=100 ))
## t0 = 1153.46
## 
## Call: parboot(object = fm6, statistic = fitstats, nsim = 1000, report = 100)
## 
## Parametric Bootstrap Statistics:
##     t0 mean(t0 - t_B) StdDev(t0 - t_B) Pr(t_B > t0)
## 1 1153            719             28.8            0
## 
## t_B quantiles:
##       0% 2.5% 25% 50% 75% 97.5% 100%
## [1,] 347  377 415 433 453   492  542
## 
## t0 = Original statistic computed from data
## t_B = Vector of bootstrap samples
plot(bt)

library(AICcmodavg)
system.time(gof.P <- Nmix.gof.test(fm6NB, nsim=100)) # 65 min

##    user  system elapsed 
##    0.28    0.08  593.59
#######################################################
####goodness-of-fit tests (posterior predictive checks)
# (fit_top_gof <- gof(fm5_ubms, draws=100, quiet=TRUE))
# plot(fit_top_gof)

# Estimates of fixed effects
e <- coef(fm6)
exp(e[1])
## lambda(Int) 
##    33155.25
plogis(e[2:3])
##    lambda(SistemaSilvopastoril) lambda(TratamientoSin_Encierro) 
##                       0.6069263                       0.5022098
# Estimates of random effects
re <- ranef(fm6, K=80)
#ltheme <- canonical.theme(color = FALSE)
#lattice.options(default.theme = ltheme)
plot(re, subset=site %in% 1:25)

newdat1 <- cbind(luna=as.numeric(seq(min(standardize(as.matrix(luna[,8:12]))), max(standardize(as.matrix(luna[,8:12]))), length=100)), Sistema="Silvopastoril", Tratamiento="Encierro")

newdat2 <- cbind(luna=as.numeric(seq(min(standardize(as.matrix(luna[,8:12]))), max(standardize(as.matrix(luna[,8:12]))), length=100)), Sistema="Silvopastoril", Tratamiento="Sin_Encierro")

newdat3 <- cbind(luna=as.numeric(seq(min(standardize(as.matrix(luna[,8:12]))), max(standardize(as.matrix(luna[,8:12]))), length=100)), Sistema="Convencional", Tratamiento="Encierro")

newdat4 <- cbind(luna=as.numeric(seq(min(standardize(as.matrix(luna[,8:12]))), max(standardize(as.matrix(luna[,8:12]))), length=100)), Sistema="Convencional", Tratamiento="Sin_Encierro")

newdat <- as.data.frame(rbind(newdat1, newdat2, newdat3, newdat4))
#newdat$luna <- rep(as.numeric(seq(0,1, length=100)),4)

newdat$luna <- as.numeric(newdat$luna)

pred_lambda <-predict(fm6,type="state",newdata=newdat,appendData=TRUE)
pred_lambda$Predicted <- log(pred_lambda$Predicted)

pred_p <-predict(fm6,type="det",newdata=newdat,appendData=TRUE)
pred_p$Predicted <- plogis(pred_p$Predicted)
# pred_p$SE <- plogis(pred_p$SE)
# pred_p$lower  <- plogis(pred_p$lower )

ggplot(pred_lambda, aes(x=Sistema, y=Predicted)) +
  geom_boxplot(fill='#A4A4A4', color="black") + geom_jitter()

ggplot(pred_lambda, aes(x=Tratamiento, y=Predicted)) +
  geom_boxplot(fill='#A4A4A4', color="black") + geom_jitter()

ggplot(pred_p, aes(x=luna, y=Predicted)) +
  geom_line() 

# plot(Predicted~luna, pred_p,type="l",col="blue",
#        xlab="luna",
#        ylab="p",
#        ylim=c(0,0.1))
# lines(lower~luna, pred_p,type="l",col=gray(0.5))
# lines(upper~luna, pred_p,type="l",col=gray(0.5))

Random effects in ubms

As we have a group site covariate, we can fit a model with random intercepts by group by including + (1|group) in our parameter formula

# fm7_ubms <- stan_multinomPois(~luna + I(luna^2) ~ Sistema + (1|Tratamiento), umf, chains=3, iter=5000, cores=3)
# (fm7_ubms)


# fm8_ubms <- stan_multinomPois(~luna + I(luna^2) ~ Tratamiento + (1|Sistema), umf, chains=3, iter=5000, cores=3)
# fm8_ubms


fm1p_ubms <- stan_pcount(~1 ~ 1, umf2, chains=3, iter=100000, cores=3)

fm2p_ubms <- stan_pcount(~luna  ~ 1 , umf2, chains=3, iter=100000, cores=3)

fm3p_ubms <- stan_pcount(~luna + I(luna^2) ~ 1 , umf2, chains=3, iter=100000, cores=3)

fm4p_ubms <- stan_pcount(~luna + I(luna^3) ~ 1 , umf2, chains=3, iter=100000, cores=3)


fm1_ubms <- stan_pcount(~luna + I(luna^3)  ~ Tratamiento , umf2, chains=3, iter=100000, cores=3)

fm2_ubms <- stan_pcount(~luna + I(luna^3)  ~  Sistema+Tratamiento, umf2, chains=3, iter=100000, cores=3)

fm3_ubms <- stan_pcount(~luna + I(luna^3)  ~  Tratamiento*Sistema, umf2, chains=3, iter=100000, cores=3)

# random intercept in sistema
fm6_ubms <- stan_pcount(~luna + I(luna^3)  ~ Tratamiento + (1|Sistema), umf2, chains=3, iter=100000, cores=3)

# interaction and random intercept
# fm4_ubms <- stan_pcount(~luna + I(luna^3)  ~ Sistema * Tratamiento + (1|Sistema), umf2, chains=3, iter=10000, cores=3)

# correlated random intercept 
# fm8_ubms <- stan_pcount(~luna + I(luna^2) ~  Tratamiento * (1|Sistema), umf2, chains=3, iter=5000, cores=3)

# Correlated slopes and intercepts are not supported
# fm9_ubms <- stan_pcount(~luna + I(luna^2) ~  Tratamiento + (Tratamiento|Sistema), umf2, chains=3, iter=5000, cores=3) # Error



## 2.3.2 Compare the models
# First we combine the models into a fitList:

mods_p <- fitList("p(.)lamdba(.)"=fm1p_ubms,
                "p(m)lamdba(.)"=fm2p_ubms,
                "p(m^2)lamdba(.)"=fm3p_ubms,
                "p(m^3)lamdba(.)"=fm4p_ubms)#,
                #, fm8_ubms)

round(modSel(mods_p), 3)
mods_lambda <- fitList("p(m^3)lamdba(T)"=fm1_ubms,
                #"p(m^3)lamdba(T+S)"=fm2_ubms,
                #"p(m^3)lamdba(T*S)"=fm3_ubms,
                "p(m^3)lamdba(.)"=fm4p_ubms,
                "p(m^3)lamdba(T+(1|S))"=fm6_ubms)
# Then we generate a model selection table:
# round(modSel(mods), 3)

# Instead of AIC, models are compared using leave-one-out cross-validation (LOO) (Vehtari, Gelman, and Gabry 2017) via the loo package. Based on this cross-validation, the expected predictive accuracy (elpd) for each model is calculated. The model with the largest elpd (fit_global) performed best.

# The elpd_diff column shows the difference in elpd between a model and the top model if this difference is several times larger than the standard error of the difference (se_diff), we are confident the model with the larger elpd performed better. LOO model weights, analogous to AIC weights, are also calculated. 

round(modSel(mods_lambda), 3)
names(fm6_ubms) # see the names
## [1] "beta_state[(Intercept)]"                   
## [2] "beta_state[TratamientoSin_Encierro]"       
## [3] "b_state[(Intercept) Sistema:Convencional]" 
## [4] "b_state[(Intercept) Sistema:Silvopastoril]"
## [5] "sigma_state[sigma [1|Sistema]]"            
## [6] "beta_det[(Intercept)]"                     
## [7] "beta_det[luna]"                            
## [8] "beta_det[I(luna^3)]"
# see coefs
summary(fm6_ubms, "det")
summary(fm6_ubms, "state")
lambda_intercept <- extract(fm6_ubms, "beta_state[(Intercept)]")[[1]]
hist(lambda_intercept, freq=FALSE)
lines(density(lambda_intercept), col='red', lwd=2)

lambda2_intercept <-  extract(fm6_ubms,"beta_state[TratamientoSin_Encierro]")[[1]]
hist(lambda_intercept +lambda2_intercept, freq=FALSE)
lines(density(lambda_intercept + lambda2_intercept), col='red', lwd=2)

# check convergence
traceplot(fm6_ubms, pars=c("beta_state", "beta_det"))

plot_posteriors(fm6_ubms)#, pars=c("beta_state[SistemaSilvopastoril]"))

# # Note that the residuals are automatically binned, which is appropriate for a binomial response (Gelman and Hill 2007). If the model fits the data well, you would expect 95% of the binned residual points to fall within the shaded area. 
plot_residuals(fm6_ubms, submodel="state")

plot_residuals(fm6_ubms, submodel="det")

# 
# #plot residuals against covariate values using the plot_residuals function:
plot_residuals(fm6_ubms, submodel="state", covariate="Sistema")

plot_residuals(fm6_ubms, submodel="state", covariate="Tratamiento")

#### goodness-of-fit tests
fit_top_gof <- gof(fm6_ubms, draws=100, quiet=TRUE)
fit_top_gof
## N-mixture Chi-square 
## Point estimate = 1173.024
## Posterior predictive p = 0
plot(fit_top_gof)

# #You can use the posterior_predict function to simulate new datasets, which you can use to calculate your own fit statistic. The following command generates 100 simulated datasets.
# 
sim_y <- posterior_predict(fm6_ubms, "y", draws=1000)
# # dim(sim_y)
prop0 <- apply(sim_y, 1, function(x) mean(x==0, na.rm=TRUE))
# #and compare that to the proportion of zeros in the actual dataset.
actual_prop0 <- mean(getY(fm6_ubms) == 0, na.rm=TRUE)
# 
# #Compare
hist(prop0, col='gray', xlim=c(0,actual_prop0*1.5))
abline(v=actual_prop0, col='red', lwd=2)

# 

#####################
# Random effect
####################
# You can also generate summary statistics for each random term:
ran <- ranef(fm6_ubms, submodel="state", summary=TRUE)
#### real scale
exp(ran$Sistema$`(Intercept)`)
# 2.5 Model inference
# 2.5.1 Marginal covariate effects
# Based on the 95% uncertainty intervals
plot_effects(fm6_ubms, submodel="state")

#plot_effects(fm8_ubms, "state")
plot_effects(fm6_ubms, "det")

#plot_effects(fm8_ubms, "det")

##########
# predict
##########

#2.5.2 Predict parameter values
#As with unmarked, we can get the predicted psi or p for each site or observation using the predict function. For example, to get occupancy:

head(predict(fm6_ubms, submodel="state"))
nd <- data.frame(Tratamiento=c("Encierro", "Encierro", 
                               "Sin_Encierro", "Sin_Encierro"),                                          Sistema=c("Convencional", "Silvopastoril",                                                       "Convencional", "Silvopastoril"),
                 luna=c(0.75, 0.75, 0.75, 0.75))


pred <- predict(fm6_ubms, submodel="state", newdata=nd)

predicciones <- cbind(nd, pred)

barplot(predicciones$Predicted, names.arg =predicciones$Sistema ,
        ylim=c(0,25), col=c("#66C2A5", "#66C2A5", "white", "white"))
segments(x0 = c(0.7, 1.9, 3.1, 4.3),   # Draw multiple lines
         y0 = predicciones$`2.5%`,
         x1 = c(0.7, 1.9, 3.1, 4.3),
         y1 = predicciones$`97.5%`)

# 2.5.3 Simulate latent occupancy states
# One of the advantages of BUGS/JAGS is that you can directly model latent parameters, such as the true unobserved occupancy state of a site z. Using the posterior_predict function in ubms, you can generate an equivalent posterior distribution of z.
# zpost <- posterior_predict(fm6_ubms, "y", draws=100)
# dim(zpost)


# 2.2.5 Extracting individual parameters
(sum_tab <- summary(fm6_ubms, "state"))
# sum_tab$mean[1]

# Lambda_intercept <- extract(fm7_ubms, "beta_state[(Intercept)]")[[1]]
# hist(Lambda_intercept, freq=FALSE)
# lines(density(Lambda_intercept), col='red', lwd=2)

##########
# predict
##########

#((predict(fm6_ubms, submodel="state")))

nd1 <- data.frame(Tratamiento="Encierro", Sistema="Silvopastoril",luna=0.75)
nd2 <- data.frame(Tratamiento="Sin_Encierro", Sistema="Silvopastoril",luna=0.75)
nd3 <- data.frame(Tratamiento="Encierro", Sistema="Convencional",luna=0.75)
nd4 <- data.frame(Tratamiento="Sin_Encierro", Sistema="Convencional",luna=0.75)
t1 <- predict(fm6_ubms, submodel="state", newdata=nd1)
t2 <- predict(fm6_ubms, submodel="state", newdata=nd2)
t3 <- predict(fm6_ubms, submodel="state", newdata=nd3)
t4 <- predict(fm6_ubms, submodel="state", newdata=nd4)

comparison <- rbind(t1,t2,t3,t4)

plot(1:4, comparison[,1], xlab="", ylab="insects", pch=16, ylim=c(9,27), las=1)
segments(1:4,comparison[,3], 1:4, comparison[,4], col = "gray")

# posterior_predict(fm6_ubms, "z", draws=100)
#The output has one row per posterior draw, and one column per site. The posterior of z can be useful for post-hoc analyses. For example, suppose you wanted to test for a difference in mean occupancy probability between sites 1-50 and sites 51-100:

zpost <- posterior_predict(fm6_ubms, "z", draws=10000)
group1 <- rowMeans(zpost[,1:44], na.rm=TRUE)
group2 <- rowMeans(zpost[,45:88], na.rm=TRUE)

Excl_Conv <- rowMeans(zpost[,c(1:4,9:11,15:19,25:29,35:39)])
Excl_Silv <- rowMeans(zpost[,c(5:8,12:14,20:24,30:34,40:44)])
noExcl_Conv <- rowMeans(zpost[,c(45:48,53:55,59:63,69:73,79:83)])
noExcl_Silv <- rowMeans(zpost[,c(49:52,56:58,64:68,74:78,84:88)]) 

Dif_Silvo <- Excl_Silv - noExcl_Silv
Dif_Conve <- Excl_Conv - noExcl_Conv

# silvo
df_Silvo <- data.frame (
    Tratamiento=factor(rep(c("Exclosure", "No Exclosure"), each=length(Excl_Silv) )),
    Abundance=c(Excl_Silv,
                (noExcl_Silv) ) )

df_Conv <- data.frame (
    Tratamiento=factor(rep(c("Exclosure", "No Exclosure"), each=length(Excl_Conv) )),
    Abundance=c(Excl_Conv,
                (noExcl_Conv) ) )


df_Dif_Silvo <- data.frame(
    Tratamiento=factor(rep(c("Dif. Silvopastoral"), each=length(Dif_Silvo))),
    Abundance=c(Dif_Silvo))# ,
                #((out$sims.list$Ntratamiento[,2])))
    

df_Dif_Conv <- data.frame(
    Tratamiento=factor(rep(c("Dif. Conventional"), each=length(Dif_Conve))),
    Abundance=c(Dif_Conve))# ,
                #((out$sims.list$Ntratamiento[,2])))
    
library(plyr)

mu_Silv <- ddply(df_Silvo, "Tratamiento", summarise, grp.median=median(Abundance),                                          grp.mean=mean(Abundance))

mu_Conv <- ddply(df_Conv, "Tratamiento", summarise, grp.median=median(Abundance),                                          grp.mean=mean(Abundance))

mu_Dif_Silv <- ddply(df_Dif_Silvo, "Tratamiento", summarise, grp.median=median(Abundance),                                          grp.mean=mean(Abundance))

mu_Dif_Conv <- ddply(df_Dif_Conv, "Tratamiento", summarise, grp.median=median(Abundance),                                          grp.mean=mean(Abundance))


##########################
library(rphylopic)
bat_pic <- image_data("18bfd2fc-f184-4c3a-b511-796aafcc70f6", size=128)[[1]] # get actual 
cow_pic <- image_data("415714b4-859c-4d1c-9ce0-9e1081613df7", size=128)[[1]]
grasshoper_pic <- image_data("7b3f79f4-9d30-42c2-bb04-6b7d2a83da04", size=128)[[1]]
fly_pic <- image_data("67bb75ec-8906-4910-871c-2758415b139a", size=128)[[1]]

##########################################
## Silvo graph 
##########################################
# Add mean lines
silvo <- ggplot(df_Silvo, aes(x=Abundance, color=Tratamiento, fill=Tratamiento)) +
  # geom_density(alpha=0.3) +
  geom_histogram(alpha=0.3, bins = 50,  position="identity") +
  geom_density(alpha=0.6) +
  geom_vline(data=mu_Silv, aes(xintercept=grp.median, color=Tratamiento),
             linetype="dashed") + 
  # scale_x_break(c(40000,50000),  scales = 0.25) +
  #scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  #scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  labs(title="Posteriors of insect abundance Silvopastoral",x="Insect abundance per trap", y = "Frecuency") +
  theme(legend.position="top")

silvo +  # add_phylopic(cow_pic, 1, 9, 200, ysize = 300) +
  add_phylopic(bat_pic, 1, 21.25, 750, ysize = 60) 

###### diference
# Add mean lines
q2<-ggplot(df_Dif_Silvo, aes(x=Abundance,  fill=Tratamiento)) +
  geom_density( alpha=0.3) +    # geom_density(alpha=0.6) +
  geom_vline(data=mu_Dif_Silv, aes(xintercept=grp.median),
             linetype="dashed") + 
  #scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  #scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  labs(title="Insect removal by bats in Silvopastoral",x="Total Insect removal", y = "Density") +
  theme(legend.position="top")

q2 + annotate("text", x = mu_Dif_Silv$grp.median, y = 0.1, label = round(mu_Dif_Silv$grp.median, 3) ) + add_phylopic(fly_pic, 1, 6, 0.2, ysize = 0.5) 

##########################################
## Conventional graph 
##########################################
# Add mean lines
conventional <- ggplot(df_Conv, aes(x=Abundance, color=Tratamiento, fill=Tratamiento)) +
  # geom_density(alpha=0.3) +
  geom_histogram(alpha=0.3, bins = 50,  position="identity") +
  geom_density(alpha=0.6) +
  geom_vline(data=mu_Conv, aes(xintercept=grp.median, color=Tratamiento),
             linetype="dashed") + 
  # scale_x_break(c(40000,50000),  scales = 0.25) +
  #scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  #scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  labs(title="Posteriors of insect abundance Conventional",x="Insect abundance per trap", y = "Frecuency") +
  theme(legend.position="top")


conventional +  # add_phylopic(cow_pic, 1, 9, 200, ysize = 300) +
  add_phylopic(bat_pic, 1, 19, 700, ysize = 60) 

###### diference
# Add mean lines
q2<-ggplot(df_Dif_Conv, aes(x=Abundance,  fill=Tratamiento)) +
  geom_density( alpha=0.3) +    # geom_density(alpha=0.6) +
  geom_vline(data=mu_Dif_Conv, aes(xintercept=grp.median),
             linetype="dashed") + 
  #scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  #scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  labs(title="Insect removal by bats in Conventional",x="Total Insect removal", y = "Density") +
  theme(legend.position="top")

q2 + annotate("text", x = mu_Dif_Conv$grp.median, y = 0.1, label = round(mu_Dif_Conv$grp.median, 3) ) + add_phylopic(fly_pic, 1, 6, 0.2, ysize = 0.5) 

##########################################
## poisson test 1 overall
##########################################
count1 <- data.frame(count=as.vector(zpost[,1:44]), cond="Con Encierro")
count2 <- data.frame(count=as.vector(zpost[,45:88]), cond="Sin Encierro")
counts <- rbind(count1, count2)  
  
fit_poison <- glm(count ~ cond, data= counts, family= 'poisson')
print("overall")
## [1] "overall"
summary(fit_poison)
## 
## Call:
## glm(formula = count ~ cond, family = "poisson", data = counts)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.5684  -1.0243  -0.0740   0.8115   3.3828  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       3.1058955  0.0003190  9735.3   <2e-16 ***
## condSin Encierro -0.1445277  0.0004684  -308.6   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1627165  on 879999  degrees of freedom
## Residual deviance: 1531706  on 879998  degrees of freedom
## AIC: 5787520
## 
## Number of Fisher Scoring iterations: 4
# detailed test
plot_dat_Sil_con <- bat_exclusure  %>% filter(Sistema=="Silvopastoril" & Tratamiento =="Encierro")

plot_dat_Sil_sin <- bat_exclusure  %>% filter(Sistema=="Silvopastoril" & Tratamiento =="Sin_Encierro")

plot_dat_Conv_con <- bat_exclusure  %>% filter(Sistema=="Convencional" & Tratamiento =="Encierro")

plot_dat_Conv_sin <- bat_exclusure  %>% filter(Sistema=="Convencional" & Tratamiento =="Sin_Encierro")

##########################################
## poisson test Silvopastoril
##########################################
count_silvo_C <- data.frame(count=as.vector(zpost[,plot_dat_Sil_con$s]), cond="Con Encierro")
count_silvo_S <- data.frame(count=as.vector(zpost[,plot_dat_Sil_sin$s]), cond="Sin Encierro")

counts_Silvo <- rbind(count_silvo_C, count_silvo_S)  
  
fit_poison_Silvo <- glm(count ~ cond, data= counts_Silvo, family= 'poisson')
print("Silvopastoral")
## [1] "Silvopastoral"
summary(fit_poison_Silvo)
## 
## Call:
## glm(formula = count ~ cond, family = "poisson", data = counts_Silvo)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.3033  -0.9131  -0.0218   0.8247   2.9437  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       3.2373826  0.0004225  7662.9   <2e-16 ***
## condSin Encierro -0.1880979  0.0006276  -299.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 733435  on 439999  degrees of freedom
## Residual deviance: 643217  on 439998  degrees of freedom
## AIC: 2823545
## 
## Number of Fisher Scoring iterations: 4
##########################################
## poisson test Conventional
##########################################
count_conv_C <- data.frame(count=as.vector(zpost[,plot_dat_Conv_con$s]), cond="Con Encierro")
count_conv_S <- data.frame(count=as.vector(zpost[,plot_dat_Conv_sin$s]), cond="Sin Encierro")

counts_conv <- rbind(count_conv_C, count_conv_S)  
  
fit_poison_Conv <- glm(count ~ cond, data= counts_conv, family= 'poisson')
print("Conventional")
## [1] "Conventional"
summary(fit_poison_Conv)
## 
## Call:
## glm(formula = count ~ cond, family = "poisson", data = counts_conv)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.2314  -0.8784  -0.0438   0.7989   3.8513  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       2.9544642  0.0004867  6070.8   <2e-16 ***
## condSin Encierro -0.0894941  0.0007042  -127.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 638097  on 439999  degrees of freedom
## Residual deviance: 621929  on 439998  degrees of freedom
## AIC: 2697418
## 
## Number of Fisher Scoring iterations: 4
# So the mean of Cond 1 is estimated as exp(3.025457) = 20.60342, the mean of Cond 2 is exp(3.025457  -0.156641) = 17.61615 and the difference has p = <2e-16 for the null hypothesis of being zero.

##########################################





plot_dat <- rbind(data.frame(group="Con Encierro", occ=mean(group1),
                             lower=quantile(group1, 0.025, na.rm = TRUE),
                             upper=quantile(group1, 0.975, na.rm = TRUE)),
                  data.frame(group="Sin Encierro", occ=mean(group2),
                             lower=quantile(group2, 0.025, na.rm = TRUE),
                             upper=quantile(group2, 0.975, na.rm = TRUE)))




library(ggplot2)

ggplot(plot_dat, aes(x=group, y=occ)) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width=0.2) +
  geom_point(size=3) +
  ylim(10,25) +
  labs(x="Group", y="Insects + 95% UI") +
  theme_bw() +
  theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(),
        axis.text=element_text(size=12), axis.title=element_text(size=14))

So the mean of Con Encierro is estimated as exp (3.1058955) = 22.3292068, the mean of Sin Encierro is exp(3.1058955 + -0.1445277) = 19.3243864 and the difference has p = 0 for the null hypothesis of being zero.

Session Info

Details and packages used

print(sessionInfo(), locale = FALSE)
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 14393)
## 
## Matrix products: default
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rphylopic_0.3.0  plyr_1.8.7       AICcmodavg_2.3-1 wiqid_0.3.0     
##  [5] mcmcOutput_0.1.1 HDInterval_0.2.2 sf_1.0-0         mapview_2.11.0  
##  [9] stocc_1.31       ubms_1.1.0       unmarked_1.1.1   lattice_0.20-44 
## [13] bayesplot_1.9.0  ggeffects_1.1.1  lubridate_1.7.10 forcats_0.5.1   
## [17] stringr_1.4.0    dplyr_1.0.7      purrr_0.3.4      readr_1.4.0     
## [21] tidyr_1.1.4      tibble_3.1.7     ggplot2_3.3.6    tidyverse_1.3.1 
## [25] readxl_1.3.1    
## 
## loaded via a namespace (and not attached):
##   [1] uuid_0.1-4              backports_1.4.1         spam_2.7-0             
##   [4] VGAM_1.1-5              systemfonts_1.0.2       sp_1.4-5               
##   [7] splines_4.0.3           crosstalk_1.2.0         leaflet_2.0.4.1        
##  [10] gridBase_0.4-7          urltools_1.7.3          rstantools_2.2.0       
##  [13] inline_0.3.19           digest_0.6.29           leafpop_0.1.0          
##  [16] htmltools_0.5.1.1       viridis_0.6.1           leaflet.providers_1.9.0
##  [19] fansi_1.0.3             magrittr_2.0.3          modelr_0.1.8           
##  [22] RcppParallel_5.1.5      matrixStats_0.62.0      rARPACK_0.11-0         
##  [25] svglite_2.0.0           prettyunits_1.1.1       colorspace_2.0-3       
##  [28] rvest_1.0.2             haven_2.4.1             xfun_0.31              
##  [31] leafem_0.1.6            callr_3.7.0             crayon_1.5.1           
##  [34] jsonlite_1.8.0          lme4_1.1-29             brew_1.0-6             
##  [37] survival_3.2-11         glue_1.6.2              gtable_0.3.0           
##  [40] webshot_0.5.2           V8_4.1.0                pkgbuild_1.3.1         
##  [43] rstan_2.26.10           maps_3.4.0              scales_1.2.0           
##  [46] DBI_1.1.2               Rcpp_1.0.8.3            viridisLite_0.4.0      
##  [49] xtable_1.8-4            units_0.7-2             proxy_0.4-26           
##  [52] dotCall64_1.0-1         stats4_4.0.3            StanHeaders_2.26.10    
##  [55] truncnorm_1.0-8         htmlwidgets_1.5.4       httr_1.4.2             
##  [58] ellipsis_0.3.2          pkgconfig_2.0.3         loo_2.5.1              
##  [61] farver_2.1.0            sass_0.4.0              dbplyr_2.1.1           
##  [64] crul_1.2.0              utf8_1.2.2              tidyselect_1.1.1       
##  [67] labeling_0.4.2          rlang_1.0.2             munsell_0.5.0          
##  [70] cellranger_1.1.0        tools_4.0.3             cli_3.2.0              
##  [73] generics_0.1.2          broom_0.7.8             ggridges_0.5.3         
##  [76] evaluate_0.15           yaml_2.3.5              processx_3.5.3         
##  [79] knitr_1.39              fs_1.5.0                satellite_1.0.2        
##  [82] pbapply_1.5-0           nlme_3.1-152            xml2_1.3.3             
##  [85] compiler_4.0.3          rstudioapi_0.13         curl_4.3.2             
##  [88] png_0.1-7               e1071_1.7-7             reprex_2.0.0           
##  [91] bslib_0.2.5.1           stringi_1.7.6           highr_0.9              
##  [94] ps_1.6.0                RSpectra_0.16-1         fields_12.5            
##  [97] Matrix_1.3-4            classInt_0.4-3          nloptr_2.0.1           
## [100] vctrs_0.4.1             pillar_1.7.0            lifecycle_1.0.1        
## [103] triebeard_0.3.0         jquerylib_0.1.4         raster_3.5-15          
## [106] R6_2.5.1                KernSmooth_2.23-20      gridExtra_2.3          
## [109] codetools_0.2-18        boot_1.3-28             MASS_7.3-54            
## [112] assertthat_0.2.1        httpcode_0.3.0          withr_2.5.0            
## [115] parallel_4.0.3          hms_1.1.0               terra_1.5-21           
## [118] grid_4.0.3              coda_0.19-4             class_7.3-19           
## [121] minqa_1.2.4             rmarkdown_2.14          base64enc_0.1-3

Cited Literature