Introduction

This document gives an introduction to the statistical analysis of insect data gathered in exclosure experiments.

#Libraries used (Packages)
library(ggplot2) # Used for data visualisation (i.e barcharts)
library(reshape2) # Manipulation of data frame
library(RColorBrewer) # Colours in bar charts
library(knitr) # Complies this dynamic report
library(vegan) # For the ANOSIM test
library(nortest) # Anderson-Darling test
library(mobsim) # rank abundance

library (readxl) # read excel data
library (tidyverse) # Data handling
library (plyr)
library (hrbrthemes)
# library(dplyr)
# library(tidyr)
library (viridis)
library (fitdistrplus)
library (MASS)


library (tmap) # nice maps
# library (unmarked) # occu analitics
# library (ggcorrplot) # corplot in ggplot
library (raster)
library (rasterVis)
library (knitr) #
library (kableExtra) # nice tables
library (sf) #sf
library (mapview)
library (stringr)
library (lubridate)
library (ggeffects) # test and plot glm
library (lme4) # GLMM
#R script to print the version of R used
# version$version.string
library(brms)
library(bayesplot)

Load data

bat_exclusure <- read_excel("C:/Users/diego.lizcano/Box Sync/CodigoR/Unillanos-Bat2019/data/Datos_Bat_exclusion_12dic2019_tydy.xlsx")
# View(Datos_Bat_exclusion_12dic2019)

Basic plots

La linea punteada representa la media de los conteos.

bat_vaca <- bat_exclusure %>% filter(parasito_de=="vaca")
bat_pasto <- bat_exclusure %>% filter(parasito_de=="pasto")

bat_vaca$Tratamiento <- as.factor(bat_vaca$Tratamiento)
bat_vaca$Finca <- as.factor(bat_vaca$Finca)
bat_vaca$DIA <- as.factor(bat_vaca$DIA)

cdat_vaca2 <- ddply(bat_vaca, c("Sistema", "Tratamiento"), summarise, rating.mean=mean(insectos))

cdat_vaca1 <- ddply(bat_vaca, c("Tratamiento"), summarise, rating.mean=mean(insectos))


cdat_pasto2 <- ddply(bat_pasto, c("Sistema", "Tratamiento"), summarise, rating.mean=mean(insectos)) 


bat_exclusure %>% 
ggplot( aes(x=insectos, fill=parasito_de)) + 
    stat_count() + #geom_point() +
    facet_grid(Sistema~Tratamiento)

bat_vaca %>% 
ggplot( aes(x=insectos)) + 
    stat_count() + #geom_point() +
    facet_grid(Tratamiento ~ Sistema) +
    geom_vline(data=cdat_vaca2, aes(xintercept=rating.mean),
               linetype="dashed", size=1) + ggtitle("Parasitos de Vaca")

bat_vaca %>% 
ggplot( aes(x=insectos, fill=Tratamiento)) + 
geom_histogram(alpha=.5, position="identity") + 
  xlim(0, 25) +
  geom_vline(data=cdat_vaca2, aes(xintercept=rating.mean),
               linetype="dashed") +
  facet_grid(Sistema ~ .) + ggtitle("Parasitos de Vaca")

bat_pasto %>% 
ggplot( aes(x=insectos, fill=Tratamiento)) + 
geom_histogram(alpha=.5, position="identity") + 
  xlim(0, 25) +
  geom_vline(data=cdat_pasto2, aes(xintercept=rating.mean),
               linetype="dashed") +
  facet_grid(Sistema ~ .) + ggtitle("Parasitos de Pasto")

Pareciera que la magnitud de la diferencia es mayor en convencional que en silvopastoril.

Testing Data Distributions

Testing fot bat_vaca. Insectos de vaca

# estimate paramters
library(MASS)
fit_g <- fitdist(bat_vaca$insectos, "geom")
fit_p <- fitdist(bat_vaca$insectos, "pois")
fit_nb <- fitdist(bat_vaca$insectos, "nbinom")

plot(fit_g) # Geometric

plot(fit_p) # Poisson

plot(fit_nb) # Negative Binomial

# par(mfrow=c(2,2))
plot.legend <- c("geometric", "Poisson", "negative binomial")
denscomp(list(fit_g, fit_p, fit_nb), legendtext = plot.legend)

cdfcomp (list(fit_g, fit_p, fit_nb), legendtext = plot.legend)

qqcomp  (list(fit_g, fit_p, fit_nb), legendtext = plot.legend)

ppcomp  (list(fit_g, fit_p, fit_nb), legendtext = plot.legend)

Pareciara que los datos se ajustan un poco mas a una negative binomial que a Poisson o Geometrica. Sin embargo por facilidad modelamos los conteos de insectos como Poisson zero inflado. Dada la gran cantidad de ceros.

Testing diferences between two Poisson distributions.

Potrero in Convencional Vs Encierro in Convencional for vaca insects

las observaciones 84 y 94 son problematicas.

Best model is: fit_zinb1

print (best)
## Output of model 'fit_zinb0':
## 
## Computed from 4000 by 206 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo   -591.7 121.5
## p_loo        18.9  11.3
## looic      1183.4 243.1
## ------
## Monte Carlo SE of elpd_loo is 0.3.
## 
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'fit_zinb1':
## 
## Computed from 4000 by 206 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo   -571.2 106.5
## p_loo        39.8  23.3
## looic      1142.5 213.0
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'fit_zinb2':
## 
## Computed from 4000 by 206 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo   -575.1 108.3
## p_loo        46.5  25.3
## looic      1150.3 216.7
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     203   98.5%   2         
##  (0.5, 0.7]   (ok)         3    1.5%   139       
##    (0.7, 1]   (bad)        0    0.0%   <NA>      
##    (1, Inf)   (very bad)   0    0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'fit_zinb3':
## 
## Computed from 4000 by 206 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo   -568.5 104.3
## p_loo        36.9  20.7
## looic      1137.0 208.5
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'fit_zinb4':
## 
## Computed from 4000 by 206 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo   -606.7 125.0
## p_loo        60.0  38.5
## looic      1213.4 250.1
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     203   98.5%   2         
##  (0.5, 0.7]   (ok)         3    1.5%   70        
##    (0.7, 1]   (bad)        0    0.0%   <NA>      
##    (1, Inf)   (very bad)   0    0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## Model comparisons:
##           elpd_diff se_diff
## fit_zinb3   0.0       0.0  
## fit_zinb1  -2.8       2.4  
## fit_zinb2  -6.7       4.5  
## fit_zinb0 -23.2      22.4  
## fit_zinb4 -38.2      26.4
print(fit_zinb1)
##  Family: zero_inflated_poisson 
##   Links: mu = log; zi = identity 
## Formula: insectos ~ Tratamiento + Finca 
##    Data: bat_vaca_conv (Number of observations: 206) 
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup samples = 4000
## 
## Population-Level Effects: 
##                    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              1.41      0.11     1.19     1.63 1.00     2785     2606
## TratamientoPotrero     0.51      0.11     0.30     0.72 1.00     3056     2569
## FincaANDORRA2         -0.66      0.13    -0.91    -0.41 1.00     3078     2873
## FincaLAPRADERA        -1.17      0.18    -1.54    -0.84 1.00     3109     2731
## FincaPORVENIR         -0.98      0.20    -1.38    -0.61 1.00     2811     2854
## FincaROSANIA          -0.23      0.12    -0.47     0.01 1.00     2893     2608
## 
## Family Specific Parameters: 
##    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## zi     0.32      0.04     0.25     0.39 1.00     3180     2798
## 
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample 
## is a crude measure of effective sample size, and Rhat is the potential 
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(fit_zinb1) 

posterior <- as.array(fit_zinb1)
dimnames(posterior)
## $iterations
## NULL
## 
## $chains
## [1] "chain:1" "chain:2" "chain:3" "chain:4"
## 
## $parameters
## [1] "b_Intercept"          "b_TratamientoPotrero" "b_FincaANDORRA2"     
## [4] "b_FincaLAPRADERA"     "b_FincaPORVENIR"      "b_FincaROSANIA"      
## [7] "zi"                   "lp__"
mcmc_intervals(posterior, pars = c("b_TratamientoPotrero", "zi" ))

color_scheme_set("blue")
mcmc_hist(posterior, pars = c("b_Intercept",                "b_TratamientoPotrero" , "zi"))

plot(marginal_effects(fit_zinb1))

Si hay diferencias en la distribuciones de los posteriores de Encierro y Potrero. La difencia entre los dos es la cantidad que remueven los murcielagos