This document gives an introduction to the statistical analysis of insect data gathered in exclosure experiments.
#Libraries used (Packages)
library(ggplot2) # Used for data visualisation (i.e barcharts)
library(reshape2) # Manipulation of data frame
library(RColorBrewer) # Colours in bar charts
library(knitr) # Complies this dynamic report
library(vegan) # For the ANOSIM test
library(nortest) # Anderson-Darling test
library(mobsim) # rank abundance
library (readxl) # read excel data
library (tidyverse) # Data handling
library (plyr)
library (hrbrthemes)
# library(dplyr)
# library(tidyr)
library (viridis)
library (fitdistrplus)
library (MASS)
library (tmap) # nice maps
# library (unmarked) # occu analitics
# library (ggcorrplot) # corplot in ggplot
library (raster)
library (rasterVis)
library (knitr) #
library (kableExtra) # nice tables
library (sf) #sf
library (mapview)
library (stringr)
library (lubridate)
library (ggeffects) # test and plot glm
library (lme4) # GLMM
#R script to print the version of R used
# version$version.string
library(brms)
library(bayesplot)La linea punteada representa la media de los conteos.
bat_vaca <- bat_exclusure %>% filter(parasito_de=="vaca")
bat_pasto <- bat_exclusure %>% filter(parasito_de=="pasto")
bat_vaca$Tratamiento <- as.factor(bat_vaca$Tratamiento)
bat_vaca$Finca <- as.factor(bat_vaca$Finca)
bat_vaca$DIA <- as.factor(bat_vaca$DIA)
cdat_vaca2 <- ddply(bat_vaca, c("Sistema", "Tratamiento"), summarise, rating.mean=mean(insectos))
cdat_vaca1 <- ddply(bat_vaca, c("Tratamiento"), summarise, rating.mean=mean(insectos))
cdat_pasto2 <- ddply(bat_pasto, c("Sistema", "Tratamiento"), summarise, rating.mean=mean(insectos))
bat_exclusure %>%
ggplot( aes(x=insectos, fill=parasito_de)) +
stat_count() + #geom_point() +
facet_grid(Sistema~Tratamiento)bat_vaca %>%
ggplot( aes(x=insectos)) +
stat_count() + #geom_point() +
facet_grid(Tratamiento ~ Sistema) +
geom_vline(data=cdat_vaca2, aes(xintercept=rating.mean),
linetype="dashed", size=1) + ggtitle("Parasitos de Vaca")bat_vaca %>%
ggplot( aes(x=insectos, fill=Tratamiento)) +
geom_histogram(alpha=.5, position="identity") +
xlim(0, 25) +
geom_vline(data=cdat_vaca2, aes(xintercept=rating.mean),
linetype="dashed") +
facet_grid(Sistema ~ .) + ggtitle("Parasitos de Vaca")bat_pasto %>%
ggplot( aes(x=insectos, fill=Tratamiento)) +
geom_histogram(alpha=.5, position="identity") +
xlim(0, 25) +
geom_vline(data=cdat_pasto2, aes(xintercept=rating.mean),
linetype="dashed") +
facet_grid(Sistema ~ .) + ggtitle("Parasitos de Pasto")Pareciera que la magnitud de la diferencia es mayor en convencional que en silvopastoril.
# estimate paramters
library(MASS)
fit_g <- fitdist(bat_vaca$insectos, "geom")
fit_p <- fitdist(bat_vaca$insectos, "pois")
fit_nb <- fitdist(bat_vaca$insectos, "nbinom")
plot(fit_g) # Geometric# par(mfrow=c(2,2))
plot.legend <- c("geometric", "Poisson", "negative binomial")
denscomp(list(fit_g, fit_p, fit_nb), legendtext = plot.legend)Pareciara que los datos se ajustan un poco mas a una negative binomial que a Poisson o Geometrica. Sin embargo por facilidad modelamos los conteos de insectos como Poisson zero inflado. Dada la gran cantidad de ceros.
las observaciones 84 y 94 son problematicas.
## Output of model 'fit_zinb0':
##
## Computed from 4000 by 206 log-likelihood matrix
##
## Estimate SE
## elpd_loo -591.7 121.5
## p_loo 18.9 11.3
## looic 1183.4 243.1
## ------
## Monte Carlo SE of elpd_loo is 0.3.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'fit_zinb1':
##
## Computed from 4000 by 206 log-likelihood matrix
##
## Estimate SE
## elpd_loo -571.2 106.5
## p_loo 39.8 23.3
## looic 1142.5 213.0
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'fit_zinb2':
##
## Computed from 4000 by 206 log-likelihood matrix
##
## Estimate SE
## elpd_loo -575.1 108.3
## p_loo 46.5 25.3
## looic 1150.3 216.7
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 203 98.5% 2
## (0.5, 0.7] (ok) 3 1.5% 139
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'fit_zinb3':
##
## Computed from 4000 by 206 log-likelihood matrix
##
## Estimate SE
## elpd_loo -568.5 104.3
## p_loo 36.9 20.7
## looic 1137.0 208.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'fit_zinb4':
##
## Computed from 4000 by 206 log-likelihood matrix
##
## Estimate SE
## elpd_loo -606.7 125.0
## p_loo 60.0 38.5
## looic 1213.4 250.1
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 203 98.5% 2
## (0.5, 0.7] (ok) 3 1.5% 70
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## Model comparisons:
## elpd_diff se_diff
## fit_zinb3 0.0 0.0
## fit_zinb1 -2.8 2.4
## fit_zinb2 -6.7 4.5
## fit_zinb0 -23.2 22.4
## fit_zinb4 -38.2 26.4
## Family: zero_inflated_poisson
## Links: mu = log; zi = identity
## Formula: insectos ~ Tratamiento + Finca
## Data: bat_vaca_conv (Number of observations: 206)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.41 0.11 1.19 1.63 1.00 2785 2606
## TratamientoPotrero 0.51 0.11 0.30 0.72 1.00 3056 2569
## FincaANDORRA2 -0.66 0.13 -0.91 -0.41 1.00 3078 2873
## FincaLAPRADERA -1.17 0.18 -1.54 -0.84 1.00 3109 2731
## FincaPORVENIR -0.98 0.20 -1.38 -0.61 1.00 2811 2854
## FincaROSANIA -0.23 0.12 -0.47 0.01 1.00 2893 2608
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## zi 0.32 0.04 0.25 0.39 1.00 3180 2798
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## $iterations
## NULL
##
## $chains
## [1] "chain:1" "chain:2" "chain:3" "chain:4"
##
## $parameters
## [1] "b_Intercept" "b_TratamientoPotrero" "b_FincaANDORRA2"
## [4] "b_FincaLAPRADERA" "b_FincaPORVENIR" "b_FincaROSANIA"
## [7] "zi" "lp__"
color_scheme_set("blue")
mcmc_hist(posterior, pars = c("b_Intercept", "b_TratamientoPotrero" , "zi"))Si hay diferencias en la distribuciones de los posteriores de Encierro y Potrero. La difencia entre los dos es la cantidad que remueven los murcielagos