## =========================================================
## GLMM (impact = %ANTHROPIC_2024 − %ANTHROPIC_1985)
## =========================================================
library(readxl); library(dplyr); library(glmmTMB); library(emmeans)
##
## Anexando pacote: 'dplyr'
## Os seguintes objetos são mascarados por 'package:stats':
##
## filter, lag
## Os seguintes objetos são mascarados por 'package:base':
##
## intersect, setdiff, setequal, union
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
dat <- read_excel("Land_use_biome_250.xlsx") %>%
transmute(impact=`%IMPACT`, baseline=`%ANTHROPIC_1985`,
Biome=factor(Biome, levels=c("Atlantic_Forest","Caatinga","Cerrado","Pampa","Pantanal","Amazon")),
Protection_bin=factor(ifelse(Protection_status=="Unprotected","Unprotected","Protected"),
levels=c("Unprotected","Protected")),
Watersheds_9=factor(Watersheds_9)) %>%
filter(complete.cases(.)) %>%
mutate(baseline_z=as.numeric(scale(baseline)))
## New names:
## • `HISTO_0_1985` -> `HISTO_0_1985...29`
## • `HISTO_0_1985` -> `HISTO_0_1985...33`
## • `HISTO_0_2024` -> `HISTO_0_2024...59`
## • `HISTO_0_2024` -> `HISTO_0_2024...63`
count(dat, Biome, Protection_bin)
## # A tibble: 12 × 3
## Biome Protection_bin n
## <fct> <fct> <int>
## 1 Atlantic_Forest Unprotected 93
## 2 Atlantic_Forest Protected 26
## 3 Caatinga Unprotected 139
## 4 Caatinga Protected 7
## 5 Cerrado Unprotected 72
## 6 Cerrado Protected 11
## 7 Pampa Unprotected 236
## 8 Pampa Protected 15
## 9 Pantanal Unprotected 56
## 10 Pantanal Protected 3
## 11 Amazon Unprotected 41
## 12 Amazon Protected 13
fit <- function(form) glmmTMB(form, data=dat, family=glmmTMB::t_family(),
ziformula=~1, dispformula=~Biome + baseline_z)
m_final <- fit(impact ~ baseline_z + Biome*Protection_bin + (1|Watersheds_9))
m_add <- fit(impact ~ baseline_z + Biome + Protection_bin + (1|Watersheds_9))
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')
p_int <- as.numeric(joint_tests(m_final, at=list(baseline_z=0))[
"Biome:Protection_bin","p.value"
])
m_model <- if (is.na(p_int) || p_int>0.05) m_add else m_final
summary(m_model)
## Family: t ( identity )
## Formula:
## impact ~ baseline_z + Biome + Protection_bin + (1 | Watersheds_9)
## Zero inflation: ~1
## Dispersion: ~Biome + baseline_z
## Data: dat
##
## AIC BIC logLik -2*log(L) df.resid
## NA NA NA NA 694
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Watersheds_9 (Intercept) 151.1 12.29
## Number of obs: 712, groups: Watersheds_9, 395
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 18.767 2.054 9.138 < 2e-16 ***
## baseline_z -15.148 1.161 -13.051 < 2e-16 ***
## BiomeCaatinga -5.026 2.816 -1.785 0.074336 .
## BiomeCerrado -2.696 2.837 -0.950 0.341965
## BiomePampa -9.269 2.553 -3.631 0.000283 ***
## BiomePantanal 1.300 4.978 0.261 0.793917
## BiomeAmazon 15.238 7.626 1.998 0.045701 *
## Protection_binProtected -9.821 3.618 -2.715 0.006634 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Zero-inflation model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.28657 0.09324 -13.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Dispersion model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.97234 0.09369 31.73 < 2e-16 ***
## BiomeCaatinga 0.16267 0.12363 1.32 0.188
## BiomeCerrado -0.52831 0.22709 -2.33 0.020 *
## BiomePampa 0.06132 0.11473 0.53 0.593
## BiomePantanal 0.18188 0.18043 1.01 0.313
## BiomeAmazon 0.25975 0.21967 1.18 0.237
## baseline_z -0.23662 0.05441 -4.35 1.37e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
at0 <- list(baseline_z=0)
emmeans(m_model, ~1, at=at0)
## 1 emmean SE df asymp.LCL asymp.UCL
## overall 13.8 2.29 Inf 9.3 18.3
##
## Results are averaged over the levels of: Biome, Protection_bin
## Confidence level used: 0.95
joint_tests(m_model, at=at0)
## model term df1 df2 F.ratio Chisq p.value
## Biome 5 Inf 14.097 70.485 <.0001
## Protection_bin 1 Inf 7.370 7.370 0.0066
pairs(emmeans(m_model, ~Biome, at=at0), adjust="tukey")
## contrast estimate SE df z.ratio p.value
## Atlantic_Forest - Caatinga 5.03 2.82 Inf 1.785 0.4758
## Atlantic_Forest - Cerrado 2.70 2.84 Inf 0.950 0.9333
## Atlantic_Forest - Pampa 9.27 2.55 Inf 3.631 0.0038
## Atlantic_Forest - Pantanal -1.30 4.98 Inf -0.261 0.9998
## Atlantic_Forest - Amazon -15.24 7.63 Inf -1.998 0.3432
## Caatinga - Cerrado -2.33 3.17 Inf -0.735 0.9777
## Caatinga - Pampa 4.24 3.33 Inf 1.276 0.7983
## Caatinga - Pantanal -6.33 5.94 Inf -1.066 0.8950
## Caatinga - Amazon -20.26 6.92 Inf -2.927 0.0400
## Cerrado - Pampa 6.57 2.99 Inf 2.196 0.2396
## Cerrado - Pantanal -4.00 5.65 Inf -0.707 0.9812
## Cerrado - Amazon -17.93 6.88 Inf -2.606 0.0957
## Pampa - Pantanal -10.57 5.87 Inf -1.800 0.4660
## Pampa - Amazon -24.51 6.46 Inf -3.791 0.0021
## Pantanal - Amazon -13.94 7.48 Inf -1.864 0.4249
##
## Results are averaged over the levels of: Protection_bin
## P value adjustment: tukey method for comparing a family of 6 estimates
emmeans(m_model, ~Protection_bin, at=at0)
## Protection_bin emmean SE df asymp.LCL asymp.UCL
## Unprotected 18.69 1.70 Inf 15.35 22.0
## Protected 8.87 3.76 Inf 1.51 16.2
##
## Results are averaged over the levels of: Biome
## Confidence level used: 0.95
## Diagnóstico DHARMa ----
library(DHARMa)
## This is DHARMa 0.4.7. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
set.seed(123)
res <- simulateResiduals(m_model, n=1000)
plot(res, quantreg=FALSE)

testUniformity(res); testDispersion(res); testZeroInflation(res); testOutliers(res)

##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.042697, p-value = 0.1491
## alternative hypothesis: two-sided

##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.0072, p-value = 0.892
## alternative hypothesis: two.sided

##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 1.0375, p-value = 0.646
## alternative hypothesis: two.sided

##
## DHARMa outlier test based on exact binomial test with approximate
## expectations
##
## data: res
## outliers at both margin(s) = 1, observations = 712, p-value = 1
## alternative hypothesis: true probability of success is not equal to 0.001998002
## 95 percent confidence interval:
## 3.555809e-05 7.800265e-03
## sample estimates:
## frequency of outliers (expected: 0.001998001998002 )
## 0.001404494
plotResiduals(res, dat$Biome, quantreg=FALSE)

plotResiduals(res, dat$baseline_z, quantreg=FALSE)

## 5) Inferência (baseline na média: baseline_z = 0) ----
at0 <- list(baseline_z = 0)
## H1: média global de impact > 0 (teste unilateral;H0: impact≤0; H1: impact>0)
emm_global <- emmeans(m_model, ~1, weights="proportional", at=at0)
summary(emm_global, infer=TRUE)
## 1 emmean SE df asymp.LCL asymp.UCL z.ratio p.value
## overall 14.4 1.28 Inf 11.9 16.9 11.215 <.0001
##
## Results are averaged over the levels of: Biome, Protection_bin
## Confidence level used: 0.95
test(emm_global, null=0, side=">")
## 1 emmean SE df z.ratio p.value
## overall 14.4 1.28 Inf 11.215 <.0001
##
## Results are averaged over the levels of: Biome, Protection_bin
## P values are right-tailed
## H2: diferenças entre biomas (efeito global + pares Tukey)
emm_biome <- emmeans(m_model, ~Biome, weights="proportional", at=at0)
joint_tests(m_model, at=at0) # teste global (Biome + Protection_bin)
## model term df1 df2 F.ratio Chisq p.value
## Biome 5 Inf 14.097 70.485 <.0001
## Protection_bin 1 Inf 7.370 7.370 0.0066
pairs(emm_biome, adjust="tukey") # pares entre biomas
## contrast estimate SE df z.ratio p.value
## Atlantic_Forest - Caatinga 5.03 2.82 Inf 1.785 0.4758
## Atlantic_Forest - Cerrado 2.70 2.84 Inf 0.950 0.9333
## Atlantic_Forest - Pampa 9.27 2.55 Inf 3.631 0.0038
## Atlantic_Forest - Pantanal -1.30 4.98 Inf -0.261 0.9998
## Atlantic_Forest - Amazon -15.24 7.63 Inf -1.998 0.3432
## Caatinga - Cerrado -2.33 3.17 Inf -0.735 0.9777
## Caatinga - Pampa 4.24 3.33 Inf 1.276 0.7983
## Caatinga - Pantanal -6.33 5.94 Inf -1.066 0.8950
## Caatinga - Amazon -20.26 6.92 Inf -2.927 0.0400
## Cerrado - Pampa 6.57 2.99 Inf 2.196 0.2396
## Cerrado - Pantanal -4.00 5.65 Inf -0.707 0.9812
## Cerrado - Amazon -17.93 6.88 Inf -2.606 0.0957
## Pampa - Pantanal -10.57 5.87 Inf -1.800 0.4660
## Pampa - Amazon -24.51 6.46 Inf -3.791 0.0021
## Pantanal - Amazon -13.94 7.48 Inf -1.864 0.4249
##
## Results are averaged over the levels of: Protection_bin
## P value adjustment: tukey method for comparing a family of 6 estimates
## H3: Protected vs Unprotected (efeito global + (opcional) efeito simples por bioma)
emm_prot <- emmeans(m_model, ~Protection_bin, weights="proportional", at=at0)
summary(emm_prot, infer=TRUE)
## Protection_bin emmean SE df asymp.LCL asymp.UCL z.ratio p.value
## Unprotected 15.4 1.31 Inf 12.85 18.0 11.749 <.0001
## Protected 5.6 3.56 Inf -1.39 12.6 1.570 0.1163
##
## Results are averaged over the levels of: Biome
## Confidence level used: 0.95
pairs(emm_prot) # Unprotected - Protected
## contrast estimate SE df z.ratio p.value
## Unprotected - Protected 9.82 3.62 Inf 2.715 0.0066
##
## Results are averaged over the levels of: Biome