## =========================================================
## GLMM (impact = %ANTHROPIC_2024 − %ANTHROPIC_1985)
## =========================================================

library(readxl); library(dplyr); library(glmmTMB); library(emmeans)
## 
## Anexando pacote: 'dplyr'
## Os seguintes objetos são mascarados por 'package:stats':
## 
##     filter, lag
## Os seguintes objetos são mascarados por 'package:base':
## 
##     intersect, setdiff, setequal, union
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
dat <- read_excel("Land_use_biome_250.xlsx") %>%
  transmute(impact=`%IMPACT`, baseline=`%ANTHROPIC_1985`,
            Biome=factor(Biome, levels=c("Atlantic_Forest","Caatinga","Cerrado","Pampa","Pantanal","Amazon")),
            Protection_bin=factor(ifelse(Protection_status=="Unprotected","Unprotected","Protected"),
                                  levels=c("Unprotected","Protected")),
            Watersheds_9=factor(Watersheds_9)) %>%
  filter(complete.cases(.)) %>%
  mutate(baseline_z=as.numeric(scale(baseline)))
## New names:
## • `HISTO_0_1985` -> `HISTO_0_1985...29`
## • `HISTO_0_1985` -> `HISTO_0_1985...33`
## • `HISTO_0_2024` -> `HISTO_0_2024...59`
## • `HISTO_0_2024` -> `HISTO_0_2024...63`
count(dat, Biome, Protection_bin)
## # A tibble: 12 × 3
##    Biome           Protection_bin     n
##    <fct>           <fct>          <int>
##  1 Atlantic_Forest Unprotected       93
##  2 Atlantic_Forest Protected         26
##  3 Caatinga        Unprotected      139
##  4 Caatinga        Protected          7
##  5 Cerrado         Unprotected       72
##  6 Cerrado         Protected         11
##  7 Pampa           Unprotected      236
##  8 Pampa           Protected         15
##  9 Pantanal        Unprotected       56
## 10 Pantanal        Protected          3
## 11 Amazon          Unprotected       41
## 12 Amazon          Protected         13
fit <- function(form) glmmTMB(form, data=dat, family=glmmTMB::t_family(),
                              ziformula=~1, dispformula=~Biome + baseline_z)

m_final <- fit(impact ~ baseline_z + Biome*Protection_bin + (1|Watersheds_9))
m_add   <- fit(impact ~ baseline_z + Biome + Protection_bin + (1|Watersheds_9))
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; non-positive-definite Hessian matrix. See vignette('troubleshooting')
p_int <- as.numeric(joint_tests(m_final, at=list(baseline_z=0))[
  "Biome:Protection_bin","p.value"
])

m_model <- if (is.na(p_int) || p_int>0.05) m_add else m_final
summary(m_model)
##  Family: t  ( identity )
## Formula:          
## impact ~ baseline_z + Biome + Protection_bin + (1 | Watersheds_9)
## Zero inflation:          ~1
## Dispersion:              ~Biome + baseline_z
## Data: dat
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##        NA        NA        NA        NA       694 
## 
## Random effects:
## 
## Conditional model:
##  Groups       Name        Variance Std.Dev.
##  Watersheds_9 (Intercept) 151.1    12.29   
## Number of obs: 712, groups:  Watersheds_9, 395
## 
## Conditional model:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               18.767      2.054   9.138  < 2e-16 ***
## baseline_z               -15.148      1.161 -13.051  < 2e-16 ***
## BiomeCaatinga             -5.026      2.816  -1.785 0.074336 .  
## BiomeCerrado              -2.696      2.837  -0.950 0.341965    
## BiomePampa                -9.269      2.553  -3.631 0.000283 ***
## BiomePantanal              1.300      4.978   0.261 0.793917    
## BiomeAmazon               15.238      7.626   1.998 0.045701 *  
## Protection_binProtected   -9.821      3.618  -2.715 0.006634 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Zero-inflation model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.28657    0.09324   -13.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Dispersion model:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    2.97234    0.09369   31.73  < 2e-16 ***
## BiomeCaatinga  0.16267    0.12363    1.32    0.188    
## BiomeCerrado  -0.52831    0.22709   -2.33    0.020 *  
## BiomePampa     0.06132    0.11473    0.53    0.593    
## BiomePantanal  0.18188    0.18043    1.01    0.313    
## BiomeAmazon    0.25975    0.21967    1.18    0.237    
## baseline_z    -0.23662    0.05441   -4.35 1.37e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
at0 <- list(baseline_z=0)
emmeans(m_model, ~1, at=at0)
##  1       emmean   SE  df asymp.LCL asymp.UCL
##  overall   13.8 2.29 Inf       9.3      18.3
## 
## Results are averaged over the levels of: Biome, Protection_bin 
## Confidence level used: 0.95
joint_tests(m_model, at=at0)
##  model term     df1 df2 F.ratio  Chisq p.value
##  Biome            5 Inf  14.097 70.485  <.0001
##  Protection_bin   1 Inf   7.370  7.370  0.0066
pairs(emmeans(m_model, ~Biome, at=at0), adjust="tukey")
##  contrast                   estimate   SE  df z.ratio p.value
##  Atlantic_Forest - Caatinga     5.03 2.82 Inf   1.785  0.4758
##  Atlantic_Forest - Cerrado      2.70 2.84 Inf   0.950  0.9333
##  Atlantic_Forest - Pampa        9.27 2.55 Inf   3.631  0.0038
##  Atlantic_Forest - Pantanal    -1.30 4.98 Inf  -0.261  0.9998
##  Atlantic_Forest - Amazon     -15.24 7.63 Inf  -1.998  0.3432
##  Caatinga - Cerrado            -2.33 3.17 Inf  -0.735  0.9777
##  Caatinga - Pampa               4.24 3.33 Inf   1.276  0.7983
##  Caatinga - Pantanal           -6.33 5.94 Inf  -1.066  0.8950
##  Caatinga - Amazon            -20.26 6.92 Inf  -2.927  0.0400
##  Cerrado - Pampa                6.57 2.99 Inf   2.196  0.2396
##  Cerrado - Pantanal            -4.00 5.65 Inf  -0.707  0.9812
##  Cerrado - Amazon             -17.93 6.88 Inf  -2.606  0.0957
##  Pampa - Pantanal             -10.57 5.87 Inf  -1.800  0.4660
##  Pampa - Amazon               -24.51 6.46 Inf  -3.791  0.0021
##  Pantanal - Amazon            -13.94 7.48 Inf  -1.864  0.4249
## 
## Results are averaged over the levels of: Protection_bin 
## P value adjustment: tukey method for comparing a family of 6 estimates
emmeans(m_model, ~Protection_bin, at=at0)
##  Protection_bin emmean   SE  df asymp.LCL asymp.UCL
##  Unprotected     18.69 1.70 Inf     15.35      22.0
##  Protected        8.87 3.76 Inf      1.51      16.2
## 
## Results are averaged over the levels of: Biome 
## Confidence level used: 0.95
## Diagnóstico DHARMa ----
library(DHARMa)
## This is DHARMa 0.4.7. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
set.seed(123)
res <- simulateResiduals(m_model, n=1000)
plot(res, quantreg=FALSE)

testUniformity(res); testDispersion(res); testZeroInflation(res); testOutliers(res)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.042697, p-value = 0.1491
## alternative hypothesis: two-sided

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.0072, p-value = 0.892
## alternative hypothesis: two.sided

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 1.0375, p-value = 0.646
## alternative hypothesis: two.sided

## 
##  DHARMa outlier test based on exact binomial test with approximate
##  expectations
## 
## data:  res
## outliers at both margin(s) = 1, observations = 712, p-value = 1
## alternative hypothesis: true probability of success is not equal to 0.001998002
## 95 percent confidence interval:
##  3.555809e-05 7.800265e-03
## sample estimates:
## frequency of outliers (expected: 0.001998001998002 ) 
##                                          0.001404494
plotResiduals(res, dat$Biome, quantreg=FALSE)

plotResiduals(res, dat$baseline_z, quantreg=FALSE)

## 5) Inferência (baseline na média: baseline_z = 0) ----
at0 <- list(baseline_z = 0)

## H1: média global de impact > 0 (teste unilateral;H0: impact≤0; H1: impact>0)
emm_global <- emmeans(m_model, ~1, weights="proportional", at=at0)
summary(emm_global, infer=TRUE)
##  1       emmean   SE  df asymp.LCL asymp.UCL z.ratio p.value
##  overall   14.4 1.28 Inf      11.9      16.9  11.215  <.0001
## 
## Results are averaged over the levels of: Biome, Protection_bin 
## Confidence level used: 0.95
test(emm_global, null=0, side=">")
##  1       emmean   SE  df z.ratio p.value
##  overall   14.4 1.28 Inf  11.215  <.0001
## 
## Results are averaged over the levels of: Biome, Protection_bin 
## P values are right-tailed
## H2: diferenças entre biomas (efeito global + pares Tukey)
emm_biome <- emmeans(m_model, ~Biome, weights="proportional", at=at0)
joint_tests(m_model, at=at0)                     # teste global (Biome + Protection_bin)
##  model term     df1 df2 F.ratio  Chisq p.value
##  Biome            5 Inf  14.097 70.485  <.0001
##  Protection_bin   1 Inf   7.370  7.370  0.0066
pairs(emm_biome, adjust="tukey")                 # pares entre biomas
##  contrast                   estimate   SE  df z.ratio p.value
##  Atlantic_Forest - Caatinga     5.03 2.82 Inf   1.785  0.4758
##  Atlantic_Forest - Cerrado      2.70 2.84 Inf   0.950  0.9333
##  Atlantic_Forest - Pampa        9.27 2.55 Inf   3.631  0.0038
##  Atlantic_Forest - Pantanal    -1.30 4.98 Inf  -0.261  0.9998
##  Atlantic_Forest - Amazon     -15.24 7.63 Inf  -1.998  0.3432
##  Caatinga - Cerrado            -2.33 3.17 Inf  -0.735  0.9777
##  Caatinga - Pampa               4.24 3.33 Inf   1.276  0.7983
##  Caatinga - Pantanal           -6.33 5.94 Inf  -1.066  0.8950
##  Caatinga - Amazon            -20.26 6.92 Inf  -2.927  0.0400
##  Cerrado - Pampa                6.57 2.99 Inf   2.196  0.2396
##  Cerrado - Pantanal            -4.00 5.65 Inf  -0.707  0.9812
##  Cerrado - Amazon             -17.93 6.88 Inf  -2.606  0.0957
##  Pampa - Pantanal             -10.57 5.87 Inf  -1.800  0.4660
##  Pampa - Amazon               -24.51 6.46 Inf  -3.791  0.0021
##  Pantanal - Amazon            -13.94 7.48 Inf  -1.864  0.4249
## 
## Results are averaged over the levels of: Protection_bin 
## P value adjustment: tukey method for comparing a family of 6 estimates
## H3: Protected vs Unprotected (efeito global + (opcional) efeito simples por bioma)
emm_prot <- emmeans(m_model, ~Protection_bin, weights="proportional", at=at0)
summary(emm_prot, infer=TRUE)
##  Protection_bin emmean   SE  df asymp.LCL asymp.UCL z.ratio p.value
##  Unprotected      15.4 1.31 Inf     12.85      18.0  11.749  <.0001
##  Protected         5.6 3.56 Inf     -1.39      12.6   1.570  0.1163
## 
## Results are averaged over the levels of: Biome 
## Confidence level used: 0.95
pairs(emm_prot)                                  # Unprotected - Protected
##  contrast                estimate   SE  df z.ratio p.value
##  Unprotected - Protected     9.82 3.62 Inf   2.715  0.0066
## 
## Results are averaged over the levels of: Biome