generalized linear mixd effect models

library packages

library(tidyverse)

bring in the data

# #plot metadata
meta<-read.csv("Plot_meta.csv", header = T)
# head(meta)
meta <- meta %>% select(PlotID, Trt)


#AG counts per transect 
ag <- read.csv("final_outputs/ag_count.csv")
#make uniform column names
ag$LineKey<-ag$PrimaryKey_line
ag$PlotID <- ag$PrimaryKey


#shrub counts per tranect
shrubs<- read.csv("shrub_count/lg_shrub_count.csv", header=T)
head(shrubs)

##   old_name PlotID Num Trt SH_GR pasture old_point  X X.1  T LineKey shrub_size
## 1 HSHG_C_1   105T 105   T  HSHG      HG         3 NA     NA    <NA>       <NA>
## 2 HSHG_C_5   103C 103   C  HSHG      KC        35 NA      2  103C_2         LG
## 3 HSHG_C_5   103C 103   C  HSHG      KC        35 NA     15 103C_15         LG
## 4 HSHG_C_5   103C 103   C  HSHG      KC        35 NA     28 103C_28         LG
## 5 HSHG_C_6   104C 104   C  HSHG      KC        36 NA      2  104C_2         LG
## 6 HSHG_C_6   104C 104   C  HSHG      KC        36 NA     15 104C_15         LG
##   size_count
## 1          0
## 2         96
## 3         87
## 4        129
## 5        124
## 6         65

shrubs$shrub_count <- shrubs$size_count
shrubs <- shrubs %>% select(PlotID,LineKey, shrub_count)



#percent functional group cover for shrub cover
PFXcover<- read.csv("PFXcover.csv", header=T)
head(PFXcover)

##   PrimaryKey LineKey AF_I     AF_N     AG_I      GAP OTHER     PF_N PG_I
## 1       101C 101C_15    5 1.666667 5.000000 36.66667   100 5.000000    0
## 2       101C  101C_2    0 3.333333 1.666667 18.33333   100 5.000000    0
## 3       101C 101C_28    0 0.000000 0.000000 35.00000   100 5.000000    0
## 4       101T 101T_15    0 1.666667 1.666667 30.00000   100 6.666667    0
## 5       101T  101T_2    0 0.000000 0.000000 36.66667   100 6.666667    0
## 6       101T 101T_28    0 0.000000 1.666667 31.66667   100 3.333333    0
##       PG_N     SH_N PlotID   Yr Trt SH_GR shrub_count PF_I
## 1 23.33333 36.66667   101C 2021   C  HSHG    57.33333   NA
## 2 25.00000 63.33333   101C 2021   C  HSHG    57.33333   NA
## 3 15.00000 51.66667   101C 2021   C  HSHG    57.33333   NA
## 4 31.66667 43.33333   101T 2021   T  HSHG    61.00000   NA
## 5 30.00000 38.33333   101T 2021   T  HSHG    61.00000   NA
## 6 23.33333 48.33333   101T 2021   T  HSHG    61.00000   NA

SHcov <- PFXcover %>% select(PlotID, LineKey, SH_N)

Merge the data by transect

ag_res <- left_join(ag, shrubs, 
                    by = c("PlotID", "LineKey"))

# add shrub cover 
ag_res <- left_join(ag_res, SHcov, 
                    by = c("PlotID", "LineKey"))

# add shrub cover 
ag_res <- left_join(ag_res, meta, 
                    by = c("PlotID"))


head(ag)

##   PrimaryKey_line PrimaryKey BRTE BRJA both   Yr LineKey PlotID
## 1         101C_15       101C    3   NA    3 2021 101C_15   101C
## 2          101C_2       101C    1   NA    1 2021  101C_2   101C
## 3         101T_15       101T    1   NA    1 2021 101T_15   101T
## 4         101T_28       101T    1   NA    1 2021 101T_28   101T
## 5         102T_15       102T    2   NA    2 2021 102T_15   102T
## 6          102T_2       102T    2   NA    2 2021  102T_2   102T

clean up the data

#  remove 403C and T
ag_res <- ag_res %>% filter(!(PlotID == "403C"|
                                PlotID == "403T"))

#fill any NAs with 0
ag_res$shrub_count[is.na(ag_res$shrub_count)] <- 0

Explore colinearity

cor(ag_res$SH_N, ag_res$shrub_count)

## [1] 0.7126749

standardize continuous model inputs to hopefully assist in model convergence

ag_res$Shrub_count_stan <- scale(ag_res$shrub_count)
ag_res$Shrub_cov_stan <- scale(ag_res$SH_N)

test random effects structure

prepare the data

ag_res$Yr <- factor(ag_res$Yr)

# Trt is C (control) or T (treatment) with herbicide indaziflam 
ag_res$Trt <- factor(ag_res$Trt)

library(lme4)

## Warning: package 'lme4' was built under R version 4.0.5

## Loading required package: Matrix

## Warning: package 'Matrix' was built under R version 4.0.5

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

glm1<- glmer(both ~ Trt * Shrub_count_stan +(1|PlotID),
           family= poisson,
           data=ag_res)

glm2<- glmer(both ~ Trt * Shrub_count_stan +(1|Yr),
           family= poisson,
           data=ag_res)
glm3<- glmer(both ~ Trt * Shrub_count_stan +(1|PlotID:Yr),
           family= poisson,
           data=ag_res)

anova(glm1, glm2, glm3)

## Data: ag_res
## Models:
## glm1: both ~ Trt * Shrub_count_stan + (1 | PlotID)
## glm2: both ~ Trt * Shrub_count_stan + (1 | Yr)
## glm3: both ~ Trt * Shrub_count_stan + (1 | PlotID:Yr)
##      npar    AIC    BIC   logLik deviance   Chisq Df Pr(>Chisq)
## glm1    5 2201.2 2219.0 -1095.60   2191.2                      
## glm2    5 2144.9 2162.8 -1067.47   2134.9  56.256  0           
## glm3    5 1629.5 1647.4  -809.77   1619.5 515.408  0

AIC(glm1, glm2, glm3) #glm3 is best, with nested random effects

##      df      AIC
## glm1  5 2201.195
## glm2  5 2144.938
## glm3  5 1629.530

library(lme4)

glm4<- glmer(both ~ Trt * SH_N +(1|PlotID),
           family= poisson,
           data=ag_res)

glm5<- glmer(both ~ Trt * SH_N +(1|Yr),
           family= poisson,
           data=ag_res)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?

glm6<- glmer(both ~ Trt * SH_N +(1|PlotID:Yr),
           family= poisson,
           data=ag_res)

anova(glm4, glm5, glm6)

## Data: ag_res
## Models:
## glm4: both ~ Trt * SH_N + (1 | PlotID)
## glm5: both ~ Trt * SH_N + (1 | Yr)
## glm6: both ~ Trt * SH_N + (1 | PlotID:Yr)
##      npar    AIC    BIC   logLik deviance  Chisq Df Pr(>Chisq)
## glm4    5 2150.2 2168.1 -1070.12   2140.2                     
## glm5    5 2210.6 2228.4 -1100.30   2200.6   0.00  0           
## glm6    5 1574.4 1592.3  -782.22   1564.4 636.17  0

AIC(glm4, glm5, glm6) #glm6 is best, with nested random effects

##      df      AIC
## glm4  5 2150.250
## glm5  5 2210.603
## glm6  5 1574.431

compare shrub cover to count

#3 is count 
#6 is cover


anova(glm3, glm6)

## Data: ag_res
## Models:
## glm3: both ~ Trt * Shrub_count_stan + (1 | PlotID:Yr)
## glm6: both ~ Trt * SH_N + (1 | PlotID:Yr)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## glm3    5 1629.5 1647.4 -809.77   1619.5                     
## glm6    5 1574.4 1592.3 -782.22   1564.4 55.099  0

AIC(glm3, glm6) #glm6 is best, with nested random effects

##      df      AIC
## glm3  5 1629.530
## glm6  5 1574.431

#shrub cover wins

compare intercept only to slope only

#random intercept only
glm6<- glmer(both ~ Trt * SH_N +(1|PlotID:Yr),
           family= poisson,
           data=ag_res)

#slope and intercept
glm7<- glmer(both ~ Trt * SH_N +(1|PlotID:Yr)+
               (0+SH_N|PlotID:Yr),
           family= poisson,
           data=ag_res)


anova(glm6, glm7)

## Data: ag_res
## Models:
## glm6: both ~ Trt * SH_N + (1 | PlotID:Yr)
## glm7: both ~ Trt * SH_N + (1 | PlotID:Yr) + (0 + SH_N | PlotID:Yr)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## glm6    5 1574.4 1592.3 -782.22   1564.4                         
## glm7    6 1522.5 1544.0 -755.27   1510.5 53.893  1  2.118e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

AIC(glm6, glm7)

##      df      AIC
## glm6  5 1574.431
## glm7  6 1522.538

#glm7 is best, with nested random effects, shrub cover, and slope and intercept
#shrub cover wins

final model

#slope and intercept
m_final<- glmer(both ~ Trt * SH_N +(1|PlotID:Yr)+
               (0+SH_N|PlotID:Yr),
           family= poisson,
           data=ag_res)
summary(m_final)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: both ~ Trt * SH_N + (1 | PlotID:Yr) + (0 + SH_N | PlotID:Yr)
##    Data: ag_res
## 
##      AIC      BIC   logLik deviance df.resid 
##   1522.5   1543.9   -755.3   1510.5      256 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0941 -0.6190 -0.1387  0.4596  3.6453 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev.
##  PlotID.Yr   (Intercept) 0.5070838 0.71210 
##  PlotID.Yr.1 SH_N        0.0003657 0.01912 
## Number of obs: 262, groups:  PlotID:Yr, 55
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  2.149744   0.195437  11.000  < 2e-16 ***
## TrtT        -0.972843   0.276243  -3.522 0.000429 ***
## SH_N         0.010556   0.005186   2.035 0.041817 *  
## TrtT:SH_N   -0.006581   0.007469  -0.881 0.378279    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr) TrtT   SH_N  
## TrtT      -0.679              
## SH_N      -0.457  0.303       
## TrtT:SH_N  0.296 -0.458 -0.679

vizualizing glmer https://stackoverflow.com/questions/53255211/plotting-random-effects-for-a-binomial-glmer-in-ggplot/53447278#53447278

library(ggeffects)
library(sjPlot)

## #refugeeswelcome

mydf <- ggpredict(m_final, terms = c("SH_N", "Trt"))

gg<- ggplot(mydf, aes(x, y = predicted, color = group)) +
    geom_line(size=2) +
  scale_color_brewer(palette="Dark2", name = "Indaziflam application",
                     labels = c("Control", "Treatment"))+
    geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .1)+
  labs(x = "Shrub % cover", y= "Predicted annual grass abundance (count)")+
  scale_y_continuous(limits=c(0,55), expand=c(0,0))+
  scale_x_continuous(limits=c(0,91), expand=c(0,0))+
  theme_classic()+
  theme(legend.position = "top")
gg

# 
# mydf1 <- ggpredict(m_final, terms = c("SH_N", "Trt", "Yr"))
# 
# gg<- ggplot(mydf1, aes(x, y = predicted, color = group)) +
#     geom_line(size=2) +
#   scale_color_brewer(palette="Dark2", labels = c("Control", "Treatment"))+
#     geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .1)+
#   labs(x = "Shrub % cover", y= "predicted AG count")+
#   scale_y_continuous(limits=c(0,45), expand=c(0,0))+
#   scale_x_continuous(limits=c(0,90), expand=c(0,0))+
#   theme_classic()+
#   theme(legend.position = "top")+
#   facet_grid(~facet)
# gg

plot data outside of the model

ggag<-ggplot(data=ag_res, aes(x=SH_N, y=both, color=Trt))+
  geom_point(size=3)+
  labs(x="shrub cover", y="Annual grass abundance (count)")+
  theme_classic()+
  scale_y_continuous(limits=c(0,55), expand=c(0,0))+
  scale_x_continuous(limits=c(0,91), expand=c(0,0))+
  scale_color_brewer(palette="Dark2", name = "Indaziflam application",
                    labels = c("Control", "Treatment"))+
  theme(legend.position = "top")
summary(ag_res$both)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   3.000   7.000   9.893  12.000  63.000

ggag

## Warning: Removed 1 rows containing missing values (geom_point).