getting % cover data fully ready for analysis

library(tidyverse)
library(nlme)
library(lattice)
library(lme4)

read in data

PFXcover <- read.csv("PFXcover.csv", header=T)

# plot info included in the datasets
# Yr is sample year, either 2021 or 2022
PFXcover$Yr <- factor(PFXcover$Yr)

# SH_GR is plant community strata
PFXcover$SH_GR <- factor(PFXcover$SH_GR)

# Trt is C (control) or T (treatment) with herbicide indaziflam 
PFXcover$Trt <- factor(PFXcover$Trt)

shrub_count is average count of shrubs > 15 cm within 2 x 30 m belt transect this is a proxy for shrub abundance I need to add in other environmental factors like soil type, aspect, and slope but this is what we are working with right now

Not that this is nested transect data. 3 observations (transects) per plot, per year for two consecutive years

research question:

Did indaziflam treatment areas have lower annual grass cover? How did this differ by year and veg structure?

stage 1

check for colinearity between shrub count and shrub % cover

#head(PFXcover)
cor(PFXcover$shrub_count, PFXcover$SH_N)

## [1] 0.6899687

seems like yes, there is some colinearily between big shrub count and % cover worth noting that % cover of shrub is unique by year, count is not.

stage 2

standardize continuous model inputs to hopefully assist in model convergence

PFXcover$Shrub_count_stan <- scale(PFXcover$shrub_count)
PFXcover$Shrub_cov_stan <- scale(PFXcover$SH_N)

stage 3

test if shrub count or shrub cover is better using AIC

also test two different random effects

need to use poisson because response variable (annual grass cover is generated from count data, and is bound between 0 and 100, and is mostly small numbers)

is this the right way to specify random effects I want it to estimate for each combination of year and either SH_GR or PlotID

models with shrub count

m1<- lme(AG_I ~ Trt * Shrub_count_stan,
         random = (~1|SH_GR),
         data=PFXcover)
m2<- lme(AG_I ~ Trt * Shrub_count_stan,
         random = (~1|PlotID),
         data=PFXcover)
m3<- lme(AG_I ~ Trt * Shrub_count_stan,
         random = (~1|Yr),
         data=PFXcover)


m4<- lme(AG_I ~ Trt * Shrub_count_stan,
         random = list(SH_GR=~1, Yr=~Yr),
         data=PFXcover)

m5<- lme(AG_I ~ Trt * Shrub_count_stan,
         random = list(SH_GR=~1, Yr=~Yr),
         data=PFXcover)

# m5<- lme(AG_I ~ Trt * Shrub_count_stan,
#          random = ~1+ PlotID | Yr, 
#          data=PFXcover)
# too many paramteres

compare models

anova(m1, m2, m3, m4, m5)

##    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m1     1  6 1480.758 1499.781 -734.3790                        
## m2     2  6 1466.583 1485.606 -727.2916                        
## m3     3  6 1443.034 1462.057 -715.5170                        
## m4     4  9 1451.342 1479.876 -716.6708 3 vs 4 2.307653  0.5111
## m5     5  9 1451.342 1479.876 -716.6708

AIC(m1, m2, m3, m4, m5)

##    df      AIC
## m1  6 1480.758
## m2  6 1466.583
## m3  6 1443.034
## m4  9 1451.342
## m5  9 1451.342

#m5 is best

models with shrub cover

m6<- lme(AG_I ~ Trt * Shrub_cov_stan,
         random = (~1|SH_GR),
         data=PFXcover)
m7<- lme(AG_I ~ Trt * Shrub_cov_stan,
         random = (~1|PlotID),
         data=PFXcover)
m8<- lme(AG_I ~ Trt * Shrub_cov_stan,
         random = (~1|Yr),
         data=PFXcover)


#m9<- lme(AG_I ~ Trt * Shrub_cov_stan,
#         random = list(SH_GR=~1, Yr=~Yr),
#         data=PFXcover)
# convergence error

m10<- lme(AG_I ~ Trt * Shrub_cov_stan,
         random = list(PlotID=~1, Yr=~Yr),
         data=PFXcover)

compare models

anova(m6, m7, m8, m10)

##     Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m6      1  6 1489.550 1508.573 -738.7751                        
## m7      2  6 1472.328 1491.351 -730.1639                        
## m8      3  6 1456.646 1475.669 -722.3230                        
## m10     4  9 1408.530 1437.065 -695.2651 3 vs 4 54.11581  <.0001

AIC(m6, m7, m8, m10)

##     df      AIC
## m6   6 1489.550
## m7   6 1472.328
## m8   6 1456.646
## m10  9 1408.530

#m10 is best

compare shrub count and shrub cover

anova (m5, m10)

## Warning in anova.lme(m5, m10): fitted objects with different fixed effects. REML
## comparisons are not meaningful.

##     Model df      AIC      BIC    logLik
## m5      1  9 1451.342 1479.876 -716.6708
## m10     2  9 1408.530 1437.065 -695.2651

AIC(m5, m10)

##     df      AIC
## m5   9 1451.342
## m10  9 1408.530

shrub count (m5) is better, we will proceed with that

m5<- lme(AG_I ~ Trt * Shrub_count_stan,
         random = list(PlotID=~1, Yr=~Yr),
         data=PFXcover)

m5

## Linear mixed-effects model fit by REML
##   Data: PFXcover 
##   Log-restricted-likelihood: -693.7601
##   Fixed: AG_I ~ Trt * Shrub_count_stan 
##           (Intercept)                  TrtT      Shrub_count_stan 
##             13.924803             -8.869317             -5.879935 
## TrtT:Shrub_count_stan 
##              3.977184 
## 
## Random effects:
##  Formula: ~1 | PlotID
##         (Intercept)
## StdDev: 0.001944343
## 
##  Formula: ~Yr | Yr %in% PlotID
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev    Corr  
## (Intercept)  5.153773 (Intr)
## Yr2022      17.262658 0.145 
## Residual     8.907162       
## 
## Number of Observations: 180
## Number of Groups: 
##         PlotID Yr %in% PlotID 
##             33             62

summary(m5)

## Linear mixed-effects model fit by REML
##   Data: PFXcover 
##       AIC      BIC    logLik
##   1405.52 1434.055 -693.7601
## 
## Random effects:
##  Formula: ~1 | PlotID
##         (Intercept)
## StdDev: 0.001944343
## 
##  Formula: ~Yr | Yr %in% PlotID
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev    Corr  
## (Intercept)  5.153773 (Intr)
## Yr2022      17.262658 0.145 
## Residual     8.907162       
## 
## Fixed effects:  AG_I ~ Trt * Shrub_count_stan 
##                           Value Std.Error  DF   t-value p-value
## (Intercept)           13.924803  1.873649 118  7.431918  0.0000
## TrtT                  -8.869317  2.529591  29 -3.506225  0.0015
## Shrub_count_stan      -5.879935  2.391783  29 -2.458390  0.0202
## TrtT:Shrub_count_stan  3.977184  2.808892  29  1.415927  0.1674
##  Correlation: 
##                       (Intr) TrtT   Shrb__
## TrtT                  -0.741              
## Shrub_count_stan      -0.095  0.070       
## TrtT:Shrub_count_stan  0.081 -0.073 -0.852
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.75724059 -0.42765642 -0.08717075  0.24094590  4.35314237 
## 
## Number of Observations: 180
## Number of Groups: 
##         PlotID Yr %in% PlotID 
##             33             62

stage 4 variance structures

#make simple linear regression to plot the residuals
m1 <- lm(AG_I ~ Trt * Shrub_count_stan, data = PFXcover)
#plot the residuals

op <- par(mfrow = c(2,2), mar=c(4,4,2,2))
plot(m1, which=c(1), col=1, add.smooth=F, caption="")
plot(resid(m1) ~ PFXcover$Trt, xlab = "Trt", ylab = "residuals")
plot(resid(m1) ~ PFXcover$Shrub_count_stan, xlab = "shrub count", ylab = "residuals")
par(op)

try some different variance structures to see if that helps anything

#with no var structures
m5<- lme(AG_I ~ Trt * Shrub_count_stan,
         random = list(PlotID=~1, Yr=~Yr),
         data=PFXcover)

# try var indent for each categorical variance
M5v1 <- lme(AG_I ~ Trt * Shrub_count_stan,
         random = list(PlotID=~1, Yr=~Yr),
         data = PFXcover,
          weights = varIdent(form = ~1 | Trt))

# # from previous versions I know varExp(form =~ shrub_count) might be good
# M5v2 <- lme(AG_I ~ Trt * Shrub_count_stan,
#          random = list(PlotID=~1, Yr=~Yr),
#           data = PFXcover, 
#           weights = varExp(form =~Shrub_count_stan))
# 
# #combination of all 
# M5v3 <- lme(AG_I ~ Trt * Shrub_count_stan,
#          random = list(PlotID=~1, Yr=~Yr),
#           weights = varComb(varIdent(form = ~1 | Trt),
#                             varExp(form =~ Shrub_count_stan)))

anova(m5, M5v1)

##      Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m5       1  9 1405.520 1434.054 -693.7601                        
## M5v1     2 10 1369.828 1401.532 -674.9138 1 vs 2 37.69264  <.0001

AIC(m5, M5v1)

##      df      AIC
## m5    9 1405.520
## M5v1 10 1369.828

plot the updated residuals

op <- par(mfrow = c(2,2), mar=c(4,4,2,2))
plot(m1, which=c(1), col=1, add.smooth=F, caption="")
plot(resid(m5) ~ PFXcover$Trt, xlab = "Trt", ylab = "residuals")
plot(resid(m5) ~ PFXcover$Shrub_count_stan, xlab = "shrub count", ylab = "residuals")



E1 <- resid(M5v1)
coplot(E1 ~ Shrub_count_stan |Trt, data = PFXcover,
       ylab = "residuals")

E2 <- resid(M5v1, type = "normalized")
coplot(E2 ~ Shrub_count_stan |Trt, data = PFXcover,
       ylab = "Normalised residuals")

final model: M5v1

mfinal <- lme(AG_I ~ Trt * Shrub_count_stan,
         random = list(PlotID=~1, Yr=~Yr),
         data = PFXcover,
          weights = varIdent(form = ~1 | Trt))
summary(mfinal)

## Linear mixed-effects model fit by REML
##   Data: PFXcover 
##        AIC      BIC    logLik
##   1369.828 1401.532 -674.9138
## 
## Random effects:
##  Formula: ~1 | PlotID
##         (Intercept)
## StdDev: 0.004350801
## 
##  Formula: ~Yr | Yr %in% PlotID
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev    Corr  
## (Intercept)  2.204853 (Intr)
## Yr2022      19.156764 -0.503
## Residual    12.627728       
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | Trt 
##  Parameter estimates:
##         C         T 
## 1.0000000 0.4447316 
## Fixed effects:  AG_I ~ Trt * Shrub_count_stan 
##                           Value Std.Error  DF   t-value p-value
## (Intercept)           14.120560  1.949583 118  7.242860  0.0000
## TrtT                  -9.719851  2.172447  29 -4.474149  0.0001
## Shrub_count_stan      -5.940999  2.486329  29 -2.389466  0.0236
## TrtT:Shrub_count_stan  4.245233  2.621562  29  1.619352  0.1162
##  Correlation: 
##                       (Intr) TrtT   Shrb__
## TrtT                  -0.897              
## Shrub_count_stan      -0.093  0.084       
## TrtT:Shrub_count_stan  0.088 -0.085 -0.948
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.9413178 -0.5451188 -0.1137243  0.3045648  3.5761729 
## 
## Number of Observations: 180
## Number of Groups: 
##         PlotID Yr %in% PlotID 
##             33             62

how do I update ggplot box plot with these new values?

is this good enough? What more needs to be done?

#without trend lines
ggag<-ggplot(data=PFXcover, aes(x=Shrub_count_stan, y=AG_I, color=Trt))+
  geom_point(size=3)+
  labs(x="standardized shrub abundance", y="Annual Grass cover (%)")+
  theme_classic()+
  scale_y_continuous(limits=c(0,70), expand=c(0,0))+
  scale_color_brewer(palette="Dark2", name = "Indaziflam application",
                    labels = c("Control", "Treatment"))+
  theme(legend.position = "top")
ggag

## Warning: Removed 1 rows containing missing values (geom_point).

#with trend lines
ggag<-ggag + geom_smooth(method = "lm", se=FALSE)
ggag

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Removed 1 rows containing missing values (geom_point).

#facet by year
ggag<-ggag + facet_wrap(~Yr)
ggag

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Removed 1 rows containing missing values (geom_point).

## Warning: Removed 3 rows containing missing values (geom_smooth).

adding lme to ggplot

PFXcover$mfinal_predicted <- predict(mfinal, re.form=NA) ## population level
PFXcover$mfinal_predicted_fit <- predict(mfinal) ## individual  level


#without trend lines
ggag<-ggplot(data=PFXcover, aes(x=Shrub_count_stan, y=AG_I, color=Trt))+
  geom_point(size=3)+
  labs(x="standardized shrub abundance", y="Annual Grass cover (%)")+
  theme_classic()+
  scale_y_continuous(limits=c(0,70), expand=c(0,0))+
  scale_color_brewer(palette="Dark2", name = "Indaziflam application",
                    labels = c("Control", "Treatment"))+
  theme(legend.position = "top")

# add prediction 
ggag <- ggag + 
    geom_line(aes(y=mfinal_predicted, color=Trt))+
  geom_line(aes(y=mfinal_predicted_fit, color=Trt))

ggag

## Warning: Removed 1 rows containing missing values (geom_point).

ggsh<-ggplot(data=PFXcover, aes(x=SH_GR, y=AG_I, 
                                fill=Trt))+
  geom_boxplot()+
  labs(x="Plant Community Type", y="Annual Grass cover (%)")+
  scale_y_continuous(limits=c(0,75), expand=c(0,0))+
  scale_fill_brewer(palette="Dark2", name = "Indaziflam application",
                    labels = c("Control", "Treatment"))+
  theme_classic()+ theme(legend.position = "top")
ggsh

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

#annual grass cover 
#with year
# maybe should not include year because did not model variance
ggyr<-ggplot(data=PFXcover, aes(x=SH_GR, y=AG_I, 
                                fill=Trt))+
  geom_boxplot()+facet_grid(~Yr)+
  labs(x="Plant Community Type", y="Annual Grass cover (%)")+
  scale_y_continuous(limits=c(0,70), expand=c(0,0))+
  scale_fill_brewer(palette="Dark2", name = "Indaziflam application",
                    labels = c("Control", "Treatment"))+
  theme_classic()+
  theme(legend.position = "top")
ggyr

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).