library(tidyverse)
library(nlme)
library(lattice)
library(lme4)
PFXcover <- read.csv("PFXcover.csv", header=T)
# plot info included in the datasets
# Yr is sample year, either 2021 or 2022
PFXcover$Yr <- factor(PFXcover$Yr)
# SH_GR is plant community strata
PFXcover$SH_GR <- factor(PFXcover$SH_GR)
# Trt is C (control) or T (treatment) with herbicide indaziflam
PFXcover$Trt <- factor(PFXcover$Trt)
shrub_count is average count of shrubs > 15 cm within 2 x 30 m belt transect this is a proxy for shrub abundance I need to add in other environmental factors like soil type, aspect, and slope but this is what we are working with right now
Not that this is nested transect data. 3 observations (transects) per plot, per year for two consecutive years
#head(PFXcover)
cor(PFXcover$shrub_count, PFXcover$SH_N)
## [1] 0.6899687
seems like yes, there is some colinearily between big shrub count and % cover worth noting that % cover of shrub is unique by year, count is not.
PFXcover$Shrub_count_stan <- scale(PFXcover$shrub_count)
PFXcover$Shrub_cov_stan <- scale(PFXcover$SH_N)
need to use poisson because response variable (annual grass cover is generated from count data, and is bound between 0 and 100, and is mostly small numbers)
is this the right way to specify random effects I want it to estimate for each combination of year and either SH_GR or PlotID
models with shrub count
m1<- lme(AG_I ~ Trt * Shrub_count_stan,
random = (~1|SH_GR),
data=PFXcover)
m2<- lme(AG_I ~ Trt * Shrub_count_stan,
random = (~1|PlotID),
data=PFXcover)
m3<- lme(AG_I ~ Trt * Shrub_count_stan,
random = (~1|Yr),
data=PFXcover)
m4<- lme(AG_I ~ Trt * Shrub_count_stan,
random = list(SH_GR=~1, Yr=~Yr),
data=PFXcover)
m5<- lme(AG_I ~ Trt * Shrub_count_stan,
random = list(SH_GR=~1, Yr=~Yr),
data=PFXcover)
# m5<- lme(AG_I ~ Trt * Shrub_count_stan,
# random = ~1+ PlotID | Yr,
# data=PFXcover)
# too many paramteres
compare models
anova(m1, m2, m3, m4, m5)
## Model df AIC BIC logLik Test L.Ratio p-value
## m1 1 6 1480.758 1499.781 -734.3790
## m2 2 6 1466.583 1485.606 -727.2916
## m3 3 6 1443.034 1462.057 -715.5170
## m4 4 9 1451.342 1479.876 -716.6708 3 vs 4 2.307653 0.5111
## m5 5 9 1451.342 1479.876 -716.6708
AIC(m1, m2, m3, m4, m5)
## df AIC
## m1 6 1480.758
## m2 6 1466.583
## m3 6 1443.034
## m4 9 1451.342
## m5 9 1451.342
#m5 is best
models with shrub cover
m6<- lme(AG_I ~ Trt * Shrub_cov_stan,
random = (~1|SH_GR),
data=PFXcover)
m7<- lme(AG_I ~ Trt * Shrub_cov_stan,
random = (~1|PlotID),
data=PFXcover)
m8<- lme(AG_I ~ Trt * Shrub_cov_stan,
random = (~1|Yr),
data=PFXcover)
#m9<- lme(AG_I ~ Trt * Shrub_cov_stan,
# random = list(SH_GR=~1, Yr=~Yr),
# data=PFXcover)
# convergence error
m10<- lme(AG_I ~ Trt * Shrub_cov_stan,
random = list(PlotID=~1, Yr=~Yr),
data=PFXcover)
compare models
anova(m6, m7, m8, m10)
## Model df AIC BIC logLik Test L.Ratio p-value
## m6 1 6 1489.550 1508.573 -738.7751
## m7 2 6 1472.328 1491.351 -730.1639
## m8 3 6 1456.646 1475.669 -722.3230
## m10 4 9 1408.530 1437.065 -695.2651 3 vs 4 54.11581 <.0001
AIC(m6, m7, m8, m10)
## df AIC
## m6 6 1489.550
## m7 6 1472.328
## m8 6 1456.646
## m10 9 1408.530
#m10 is best
anova (m5, m10)
## Warning in anova.lme(m5, m10): fitted objects with different fixed effects. REML
## comparisons are not meaningful.
## Model df AIC BIC logLik
## m5 1 9 1451.342 1479.876 -716.6708
## m10 2 9 1408.530 1437.065 -695.2651
AIC(m5, m10)
## df AIC
## m5 9 1451.342
## m10 9 1408.530
shrub count (m5) is better, we will proceed with that
m5<- lme(AG_I ~ Trt * Shrub_count_stan,
random = list(PlotID=~1, Yr=~Yr),
data=PFXcover)
m5
## Linear mixed-effects model fit by REML
## Data: PFXcover
## Log-restricted-likelihood: -693.7601
## Fixed: AG_I ~ Trt * Shrub_count_stan
## (Intercept) TrtT Shrub_count_stan
## 13.924803 -8.869317 -5.879935
## TrtT:Shrub_count_stan
## 3.977184
##
## Random effects:
## Formula: ~1 | PlotID
## (Intercept)
## StdDev: 0.001944343
##
## Formula: ~Yr | Yr %in% PlotID
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 5.153773 (Intr)
## Yr2022 17.262658 0.145
## Residual 8.907162
##
## Number of Observations: 180
## Number of Groups:
## PlotID Yr %in% PlotID
## 33 62
summary(m5)
## Linear mixed-effects model fit by REML
## Data: PFXcover
## AIC BIC logLik
## 1405.52 1434.055 -693.7601
##
## Random effects:
## Formula: ~1 | PlotID
## (Intercept)
## StdDev: 0.001944343
##
## Formula: ~Yr | Yr %in% PlotID
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 5.153773 (Intr)
## Yr2022 17.262658 0.145
## Residual 8.907162
##
## Fixed effects: AG_I ~ Trt * Shrub_count_stan
## Value Std.Error DF t-value p-value
## (Intercept) 13.924803 1.873649 118 7.431918 0.0000
## TrtT -8.869317 2.529591 29 -3.506225 0.0015
## Shrub_count_stan -5.879935 2.391783 29 -2.458390 0.0202
## TrtT:Shrub_count_stan 3.977184 2.808892 29 1.415927 0.1674
## Correlation:
## (Intr) TrtT Shrb__
## TrtT -0.741
## Shrub_count_stan -0.095 0.070
## TrtT:Shrub_count_stan 0.081 -0.073 -0.852
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.75724059 -0.42765642 -0.08717075 0.24094590 4.35314237
##
## Number of Observations: 180
## Number of Groups:
## PlotID Yr %in% PlotID
## 33 62
#make simple linear regression to plot the residuals
m1 <- lm(AG_I ~ Trt * Shrub_count_stan, data = PFXcover)
#plot the residuals
op <- par(mfrow = c(2,2), mar=c(4,4,2,2))
plot(m1, which=c(1), col=1, add.smooth=F, caption="")
plot(resid(m1) ~ PFXcover$Trt, xlab = "Trt", ylab = "residuals")
plot(resid(m1) ~ PFXcover$Shrub_count_stan, xlab = "shrub count", ylab = "residuals")
par(op)
try some different variance structures to see if that helps anything
#with no var structures
m5<- lme(AG_I ~ Trt * Shrub_count_stan,
random = list(PlotID=~1, Yr=~Yr),
data=PFXcover)
# try var indent for each categorical variance
M5v1 <- lme(AG_I ~ Trt * Shrub_count_stan,
random = list(PlotID=~1, Yr=~Yr),
data = PFXcover,
weights = varIdent(form = ~1 | Trt))
# # from previous versions I know varExp(form =~ shrub_count) might be good
# M5v2 <- lme(AG_I ~ Trt * Shrub_count_stan,
# random = list(PlotID=~1, Yr=~Yr),
# data = PFXcover,
# weights = varExp(form =~Shrub_count_stan))
#
# #combination of all
# M5v3 <- lme(AG_I ~ Trt * Shrub_count_stan,
# random = list(PlotID=~1, Yr=~Yr),
# weights = varComb(varIdent(form = ~1 | Trt),
# varExp(form =~ Shrub_count_stan)))
anova(m5, M5v1)
## Model df AIC BIC logLik Test L.Ratio p-value
## m5 1 9 1405.520 1434.054 -693.7601
## M5v1 2 10 1369.828 1401.532 -674.9138 1 vs 2 37.69264 <.0001
AIC(m5, M5v1)
## df AIC
## m5 9 1405.520
## M5v1 10 1369.828
plot the updated residuals
op <- par(mfrow = c(2,2), mar=c(4,4,2,2))
plot(m1, which=c(1), col=1, add.smooth=F, caption="")
plot(resid(m5) ~ PFXcover$Trt, xlab = "Trt", ylab = "residuals")
plot(resid(m5) ~ PFXcover$Shrub_count_stan, xlab = "shrub count", ylab = "residuals")
E1 <- resid(M5v1)
coplot(E1 ~ Shrub_count_stan |Trt, data = PFXcover,
ylab = "residuals")
E2 <- resid(M5v1, type = "normalized")
coplot(E2 ~ Shrub_count_stan |Trt, data = PFXcover,
ylab = "Normalised residuals")
final model: M5v1
mfinal <- lme(AG_I ~ Trt * Shrub_count_stan,
random = list(PlotID=~1, Yr=~Yr),
data = PFXcover,
weights = varIdent(form = ~1 | Trt))
summary(mfinal)
## Linear mixed-effects model fit by REML
## Data: PFXcover
## AIC BIC logLik
## 1369.828 1401.532 -674.9138
##
## Random effects:
## Formula: ~1 | PlotID
## (Intercept)
## StdDev: 0.004350801
##
## Formula: ~Yr | Yr %in% PlotID
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 2.204853 (Intr)
## Yr2022 19.156764 -0.503
## Residual 12.627728
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Trt
## Parameter estimates:
## C T
## 1.0000000 0.4447316
## Fixed effects: AG_I ~ Trt * Shrub_count_stan
## Value Std.Error DF t-value p-value
## (Intercept) 14.120560 1.949583 118 7.242860 0.0000
## TrtT -9.719851 2.172447 29 -4.474149 0.0001
## Shrub_count_stan -5.940999 2.486329 29 -2.389466 0.0236
## TrtT:Shrub_count_stan 4.245233 2.621562 29 1.619352 0.1162
## Correlation:
## (Intr) TrtT Shrb__
## TrtT -0.897
## Shrub_count_stan -0.093 0.084
## TrtT:Shrub_count_stan 0.088 -0.085 -0.948
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.9413178 -0.5451188 -0.1137243 0.3045648 3.5761729
##
## Number of Observations: 180
## Number of Groups:
## PlotID Yr %in% PlotID
## 33 62
#without trend lines
ggag<-ggplot(data=PFXcover, aes(x=Shrub_count_stan, y=AG_I, color=Trt))+
geom_point(size=3)+
labs(x="standardized shrub abundance", y="Annual Grass cover (%)")+
theme_classic()+
scale_y_continuous(limits=c(0,70), expand=c(0,0))+
scale_color_brewer(palette="Dark2", name = "Indaziflam application",
labels = c("Control", "Treatment"))+
theme(legend.position = "top")
ggag
## Warning: Removed 1 rows containing missing values (geom_point).
#with trend lines
ggag<-ggag + geom_smooth(method = "lm", se=FALSE)
ggag
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Removed 1 rows containing missing values (geom_point).
#facet by year
ggag<-ggag + facet_wrap(~Yr)
ggag
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Removed 1 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_smooth).
adding lme to ggplot
PFXcover$mfinal_predicted <- predict(mfinal, re.form=NA) ## population level
PFXcover$mfinal_predicted_fit <- predict(mfinal) ## individual level
#without trend lines
ggag<-ggplot(data=PFXcover, aes(x=Shrub_count_stan, y=AG_I, color=Trt))+
geom_point(size=3)+
labs(x="standardized shrub abundance", y="Annual Grass cover (%)")+
theme_classic()+
scale_y_continuous(limits=c(0,70), expand=c(0,0))+
scale_color_brewer(palette="Dark2", name = "Indaziflam application",
labels = c("Control", "Treatment"))+
theme(legend.position = "top")
# add prediction
ggag <- ggag +
geom_line(aes(y=mfinal_predicted, color=Trt))+
geom_line(aes(y=mfinal_predicted_fit, color=Trt))
ggag
## Warning: Removed 1 rows containing missing values (geom_point).
ggsh<-ggplot(data=PFXcover, aes(x=SH_GR, y=AG_I,
fill=Trt))+
geom_boxplot()+
labs(x="Plant Community Type", y="Annual Grass cover (%)")+
scale_y_continuous(limits=c(0,75), expand=c(0,0))+
scale_fill_brewer(palette="Dark2", name = "Indaziflam application",
labels = c("Control", "Treatment"))+
theme_classic()+ theme(legend.position = "top")
ggsh
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
#annual grass cover
#with year
# maybe should not include year because did not model variance
ggyr<-ggplot(data=PFXcover, aes(x=SH_GR, y=AG_I,
fill=Trt))+
geom_boxplot()+facet_grid(~Yr)+
labs(x="Plant Community Type", y="Annual Grass cover (%)")+
scale_y_continuous(limits=c(0,70), expand=c(0,0))+
scale_fill_brewer(palette="Dark2", name = "Indaziflam application",
labels = c("Control", "Treatment"))+
theme_classic()+
theme(legend.position = "top")
ggyr
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).