Set Up
Data Cleaning
## Recoding CogonStatus to numeric for modeling
shrubs$CogonStatusNumeric <- ifelse(shrubs$CogonStatus == "in", 1, 0)
## Convert categorical variables to factors
shrubs$ShrubType <- factor(shrubs$ShrubType)
shrubs$Common_Name <- factor(shrubs$Common_Name)
shrubs$CogonStatusNumeric <- factor(shrubs$CogonStatusNumeric)
shrubs$Patch <- factor(shrubs$Patch)
shrubs$PlantTag <- as.factor(shrubs$PlantTag)
## Calculate mean height for each shrub using the four height measurements
## Second measurement
shrubs$MeanHeight2 <- rowMeans(
shrubs[, c("TH2.1", "TH2.2", "TH2.3", "TH2.4")],
na.rm = TRUE
)
shrubs$MeanHeight2[is.nan(shrubs$MeanHeight2)] <- NA
Height Difference
# Calculate the difference between MeanHeight2 and MeanHeight
shrubs$HeightDiff <- shrubs$MeanHeight2 - shrubs$MeanHeight
###Remove the height with very high negative value (YH and MB)
#shrubs <- shrubs[!shrubs$PlantTag %in% c(9505, 9506), ] # Why are we removing these two plants?
# Convert MeanHeight and MeanHeight2 to long format
long_height <- shrubs %>%
pivot_longer(
cols = c(MeanHeight, MeanHeight2),
names_to = "SamplingPeriod",
values_to = "Height"
)
long_height$SamplingPeriod <- factor(
long_height$SamplingPeriod,
levels = c("MeanHeight", "MeanHeight2"),
labels = c("Pre", "Post") # Pre- or Post-Fire
)
####dropping row 57 in height
# Why is this row being dropped?
hist(long_height$Height)

model_data <- na.omit(long_height[, c("PlantTag", "HeightDiff", "Common_Name",
"CogonStatusNumeric", "SMavg",
"PercCanopy2", "SamplingPeriod", "Height")])
Height Models
model_dataH <- model_data %>%
filter(!PlantTag %in% c("9555", "9504", "9585", "9691"))
# Null
height_glmer0 <- glmer(Height ~ 1 + (1 | PlantTag),
data = model_dataH,
family = gaussian(link = "identity")
)
## Warning in glmer(Height ~ 1 + (1 | PlantTag), data = model_dataH, family =
## gaussian(link = "identity")): calling glmer() with family=gaussian (identity
## link) as a shortcut to lmer() is deprecated; please call lmer() directly
# Global Interaction
height_glmer1 <- glmer(Height ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2 +
(1 | PlantTag),
data = model_dataH,
family = gaussian(link = "identity")
)
## Warning in glmer(Height ~ Common_Name * CogonStatusNumeric + SMavg +
## PercCanopy2 + : calling glmer() with family=gaussian (identity link) as a
## shortcut to lmer() is deprecated; please call lmer() directly
# Global Additive
height_glmer2 <- glmer(Height ~ Common_Name + CogonStatusNumeric + SMavg + PercCanopy2 +
(1 | PlantTag),
data = model_dataH,
family = gaussian(link = "identity")
)
## Warning in glmer(Height ~ Common_Name + CogonStatusNumeric + SMavg +
## PercCanopy2 + : calling glmer() with family=gaussian (identity link) as a
## shortcut to lmer() is deprecated; please call lmer() directly
# Invasion Only
height_glmer3 <- glmer(Height ~ CogonStatusNumeric +
(1 | PlantTag),
data = model_dataH,
family = gaussian(link = "identity")
)
## Warning in glmer(Height ~ CogonStatusNumeric + (1 | PlantTag), data =
## model_dataH, : calling glmer() with family=gaussian (identity link) as a
## shortcut to lmer() is deprecated; please call lmer() directly
# Invasion by species
height_glmer4 <- glmer(Height ~ Common_Name * CogonStatusNumeric +
(1 | PlantTag),
data = model_dataH,
family = gaussian(link = "identity")
)
## Warning in glmer(Height ~ Common_Name * CogonStatusNumeric + (1 | PlantTag), :
## calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
## deprecated; please call lmer() directly
height_glmer5 <- glmer(Height ~ Common_Name + CogonStatusNumeric +
(1 | PlantTag),
data = model_dataH,
family = gaussian(link = "identity")
)
## Warning in glmer(Height ~ Common_Name + CogonStatusNumeric + (1 | PlantTag), :
## calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
## deprecated; please call lmer() directly
models <- list(
"Model 0" = height_glmer0,
"Model 1" = height_glmer1,
"Model 2" = height_glmer2,
"Model 3" = height_glmer3,
"Model 4" = height_glmer4,
"Model 5" = height_glmer5
)
mod_sel <- aictab(cand.set = models)
## Warning in aictab.AIClmerMod(cand.set = models):
## Model selection for fixed effects is only appropriate with ML estimation:
## REML (default) should only be used to select random effects for a constant set of fixed effects
mod_sel
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt Res.LL
## Model 4 8 1449.77 0.00 0.89 0.89 -716.43
## Model 1 10 1454.01 4.24 0.11 0.99 -716.30
## Model 5 6 1459.70 9.93 0.01 1.00 -723.58
## Model 2 8 1464.19 14.42 0.00 1.00 -723.64
## Model 3 4 1491.92 42.15 0.00 1.00 -741.83
## Model 0 3 1503.33 53.56 0.00 1.00 -748.59
height_glmer <- height_glmer4
#-------------#
## Best model- switched to lmer since it is the same fixed effects structure but allows for p-values to be calculated
height_lmer <- lmer(Height ~ Common_Name * CogonStatusNumeric + (1 | PlantTag),
data = model_dataH)
# This will now display the p-values
summary(height_lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Height ~ Common_Name * CogonStatusNumeric + (1 | PlantTag)
## Data: model_dataH
##
## REML criterion at convergence: 1432.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9870 -0.6758 -0.1694 0.5611 2.6195
##
## Random effects:
## Groups Name Variance Std.Dev.
## PlantTag (Intercept) 175.5 13.25
## Residual 267.7 16.36
## Number of obs: 166, groups: PlantTag, 83
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 62.224 4.701 77.000 13.237
## Common_NameMayberry -16.308 6.536 77.000 -2.495
## Common_NameYaupon_Holly -17.834 6.648 77.000 -2.683
## CogonStatusNumeric1 19.675 6.536 77.000 3.010
## Common_NameMayberry:CogonStatusNumeric1 -12.105 9.441 77.000 -1.282
## Common_NameYaupon_Holly:CogonStatusNumeric1 -10.192 9.414 77.000 -1.083
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## Common_NameMayberry 0.01474 *
## Common_NameYaupon_Holly 0.00894 **
## CogonStatusNumeric1 0.00353 **
## Common_NameMayberry:CogonStatusNumeric1 0.20362
## Common_NameYaupon_Holly:CogonStatusNumeric1 0.28233
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Cmm_NM Cm_NY_H CgnSN1 C_NM:C
## Cmmn_NmMybr -0.719
## Cmmn_NmYp_H -0.707 0.509
## CgnSttsNmr1 -0.719 0.517 0.509
## Cmm_NM:CSN1 0.498 -0.692 -0.352 -0.692
## C_NY_H:CSN1 0.499 -0.359 -0.706 -0.694 0.481
#------------#
## 1. Simulate residuals (standard is 250 simulations)
sim_residuals <- simulateResiduals(fittedModel = height_glmer, plot = FALSE)
## 2. Run the main diagnostic plots
plot(sim_residuals)

testUniformity(sim_residuals)

##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.045831, p-value = 0.8766
## alternative hypothesis: two-sided
testDispersion(sim_residuals)

##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.95221, p-value = 0.68
## alternative hypothesis: two.sided
testOutliers(sim_residuals) # Maybe remove the largest values?

##
## DHARMa outlier test based on exact binomial test with approximate
## expectations
##
## data: sim_residuals
## outliers at both margin(s) = 0, observations = 166, p-value = 0.6467
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
## 0.00000000 0.02197707
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 )
## 0
# or remove the negative values and use a gamma distribution
vif(height_glmer)
## GVIF Df GVIF^(1/(2*Df))
## Common_Name 3.725363 2 1.389288
## CogonStatusNumeric 2.861733 1 1.691666
## Common_Name:CogonStatusNumeric 6.917217 2 1.621746
Leave-One-Out Cross-Validation (LOOCV) for Height Model
# Get unique tags
tags <- unique(model_data$PlantTag)
n_tags <- length(tags)
# Initialize vector for predictions
loocv_preds <- numeric(nrow(model_data))
for(i in 1:n_tags) {
# 1. Identify rows for the current plant
test_indices <- which(model_data$PlantTag == tags[i])
# 2. Split data
train_data <- model_data[-test_indices, ]
test_data <- model_data[test_indices, ]
# 3. Refit the LMM
# (Note: Use lmer with the same formula as your summary)
m_fold <- lmer(Height ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2 + (1 | PlantTag),
data = train_data)
# 4. Predict for the held-out plant
# re.form = NA tells R to predict using only Fixed Effects (since PlantTag is new)
loocv_preds[test_indices] <- predict(m_fold, newdata = test_data, re.form = NA)
}
# 5. Calculate Performance
actual <- model_data$Height
rmse <- sqrt(mean((actual - loocv_preds)^2))
mae <- mean(abs(actual - loocv_preds))
r2_cv <- 1 - (sum((actual - loocv_preds)^2) / sum((actual - mean(actual))^2))
mae_cv <- mean(abs(model_data$Height - loocv_preds))
cat("Leave-One-Plant-Out MAE:", round(mae_cv, 3), "cm\n")
## Leave-One-Plant-Out MAE: 19.87 cm
cat("Leave-One-Plant-Out RMSE:", round(rmse, 3), "\n")
## Leave-One-Plant-Out RMSE: 25.926
cat("Leave-One-Plant-Out R-squared:", round(r2_cv, 3), "\n")
## Leave-One-Plant-Out R-squared: 0.088
Height- Predictive Plots
# Predictions
height_preds <- ggpredict(
height_lmer,
terms = "CogonStatusNumeric"
)
# Make status a factor for plotting raw points
model_data$CogonStatusNumeric <- factor(
model_data$CogonStatusNumeric,
levels = c(0, 1)
)
# Plot
height_plot <- ggplot() +
# Raw data points
geom_jitter(
data = model_data,
aes(x = CogonStatusNumeric,
y = Height,
color = CogonStatusNumeric),
width = 0.08,
alpha = 0.15,
size = 2
) +
# Predicted means
geom_point(
data = height_preds,
aes(x = x,
y = predicted,
color = factor(x)),
size = 4
) +
# Confidence intervals
geom_errorbar(
data = height_preds,
aes(x = x,
ymin = conf.low,
ymax = conf.high,
color = factor(x)),
width = 0.1,
linewidth = 1
) +
scale_x_discrete(
labels = c(
"0" = "Native",
"1" = "Invaded"
)
) +
scale_color_manual(
values = c(
"0" = "#0072B2",
"1" = "#D55E00"
),
guide = "none"
) +
labs(
x = "Cogongrass Status",
y = "Height (cm)"
) +
theme_classic(base_size = 15)
height_plot

## Save
ggsave(
filename = "C:/Users/DrewIvory/OneDrive - University of Florida/Desktop/School/PHD/01_Projects/10_Shrub/03_Figures/Height_Predictions.png",
plot = height_plot,
width = 4,
height = 6,
dpi = 300
)
Species Height Plot
# 1. Generate predictions by species
height_species_preds <- ggpredict(
height_lmer,
terms = c("Common_Name")
)
# Fix the underscore in the model predictions data frame so it matches the clean text
height_species_preds$x <- gsub("Yaupon_Holly", "Yaupon Holly", height_species_preds$x)
# 2. Fix the underscore in your raw data and set the factor levels
model_data$Common_Name <- gsub("Yaupon_Holly", "Yaupon Holly", model_data$Common_Name)
model_data$Common_Name <- factor(
model_data$Common_Name,
levels = c("Beautyberry", "Mayberry", "Yaupon Holly")
)
# 3. Build the professional, layered plot
height_species_plot <- ggplot() +
# Layer 1: Raw data points (faded in background to show variation)
geom_jitter(
data = model_data,
aes(x = Common_Name,
y = Height,
color = Common_Name),
width = 0.1, # Subtle horizontal spread
alpha = 0.15, # Highly transparent so it doesn't crowd the model means
size = 2
) +
# Layer 2: Model-predicted means (prominent foreground)
geom_point(
data = height_species_preds,
aes(x = factor(x, levels = c("Beautyberry", "Mayberry", "Yaupon Holly")),
y = predicted,
color = factor(x, levels = c("Beautyberry", "Mayberry", "Yaupon Holly"))),
size = 4
) +
# Layer 3: Model confidence intervals
geom_errorbar(
data = height_species_preds,
aes(x = factor(x, levels = c("Beautyberry", "Mayberry", "Yaupon Holly")),
ymin = conf.low,
ymax = conf.high,
color = factor(x, levels = c("Beautyberry", "Mayberry", "Yaupon Holly"))),
width = 0.1,
linewidth = 1
) +
# Color palette: Cohesive, colorblind-friendly academic tones
scale_color_manual(
values = c(
"Beautyberry" = "#009E73", # Clean Green
"Mayberry" = "#E69F00", # Muted Orange/Gold
"Yaupon Holly" = "#56B4E9" # Soft Blue
),
guide = "none" # Removes unnecessary legend since x-axis is labeled
) +
# Axis labels matching manuscript standard
labs(
x = "Species",
y = "Height (cm)"
) +
# Clean, minimal academic theme
theme_classic(base_size = 15)
# View the plot
height_species_plot

# 4. Save at publication-quality resolution (300 DPI)
ggsave(
filename = "C:/Users/DrewIvory/OneDrive - University of Florida/Desktop/School/PHD/01_Projects/10_Shrub/03_Figures/Species_Height_Predictions.png",
plot = height_species_plot,
width = 5, # Slightly wider than the single comparison plot to let 3 species breathe
height = 6,
dpi = 300
)
Global Model for Species Height Difference
model_dataD <- model_data %>%
distinct(PlantTag, .keep_all = TRUE)
model_dataD <- model_dataD %>%
filter(!PlantTag %in% c("9505", "9506", "9546", "9555", "9585",
"9589", "9691")) # had to remove the largest negative values.
# Null
heightD_lm0 <- lm(HeightDiff ~ 1,
data = model_dataD
)
# Global Interaction
heightD_lm1 <- lm(HeightDiff ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2,
data = model_dataD
)
# Global Additive
heightD_lm2 <- lm(HeightDiff ~ Common_Name + CogonStatusNumeric + SMavg + PercCanopy2,
data = model_dataD
)
# Invasion Only
heightD_lm3 <- lm(HeightDiff ~ CogonStatusNumeric,
data = model_dataD
)
# Invasion by species
heightD_lm4 <- lm(HeightDiff ~ Common_Name * CogonStatusNumeric,
data = model_dataD
)
heightD_lm5 <- lm(HeightDiff ~ Common_Name + CogonStatusNumeric,
data = model_dataD
)
## AIC comparison
models <- list(
"Model 0" = heightD_lm0,
"Model 1" = heightD_lm1,
"Model 2" = heightD_lm2,
"Model 3" = heightD_lm3,
"Model 4" = heightD_lm4,
"Model 5" = heightD_lm5
)
mod_sel <- aictab(cand.set = models)
mod_sel
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## Model 1 9 623.94 0.00 0.71 0.71 -301.69
## Model 4 7 625.80 1.86 0.28 0.99 -305.12
## Model 2 7 634.38 10.44 0.00 1.00 -309.41
## Model 5 5 634.48 10.54 0.00 1.00 -311.84
## Model 0 2 641.45 17.51 0.00 1.00 -318.65
## Model 3 3 642.54 18.60 0.00 1.00 -318.11
topmods <- list(heightD_lm1, heightD_lm4)
avg_heightD_mod <- model.avg(topmods)
summary(avg_heightD_mod)
##
## Call:
## model.avg(object = topmods)
##
## Component model call:
## lm(formula = <2 unique values>, data = model_dataD)
##
## Component models:
## df logLik AICc delta weight
## 12345 9 -301.69 623.94 0.00 0.72
## 125 7 -305.12 625.80 1.86 0.28
##
## Term codes:
## CogonStatusNumeric Common_Name
## 1 2
## PercCanopy2 SMavg
## 3 4
## CogonStatusNumeric:Common_Name
## 5
##
## Model-averaged coefficients:
## (full average)
## Estimate Std. Error Adjusted SE
## (Intercept) 28.39381 7.88564 7.95273
## Common_NameMayberry -5.12541 4.20529 4.27616
## Common_NameYaupon Holly -1.30090 4.28168 4.35329
## CogonStatusNumeric1 -6.97770 4.27156 4.34269
## SMavg -0.71063 0.63567 0.64120
## PercCanopy2 -0.05240 0.04860 0.04905
## CogonStatusNumeric1:Common_NameMayberry 9.75609 6.17277 6.27483
## CogonStatusNumeric1:Common_NameYaupon Holly 23.83278 6.22703 6.33169
## z value Pr(>|z|)
## (Intercept) 3.570 0.000356 ***
## Common_NameMayberry 1.199 0.230684
## Common_NameYaupon Holly 0.299 0.765068
## CogonStatusNumeric1 1.607 0.108105
## SMavg 1.108 0.267737
## PercCanopy2 1.068 0.285320
## CogonStatusNumeric1:Common_NameMayberry 1.555 0.119995
## CogonStatusNumeric1:Common_NameYaupon Holly 3.764 0.000167 ***
##
## (conditional average)
## Estimate Std. Error Adjusted SE
## (Intercept) 28.39381 7.88564 7.95273
## Common_NameMayberry -5.12541 4.20529 4.27616
## Common_NameYaupon Holly -1.30090 4.28168 4.35329
## CogonStatusNumeric1 -6.97770 4.27156 4.34269
## SMavg -0.99141 0.53420 0.54333
## PercCanopy2 -0.07311 0.04220 0.04292
## CogonStatusNumeric1:Common_NameMayberry 9.75609 6.17277 6.27483
## CogonStatusNumeric1:Common_NameYaupon Holly 23.83278 6.22703 6.33169
## z value Pr(>|z|)
## (Intercept) 3.570 0.000356 ***
## Common_NameMayberry 1.199 0.230684
## Common_NameYaupon Holly 0.299 0.765068
## CogonStatusNumeric1 1.607 0.108105
## SMavg 1.825 0.068049 .
## PercCanopy2 1.703 0.088512 .
## CogonStatusNumeric1:Common_NameMayberry 1.555 0.119995
## CogonStatusNumeric1:Common_NameYaupon Holly 3.764 0.000167 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals on the most complicated model to check assumptions
# Unable to simulate residuals on the averaged model
sim_residuals <- simulateResiduals(fittedModel = heightD_lm1, plot = FALSE)
# 2. Run the main diagnostic plots
plot(sim_residuals)

testUniformity(sim_residuals)

##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.068, p-value = 0.8532
## alternative hypothesis: two-sided
testDispersion(sim_residuals)

##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.90938, p-value = 0.624
## alternative hypothesis: two.sided
testOutliers(sim_residuals)

##
## DHARMa outlier test based on exact binomial test with approximate
## expectations
##
## data: sim_residuals
## outliers at both margin(s) = 1, observations = 80, p-value = 0.4727
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
## 0.0003164225 0.0676877845
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 )
## 0.0125
vif(heightD_lm1)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
## GVIF Df GVIF^(1/(2*Df))
## Common_Name 3.884060 2 1.403853
## CogonStatusNumeric 2.909929 1 1.705851
## SMavg 1.287353 1 1.134616
## PercCanopy2 1.079139 1 1.038816
## Common_Name:CogonStatusNumeric 7.692400 2 1.665388
LOOCV for model average
# Initialize a vector to store predictions
n <- nrow(model_dataD)
loocv_preds <- numeric(n)
for(i in 1:n) {
# 1. Split data
train_data <- model_dataD[-i, ]
test_point <- model_dataD[i, , drop = FALSE]
# 2. Refit the component models on training data
m1 <- lm(HeightDiff ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2,
data = train_data)
m2 <- lm(HeightDiff ~ Common_Name + CogonStatusNumeric + SMavg + PercCanopy2,
data = train_data)
m3 <- lm(HeightDiff ~ CogonStatusNumeric,
data = train_data)
m4 <- lm(HeightDiff ~ Common_Name * CogonStatusNumeric,
data = train_data)
# 3. Create the model average for this specific fold
m_list <- list(m1, m2, m3, m4)
avg_fold <- model.avg(m_list)
# 4. Predict the held-out point (using the full average)
loocv_preds[i] <- predict(avg_fold, newdata = test_point)
}
# 5. Calculate Performance Metrics
rmse <- sqrt(mean((model_dataD$HeightDiff - loocv_preds)^2))
mae <- mean(abs(model_dataD$HeightDiff - loocv_preds))
# 1. Calculate Total Sum of Squares (TSS)
tss <- sum((model_dataD$HeightDiff - mean(model_dataD$HeightDiff))^2)
# 2. Calculate Residual Sum of Squares from LOOCV (RSS)
rss_cv <- sum((model_dataD$HeightDiff - loocv_preds)^2)
# 3. Calculate LOOCV R-squared
r2_cv <- 1 - (rss_cv / tss)
cat("LOOCV R-squared:", round(r2_cv, 3), "\n")
## LOOCV R-squared: 0.176
cat("LOOCV RMSE:", round(rmse, 3), "\n")
## LOOCV RMSE: 11.791
cat("LOOCV MAE:", round(mae, 3), "\n")
## LOOCV MAE: 9.247
Height Difference Post-hoc Tests
# Generate predictions
heightD_preds <- ggpredict(
avg_heightD_mod,
terms = c("CogonStatusNumeric", "Common_Name")
)
heightD_preds <- heightD_preds %>%
mutate(group = dplyr::recode(group,
"Beautyberry" = "Beautyberry",
"Mayberry" = "Mayberry",
"Yaupon_Holly" = "Yaupon Holly"
))
# Make sure grouping variables are factors
model_data$CogonStatusNumeric <- factor(
model_data$CogonStatusNumeric,
levels = c(0, 1)
)
# Plot
heightD_plot <- ggplot() +
# Raw data points
geom_jitter(
data = model_data,
aes(x = CogonStatusNumeric,
y = HeightDiff,
color = CogonStatusNumeric),
width = 0.08,
alpha = 0.05,
size = 2
) +
# Predicted means
geom_point(
data = heightD_preds,
aes(x = x,
y = predicted,
color = factor(x)),
size = 4
) +
# Confidence intervals
geom_errorbar(
data = heightD_preds,
aes(x = x,
ymin = conf.low,
ymax = conf.high,
color = factor(x)),
width = 0.1,
linewidth = 1
) +
# Separate panels by species
facet_wrap(~group) +
scale_x_discrete(
labels = c(
"0" = "Native",
"1" = "Invaded"
)
) +
scale_color_manual(
values = c(
"0" = "#0072B2", # Native
"1" = "#D55E00" # Invaded
),
guide = "none"
) +
labs(
x = "Cogongrass Status",
y = "Height Difference (cm)"
) +
theme_classic(base_size = 15)
heightD_plot

## Save
ggsave(
filename = "C:/Users/DrewIvory/OneDrive - University of Florida/Desktop/School/PHD/01_Projects/10_Shrub/03_Figures/HeightDifference_Predictions.png",
plot = heightD_plot,
width = 8,
height = 6,
dpi = 300
)
Stem Count Analysis
Total Stem Count
Data Cleaning
# Calculate the difference between TotalStems2 and TotalStems
shrubs$StemDiff <- shrubs$TotalStems2 - shrubs$TotalStems
# Long Format
long_totalstems <- shrubs %>%
# 1. Keep only the columns you need (your ID vars + your measure vars)
dplyr::select(ShrubType, Common_Name, Patch, CogonStatusNumeric, SMavg,
PercCover2, PercCanopy2, PlantTag, Date, StemDiff,
TotalStems, TotalStems2) %>%
# 2. Pivot only the stem columns
pivot_longer(
cols = c(TotalStems, TotalStems2),
names_to = "SamplingPeriod",
values_to = "TotalStemsValue"
)
# Sample period is numeric
long_totalstems$SamplingPeriod <- factor(
long_totalstems$SamplingPeriod,
levels = c("TotalStems", "TotalStems2"),
labels = c("Pre", "Post") # Pre- or Post-Fire
)
model_data_stems <- long_totalstems %>%
dplyr::select(PlantTag, StemDiff, TotalStemsValue, ShrubType,
Common_Name, CogonStatusNumeric, SMavg, PercCanopy2) %>%
na.omit()
# Confirm Factors
model_data_stems$PlantTag <- as.factor(model_data_stems$PlantTag)
model_data_stems$Common_Name <- as.factor(model_data_stems$Common_Name)
model_data_stems$ShrubType <- as.factor(model_data_stems$ShrubType)
model_data_stems$CogonStatusNumeric <- as.factor(model_data_stems$CogonStatusNumeric)
Total Stem Count Model
# Null
stem_glm0 <- glmer(TotalStemsValue ~ 1 + (1 | PlantTag),
data = model_data_stems,
family = poisson(link = "log")
)
stem_glm1 <- glmer(TotalStemsValue ~ Common_Name * CogonStatusNumeric +
SMavg + PercCanopy2 + (1 | PlantTag),
data = model_data_stems,
family = poisson(link = "log")
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0836651 (tol = 0.002, component 1)
stem_glm2 <- glmer(TotalStemsValue ~ Common_Name + CogonStatusNumeric +
SMavg + PercCanopy2 + (1 | PlantTag),
data = model_data_stems,
family = poisson(link = "log")
)
stem_glm3 <- glmer(TotalStemsValue ~ CogonStatusNumeric +
SMavg + PercCanopy2 + (1 | PlantTag),
data = model_data_stems,
family = poisson(link = "log")
)
stem_glm4 <- glmer(TotalStemsValue ~ Common_Name * CogonStatusNumeric +
(1 | PlantTag),
data = model_data_stems,
family = poisson(link = "log")
)
stem_glm5 <- glmer(TotalStemsValue ~ Common_Name + CogonStatusNumeric +
(1 | PlantTag),
data = model_data_stems,
family = poisson(link = "log")
)
models <- list(
"Model 0" = stem_glm0,
"Model 1" = stem_glm1,
"Model 2" = stem_glm2,
"Model 3" = stem_glm3,
"Model 4" = stem_glm4,
"Model 5" = stem_glm5
)
mod_sel <- aictab(cand.set = models)
mod_sel
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## Model 5 5 904.07 0.00 0.60 0.60 -446.86
## Model 2 7 905.88 1.81 0.24 0.84 -445.60
## Model 4 7 907.62 3.55 0.10 0.94 -446.47
## Model 1 9 908.66 4.59 0.06 1.00 -444.78
## Model 3 5 922.31 18.24 0.00 1.00 -455.98
## Model 0 2 927.23 23.16 0.00 1.00 -461.58
summary(stem_glm5)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: TotalStemsValue ~ Common_Name + CogonStatusNumeric + (1 | PlantTag)
## Data: model_data_stems
##
## AIC BIC logLik -2*log(L) df.resid
## 903.7 919.5 -446.9 893.7 169
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.79116 -0.42939 -0.09079 0.30804 2.31498
##
## Random effects:
## Groups Name Variance Std.Dev.
## PlantTag (Intercept) 0.2765 0.5258
## Number of obs: 174, groups: PlantTag, 87
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.38627 0.13184 10.515 < 2e-16 ***
## Common_NameMayberry -0.01423 0.16373 -0.087 0.930720
## Common_NameYaupon_Holly 0.63361 0.15401 4.114 3.89e-05 ***
## CogonStatusNumeric1 0.44112 0.12931 3.411 0.000647 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Cmm_NM C_NY_H
## Cmmn_NmMybr -0.608
## Cmmn_NmYp_H -0.624 0.499
## CgnSttsNmr1 -0.519 0.049 0.007
topmods <- list(heightD_lm5, heightD_lm2)
avg_stem_mod <- model.avg(topmods)
summary(avg_stem_mod)
##
## Call:
## model.avg(object = topmods)
##
## Component model call:
## lm(formula = <2 unique values>, data = model_dataD)
##
## Component models:
## df logLik AICc delta weight
## 1234 7 -309.41 634.38 0.0 0.51
## 12 5 -311.84 634.48 0.1 0.49
##
## Term codes:
## CogonStatusNumeric Common_Name PercCanopy2 SMavg
## 1 2 3 4
##
## Model-averaged coefficients:
## (full average)
## Estimate Std. Error Adjusted SE z value Pr(>|z|)
## (Intercept) 18.73570 6.56503 6.62614 2.828 0.00469 **
## Common_NameMayberry -0.51120 3.31339 3.36728 0.152 0.87933
## Common_NameYaupon Holly 10.15330 3.34575 3.40003 2.986 0.00282 **
## CogonStatusNumeric1 2.92920 2.85123 2.89805 1.011 0.31214
## SMavg -0.29008 0.48549 0.49084 0.591 0.55453
## PercCanopy2 -0.04450 0.05432 0.05465 0.814 0.41549
##
## (conditional average)
## Estimate Std. Error Adjusted SE z value Pr(>|z|)
## (Intercept) 18.73570 6.56503 6.62614 2.828 0.00469 **
## Common_NameMayberry -0.51120 3.31339 3.36728 0.152 0.87933
## Common_NameYaupon Holly 10.15330 3.34575 3.40003 2.986 0.00282 **
## CogonStatusNumeric1 2.92920 2.85123 2.89805 1.011 0.31214
## SMavg -0.56552 0.55112 0.56028 1.009 0.31281
## PercCanopy2 -0.08675 0.04568 0.04644 1.868 0.06176 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sim_residuals <- simulateResiduals(fittedModel = stem_glm2, plot = FALSE)
plot(sim_residuals)

vif(stem_glm2)
## GVIF Df GVIF^(1/(2*Df))
## Common_Name 1.024389 2 1.006042
## CogonStatusNumeric 1.175014 1 1.083981
## SMavg 1.171948 1 1.082565
## PercCanopy2 1.064814 1 1.031898
Total Stem Count Plots
# Predicted stem counts (marginal over species + covariates)
stem_preds <- ggpredict(
stem_glm5,
terms = "CogonStatusNumeric",
bias_correction = TRUE
)
# Ensure factor ordering
model_data_stems$CogonStatusNumeric <- factor(
model_data_stems$CogonStatusNumeric,
levels = c(0, 1)
)
# Plot
stem_plot <- ggplot() +
# raw data
geom_jitter(
data = model_data_stems,
aes(
x = CogonStatusNumeric,
y = TotalStemsValue,
color = CogonStatusNumeric
),
width = 0.08,
alpha = 0.15,
size = 2
) +
# predicted means
geom_point(
data = stem_preds,
aes(
x = x,
y = predicted,
color = factor(x)
),
size = 4
) +
# CI
geom_errorbar(
data = stem_preds,
aes(
x = x,
ymin = conf.low,
ymax = conf.high,
color = factor(x)
),
width = 0.1,
linewidth = 1
) +
scale_x_discrete(
labels = c("0" = "Native", "1" = "Invaded")
) +
scale_color_manual(
values = c(
"0" = "#0072B2",
"1" = "#D55E00"
),
guide = "none"
) +
labs(
x = "Cogongrass Status",
y = "Predicted Total Stem Count"
) +
theme_classic(base_size = 14)
stem_plot

## Save
ggsave(
filename = "C:/Users/DrewIvory/OneDrive - University of Florida/Desktop/School/PHD/01_Projects/10_Shrub/03_Figures/TotalStemCount_Predictions.png",
plot = stem_plot,
width = 8,
height = 6,
dpi = 300
)
Modeling Total Stem Count Difference
model_dataD_stems <- model_data_stems %>%
distinct(PlantTag, .keep_all = TRUE)
model_dataD_stems <- model_dataD_stems %>%
filter(!PlantTag %in% c("9555"))
# Null
stemD_lm0 <- lm(StemDiff ~ 1,
data = model_dataD_stems
)
stemD_lm1 <- lm(StemDiff ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2,
data = model_dataD_stems
)
summary(stemD_lm1)
##
## Call:
## lm(formula = StemDiff ~ Common_Name * CogonStatusNumeric + SMavg +
## PercCanopy2, data = model_dataD_stems)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1491 -1.3583 -0.0580 0.9044 8.3694
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -1.982159 1.365012 -1.452
## Common_NameMayberry 0.867466 0.914751 0.948
## Common_NameYaupon_Holly 0.870007 0.911111 0.955
## CogonStatusNumeric1 -0.258603 0.930901 -0.278
## SMavg 0.140575 0.111947 1.256
## PercCanopy2 -0.008093 0.009000 -0.899
## Common_NameMayberry:CogonStatusNumeric1 -0.664135 1.336040 -0.497
## Common_NameYaupon_Holly:CogonStatusNumeric1 -0.991344 1.301736 -0.762
## Pr(>|t|)
## (Intercept) 0.150
## Common_NameMayberry 0.346
## Common_NameYaupon_Holly 0.343
## CogonStatusNumeric1 0.782
## SMavg 0.213
## PercCanopy2 0.371
## Common_NameMayberry:CogonStatusNumeric1 0.621
## Common_NameYaupon_Holly:CogonStatusNumeric1 0.449
##
## Residual standard error: 2.48 on 78 degrees of freedom
## Multiple R-squared: 0.06051, Adjusted R-squared: -0.0238
## F-statistic: 0.7177 on 7 and 78 DF, p-value: 0.6572
stemD_lm2 <- lm(StemDiff ~ Common_Name + CogonStatusNumeric + SMavg + PercCanopy2,
data = model_dataD_stems
)
stemD_lm3 <- lm(StemDiff ~ CogonStatusNumeric,
data = model_dataD_stems
)
stemD_lm4 <- lm(StemDiff ~ Common_Name * CogonStatusNumeric,
data = model_dataD_stems
)
stemD_lm5 <- lm(StemDiff ~ Common_Name + CogonStatusNumeric,
data = model_dataD_stems
)
## AIC comparison
models <- list(
"Model 0" = stemD_lm0,
"Model 1" = stemD_lm1,
"Model 2" = stemD_lm2,
"Model 3" = stemD_lm3,
"Model 4" = stemD_lm4,
"Model 5" = stemD_lm5
)
mod_sel <- aictab(cand.set = models)
mod_sel
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## Model 0 2 401.41 0.00 0.60 0.60 -198.64
## Model 3 3 402.72 1.31 0.31 0.91 -198.22
## Model 5 5 406.07 4.65 0.06 0.97 -197.66
## Model 2 7 408.01 6.59 0.02 0.99 -196.29
## Model 4 7 410.21 8.79 0.01 1.00 -197.39
## Model 1 9 412.27 10.86 0.00 1.00 -195.95
stemD_lm <- stemD_lm3
summary(stemD_lm)
##
## Call:
## lm(formula = StemDiff ~ CogonStatusNumeric, data = model_dataD_stems)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3415 -1.3415 0.1778 1.1778 8.6585
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1778 0.3658 -0.486 0.628
## CogonStatusNumeric1 -0.4808 0.5298 -0.907 0.367
##
## Residual standard error: 2.454 on 84 degrees of freedom
## Multiple R-squared: 0.009708, Adjusted R-squared: -0.002081
## F-statistic: 0.8235 on 1 and 84 DF, p-value: 0.3668
# 1. Simulate residuals (standard is 250 simulations)
sim_residuals <- simulateResiduals(fittedModel = stemD_lm, plot = FALSE)
# 2. Run the main diagnostic plots
plot(sim_residuals)

testUniformity(sim_residuals)

##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.10595, p-value = 0.2892
## alternative hypothesis: two-sided
testDispersion(sim_residuals)

##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.98454, p-value = 0.96
## alternative hypothesis: two.sided
testOutliers(sim_residuals)

##
## DHARMa outlier test based on exact binomial test with approximate
## expectations
##
## data: sim_residuals
## outliers at both margin(s) = 1, observations = 86, p-value = 0.4974
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
## 0.0002943498 0.0630905163
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 )
## 0.01162791
Plot for Stem Count Difference
GAM for Stem Count Difference
stemD_gam1 <- gam(
StemDiff ~ Common_Name * CogonStatusNumeric +
s(SMavg, k = 5) +
s(PercCanopy2, k = 5),
family = scat(), # The Scaled-t family handles 'heavy tails' better than Gaussian
data = model_dataD_stems,
method = "REML"
)
stemD_gam2 <- gam(
StemDiff ~ Common_Name + CogonStatusNumeric +
s(SMavg, k = 5) +
s(PercCanopy2, k = 5),
family = scat(),
data = model_dataD_stems,
method = "REML"
)
stemD_gam3 <- gam(
StemDiff ~ CogonStatusNumeric,
family = scat(),
data = model_dataD_stems,
method = "REML"
)
stemD_gam4 <- gam(
StemDiff ~ Common_Name * CogonStatusNumeric,
family = scat(),
data = model_dataD_stems,
method = "REML"
)
# Model Selection
model_comparison <- AIC(stemD_gam1, stemD_gam2, stemD_gam3, stemD_gam4)
model_comparison
## df AIC
## stemD_gam1 11.782037 402.7777
## stemD_gam2 9.108303 400.3537
## stemD_gam3 4.000000 398.6947
## stemD_gam4 8.000000 402.1083
stemD_gam <- stemD_gam3
summary(stemD_gam)
##
## Family: Scaled t(4.586,1.889)
## Link function: identity
##
## Formula:
## StemDiff ~ CogonStatusNumeric
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2735 0.3282 -0.834 0.405
## CogonStatusNumeric1 -0.5336 0.4753 -1.123 0.262
##
##
## R-sq.(adj) = -0.0022 Deviance explained = 1.11%
## -REML = 195.71 Scale est. = 1 n = 86
# Re-check DHARMa
sim_residuals <- simulateResiduals(stemD_gam, plot = FALSE)
## Registered S3 method overwritten by 'mgcViz':
## method from
## +.gg ggplot2
plot(sim_residuals)

testUniformity(sim_residuals)

##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.052651, p-value = 0.971
## alternative hypothesis: two-sided
testDispersion(sim_residuals)

##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.96375, p-value = 0.976
## alternative hypothesis: two.sided
testOutliers(sim_residuals)

##
## DHARMa outlier test based on exact binomial test with approximate
## expectations
##
## data: sim_residuals
## outliers at both margin(s) = 1, observations = 86, p-value = 0.4974
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
## 0.0002943498 0.0630905163
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 )
## 0.01162791
Post-hoc Tests for Stem Count
# 1. Calculate marginal means for the interaction
#interaction_means_stems <- emmeans(global_stem_lm, ~ Common_Name *
#CogonStatusNumeric)
# 2. View the means
#interaction_means_stems
# 3. Perform pair-wise comparisons (Post-hoc Tukey test)
#pairs(interaction_means_stems, by = "Common_Name")
Predict