Set Up

Data Cleaning

## Recoding CogonStatus to numeric for modeling
shrubs$CogonStatusNumeric <- ifelse(shrubs$CogonStatus == "in", 1, 0)

## Convert categorical variables to factors
shrubs$ShrubType <- factor(shrubs$ShrubType)
shrubs$Common_Name <- factor(shrubs$Common_Name)
shrubs$CogonStatusNumeric <- factor(shrubs$CogonStatusNumeric)
shrubs$Patch <- factor(shrubs$Patch)
shrubs$PlantTag <- as.factor(shrubs$PlantTag)

## Calculate mean height for each shrub using the four height measurements
## Second measurement
shrubs$MeanHeight2 <- rowMeans(
  shrubs[, c("TH2.1", "TH2.2", "TH2.3", "TH2.4")],
  na.rm = TRUE
)

shrubs$MeanHeight2[is.nan(shrubs$MeanHeight2)] <- NA

Height Difference

# Calculate the difference between MeanHeight2 and MeanHeight
shrubs$HeightDiff <- shrubs$MeanHeight2 - shrubs$MeanHeight

###Remove the height with very high negative value (YH and MB)
#shrubs <- shrubs[!shrubs$PlantTag %in% c(9505, 9506), ] # Why are we removing these two plants?

# Convert MeanHeight and MeanHeight2 to long format

long_height <- shrubs %>%
  pivot_longer(
    cols = c(MeanHeight, MeanHeight2),
    names_to = "SamplingPeriod",
    values_to = "Height"
  )

long_height$SamplingPeriod <- factor(
  long_height$SamplingPeriod,
  levels = c("MeanHeight", "MeanHeight2"),
  labels = c("Pre", "Post") # Pre- or Post-Fire
)

####dropping row 57 in height
# Why is this row being dropped?

hist(long_height$Height)

model_data <- na.omit(long_height[, c("PlantTag", "HeightDiff", "Common_Name", 
                                      "CogonStatusNumeric", "SMavg",
                                      "PercCanopy2", "SamplingPeriod", "Height")])

Height Models

model_dataH <- model_data %>%
  filter(!PlantTag %in% c("9555", "9504", "9585", "9691"))


# Null
height_glmer0 <- glmer(Height ~ 1 + (1 | PlantTag),
                       data = model_dataH,
                       family = gaussian(link = "identity")
                       )

## Warning in glmer(Height ~ 1 + (1 | PlantTag), data = model_dataH, family =
## gaussian(link = "identity")): calling glmer() with family=gaussian (identity
## link) as a shortcut to lmer() is deprecated; please call lmer() directly

# Global Interaction

height_glmer1 <- glmer(Height ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2 +
                         (1 | PlantTag),
                       data = model_dataH,
                       family = gaussian(link = "identity")
)

## Warning in glmer(Height ~ Common_Name * CogonStatusNumeric + SMavg +
## PercCanopy2 + : calling glmer() with family=gaussian (identity link) as a
## shortcut to lmer() is deprecated; please call lmer() directly

# Global Additive
height_glmer2 <- glmer(Height ~ Common_Name + CogonStatusNumeric + SMavg + PercCanopy2 +
                       (1 | PlantTag),
                     data = model_dataH,
                     family = gaussian(link = "identity")
                     )

## Warning in glmer(Height ~ Common_Name + CogonStatusNumeric + SMavg +
## PercCanopy2 + : calling glmer() with family=gaussian (identity link) as a
## shortcut to lmer() is deprecated; please call lmer() directly

# Invasion Only
height_glmer3 <- glmer(Height ~ CogonStatusNumeric +
                       (1 | PlantTag),
                     data = model_dataH,
                     family = gaussian(link = "identity")
                     )

## Warning in glmer(Height ~ CogonStatusNumeric + (1 | PlantTag), data =
## model_dataH, : calling glmer() with family=gaussian (identity link) as a
## shortcut to lmer() is deprecated; please call lmer() directly

# Invasion by species
height_glmer4 <- glmer(Height ~ Common_Name * CogonStatusNumeric +
                       (1 | PlantTag),
                     data = model_dataH,
                     family = gaussian(link = "identity")
                     )

## Warning in glmer(Height ~ Common_Name * CogonStatusNumeric + (1 | PlantTag), :
## calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
## deprecated; please call lmer() directly

height_glmer5 <- glmer(Height ~ Common_Name + CogonStatusNumeric +
                       (1 | PlantTag),
                     data = model_dataH,
                     family = gaussian(link = "identity")
                     )

## Warning in glmer(Height ~ Common_Name + CogonStatusNumeric + (1 | PlantTag), :
## calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
## deprecated; please call lmer() directly

models <- list(
  "Model 0" = height_glmer0,
  "Model 1" = height_glmer1,
  "Model 2" = height_glmer2,
  "Model 3" = height_glmer3,
  "Model 4" = height_glmer4,
  "Model 5" = height_glmer5
)

mod_sel <- aictab(cand.set = models)

## Warning in aictab.AIClmerMod(cand.set = models): 
## Model selection for fixed effects is only appropriate with ML estimation:
## REML (default) should only be used to select random effects for a constant set of fixed effects

mod_sel

## 
## Model selection based on AICc:
## 
##          K    AICc Delta_AICc AICcWt Cum.Wt  Res.LL
## Model 4  8 1449.77       0.00   0.89   0.89 -716.43
## Model 1 10 1454.01       4.24   0.11   0.99 -716.30
## Model 5  6 1459.70       9.93   0.01   1.00 -723.58
## Model 2  8 1464.19      14.42   0.00   1.00 -723.64
## Model 3  4 1491.92      42.15   0.00   1.00 -741.83
## Model 0  3 1503.33      53.56   0.00   1.00 -748.59

height_glmer <- height_glmer4

#-------------#
## Best model- switched to lmer since it is the same fixed effects structure but allows for p-values to be calculated
height_lmer <- lmer(Height ~ Common_Name * CogonStatusNumeric + (1 | PlantTag),
                    data = model_dataH)

# This will now display the p-values
summary(height_lmer)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Height ~ Common_Name * CogonStatusNumeric + (1 | PlantTag)
##    Data: model_dataH
## 
## REML criterion at convergence: 1432.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9870 -0.6758 -0.1694  0.5611  2.6195 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  PlantTag (Intercept) 175.5    13.25   
##  Residual             267.7    16.36   
## Number of obs: 166, groups:  PlantTag, 83
## 
## Fixed effects:
##                                             Estimate Std. Error      df t value
## (Intercept)                                   62.224      4.701  77.000  13.237
## Common_NameMayberry                          -16.308      6.536  77.000  -2.495
## Common_NameYaupon_Holly                      -17.834      6.648  77.000  -2.683
## CogonStatusNumeric1                           19.675      6.536  77.000   3.010
## Common_NameMayberry:CogonStatusNumeric1      -12.105      9.441  77.000  -1.282
## Common_NameYaupon_Holly:CogonStatusNumeric1  -10.192      9.414  77.000  -1.083
##                                             Pr(>|t|)    
## (Intercept)                                  < 2e-16 ***
## Common_NameMayberry                          0.01474 *  
## Common_NameYaupon_Holly                      0.00894 ** 
## CogonStatusNumeric1                          0.00353 ** 
## Common_NameMayberry:CogonStatusNumeric1      0.20362    
## Common_NameYaupon_Holly:CogonStatusNumeric1  0.28233    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Cmm_NM Cm_NY_H CgnSN1 C_NM:C
## Cmmn_NmMybr -0.719                             
## Cmmn_NmYp_H -0.707  0.509                      
## CgnSttsNmr1 -0.719  0.517  0.509               
## Cmm_NM:CSN1  0.498 -0.692 -0.352  -0.692       
## C_NY_H:CSN1  0.499 -0.359 -0.706  -0.694  0.481

#------------#



## 1. Simulate residuals (standard is 250 simulations)
sim_residuals <- simulateResiduals(fittedModel = height_glmer, plot = FALSE)

## 2. Run the main diagnostic plots
plot(sim_residuals)

testUniformity(sim_residuals)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.045831, p-value = 0.8766
## alternative hypothesis: two-sided

testDispersion(sim_residuals)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.95221, p-value = 0.68
## alternative hypothesis: two.sided

testOutliers(sim_residuals) # Maybe remove the largest values?

## 
##  DHARMa outlier test based on exact binomial test with approximate
##  expectations
## 
## data:  sim_residuals
## outliers at both margin(s) = 0, observations = 166, p-value = 0.6467
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
##  0.00000000 0.02197707
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 ) 
##                                                      0

# or remove the negative values and use a gamma distribution

vif(height_glmer)

##                                    GVIF Df GVIF^(1/(2*Df))
## Common_Name                    3.725363  2        1.389288
## CogonStatusNumeric             2.861733  1        1.691666
## Common_Name:CogonStatusNumeric 6.917217  2        1.621746

Leave-One-Out Cross-Validation (LOOCV) for Height Model

# Get unique tags
tags <- unique(model_data$PlantTag)
n_tags <- length(tags)

# Initialize vector for predictions
loocv_preds <- numeric(nrow(model_data))

for(i in 1:n_tags) {
  # 1. Identify rows for the current plant
  test_indices <- which(model_data$PlantTag == tags[i])
  
  # 2. Split data
  train_data <- model_data[-test_indices, ]
  test_data  <- model_data[test_indices, ]
  
  # 3. Refit the LMM
  # (Note: Use lmer with the same formula as your summary)
  m_fold <- lmer(Height ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2 + (1 | PlantTag), 
                 data = train_data)
  
  # 4. Predict for the held-out plant 
  # re.form = NA tells R to predict using only Fixed Effects (since PlantTag is new)
  loocv_preds[test_indices] <- predict(m_fold, newdata = test_data, re.form = NA)
}

# 5. Calculate Performance
actual <- model_data$Height
rmse <- sqrt(mean((actual - loocv_preds)^2))
mae <- mean(abs(actual - loocv_preds))
r2_cv <- 1 - (sum((actual - loocv_preds)^2) / sum((actual - mean(actual))^2))
mae_cv <- mean(abs(model_data$Height - loocv_preds))


cat("Leave-One-Plant-Out MAE:", round(mae_cv, 3), "cm\n")

## Leave-One-Plant-Out MAE: 19.87 cm

cat("Leave-One-Plant-Out RMSE:", round(rmse, 3), "\n")

## Leave-One-Plant-Out RMSE: 25.926

cat("Leave-One-Plant-Out R-squared:", round(r2_cv, 3), "\n")

## Leave-One-Plant-Out R-squared: 0.088

Height- Predictive Plots

# Predictions
height_preds <- ggpredict(
  height_lmer,
  terms = "CogonStatusNumeric"
)

# Make status a factor for plotting raw points
model_data$CogonStatusNumeric <- factor(
  model_data$CogonStatusNumeric,
  levels = c(0, 1)
)

# Plot
height_plot <- ggplot() +
  
  # Raw data points
  geom_jitter(
    data = model_data,
    aes(x = CogonStatusNumeric,
        y = Height,
        color = CogonStatusNumeric),
    width = 0.08,
    alpha = 0.15,
    size = 2
  ) +
  
  # Predicted means
  geom_point(
    data = height_preds,
    aes(x = x,
        y = predicted,
        color = factor(x)),
    size = 4
  ) +
  
  # Confidence intervals
  geom_errorbar(
    data = height_preds,
    aes(x = x,
        ymin = conf.low,
        ymax = conf.high,
        color = factor(x)),
    width = 0.1,
    linewidth = 1
  ) +
  
  scale_x_discrete(
    labels = c(
      "0" = "Native",
      "1" = "Invaded"
    )
  ) +
  
  scale_color_manual(
    values = c(
      "0" = "#0072B2",
      "1" = "#D55E00"
    ),
    guide = "none"
  ) +
  
  labs(
    x = "Cogongrass Status",
    y = "Height (cm)"
  ) +
  
  theme_classic(base_size = 15)

height_plot

## Save
ggsave(
  filename = "C:/Users/DrewIvory/OneDrive - University of Florida/Desktop/School/PHD/01_Projects/10_Shrub/03_Figures/Height_Predictions.png",
  plot = height_plot,
  width = 4,
  height = 6,
  dpi = 300
)

Species Height Plot

# 1. Generate predictions by species
height_species_preds <- ggpredict(
  height_lmer,
  terms = c("Common_Name")
)

# Fix the underscore in the model predictions data frame so it matches the clean text
height_species_preds$x <- gsub("Yaupon_Holly", "Yaupon Holly", height_species_preds$x)

# 2. Fix the underscore in your raw data and set the factor levels
model_data$Common_Name <- gsub("Yaupon_Holly", "Yaupon Holly", model_data$Common_Name)
model_data$Common_Name <- factor(
  model_data$Common_Name,
  levels = c("Beautyberry", "Mayberry", "Yaupon Holly")
)

# 3. Build the professional, layered plot
height_species_plot <- ggplot() +
  
  # Layer 1: Raw data points (faded in background to show variation)
  geom_jitter(
    data = model_data,
    aes(x = Common_Name, 
        y = Height, 
        color = Common_Name),
    width = 0.1,      # Subtle horizontal spread
    alpha = 0.15,     # Highly transparent so it doesn't crowd the model means
    size = 2
  ) +
  
  # Layer 2: Model-predicted means (prominent foreground)
  geom_point(
    data = height_species_preds,
    aes(x = factor(x, levels = c("Beautyberry", "Mayberry", "Yaupon Holly")), 
        y = predicted, 
        color = factor(x, levels = c("Beautyberry", "Mayberry", "Yaupon Holly"))),
    size = 4
  ) +
  
  # Layer 3: Model confidence intervals
  geom_errorbar(
    data = height_species_preds,
    aes(x = factor(x, levels = c("Beautyberry", "Mayberry", "Yaupon Holly")), 
        ymin = conf.low, 
        ymax = conf.high, 
        color = factor(x, levels = c("Beautyberry", "Mayberry", "Yaupon Holly"))),
    width = 0.1,
    linewidth = 1
  ) +
  
  # Color palette: Cohesive, colorblind-friendly academic tones
  scale_color_manual(
    values = c(
      "Beautyberry"  = "#009E73",  # Clean Green
      "Mayberry"     = "#E69F00",  # Muted Orange/Gold
      "Yaupon Holly" = "#56B4E9"   # Soft Blue
    ),
    guide = "none"                 # Removes unnecessary legend since x-axis is labeled
  ) +
  
  # Axis labels matching manuscript standard
  labs(
    x = "Species", 
    y = "Height (cm)"
  ) +
  
  # Clean, minimal academic theme
  theme_classic(base_size = 15)

# View the plot
height_species_plot

# 4. Save at publication-quality resolution (300 DPI)
ggsave(
  filename = "C:/Users/DrewIvory/OneDrive - University of Florida/Desktop/School/PHD/01_Projects/10_Shrub/03_Figures/Species_Height_Predictions.png",
  plot = height_species_plot,
  width = 5,        # Slightly wider than the single comparison plot to let 3 species breathe
  height = 6,
  dpi = 300
)

Global Model for Species Height Difference

model_dataD <- model_data %>%
  distinct(PlantTag, .keep_all = TRUE)

model_dataD <- model_dataD %>%
  filter(!PlantTag %in% c("9505", "9506", "9546", "9555", "9585",
                          "9589", "9691")) # had to remove the largest negative values.

# Null
heightD_lm0 <- lm(HeightDiff ~ 1,
  data = model_dataD
)

# Global Interaction
heightD_lm1 <- lm(HeightDiff ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2,
  data = model_dataD
)

# Global Additive
heightD_lm2 <- lm(HeightDiff ~ Common_Name + CogonStatusNumeric + SMavg + PercCanopy2,
  data = model_dataD
)

# Invasion Only
heightD_lm3 <- lm(HeightDiff ~ CogonStatusNumeric,
  data = model_dataD
)

# Invasion by species
heightD_lm4 <- lm(HeightDiff ~ Common_Name * CogonStatusNumeric,
  data = model_dataD
)

heightD_lm5 <- lm(HeightDiff ~ Common_Name + CogonStatusNumeric,
  data = model_dataD
)

## AIC comparison
models <- list(
  "Model 0" = heightD_lm0,
  "Model 1" = heightD_lm1,
  "Model 2" = heightD_lm2,
  "Model 3" = heightD_lm3,
  "Model 4" = heightD_lm4,
  "Model 5" = heightD_lm5
)

mod_sel <- aictab(cand.set = models)
mod_sel

## 
## Model selection based on AICc:
## 
##         K   AICc Delta_AICc AICcWt Cum.Wt      LL
## Model 1 9 623.94       0.00   0.71   0.71 -301.69
## Model 4 7 625.80       1.86   0.28   0.99 -305.12
## Model 2 7 634.38      10.44   0.00   1.00 -309.41
## Model 5 5 634.48      10.54   0.00   1.00 -311.84
## Model 0 2 641.45      17.51   0.00   1.00 -318.65
## Model 3 3 642.54      18.60   0.00   1.00 -318.11

topmods <- list(heightD_lm1, heightD_lm4)

avg_heightD_mod <- model.avg(topmods)

summary(avg_heightD_mod)

## 
## Call:
## model.avg(object = topmods)
## 
## Component model call: 
## lm(formula = <2 unique values>, data = model_dataD)
## 
## Component models: 
##       df  logLik   AICc delta weight
## 12345  9 -301.69 623.94  0.00   0.72
## 125    7 -305.12 625.80  1.86   0.28
## 
## Term codes: 
##             CogonStatusNumeric                    Common_Name 
##                              1                              2 
##                    PercCanopy2                          SMavg 
##                              3                              4 
## CogonStatusNumeric:Common_Name 
##                              5 
## 
## Model-averaged coefficients:  
## (full average) 
##                                             Estimate Std. Error Adjusted SE
## (Intercept)                                 28.39381    7.88564     7.95273
## Common_NameMayberry                         -5.12541    4.20529     4.27616
## Common_NameYaupon Holly                     -1.30090    4.28168     4.35329
## CogonStatusNumeric1                         -6.97770    4.27156     4.34269
## SMavg                                       -0.71063    0.63567     0.64120
## PercCanopy2                                 -0.05240    0.04860     0.04905
## CogonStatusNumeric1:Common_NameMayberry      9.75609    6.17277     6.27483
## CogonStatusNumeric1:Common_NameYaupon Holly 23.83278    6.22703     6.33169
##                                             z value Pr(>|z|)    
## (Intercept)                                   3.570 0.000356 ***
## Common_NameMayberry                           1.199 0.230684    
## Common_NameYaupon Holly                       0.299 0.765068    
## CogonStatusNumeric1                           1.607 0.108105    
## SMavg                                         1.108 0.267737    
## PercCanopy2                                   1.068 0.285320    
## CogonStatusNumeric1:Common_NameMayberry       1.555 0.119995    
## CogonStatusNumeric1:Common_NameYaupon Holly   3.764 0.000167 ***
##  
## (conditional average) 
##                                             Estimate Std. Error Adjusted SE
## (Intercept)                                 28.39381    7.88564     7.95273
## Common_NameMayberry                         -5.12541    4.20529     4.27616
## Common_NameYaupon Holly                     -1.30090    4.28168     4.35329
## CogonStatusNumeric1                         -6.97770    4.27156     4.34269
## SMavg                                       -0.99141    0.53420     0.54333
## PercCanopy2                                 -0.07311    0.04220     0.04292
## CogonStatusNumeric1:Common_NameMayberry      9.75609    6.17277     6.27483
## CogonStatusNumeric1:Common_NameYaupon Holly 23.83278    6.22703     6.33169
##                                             z value Pr(>|z|)    
## (Intercept)                                   3.570 0.000356 ***
## Common_NameMayberry                           1.199 0.230684    
## Common_NameYaupon Holly                       0.299 0.765068    
## CogonStatusNumeric1                           1.607 0.108105    
## SMavg                                         1.825 0.068049 .  
## PercCanopy2                                   1.703 0.088512 .  
## CogonStatusNumeric1:Common_NameMayberry       1.555 0.119995    
## CogonStatusNumeric1:Common_NameYaupon Holly   3.764 0.000167 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Simulate residuals on the most complicated model to check assumptions
# Unable to simulate residuals on the averaged model
sim_residuals <- simulateResiduals(fittedModel = heightD_lm1, plot = FALSE)

# 2. Run the main diagnostic plots
plot(sim_residuals)

testUniformity(sim_residuals)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.068, p-value = 0.8532
## alternative hypothesis: two-sided

testDispersion(sim_residuals)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.90938, p-value = 0.624
## alternative hypothesis: two.sided

testOutliers(sim_residuals)

## 
##  DHARMa outlier test based on exact binomial test with approximate
##  expectations
## 
## data:  sim_residuals
## outliers at both margin(s) = 1, observations = 80, p-value = 0.4727
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
##  0.0003164225 0.0676877845
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 ) 
##                                                 0.0125

vif(heightD_lm1)

## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif

##                                    GVIF Df GVIF^(1/(2*Df))
## Common_Name                    3.884060  2        1.403853
## CogonStatusNumeric             2.909929  1        1.705851
## SMavg                          1.287353  1        1.134616
## PercCanopy2                    1.079139  1        1.038816
## Common_Name:CogonStatusNumeric 7.692400  2        1.665388

LOOCV for model average

# Initialize a vector to store predictions
n <- nrow(model_dataD)
loocv_preds <- numeric(n)

for(i in 1:n) {
  # 1. Split data
  train_data <- model_dataD[-i, ]
  test_point  <- model_dataD[i, , drop = FALSE]
  
  # 2. Refit the component models on training data
  m1 <- lm(HeightDiff ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2, 
            data = train_data)
  m2 <- lm(HeightDiff ~ Common_Name + CogonStatusNumeric + SMavg + PercCanopy2, 
            data = train_data)
  m3 <- lm(HeightDiff ~ CogonStatusNumeric, 
            data = train_data)
  m4 <- lm(HeightDiff ~ Common_Name * CogonStatusNumeric, 
            data = train_data)
  
  # 3. Create the model average for this specific fold
  m_list <- list(m1, m2, m3, m4)
  avg_fold <- model.avg(m_list)
  
  # 4. Predict the held-out point (using the full average)
  loocv_preds[i] <- predict(avg_fold, newdata = test_point)
}

# 5. Calculate Performance Metrics
rmse <- sqrt(mean((model_dataD$HeightDiff - loocv_preds)^2))
mae <- mean(abs(model_dataD$HeightDiff - loocv_preds))

# 1. Calculate Total Sum of Squares (TSS)
tss <- sum((model_dataD$HeightDiff - mean(model_dataD$HeightDiff))^2)

# 2. Calculate Residual Sum of Squares from LOOCV (RSS)
rss_cv <- sum((model_dataD$HeightDiff - loocv_preds)^2)

# 3. Calculate LOOCV R-squared
r2_cv <- 1 - (rss_cv / tss)

cat("LOOCV R-squared:", round(r2_cv, 3), "\n")

## LOOCV R-squared: 0.176

cat("LOOCV RMSE:", round(rmse, 3), "\n")

## LOOCV RMSE: 11.791

cat("LOOCV MAE:", round(mae, 3), "\n")

## LOOCV MAE: 9.247

Height Difference Post-hoc Tests

# Generate predictions
heightD_preds <- ggpredict(
  avg_heightD_mod,
  terms = c("CogonStatusNumeric", "Common_Name")
)

heightD_preds <- heightD_preds %>%
  mutate(group = dplyr::recode(group,
    "Beautyberry" = "Beautyberry",
    "Mayberry" = "Mayberry",
    "Yaupon_Holly" = "Yaupon Holly"
  ))


# Make sure grouping variables are factors
model_data$CogonStatusNumeric <- factor(
  model_data$CogonStatusNumeric,
  levels = c(0, 1)
)

# Plot
heightD_plot <- ggplot() +
  
  # Raw data points
  geom_jitter(
    data = model_data,
    aes(x = CogonStatusNumeric,
        y = HeightDiff,
        color = CogonStatusNumeric),
    width = 0.08,
    alpha = 0.05,
    size = 2
  ) +
  
  # Predicted means
  geom_point(
    data = heightD_preds,
    aes(x = x,
        y = predicted,
        color = factor(x)),
    size = 4
  ) +
  
  # Confidence intervals
  geom_errorbar(
    data = heightD_preds,
    aes(x = x,
        ymin = conf.low,
        ymax = conf.high,
        color = factor(x)),
    width = 0.1,
    linewidth = 1
  ) +
  
  # Separate panels by species
  facet_wrap(~group) +
  
  scale_x_discrete(
    labels = c(
      "0" = "Native",
      "1" = "Invaded"
    )
  ) +
  
  scale_color_manual(
    values = c(
      "0" = "#0072B2",  # Native
      "1" = "#D55E00"   # Invaded
    ),
    guide = "none"
  ) +
  
  labs(
    x = "Cogongrass Status",
    y = "Height Difference (cm)"
  ) +
  
  theme_classic(base_size = 15)

heightD_plot

## Save
ggsave(
  filename = "C:/Users/DrewIvory/OneDrive - University of Florida/Desktop/School/PHD/01_Projects/10_Shrub/03_Figures/HeightDifference_Predictions.png",
  plot = heightD_plot,
  width = 8,
  height = 6,
  dpi = 300
)

Stem Count Analysis

Total Stem Count

Data Cleaning

# Calculate the difference between TotalStems2 and TotalStems
shrubs$StemDiff <- shrubs$TotalStems2 - shrubs$TotalStems

# Long Format
long_totalstems <- shrubs %>%
  # 1. Keep only the columns you need (your ID vars + your measure vars)
  dplyr::select(ShrubType, Common_Name, Patch, CogonStatusNumeric, SMavg, 
         PercCover2, PercCanopy2, PlantTag, Date, StemDiff, 
         TotalStems, TotalStems2) %>%
  # 2. Pivot only the stem columns
  pivot_longer(
    cols = c(TotalStems, TotalStems2),
    names_to = "SamplingPeriod",
    values_to = "TotalStemsValue"
  )

# Sample period is numeric
long_totalstems$SamplingPeriod <- factor(
  long_totalstems$SamplingPeriod,
  levels = c("TotalStems", "TotalStems2"),
  labels = c("Pre", "Post") # Pre- or Post-Fire
)

model_data_stems <- long_totalstems %>%
  dplyr::select(PlantTag, StemDiff, TotalStemsValue, ShrubType,
                Common_Name, CogonStatusNumeric, SMavg, PercCanopy2) %>%
  na.omit()

# Confirm Factors
model_data_stems$PlantTag <- as.factor(model_data_stems$PlantTag)
model_data_stems$Common_Name <- as.factor(model_data_stems$Common_Name)
model_data_stems$ShrubType <- as.factor(model_data_stems$ShrubType)
model_data_stems$CogonStatusNumeric <- as.factor(model_data_stems$CogonStatusNumeric)

Total Stem Count Model

# Null
stem_glm0 <- glmer(TotalStemsValue ~ 1 + (1 | PlantTag),
                       data = model_data_stems,
                       family = poisson(link = "log")
                       )


stem_glm1 <- glmer(TotalStemsValue ~ Common_Name * CogonStatusNumeric +
                     SMavg + PercCanopy2 + (1 | PlantTag),
                   data = model_data_stems,
                   family = poisson(link = "log")
                   )

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0836651 (tol = 0.002, component 1)

stem_glm2 <- glmer(TotalStemsValue ~ Common_Name + CogonStatusNumeric +
                     SMavg + PercCanopy2 + (1 | PlantTag),
                   data = model_data_stems,
                   family = poisson(link = "log")
                   )

stem_glm3 <- glmer(TotalStemsValue ~ CogonStatusNumeric +
                     SMavg + PercCanopy2 + (1 | PlantTag),
                   data = model_data_stems,
                   family = poisson(link = "log")
                   )

stem_glm4 <- glmer(TotalStemsValue ~ Common_Name * CogonStatusNumeric +
                     (1 | PlantTag),
                   data = model_data_stems,
                   family = poisson(link = "log")
                   )

stem_glm5 <- glmer(TotalStemsValue ~ Common_Name + CogonStatusNumeric +
                     (1 | PlantTag),
                   data = model_data_stems,
                   family = poisson(link = "log")
                   )

                   
models <- list(
  "Model 0" = stem_glm0,
  "Model 1" = stem_glm1,
  "Model 2" = stem_glm2,
  "Model 3" = stem_glm3,
  "Model 4" = stem_glm4,
  "Model 5" = stem_glm5
)

mod_sel <- aictab(cand.set = models)
mod_sel

## 
## Model selection based on AICc:
## 
##         K   AICc Delta_AICc AICcWt Cum.Wt      LL
## Model 5 5 904.07       0.00   0.60   0.60 -446.86
## Model 2 7 905.88       1.81   0.24   0.84 -445.60
## Model 4 7 907.62       3.55   0.10   0.94 -446.47
## Model 1 9 908.66       4.59   0.06   1.00 -444.78
## Model 3 5 922.31      18.24   0.00   1.00 -455.98
## Model 0 2 927.23      23.16   0.00   1.00 -461.58

summary(stem_glm5)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: TotalStemsValue ~ Common_Name + CogonStatusNumeric + (1 | PlantTag)
##    Data: model_data_stems
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     903.7     919.5    -446.9     893.7       169 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.79116 -0.42939 -0.09079  0.30804  2.31498 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  PlantTag (Intercept) 0.2765   0.5258  
## Number of obs: 174, groups:  PlantTag, 87
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              1.38627    0.13184  10.515  < 2e-16 ***
## Common_NameMayberry     -0.01423    0.16373  -0.087 0.930720    
## Common_NameYaupon_Holly  0.63361    0.15401   4.114 3.89e-05 ***
## CogonStatusNumeric1      0.44112    0.12931   3.411 0.000647 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Cmm_NM C_NY_H
## Cmmn_NmMybr -0.608              
## Cmmn_NmYp_H -0.624  0.499       
## CgnSttsNmr1 -0.519  0.049  0.007

topmods <- list(heightD_lm5, heightD_lm2)

avg_stem_mod <- model.avg(topmods)

summary(avg_stem_mod)

## 
## Call:
## model.avg(object = topmods)
## 
## Component model call: 
## lm(formula = <2 unique values>, data = model_dataD)
## 
## Component models: 
##      df  logLik   AICc delta weight
## 1234  7 -309.41 634.38   0.0   0.51
## 12    5 -311.84 634.48   0.1   0.49
## 
## Term codes: 
## CogonStatusNumeric        Common_Name        PercCanopy2              SMavg 
##                  1                  2                  3                  4 
## 
## Model-averaged coefficients:  
## (full average) 
##                         Estimate Std. Error Adjusted SE z value Pr(>|z|)   
## (Intercept)             18.73570    6.56503     6.62614   2.828  0.00469 **
## Common_NameMayberry     -0.51120    3.31339     3.36728   0.152  0.87933   
## Common_NameYaupon Holly 10.15330    3.34575     3.40003   2.986  0.00282 **
## CogonStatusNumeric1      2.92920    2.85123     2.89805   1.011  0.31214   
## SMavg                   -0.29008    0.48549     0.49084   0.591  0.55453   
## PercCanopy2             -0.04450    0.05432     0.05465   0.814  0.41549   
##  
## (conditional average) 
##                         Estimate Std. Error Adjusted SE z value Pr(>|z|)   
## (Intercept)             18.73570    6.56503     6.62614   2.828  0.00469 **
## Common_NameMayberry     -0.51120    3.31339     3.36728   0.152  0.87933   
## Common_NameYaupon Holly 10.15330    3.34575     3.40003   2.986  0.00282 **
## CogonStatusNumeric1      2.92920    2.85123     2.89805   1.011  0.31214   
## SMavg                   -0.56552    0.55112     0.56028   1.009  0.31281   
## PercCanopy2             -0.08675    0.04568     0.04644   1.868  0.06176 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

sim_residuals <- simulateResiduals(fittedModel = stem_glm2, plot = FALSE)

plot(sim_residuals)

vif(stem_glm2)

##                        GVIF Df GVIF^(1/(2*Df))
## Common_Name        1.024389  2        1.006042
## CogonStatusNumeric 1.175014  1        1.083981
## SMavg              1.171948  1        1.082565
## PercCanopy2        1.064814  1        1.031898

Total Stem Count Plots

# Predicted stem counts (marginal over species + covariates)
stem_preds <- ggpredict(
  stem_glm5,
  terms = "CogonStatusNumeric",
  bias_correction = TRUE
)

# Ensure factor ordering
model_data_stems$CogonStatusNumeric <- factor(
  model_data_stems$CogonStatusNumeric,
  levels = c(0, 1)
)

# Plot
stem_plot <- ggplot() +
  
  # raw data
  geom_jitter(
    data = model_data_stems,
    aes(
      x = CogonStatusNumeric,
      y = TotalStemsValue,
      color = CogonStatusNumeric
    ),
    width = 0.08,
    alpha = 0.15,
    size = 2
  ) +
  
  # predicted means
  geom_point(
    data = stem_preds,
    aes(
      x = x,
      y = predicted,
      color = factor(x)
    ),
    size = 4
  ) +
  
  # CI
  geom_errorbar(
    data = stem_preds,
    aes(
      x = x,
      ymin = conf.low,
      ymax = conf.high,
      color = factor(x)
    ),
    width = 0.1,
    linewidth = 1
  ) +
  
  scale_x_discrete(
    labels = c("0" = "Native", "1" = "Invaded")
  ) +
  
  scale_color_manual(
    values = c(
      "0" = "#0072B2",
      "1" = "#D55E00"
    ),
    guide = "none"
  ) +
  
  labs(
    x = "Cogongrass Status",
    y = "Predicted Total Stem Count"
  ) +
  
  theme_classic(base_size = 14)

stem_plot

## Save
ggsave(
  filename = "C:/Users/DrewIvory/OneDrive - University of Florida/Desktop/School/PHD/01_Projects/10_Shrub/03_Figures/TotalStemCount_Predictions.png",
  plot = stem_plot,
  width = 8,
  height = 6,
  dpi = 300
)

Modeling Total Stem Count Difference

model_dataD_stems <- model_data_stems %>%
  distinct(PlantTag, .keep_all = TRUE)


model_dataD_stems <- model_dataD_stems %>%
  filter(!PlantTag %in% c("9555"))


# Null
stemD_lm0 <- lm(StemDiff ~ 1,
  data = model_dataD_stems
)

stemD_lm1 <- lm(StemDiff ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2,
  data = model_dataD_stems
)

summary(stemD_lm1)

## 
## Call:
## lm(formula = StemDiff ~ Common_Name * CogonStatusNumeric + SMavg + 
##     PercCanopy2, data = model_dataD_stems)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1491 -1.3583 -0.0580  0.9044  8.3694 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                 -1.982159   1.365012  -1.452
## Common_NameMayberry                          0.867466   0.914751   0.948
## Common_NameYaupon_Holly                      0.870007   0.911111   0.955
## CogonStatusNumeric1                         -0.258603   0.930901  -0.278
## SMavg                                        0.140575   0.111947   1.256
## PercCanopy2                                 -0.008093   0.009000  -0.899
## Common_NameMayberry:CogonStatusNumeric1     -0.664135   1.336040  -0.497
## Common_NameYaupon_Holly:CogonStatusNumeric1 -0.991344   1.301736  -0.762
##                                             Pr(>|t|)
## (Intercept)                                    0.150
## Common_NameMayberry                            0.346
## Common_NameYaupon_Holly                        0.343
## CogonStatusNumeric1                            0.782
## SMavg                                          0.213
## PercCanopy2                                    0.371
## Common_NameMayberry:CogonStatusNumeric1        0.621
## Common_NameYaupon_Holly:CogonStatusNumeric1    0.449
## 
## Residual standard error: 2.48 on 78 degrees of freedom
## Multiple R-squared:  0.06051,    Adjusted R-squared:  -0.0238 
## F-statistic: 0.7177 on 7 and 78 DF,  p-value: 0.6572

stemD_lm2 <- lm(StemDiff ~ Common_Name + CogonStatusNumeric + SMavg + PercCanopy2,
  data = model_dataD_stems
)

stemD_lm3 <- lm(StemDiff ~ CogonStatusNumeric,
  data = model_dataD_stems
)

stemD_lm4 <- lm(StemDiff ~ Common_Name * CogonStatusNumeric,
  data = model_dataD_stems
)

stemD_lm5 <- lm(StemDiff ~ Common_Name + CogonStatusNumeric,
  data = model_dataD_stems
)

## AIC comparison
models <- list(
  "Model 0" = stemD_lm0,
  "Model 1" = stemD_lm1,
  "Model 2" = stemD_lm2,
  "Model 3" = stemD_lm3,
  "Model 4" = stemD_lm4,
  "Model 5" = stemD_lm5
)

mod_sel <- aictab(cand.set = models)
mod_sel

## 
## Model selection based on AICc:
## 
##         K   AICc Delta_AICc AICcWt Cum.Wt      LL
## Model 0 2 401.41       0.00   0.60   0.60 -198.64
## Model 3 3 402.72       1.31   0.31   0.91 -198.22
## Model 5 5 406.07       4.65   0.06   0.97 -197.66
## Model 2 7 408.01       6.59   0.02   0.99 -196.29
## Model 4 7 410.21       8.79   0.01   1.00 -197.39
## Model 1 9 412.27      10.86   0.00   1.00 -195.95

stemD_lm <- stemD_lm3

summary(stemD_lm)

## 
## Call:
## lm(formula = StemDiff ~ CogonStatusNumeric, data = model_dataD_stems)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.3415 -1.3415  0.1778  1.1778  8.6585 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)          -0.1778     0.3658  -0.486    0.628
## CogonStatusNumeric1  -0.4808     0.5298  -0.907    0.367
## 
## Residual standard error: 2.454 on 84 degrees of freedom
## Multiple R-squared:  0.009708,   Adjusted R-squared:  -0.002081 
## F-statistic: 0.8235 on 1 and 84 DF,  p-value: 0.3668

# 1. Simulate residuals (standard is 250 simulations)
sim_residuals <- simulateResiduals(fittedModel = stemD_lm, plot = FALSE)

# 2. Run the main diagnostic plots
plot(sim_residuals)

testUniformity(sim_residuals)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.10595, p-value = 0.2892
## alternative hypothesis: two-sided

testDispersion(sim_residuals)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.98454, p-value = 0.96
## alternative hypothesis: two.sided

testOutliers(sim_residuals)

## 
##  DHARMa outlier test based on exact binomial test with approximate
##  expectations
## 
## data:  sim_residuals
## outliers at both margin(s) = 1, observations = 86, p-value = 0.4974
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
##  0.0002943498 0.0630905163
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 ) 
##                                             0.01162791

Plot for Stem Count Difference

GAM for Stem Count Difference

stemD_gam1 <- gam(
  StemDiff ~ Common_Name * CogonStatusNumeric + 
             s(SMavg, k = 5) + 
             s(PercCanopy2, k = 5),
  family = scat(), # The Scaled-t family handles 'heavy tails' better than Gaussian
  data = model_dataD_stems, 
  method = "REML"
)

stemD_gam2 <- gam(
  StemDiff ~ Common_Name + CogonStatusNumeric + 
             s(SMavg, k = 5) + 
             s(PercCanopy2, k = 5),
  family = scat(), 
  data = model_dataD_stems, 
  method = "REML"
)

stemD_gam3 <- gam(
  StemDiff ~ CogonStatusNumeric,
  family = scat(), 
  data = model_dataD_stems, 
  method = "REML"
)

stemD_gam4 <- gam(
  StemDiff ~ Common_Name * CogonStatusNumeric,
  family = scat(), 
  data = model_dataD_stems, 
  method = "REML"
)

# Model Selection
model_comparison <- AIC(stemD_gam1, stemD_gam2, stemD_gam3, stemD_gam4)
model_comparison

##                   df      AIC
## stemD_gam1 11.782037 402.7777
## stemD_gam2  9.108303 400.3537
## stemD_gam3  4.000000 398.6947
## stemD_gam4  8.000000 402.1083

stemD_gam <- stemD_gam3

summary(stemD_gam)

## 
## Family: Scaled t(4.586,1.889) 
## Link function: identity 
## 
## Formula:
## StemDiff ~ CogonStatusNumeric
## 
## Parametric coefficients:
##                     Estimate Std. Error z value Pr(>|z|)
## (Intercept)          -0.2735     0.3282  -0.834    0.405
## CogonStatusNumeric1  -0.5336     0.4753  -1.123    0.262
## 
## 
## R-sq.(adj) =  -0.0022   Deviance explained = 1.11%
## -REML = 195.71  Scale est. = 1         n = 86

# Re-check DHARMa
sim_residuals <- simulateResiduals(stemD_gam, plot = FALSE)

## Registered S3 method overwritten by 'mgcViz':
##   method from   
##   +.gg   ggplot2

plot(sim_residuals)

testUniformity(sim_residuals)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.052651, p-value = 0.971
## alternative hypothesis: two-sided

testDispersion(sim_residuals)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.96375, p-value = 0.976
## alternative hypothesis: two.sided

testOutliers(sim_residuals)

## 
##  DHARMa outlier test based on exact binomial test with approximate
##  expectations
## 
## data:  sim_residuals
## outliers at both margin(s) = 1, observations = 86, p-value = 0.4974
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
##  0.0002943498 0.0630905163
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 ) 
##                                             0.01162791

Post-hoc Tests for Stem Count

# 1. Calculate marginal means for the interaction
#interaction_means_stems <- emmeans(global_stem_lm, ~ Common_Name *
#CogonStatusNumeric)

# 2. View the means
#interaction_means_stems
# 3. Perform pair-wise comparisons (Post-hoc Tukey test)
#pairs(interaction_means_stems, by = "Common_Name")

Shrub Type Analysis

Alan Ivory

2026-05-20

Set Up

Data Cleaning

Height Difference

Height Models

Leave-One-Out Cross-Validation (LOOCV) for Height Model

Height- Predictive Plots

Species Height Plot

Global Model for Species Height Difference

LOOCV for model average

Height Difference Post-hoc Tests

Stem Count Analysis

Total Stem Count

Data Cleaning

Total Stem Count Model

Total Stem Count Plots

Modeling Total Stem Count Difference

Plot for Stem Count Difference

GAM for Stem Count Difference

Post-hoc Tests for Stem Count

Predict