Set Up

Data Cleaning

## Recoding CogonStatus to numeric for modeling
shrubs$CogonStatusNumeric <- ifelse(shrubs$CogonStatus == "in", 1, 0)

## Convert categorical variables to factors
shrubs$ShrubType <- factor(shrubs$ShrubType)
shrubs$Common_Name <- factor(shrubs$Common_Name)
shrubs$CogonStatusNumeric <- factor(shrubs$CogonStatusNumeric)
shrubs$Patch <- factor(shrubs$Patch)
shrubs$PlantTag <- as.factor(shrubs$PlantTag)

## Calculate mean height for each shrub using the four height measurements
## Second measurement
shrubs$MeanHeight2 <- rowMeans(
  shrubs[, c("TH2.1", "TH2.2", "TH2.3", "TH2.4")],
  na.rm = TRUE
)

shrubs$MeanHeight2[is.nan(shrubs$MeanHeight2)] <- NA

Height Difference

# Calculate the difference between MeanHeight2 and MeanHeight
shrubs$HeightDiff <- shrubs$MeanHeight2 - shrubs$MeanHeight

###Remove the height with very high negative value (YH and MB)
#shrubs <- shrubs[!shrubs$PlantTag %in% c(9505, 9506), ] # Why are we removing these two plants?

# Convert MeanHeight and MeanHeight2 to long format

long_height <- shrubs %>%
  pivot_longer(
    cols = c(MeanHeight, MeanHeight2),
    names_to = "SamplingPeriod",
    values_to = "Height"
  )

long_height$SamplingPeriod <- factor(
  long_height$SamplingPeriod,
  levels = c("MeanHeight", "MeanHeight2"),
  labels = c("Pre", "Post") # Pre- or Post-Fire
)

####dropping row 57 in height
# Why is this row being dropped?

hist(long_height$Height)

model_data <- na.omit(long_height[, c("PlantTag", "HeightDiff", "Common_Name", 
                                      "CogonStatusNumeric", "SMavg",
                                      "PercCanopy2", "SamplingPeriod", "Height")])

Height Models

model_dataH <- model_data %>%
  filter(!PlantTag %in% c("9555", "9504", "9585", "9691"))


# Null
height_glmer0 <- glmer(Height ~ 1 + (1 | PlantTag),
                       data = model_dataH,
                       family = gaussian(link = "identity")
                       )
## Warning in glmer(Height ~ 1 + (1 | PlantTag), data = model_dataH, family =
## gaussian(link = "identity")): calling glmer() with family=gaussian (identity
## link) as a shortcut to lmer() is deprecated; please call lmer() directly
# Global Interaction

height_glmer1 <- glmer(Height ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2 +
                         (1 | PlantTag),
                       data = model_dataH,
                       family = gaussian(link = "identity")
)
## Warning in glmer(Height ~ Common_Name * CogonStatusNumeric + SMavg +
## PercCanopy2 + : calling glmer() with family=gaussian (identity link) as a
## shortcut to lmer() is deprecated; please call lmer() directly
# Global Additive
height_glmer2 <- glmer(Height ~ Common_Name + CogonStatusNumeric + SMavg + PercCanopy2 +
                       (1 | PlantTag),
                     data = model_dataH,
                     family = gaussian(link = "identity")
                     )
## Warning in glmer(Height ~ Common_Name + CogonStatusNumeric + SMavg +
## PercCanopy2 + : calling glmer() with family=gaussian (identity link) as a
## shortcut to lmer() is deprecated; please call lmer() directly
# Invasion Only
height_glmer3 <- glmer(Height ~ CogonStatusNumeric +
                       (1 | PlantTag),
                     data = model_dataH,
                     family = gaussian(link = "identity")
                     )
## Warning in glmer(Height ~ CogonStatusNumeric + (1 | PlantTag), data =
## model_dataH, : calling glmer() with family=gaussian (identity link) as a
## shortcut to lmer() is deprecated; please call lmer() directly
# Invasion by species
height_glmer4 <- glmer(Height ~ Common_Name * CogonStatusNumeric +
                       (1 | PlantTag),
                     data = model_dataH,
                     family = gaussian(link = "identity")
                     )
## Warning in glmer(Height ~ Common_Name * CogonStatusNumeric + (1 | PlantTag), :
## calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
## deprecated; please call lmer() directly
height_glmer5 <- glmer(Height ~ Common_Name + CogonStatusNumeric +
                       (1 | PlantTag),
                     data = model_dataH,
                     family = gaussian(link = "identity")
                     )
## Warning in glmer(Height ~ Common_Name + CogonStatusNumeric + (1 | PlantTag), :
## calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
## deprecated; please call lmer() directly
models <- list(
  "Model 0" = height_glmer0,
  "Model 1" = height_glmer1,
  "Model 2" = height_glmer2,
  "Model 3" = height_glmer3,
  "Model 4" = height_glmer4,
  "Model 5" = height_glmer5
)

mod_sel <- aictab(cand.set = models)
## Warning in aictab.AIClmerMod(cand.set = models): 
## Model selection for fixed effects is only appropriate with ML estimation:
## REML (default) should only be used to select random effects for a constant set of fixed effects
mod_sel
## 
## Model selection based on AICc:
## 
##          K    AICc Delta_AICc AICcWt Cum.Wt  Res.LL
## Model 4  8 1449.77       0.00   0.89   0.89 -716.43
## Model 1 10 1454.01       4.24   0.11   0.99 -716.30
## Model 5  6 1459.70       9.93   0.01   1.00 -723.58
## Model 2  8 1464.19      14.42   0.00   1.00 -723.64
## Model 3  4 1491.92      42.15   0.00   1.00 -741.83
## Model 0  3 1503.33      53.56   0.00   1.00 -748.59
height_glmer <- height_glmer4

#-------------#
## Best model- switched to lmer since it is the same fixed effects structure but allows for p-values to be calculated
height_lmer <- lmer(Height ~ Common_Name * CogonStatusNumeric + (1 | PlantTag),
                    data = model_dataH)

# This will now display the p-values
summary(height_lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Height ~ Common_Name * CogonStatusNumeric + (1 | PlantTag)
##    Data: model_dataH
## 
## REML criterion at convergence: 1432.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9870 -0.6758 -0.1694  0.5611  2.6195 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  PlantTag (Intercept) 175.5    13.25   
##  Residual             267.7    16.36   
## Number of obs: 166, groups:  PlantTag, 83
## 
## Fixed effects:
##                                             Estimate Std. Error      df t value
## (Intercept)                                   62.224      4.701  77.000  13.237
## Common_NameMayberry                          -16.308      6.536  77.000  -2.495
## Common_NameYaupon_Holly                      -17.834      6.648  77.000  -2.683
## CogonStatusNumeric1                           19.675      6.536  77.000   3.010
## Common_NameMayberry:CogonStatusNumeric1      -12.105      9.441  77.000  -1.282
## Common_NameYaupon_Holly:CogonStatusNumeric1  -10.192      9.414  77.000  -1.083
##                                             Pr(>|t|)    
## (Intercept)                                  < 2e-16 ***
## Common_NameMayberry                          0.01474 *  
## Common_NameYaupon_Holly                      0.00894 ** 
## CogonStatusNumeric1                          0.00353 ** 
## Common_NameMayberry:CogonStatusNumeric1      0.20362    
## Common_NameYaupon_Holly:CogonStatusNumeric1  0.28233    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Cmm_NM Cm_NY_H CgnSN1 C_NM:C
## Cmmn_NmMybr -0.719                             
## Cmmn_NmYp_H -0.707  0.509                      
## CgnSttsNmr1 -0.719  0.517  0.509               
## Cmm_NM:CSN1  0.498 -0.692 -0.352  -0.692       
## C_NY_H:CSN1  0.499 -0.359 -0.706  -0.694  0.481
#------------#



## 1. Simulate residuals (standard is 250 simulations)
sim_residuals <- simulateResiduals(fittedModel = height_glmer, plot = FALSE)

## 2. Run the main diagnostic plots
plot(sim_residuals)

testUniformity(sim_residuals)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.045831, p-value = 0.8766
## alternative hypothesis: two-sided
testDispersion(sim_residuals)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.95221, p-value = 0.68
## alternative hypothesis: two.sided
testOutliers(sim_residuals) # Maybe remove the largest values?

## 
##  DHARMa outlier test based on exact binomial test with approximate
##  expectations
## 
## data:  sim_residuals
## outliers at both margin(s) = 0, observations = 166, p-value = 0.6467
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
##  0.00000000 0.02197707
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 ) 
##                                                      0
# or remove the negative values and use a gamma distribution

vif(height_glmer)
##                                    GVIF Df GVIF^(1/(2*Df))
## Common_Name                    3.725363  2        1.389288
## CogonStatusNumeric             2.861733  1        1.691666
## Common_Name:CogonStatusNumeric 6.917217  2        1.621746

Leave-One-Out Cross-Validation (LOOCV) for Height Model

# Get unique tags
tags <- unique(model_data$PlantTag)
n_tags <- length(tags)

# Initialize vector for predictions
loocv_preds <- numeric(nrow(model_data))

for(i in 1:n_tags) {
  # 1. Identify rows for the current plant
  test_indices <- which(model_data$PlantTag == tags[i])
  
  # 2. Split data
  train_data <- model_data[-test_indices, ]
  test_data  <- model_data[test_indices, ]
  
  # 3. Refit the LMM
  # (Note: Use lmer with the same formula as your summary)
  m_fold <- lmer(Height ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2 + (1 | PlantTag), 
                 data = train_data)
  
  # 4. Predict for the held-out plant 
  # re.form = NA tells R to predict using only Fixed Effects (since PlantTag is new)
  loocv_preds[test_indices] <- predict(m_fold, newdata = test_data, re.form = NA)
}

# 5. Calculate Performance
actual <- model_data$Height
rmse <- sqrt(mean((actual - loocv_preds)^2))
mae <- mean(abs(actual - loocv_preds))
r2_cv <- 1 - (sum((actual - loocv_preds)^2) / sum((actual - mean(actual))^2))
mae_cv <- mean(abs(model_data$Height - loocv_preds))


cat("Leave-One-Plant-Out MAE:", round(mae_cv, 3), "cm\n")
## Leave-One-Plant-Out MAE: 19.87 cm
cat("Leave-One-Plant-Out RMSE:", round(rmse, 3), "\n")
## Leave-One-Plant-Out RMSE: 25.926
cat("Leave-One-Plant-Out R-squared:", round(r2_cv, 3), "\n")
## Leave-One-Plant-Out R-squared: 0.088

Height- Predictive Plots

# Predictions
height_preds <- ggpredict(
  height_lmer,
  terms = "CogonStatusNumeric"
)

# Make status a factor for plotting raw points
model_data$CogonStatusNumeric <- factor(
  model_data$CogonStatusNumeric,
  levels = c(0, 1)
)

# Plot
height_plot <- ggplot() +
  
  # Raw data points
  geom_jitter(
    data = model_data,
    aes(x = CogonStatusNumeric,
        y = Height,
        color = CogonStatusNumeric),
    width = 0.08,
    alpha = 0.15,
    size = 2
  ) +
  
  # Predicted means
  geom_point(
    data = height_preds,
    aes(x = x,
        y = predicted,
        color = factor(x)),
    size = 4
  ) +
  
  # Confidence intervals
  geom_errorbar(
    data = height_preds,
    aes(x = x,
        ymin = conf.low,
        ymax = conf.high,
        color = factor(x)),
    width = 0.1,
    linewidth = 1
  ) +
  
  scale_x_discrete(
    labels = c(
      "0" = "Native",
      "1" = "Invaded"
    )
  ) +
  
  scale_color_manual(
    values = c(
      "0" = "#0072B2",
      "1" = "#D55E00"
    ),
    guide = "none"
  ) +
  
  labs(
    x = "Cogongrass Status",
    y = "Height (cm)"
  ) +
  
  theme_classic(base_size = 15)

height_plot

## Save
ggsave(
  filename = "C:/Users/DrewIvory/OneDrive - University of Florida/Desktop/School/PHD/01_Projects/10_Shrub/03_Figures/Height_Predictions.png",
  plot = height_plot,
  width = 4,
  height = 6,
  dpi = 300
)

Species Height Plot

# 1. Generate predictions by species
height_species_preds <- ggpredict(
  height_lmer,
  terms = c("Common_Name")
)

# Fix the underscore in the model predictions data frame so it matches the clean text
height_species_preds$x <- gsub("Yaupon_Holly", "Yaupon Holly", height_species_preds$x)

# 2. Fix the underscore in your raw data and set the factor levels
model_data$Common_Name <- gsub("Yaupon_Holly", "Yaupon Holly", model_data$Common_Name)
model_data$Common_Name <- factor(
  model_data$Common_Name,
  levels = c("Beautyberry", "Mayberry", "Yaupon Holly")
)

# 3. Build the professional, layered plot
height_species_plot <- ggplot() +
  
  # Layer 1: Raw data points (faded in background to show variation)
  geom_jitter(
    data = model_data,
    aes(x = Common_Name, 
        y = Height, 
        color = Common_Name),
    width = 0.1,      # Subtle horizontal spread
    alpha = 0.15,     # Highly transparent so it doesn't crowd the model means
    size = 2
  ) +
  
  # Layer 2: Model-predicted means (prominent foreground)
  geom_point(
    data = height_species_preds,
    aes(x = factor(x, levels = c("Beautyberry", "Mayberry", "Yaupon Holly")), 
        y = predicted, 
        color = factor(x, levels = c("Beautyberry", "Mayberry", "Yaupon Holly"))),
    size = 4
  ) +
  
  # Layer 3: Model confidence intervals
  geom_errorbar(
    data = height_species_preds,
    aes(x = factor(x, levels = c("Beautyberry", "Mayberry", "Yaupon Holly")), 
        ymin = conf.low, 
        ymax = conf.high, 
        color = factor(x, levels = c("Beautyberry", "Mayberry", "Yaupon Holly"))),
    width = 0.1,
    linewidth = 1
  ) +
  
  # Color palette: Cohesive, colorblind-friendly academic tones
  scale_color_manual(
    values = c(
      "Beautyberry"  = "#009E73",  # Clean Green
      "Mayberry"     = "#E69F00",  # Muted Orange/Gold
      "Yaupon Holly" = "#56B4E9"   # Soft Blue
    ),
    guide = "none"                 # Removes unnecessary legend since x-axis is labeled
  ) +
  
  # Axis labels matching manuscript standard
  labs(
    x = "Species", 
    y = "Height (cm)"
  ) +
  
  # Clean, minimal academic theme
  theme_classic(base_size = 15)

# View the plot
height_species_plot

# 4. Save at publication-quality resolution (300 DPI)
ggsave(
  filename = "C:/Users/DrewIvory/OneDrive - University of Florida/Desktop/School/PHD/01_Projects/10_Shrub/03_Figures/Species_Height_Predictions.png",
  plot = height_species_plot,
  width = 5,        # Slightly wider than the single comparison plot to let 3 species breathe
  height = 6,
  dpi = 300
)

Global Model for Species Height Difference

model_dataD <- model_data %>%
  distinct(PlantTag, .keep_all = TRUE)

model_dataD <- model_dataD %>%
  filter(!PlantTag %in% c("9505", "9506", "9546", "9555", "9585",
                          "9589", "9691")) # had to remove the largest negative values.

# Null
heightD_lm0 <- lm(HeightDiff ~ 1,
  data = model_dataD
)

# Global Interaction
heightD_lm1 <- lm(HeightDiff ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2,
  data = model_dataD
)

# Global Additive
heightD_lm2 <- lm(HeightDiff ~ Common_Name + CogonStatusNumeric + SMavg + PercCanopy2,
  data = model_dataD
)

# Invasion Only
heightD_lm3 <- lm(HeightDiff ~ CogonStatusNumeric,
  data = model_dataD
)

# Invasion by species
heightD_lm4 <- lm(HeightDiff ~ Common_Name * CogonStatusNumeric,
  data = model_dataD
)

heightD_lm5 <- lm(HeightDiff ~ Common_Name + CogonStatusNumeric,
  data = model_dataD
)

## AIC comparison
models <- list(
  "Model 0" = heightD_lm0,
  "Model 1" = heightD_lm1,
  "Model 2" = heightD_lm2,
  "Model 3" = heightD_lm3,
  "Model 4" = heightD_lm4,
  "Model 5" = heightD_lm5
)

mod_sel <- aictab(cand.set = models)
mod_sel
## 
## Model selection based on AICc:
## 
##         K   AICc Delta_AICc AICcWt Cum.Wt      LL
## Model 1 9 623.94       0.00   0.71   0.71 -301.69
## Model 4 7 625.80       1.86   0.28   0.99 -305.12
## Model 2 7 634.38      10.44   0.00   1.00 -309.41
## Model 5 5 634.48      10.54   0.00   1.00 -311.84
## Model 0 2 641.45      17.51   0.00   1.00 -318.65
## Model 3 3 642.54      18.60   0.00   1.00 -318.11
topmods <- list(heightD_lm1, heightD_lm4)

avg_heightD_mod <- model.avg(topmods)

summary(avg_heightD_mod)
## 
## Call:
## model.avg(object = topmods)
## 
## Component model call: 
## lm(formula = <2 unique values>, data = model_dataD)
## 
## Component models: 
##       df  logLik   AICc delta weight
## 12345  9 -301.69 623.94  0.00   0.72
## 125    7 -305.12 625.80  1.86   0.28
## 
## Term codes: 
##             CogonStatusNumeric                    Common_Name 
##                              1                              2 
##                    PercCanopy2                          SMavg 
##                              3                              4 
## CogonStatusNumeric:Common_Name 
##                              5 
## 
## Model-averaged coefficients:  
## (full average) 
##                                             Estimate Std. Error Adjusted SE
## (Intercept)                                 28.39381    7.88564     7.95273
## Common_NameMayberry                         -5.12541    4.20529     4.27616
## Common_NameYaupon Holly                     -1.30090    4.28168     4.35329
## CogonStatusNumeric1                         -6.97770    4.27156     4.34269
## SMavg                                       -0.71063    0.63567     0.64120
## PercCanopy2                                 -0.05240    0.04860     0.04905
## CogonStatusNumeric1:Common_NameMayberry      9.75609    6.17277     6.27483
## CogonStatusNumeric1:Common_NameYaupon Holly 23.83278    6.22703     6.33169
##                                             z value Pr(>|z|)    
## (Intercept)                                   3.570 0.000356 ***
## Common_NameMayberry                           1.199 0.230684    
## Common_NameYaupon Holly                       0.299 0.765068    
## CogonStatusNumeric1                           1.607 0.108105    
## SMavg                                         1.108 0.267737    
## PercCanopy2                                   1.068 0.285320    
## CogonStatusNumeric1:Common_NameMayberry       1.555 0.119995    
## CogonStatusNumeric1:Common_NameYaupon Holly   3.764 0.000167 ***
##  
## (conditional average) 
##                                             Estimate Std. Error Adjusted SE
## (Intercept)                                 28.39381    7.88564     7.95273
## Common_NameMayberry                         -5.12541    4.20529     4.27616
## Common_NameYaupon Holly                     -1.30090    4.28168     4.35329
## CogonStatusNumeric1                         -6.97770    4.27156     4.34269
## SMavg                                       -0.99141    0.53420     0.54333
## PercCanopy2                                 -0.07311    0.04220     0.04292
## CogonStatusNumeric1:Common_NameMayberry      9.75609    6.17277     6.27483
## CogonStatusNumeric1:Common_NameYaupon Holly 23.83278    6.22703     6.33169
##                                             z value Pr(>|z|)    
## (Intercept)                                   3.570 0.000356 ***
## Common_NameMayberry                           1.199 0.230684    
## Common_NameYaupon Holly                       0.299 0.765068    
## CogonStatusNumeric1                           1.607 0.108105    
## SMavg                                         1.825 0.068049 .  
## PercCanopy2                                   1.703 0.088512 .  
## CogonStatusNumeric1:Common_NameMayberry       1.555 0.119995    
## CogonStatusNumeric1:Common_NameYaupon Holly   3.764 0.000167 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals on the most complicated model to check assumptions
# Unable to simulate residuals on the averaged model
sim_residuals <- simulateResiduals(fittedModel = heightD_lm1, plot = FALSE)

# 2. Run the main diagnostic plots
plot(sim_residuals)

testUniformity(sim_residuals)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.068, p-value = 0.8532
## alternative hypothesis: two-sided
testDispersion(sim_residuals)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.90938, p-value = 0.624
## alternative hypothesis: two.sided
testOutliers(sim_residuals)

## 
##  DHARMa outlier test based on exact binomial test with approximate
##  expectations
## 
## data:  sim_residuals
## outliers at both margin(s) = 1, observations = 80, p-value = 0.4727
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
##  0.0003164225 0.0676877845
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 ) 
##                                                 0.0125
vif(heightD_lm1)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
##                                    GVIF Df GVIF^(1/(2*Df))
## Common_Name                    3.884060  2        1.403853
## CogonStatusNumeric             2.909929  1        1.705851
## SMavg                          1.287353  1        1.134616
## PercCanopy2                    1.079139  1        1.038816
## Common_Name:CogonStatusNumeric 7.692400  2        1.665388

LOOCV for model average

# Initialize a vector to store predictions
n <- nrow(model_dataD)
loocv_preds <- numeric(n)

for(i in 1:n) {
  # 1. Split data
  train_data <- model_dataD[-i, ]
  test_point  <- model_dataD[i, , drop = FALSE]
  
  # 2. Refit the component models on training data
  m1 <- lm(HeightDiff ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2, 
            data = train_data)
  m2 <- lm(HeightDiff ~ Common_Name + CogonStatusNumeric + SMavg + PercCanopy2, 
            data = train_data)
  m3 <- lm(HeightDiff ~ CogonStatusNumeric, 
            data = train_data)
  m4 <- lm(HeightDiff ~ Common_Name * CogonStatusNumeric, 
            data = train_data)
  
  # 3. Create the model average for this specific fold
  m_list <- list(m1, m2, m3, m4)
  avg_fold <- model.avg(m_list)
  
  # 4. Predict the held-out point (using the full average)
  loocv_preds[i] <- predict(avg_fold, newdata = test_point)
}

# 5. Calculate Performance Metrics
rmse <- sqrt(mean((model_dataD$HeightDiff - loocv_preds)^2))
mae <- mean(abs(model_dataD$HeightDiff - loocv_preds))

# 1. Calculate Total Sum of Squares (TSS)
tss <- sum((model_dataD$HeightDiff - mean(model_dataD$HeightDiff))^2)

# 2. Calculate Residual Sum of Squares from LOOCV (RSS)
rss_cv <- sum((model_dataD$HeightDiff - loocv_preds)^2)

# 3. Calculate LOOCV R-squared
r2_cv <- 1 - (rss_cv / tss)

cat("LOOCV R-squared:", round(r2_cv, 3), "\n")
## LOOCV R-squared: 0.176
cat("LOOCV RMSE:", round(rmse, 3), "\n")
## LOOCV RMSE: 11.791
cat("LOOCV MAE:", round(mae, 3), "\n")
## LOOCV MAE: 9.247

Height Difference Post-hoc Tests

# Generate predictions
heightD_preds <- ggpredict(
  avg_heightD_mod,
  terms = c("CogonStatusNumeric", "Common_Name")
)

heightD_preds <- heightD_preds %>%
  mutate(group = dplyr::recode(group,
    "Beautyberry" = "Beautyberry",
    "Mayberry" = "Mayberry",
    "Yaupon_Holly" = "Yaupon Holly"
  ))


# Make sure grouping variables are factors
model_data$CogonStatusNumeric <- factor(
  model_data$CogonStatusNumeric,
  levels = c(0, 1)
)

# Plot
heightD_plot <- ggplot() +
  
  # Raw data points
  geom_jitter(
    data = model_data,
    aes(x = CogonStatusNumeric,
        y = HeightDiff,
        color = CogonStatusNumeric),
    width = 0.08,
    alpha = 0.05,
    size = 2
  ) +
  
  # Predicted means
  geom_point(
    data = heightD_preds,
    aes(x = x,
        y = predicted,
        color = factor(x)),
    size = 4
  ) +
  
  # Confidence intervals
  geom_errorbar(
    data = heightD_preds,
    aes(x = x,
        ymin = conf.low,
        ymax = conf.high,
        color = factor(x)),
    width = 0.1,
    linewidth = 1
  ) +
  
  # Separate panels by species
  facet_wrap(~group) +
  
  scale_x_discrete(
    labels = c(
      "0" = "Native",
      "1" = "Invaded"
    )
  ) +
  
  scale_color_manual(
    values = c(
      "0" = "#0072B2",  # Native
      "1" = "#D55E00"   # Invaded
    ),
    guide = "none"
  ) +
  
  labs(
    x = "Cogongrass Status",
    y = "Height Difference (cm)"
  ) +
  
  theme_classic(base_size = 15)

heightD_plot

## Save
ggsave(
  filename = "C:/Users/DrewIvory/OneDrive - University of Florida/Desktop/School/PHD/01_Projects/10_Shrub/03_Figures/HeightDifference_Predictions.png",
  plot = heightD_plot,
  width = 8,
  height = 6,
  dpi = 300
)

Stem Count Analysis

Total Stem Count

Data Cleaning

# Calculate the difference between TotalStems2 and TotalStems
shrubs$StemDiff <- shrubs$TotalStems2 - shrubs$TotalStems

# Long Format
long_totalstems <- shrubs %>%
  # 1. Keep only the columns you need (your ID vars + your measure vars)
  dplyr::select(ShrubType, Common_Name, Patch, CogonStatusNumeric, SMavg, 
         PercCover2, PercCanopy2, PlantTag, Date, StemDiff, 
         TotalStems, TotalStems2) %>%
  # 2. Pivot only the stem columns
  pivot_longer(
    cols = c(TotalStems, TotalStems2),
    names_to = "SamplingPeriod",
    values_to = "TotalStemsValue"
  )

# Sample period is numeric
long_totalstems$SamplingPeriod <- factor(
  long_totalstems$SamplingPeriod,
  levels = c("TotalStems", "TotalStems2"),
  labels = c("Pre", "Post") # Pre- or Post-Fire
)

model_data_stems <- long_totalstems %>%
  dplyr::select(PlantTag, StemDiff, TotalStemsValue, ShrubType,
                Common_Name, CogonStatusNumeric, SMavg, PercCanopy2) %>%
  na.omit()

# Confirm Factors
model_data_stems$PlantTag <- as.factor(model_data_stems$PlantTag)
model_data_stems$Common_Name <- as.factor(model_data_stems$Common_Name)
model_data_stems$ShrubType <- as.factor(model_data_stems$ShrubType)
model_data_stems$CogonStatusNumeric <- as.factor(model_data_stems$CogonStatusNumeric)

Total Stem Count Model

# Null
stem_glm0 <- glmer(TotalStemsValue ~ 1 + (1 | PlantTag),
                       data = model_data_stems,
                       family = poisson(link = "log")
                       )


stem_glm1 <- glmer(TotalStemsValue ~ Common_Name * CogonStatusNumeric +
                     SMavg + PercCanopy2 + (1 | PlantTag),
                   data = model_data_stems,
                   family = poisson(link = "log")
                   )
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0836651 (tol = 0.002, component 1)
stem_glm2 <- glmer(TotalStemsValue ~ Common_Name + CogonStatusNumeric +
                     SMavg + PercCanopy2 + (1 | PlantTag),
                   data = model_data_stems,
                   family = poisson(link = "log")
                   )

stem_glm3 <- glmer(TotalStemsValue ~ CogonStatusNumeric +
                     SMavg + PercCanopy2 + (1 | PlantTag),
                   data = model_data_stems,
                   family = poisson(link = "log")
                   )

stem_glm4 <- glmer(TotalStemsValue ~ Common_Name * CogonStatusNumeric +
                     (1 | PlantTag),
                   data = model_data_stems,
                   family = poisson(link = "log")
                   )

stem_glm5 <- glmer(TotalStemsValue ~ Common_Name + CogonStatusNumeric +
                     (1 | PlantTag),
                   data = model_data_stems,
                   family = poisson(link = "log")
                   )

                   
models <- list(
  "Model 0" = stem_glm0,
  "Model 1" = stem_glm1,
  "Model 2" = stem_glm2,
  "Model 3" = stem_glm3,
  "Model 4" = stem_glm4,
  "Model 5" = stem_glm5
)

mod_sel <- aictab(cand.set = models)
mod_sel
## 
## Model selection based on AICc:
## 
##         K   AICc Delta_AICc AICcWt Cum.Wt      LL
## Model 5 5 904.07       0.00   0.60   0.60 -446.86
## Model 2 7 905.88       1.81   0.24   0.84 -445.60
## Model 4 7 907.62       3.55   0.10   0.94 -446.47
## Model 1 9 908.66       4.59   0.06   1.00 -444.78
## Model 3 5 922.31      18.24   0.00   1.00 -455.98
## Model 0 2 927.23      23.16   0.00   1.00 -461.58
summary(stem_glm5)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: TotalStemsValue ~ Common_Name + CogonStatusNumeric + (1 | PlantTag)
##    Data: model_data_stems
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     903.7     919.5    -446.9     893.7       169 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.79116 -0.42939 -0.09079  0.30804  2.31498 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  PlantTag (Intercept) 0.2765   0.5258  
## Number of obs: 174, groups:  PlantTag, 87
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              1.38627    0.13184  10.515  < 2e-16 ***
## Common_NameMayberry     -0.01423    0.16373  -0.087 0.930720    
## Common_NameYaupon_Holly  0.63361    0.15401   4.114 3.89e-05 ***
## CogonStatusNumeric1      0.44112    0.12931   3.411 0.000647 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Cmm_NM C_NY_H
## Cmmn_NmMybr -0.608              
## Cmmn_NmYp_H -0.624  0.499       
## CgnSttsNmr1 -0.519  0.049  0.007
topmods <- list(heightD_lm5, heightD_lm2)

avg_stem_mod <- model.avg(topmods)

summary(avg_stem_mod)
## 
## Call:
## model.avg(object = topmods)
## 
## Component model call: 
## lm(formula = <2 unique values>, data = model_dataD)
## 
## Component models: 
##      df  logLik   AICc delta weight
## 1234  7 -309.41 634.38   0.0   0.51
## 12    5 -311.84 634.48   0.1   0.49
## 
## Term codes: 
## CogonStatusNumeric        Common_Name        PercCanopy2              SMavg 
##                  1                  2                  3                  4 
## 
## Model-averaged coefficients:  
## (full average) 
##                         Estimate Std. Error Adjusted SE z value Pr(>|z|)   
## (Intercept)             18.73570    6.56503     6.62614   2.828  0.00469 **
## Common_NameMayberry     -0.51120    3.31339     3.36728   0.152  0.87933   
## Common_NameYaupon Holly 10.15330    3.34575     3.40003   2.986  0.00282 **
## CogonStatusNumeric1      2.92920    2.85123     2.89805   1.011  0.31214   
## SMavg                   -0.29008    0.48549     0.49084   0.591  0.55453   
## PercCanopy2             -0.04450    0.05432     0.05465   0.814  0.41549   
##  
## (conditional average) 
##                         Estimate Std. Error Adjusted SE z value Pr(>|z|)   
## (Intercept)             18.73570    6.56503     6.62614   2.828  0.00469 **
## Common_NameMayberry     -0.51120    3.31339     3.36728   0.152  0.87933   
## Common_NameYaupon Holly 10.15330    3.34575     3.40003   2.986  0.00282 **
## CogonStatusNumeric1      2.92920    2.85123     2.89805   1.011  0.31214   
## SMavg                   -0.56552    0.55112     0.56028   1.009  0.31281   
## PercCanopy2             -0.08675    0.04568     0.04644   1.868  0.06176 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sim_residuals <- simulateResiduals(fittedModel = stem_glm2, plot = FALSE)

plot(sim_residuals)

vif(stem_glm2)
##                        GVIF Df GVIF^(1/(2*Df))
## Common_Name        1.024389  2        1.006042
## CogonStatusNumeric 1.175014  1        1.083981
## SMavg              1.171948  1        1.082565
## PercCanopy2        1.064814  1        1.031898

Total Stem Count Plots

# Predicted stem counts (marginal over species + covariates)
stem_preds <- ggpredict(
  stem_glm5,
  terms = "CogonStatusNumeric",
  bias_correction = TRUE
)

# Ensure factor ordering
model_data_stems$CogonStatusNumeric <- factor(
  model_data_stems$CogonStatusNumeric,
  levels = c(0, 1)
)

# Plot
stem_plot <- ggplot() +
  
  # raw data
  geom_jitter(
    data = model_data_stems,
    aes(
      x = CogonStatusNumeric,
      y = TotalStemsValue,
      color = CogonStatusNumeric
    ),
    width = 0.08,
    alpha = 0.15,
    size = 2
  ) +
  
  # predicted means
  geom_point(
    data = stem_preds,
    aes(
      x = x,
      y = predicted,
      color = factor(x)
    ),
    size = 4
  ) +
  
  # CI
  geom_errorbar(
    data = stem_preds,
    aes(
      x = x,
      ymin = conf.low,
      ymax = conf.high,
      color = factor(x)
    ),
    width = 0.1,
    linewidth = 1
  ) +
  
  scale_x_discrete(
    labels = c("0" = "Native", "1" = "Invaded")
  ) +
  
  scale_color_manual(
    values = c(
      "0" = "#0072B2",
      "1" = "#D55E00"
    ),
    guide = "none"
  ) +
  
  labs(
    x = "Cogongrass Status",
    y = "Predicted Total Stem Count"
  ) +
  
  theme_classic(base_size = 14)

stem_plot

## Save
ggsave(
  filename = "C:/Users/DrewIvory/OneDrive - University of Florida/Desktop/School/PHD/01_Projects/10_Shrub/03_Figures/TotalStemCount_Predictions.png",
  plot = stem_plot,
  width = 8,
  height = 6,
  dpi = 300
)

Modeling Total Stem Count Difference

model_dataD_stems <- model_data_stems %>%
  distinct(PlantTag, .keep_all = TRUE)


model_dataD_stems <- model_dataD_stems %>%
  filter(!PlantTag %in% c("9555"))


# Null
stemD_lm0 <- lm(StemDiff ~ 1,
  data = model_dataD_stems
)

stemD_lm1 <- lm(StemDiff ~ Common_Name * CogonStatusNumeric + SMavg + PercCanopy2,
  data = model_dataD_stems
)

summary(stemD_lm1)
## 
## Call:
## lm(formula = StemDiff ~ Common_Name * CogonStatusNumeric + SMavg + 
##     PercCanopy2, data = model_dataD_stems)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1491 -1.3583 -0.0580  0.9044  8.3694 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                 -1.982159   1.365012  -1.452
## Common_NameMayberry                          0.867466   0.914751   0.948
## Common_NameYaupon_Holly                      0.870007   0.911111   0.955
## CogonStatusNumeric1                         -0.258603   0.930901  -0.278
## SMavg                                        0.140575   0.111947   1.256
## PercCanopy2                                 -0.008093   0.009000  -0.899
## Common_NameMayberry:CogonStatusNumeric1     -0.664135   1.336040  -0.497
## Common_NameYaupon_Holly:CogonStatusNumeric1 -0.991344   1.301736  -0.762
##                                             Pr(>|t|)
## (Intercept)                                    0.150
## Common_NameMayberry                            0.346
## Common_NameYaupon_Holly                        0.343
## CogonStatusNumeric1                            0.782
## SMavg                                          0.213
## PercCanopy2                                    0.371
## Common_NameMayberry:CogonStatusNumeric1        0.621
## Common_NameYaupon_Holly:CogonStatusNumeric1    0.449
## 
## Residual standard error: 2.48 on 78 degrees of freedom
## Multiple R-squared:  0.06051,    Adjusted R-squared:  -0.0238 
## F-statistic: 0.7177 on 7 and 78 DF,  p-value: 0.6572
stemD_lm2 <- lm(StemDiff ~ Common_Name + CogonStatusNumeric + SMavg + PercCanopy2,
  data = model_dataD_stems
)

stemD_lm3 <- lm(StemDiff ~ CogonStatusNumeric,
  data = model_dataD_stems
)

stemD_lm4 <- lm(StemDiff ~ Common_Name * CogonStatusNumeric,
  data = model_dataD_stems
)

stemD_lm5 <- lm(StemDiff ~ Common_Name + CogonStatusNumeric,
  data = model_dataD_stems
)

## AIC comparison
models <- list(
  "Model 0" = stemD_lm0,
  "Model 1" = stemD_lm1,
  "Model 2" = stemD_lm2,
  "Model 3" = stemD_lm3,
  "Model 4" = stemD_lm4,
  "Model 5" = stemD_lm5
)

mod_sel <- aictab(cand.set = models)
mod_sel
## 
## Model selection based on AICc:
## 
##         K   AICc Delta_AICc AICcWt Cum.Wt      LL
## Model 0 2 401.41       0.00   0.60   0.60 -198.64
## Model 3 3 402.72       1.31   0.31   0.91 -198.22
## Model 5 5 406.07       4.65   0.06   0.97 -197.66
## Model 2 7 408.01       6.59   0.02   0.99 -196.29
## Model 4 7 410.21       8.79   0.01   1.00 -197.39
## Model 1 9 412.27      10.86   0.00   1.00 -195.95
stemD_lm <- stemD_lm3

summary(stemD_lm)
## 
## Call:
## lm(formula = StemDiff ~ CogonStatusNumeric, data = model_dataD_stems)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.3415 -1.3415  0.1778  1.1778  8.6585 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)          -0.1778     0.3658  -0.486    0.628
## CogonStatusNumeric1  -0.4808     0.5298  -0.907    0.367
## 
## Residual standard error: 2.454 on 84 degrees of freedom
## Multiple R-squared:  0.009708,   Adjusted R-squared:  -0.002081 
## F-statistic: 0.8235 on 1 and 84 DF,  p-value: 0.3668
# 1. Simulate residuals (standard is 250 simulations)
sim_residuals <- simulateResiduals(fittedModel = stemD_lm, plot = FALSE)

# 2. Run the main diagnostic plots
plot(sim_residuals)

testUniformity(sim_residuals)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.10595, p-value = 0.2892
## alternative hypothesis: two-sided
testDispersion(sim_residuals)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.98454, p-value = 0.96
## alternative hypothesis: two.sided
testOutliers(sim_residuals)

## 
##  DHARMa outlier test based on exact binomial test with approximate
##  expectations
## 
## data:  sim_residuals
## outliers at both margin(s) = 1, observations = 86, p-value = 0.4974
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
##  0.0002943498 0.0630905163
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 ) 
##                                             0.01162791

Plot for Stem Count Difference

GAM for Stem Count Difference

stemD_gam1 <- gam(
  StemDiff ~ Common_Name * CogonStatusNumeric + 
             s(SMavg, k = 5) + 
             s(PercCanopy2, k = 5),
  family = scat(), # The Scaled-t family handles 'heavy tails' better than Gaussian
  data = model_dataD_stems, 
  method = "REML"
)

stemD_gam2 <- gam(
  StemDiff ~ Common_Name + CogonStatusNumeric + 
             s(SMavg, k = 5) + 
             s(PercCanopy2, k = 5),
  family = scat(), 
  data = model_dataD_stems, 
  method = "REML"
)

stemD_gam3 <- gam(
  StemDiff ~ CogonStatusNumeric,
  family = scat(), 
  data = model_dataD_stems, 
  method = "REML"
)

stemD_gam4 <- gam(
  StemDiff ~ Common_Name * CogonStatusNumeric,
  family = scat(), 
  data = model_dataD_stems, 
  method = "REML"
)

# Model Selection
model_comparison <- AIC(stemD_gam1, stemD_gam2, stemD_gam3, stemD_gam4)
model_comparison
##                   df      AIC
## stemD_gam1 11.782037 402.7777
## stemD_gam2  9.108303 400.3537
## stemD_gam3  4.000000 398.6947
## stemD_gam4  8.000000 402.1083
stemD_gam <- stemD_gam3

summary(stemD_gam)
## 
## Family: Scaled t(4.586,1.889) 
## Link function: identity 
## 
## Formula:
## StemDiff ~ CogonStatusNumeric
## 
## Parametric coefficients:
##                     Estimate Std. Error z value Pr(>|z|)
## (Intercept)          -0.2735     0.3282  -0.834    0.405
## CogonStatusNumeric1  -0.5336     0.4753  -1.123    0.262
## 
## 
## R-sq.(adj) =  -0.0022   Deviance explained = 1.11%
## -REML = 195.71  Scale est. = 1         n = 86
# Re-check DHARMa
sim_residuals <- simulateResiduals(stemD_gam, plot = FALSE)
## Registered S3 method overwritten by 'mgcViz':
##   method from   
##   +.gg   ggplot2
plot(sim_residuals)

testUniformity(sim_residuals)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.052651, p-value = 0.971
## alternative hypothesis: two-sided
testDispersion(sim_residuals)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.96375, p-value = 0.976
## alternative hypothesis: two.sided
testOutliers(sim_residuals)

## 
##  DHARMa outlier test based on exact binomial test with approximate
##  expectations
## 
## data:  sim_residuals
## outliers at both margin(s) = 1, observations = 86, p-value = 0.4974
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
##  0.0002943498 0.0630905163
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 ) 
##                                             0.01162791

Post-hoc Tests for Stem Count

# 1. Calculate marginal means for the interaction
#interaction_means_stems <- emmeans(global_stem_lm, ~ Common_Name *
#CogonStatusNumeric)

# 2. View the means
#interaction_means_stems
# 3. Perform pair-wise comparisons (Post-hoc Tukey test)
#pairs(interaction_means_stems, by = "Common_Name")

Predict