Multi-Platform Lidar Analysis of Understory Vegetation Density

Author

Mickey Campbell

Published

April 22, 2025

Reference Data

The MLS data will serve as the reference data, giving us the truest representation of understory vegetation density among the three platforms, given the near-ground scanning perspective and the very high point density in relevant aboveground height ranges. Let’s first explore our data. Here is a summary of the data:

Code
library(rmarkdown)
library(knitr)
library(stringr)
library(dplyr)
library(RColorBrewer)
library(colorspace)
library(lme4)

# read in the data
df <- read.csv("S:/ursa/jones/NRD/metrics_combined_mc.csv")

# create veg type lut
lut <- matrix(c(
  "BTMAPLE", "BTMAPL", "Bigtooth Maple",
  "DRYFIR",  "DRYFIR", "Dry Mixed Conifer",
  "GAMBLOAK", "GAMOAK", "Gambel Oak",
  "GBRPJ", "GRBNPJ", "Pinyon-Juniper",
  "LODGEP", "LODGEP", "Lodgepole Pine",
  "QASPEN", "QASPEN", "Quaking Aspen",
  "SPRFIR", "SPRFIR", "Spruce-Fir"
), byrow = T, ncol = 3) |> 
  as.data.frame()
colnames(lut) <- c("veg_type", "veg_type_fix", "veg_name")

# merge
df <- merge(df, lut)

# fix veg type
df$veg_type <- df$veg_type_fix
df$veg_type_fix <- NULL

# fix plot id
plot_num <- str_sub(df$plot_id, -1, -1)
df$plot_id <- paste0(df$veg_type, "_", plot_num)

# create summary table
df_n_subp <- df |>
  group_by(veg_name, plot_id) |>
  summarize(
    n_radii = length(unique(radius)),
    n_subp = n()
  ) |>
  as.data.frame()

# print the table out
kable(df_n_subp)
veg_name plot_id n_radii n_subp
Bigtooth Maple BTMAPL_1 8 800
Bigtooth Maple BTMAPL_2 8 800
Bigtooth Maple BTMAPL_3 8 800
Dry Mixed Conifer DRYFIR_1 8 800
Dry Mixed Conifer DRYFIR_2 8 800
Dry Mixed Conifer DRYFIR_3 8 800
Dry Mixed Conifer DRYFIR_4 8 800
Gambel Oak GAMOAK_1 8 800
Gambel Oak GAMOAK_2 8 800
Gambel Oak GAMOAK_3 8 800
Lodgepole Pine LODGEP_1 8 800
Lodgepole Pine LODGEP_2 8 800
Lodgepole Pine LODGEP_3 8 800
Lodgepole Pine LODGEP_4 8 800
Lodgepole Pine LODGEP_5 8 800
Pinyon-Juniper GRBNPJ_1 8 800
Pinyon-Juniper GRBNPJ_2 8 800
Pinyon-Juniper GRBNPJ_3 8 800
Pinyon-Juniper GRBNPJ_4 8 800
Quaking Aspen QASPEN_1 8 800
Quaking Aspen QASPEN_2 8 800
Quaking Aspen QASPEN_3 8 800
Quaking Aspen QASPEN_4 8 800
Spruce-Fir SPRFIR_1 8 800
Spruce-Fir SPRFIR_2 8 800
Spruce-Fir SPRFIR_3 8 800

Quick stats:

  • Number of unique vegetation types: 7
  • Number of plots: 26
  • Number of subplot radii tested per plot: 8
    • 1, 2, 3, 4, 5, 6, 7, and 8m
  • Number of subplots generated per radius: 100

Distributions of reference density values

Let’s take a look at how the MLS understory density values (i.e., the primary response variable for all modeling in this study) vary by vegetation type, first using the 1m radius subplot data:

Code
# create summary table
df_med_dns <- df |>
  group_by(veg_name) |>
  summarize(med_dns = median(mls_vol_dens)) |>
  as.data.frame()

# get veg names by order of density (ascending)
dns_order <- df_med_dns$veg_name[order(df_med_dns$med_dns)]

# set up plot
par(mar = c(5,8,1,1), las = 1)

# create empty plot
plot(
  x = range(df$mls_vol_dens),
  y = c(0.5,7.5),
  type = "n",
  xlab = "MLS-Measured Understory Density",
  ylab = NA,
  main = "Subplot Radius = 1m",
  axes = F
)

# add axis and vertical lines
abline(v = axTicks(1), col = "lightgray", lty = 3)
axis(1)

# subset data frame to subplot radius
df_rad <- df[df$radius == 1,]

# get colors
cols <- brewer.pal(length(dns_order), "Set3")
cols_line <- darken(cols, 0.1)
cols_fill <- lighten(cols, 0.1)

# get maximum densities
dy_max <- lapply(dns_order, function(x){
  d <- density(df_rad$mls_vol_dens[df_rad$veg_name == x], bw = 0.04)
  dy <- d$y
  return(max(dy))
}) |> 
  unlist() |>
  max(0)

# loop through veg types
for (i in 1:length(dns_order)){
  
  # get veg name
  veg_name <- dns_order[i]
  
  # subset data frame to veg type
  df_vt <- df_rad[df_rad$veg_name == veg_name,]
  
  # get density
  d <- density(df_vt$mls_vol_dens, bw = 0.04, cut = 0)
  dx <- d$x
  dy <- d$y
  
  # adjust dy
  dy <- dy / (2 * dy_max)

  # add violin polygon
  polygon(
    x = c(dx, rev(dx)),
    y = c(dy, rev(dy) * -1) + i,
    border = cols_line[i],
    col = cols_fill[i]
  )
  
  # add median point
  points(
    x = median(df_vt$mls_vol_dens),
    y = i,
    pch = 16,
    col = cols_line[i]
  )
  
  # add veg type label
  mtext(veg_name, side = 2, at = i, col = cols_line[i])
  
}

Now we’ll take a look at the opposite extreme, with the 8m-radius subplot data:

Code
# create summary table
df_med_dns <- df |>
  group_by(veg_name) |>
  summarize(med_dns = median(mls_vol_dens)) |>
  as.data.frame()

# get veg names by order of density (ascending)
dns_order <- df_med_dns$veg_name[order(df_med_dns$med_dns)]

# set up plot
par(mar = c(5,8,1,1), las = 1)

# create empty plot
plot(
  x = range(df$mls_vol_dens),
  y = c(0.5,7.5),
  type = "n",
  xlab = "MLS-Measured Understory Density",
  ylab = NA,
  main = "Subplot Radius = 8m",
  axes = F
)

# add axis and vertical lines
abline(v = axTicks(1), col = "lightgray", lty = 3)
axis(1)

# subset data frame to subplot radius
df_rad <- df[df$radius == 8,]

# get colors
cols <- brewer.pal(length(dns_order), "Set3")
cols_line <- darken(cols, 0.1)
cols_fill <- lighten(cols, 0.1)

# get maximum densities
dy_max <- lapply(dns_order, function(x){
  d <- density(df_rad$mls_vol_dens[df_rad$veg_name == x], bw = 0.04)
  dy <- d$y
  return(max(dy))
}) |> 
  unlist() |>
  max(0)

# loop through veg types
for (i in 1:length(dns_order)){
  
  # get veg name
  veg_name <- dns_order[i]
  
  # subset data frame to veg type
  df_vt <- df_rad[df_rad$veg_name == veg_name,]
  
  # get density
  d <- density(df_vt$mls_vol_dens, bw = 0.04, cut = 0)
  dx <- d$x
  dy <- d$y
  
  # adjust dy
  dy <- dy / (2 * dy_max)

  # add violin polygon
  polygon(
    x = c(dx, rev(dx)),
    y = c(dy, rev(dy) * -1) + i,
    border = cols_line[i],
    col = cols_fill[i]
  )
  
  # add median point
  points(
    x = median(df_vt$mls_vol_dens),
    y = i,
    pch = 16,
    col = cols_line[i]
  )
  
  # add veg type label
  mtext(veg_name, side = 2, at = i, col = cols_line[i])
  
}

Very clear differences. At 1m, there are a whole lot more zero or near-zero values, given the increased likelihood of having gaps in the understory with little or no vegetation within a 1m radius subplot area. But there are also a lot more extreme values, given the clumpiness of vegetation, and the associated increased likelihood of very high local densities emerging. Conversely, at 8m, the values are averaged out over a much larger area, yielding generally more moderate values with a narrower range.

ALS/ULS NRD vs MLS Density

In the best-case scenario, our reference MLS density values could be predicted in a single, ordinary least-squares regression model, with ULS or ALS NRD values as the sole predictor variable. If that worked, that would tell us that ULS and ALS can both be used to predict understory density reliably. The odds of that are low, but it’s worth testing to establish a baseline, foundational relationship between NRD and MLS-measured density. First, ALS:

Code
# set up plot
par(mfrow = c(2,4), mar = rep(0,4), oma = c(5,5,1,1), las = 1)

# loop through radii
radii <- 1:8
for (radius in radii){
  
  # subset the data by subplot radius
  df_rad <- df[df$radius == radius,]
  
  # plot the data
  plot(
    x = df_rad$als_nrd_under,
    y = df_rad$mls_vol_dens,
    xlim = c(0,1),
    ylim = c(0,1),
    xlab = "ALS NRD",
    ylab = "MLS Density",
    type = "n",
    xaxt = "n",
    yaxt = "n"
  )
  grid()
  points(
    x = df_rad$als_nrd_under,
    y = df_rad$mls_vol_dens,
    pch = 16,
    cex = 0.5,
    col = rgb(0,0,0,0.25)
  )
  
  # build model
  mod <- lm(mls_vol_dens ~ als_nrd_under, data = df_rad)
  
  # add regression line
  abline(mod, lwd = 2, col = 2)
  
  # add radius
  legend(
    "topright",
    legend = paste0(radius, "m"),
    bty = "n",
    x.intersp = 0,
    text.font = 2
  )
  
  # add r2
  r2 <- summary(mod)$adj.r.squared |> round(2)
  legend(
    "topleft",
    legend = bquote(R^2==.(r2)),
    bty = "n",
    x.intersp = 0,
    text.col = 2
  )
  
  # add axes conditionally
  if (radius %in% c(1,5)) axis(2)
  if (radius %in% 5:8) axis(1)
  
}

# add axis labels
mtext("ALS NRD", 1, 3, outer = T)
mtext("MLS Density", 2, 3, outer = T, las = 0)

Next, ULS:

Code
# set up plot
par(mfrow = c(2,4), mar = rep(0,4), oma = c(5,5,1,1), las = 1)

# loop through radii
radii <- 1:8
for (radius in radii){
  
  # subset the data by subplot radius
  df_rad <- df[df$radius == radius,]
  
  # plot the data
  plot(
    x = df_rad$uls_nrd_under,
    y = df_rad$mls_vol_dens,
    xlim = c(0,1),
    ylim = c(0,1),
    xlab = "uls NRD",
    ylab = "MLS Density",
    type = "n",
    xaxt = "n",
    yaxt = "n"
  )
  grid()
  points(
    x = df_rad$uls_nrd_under,
    y = df_rad$mls_vol_dens,
    pch = 16,
    cex = 0.5,
    col = rgb(0,0,0,0.25)
  )
  
  # build model
  mod <- lm(mls_vol_dens ~ uls_nrd_under, data = df_rad)
  
  # add regression line
  abline(mod, lwd = 2, col = 2)
  
  # add radius
  legend(
    "topright",
    legend = paste0(radius, "m"),
    bty = "n",
    x.intersp = 0,
    text.font = 2
  )
  
  # add r2
  r2 <- summary(mod)$adj.r.squared |> round(2)
  legend(
    "topleft",
    legend = bquote(R^2==.(r2)),
    bty = "n",
    x.intersp = 0,
    text.col = 2
  )
  
  # add axes conditionally
  if (radius %in% c(1,5)) axis(2)
  if (radius %in% 5:8) axis(1)
  
}

# add axis labels
mtext("ULS NRD", 1, 3, outer = T)
mtext("MLS Density", 2, 3, outer = T, las = 0)

Interesting that ALS performs at least as well as ULS in most cases. Arguably better on the whole. The one exception is at the finest resolution (1m subplots), where ALS has a poor relationship and ULS has comparably high predictive ability. That makes sense, given the higher point density of ULS. So, overall perhaps ULS’ abilities are somewhat worse, but more adaptable to different resolutions?

Here’s another way to view those results:

Code
# create empty data frame
df_r2 <- data.frame(
  radius = 1:8,
  r2_als = rep(NA,8),
  r2_uls = rep(NA,8)
)

# loop through radii
radii <- 1:8
for (radius in radii){
  
  # subset the data by subplot radius
  df_rad <- df[df$radius == radius,]
  
  # get r2 vals
  r2_als <- summary(lm(mls_vol_dens ~ als_nrd_under, data = df_rad))$adj.r.squared
  r2_uls <- summary(lm(mls_vol_dens ~ uls_nrd_under, data = df_rad))$adj.r.squared
  
  # add results to df
  df_r2$r2_als[df_r2$radius == radius] <- r2_als
  df_r2$r2_uls[df_r2$radius == radius] <- r2_uls
  
}

# set up plot
par(mar = c(5,5,1,1), las = 1)

# create empty plot
plot(
  x = c(1,8),
  y = range(df_r2[,2:3]),
  xlab = "Subplot Radius (m)",
  ylab = expression(R^2),
  type = "n"
)
grid()

# add lines
lines(
  x = df_r2$radius,
  y = df_r2$r2_als,
  lwd = 2,
  col = 2
)
lines(
  x = df_r2$radius,
  y = df_r2$r2_uls,
  lwd = 2,
  col = 4
)

# add legend
legend(
  "bottomright",
  legend = c("ALS", "ULS"),
  lwd = 2,
  col = c(2,4),
  bty = "n"
)

ALS saw its best performance at 5m subplot radius whereas ULS saw its best performance at 4m subplot radius.

Sanity Check on NRD vs ORD

We have long believed NRD to be superior to ORD with respect to density estimation, so since we have the data, I thought it would be a worthwhile exercise to make sure we’ve hitched our wagon to the best horse.

Code
# create empty data frame
df_r2 <- data.frame(
  radius = 1:8,
  r2_als_nrd = rep(NA,8),
  r2_uls_nrd = rep(NA,8),
  r2_als_ord = rep(NA,8),
  r2_uls_ord = rep(NA,8)
)

# loop through radii
radii <- 1:8
for (radius in radii){
  
  # subset the data by subplot radius
  df_rad <- df[df$radius == radius,]
  
  # get r2 vals
  r2_als_nrd <- summary(
    lm(mls_vol_dens ~ als_nrd_under, data = df_rad)
  )$adj.r.squared
  r2_uls_nrd <- summary(
    lm(mls_vol_dens ~ uls_nrd_under, data = df_rad)
  )$adj.r.squared
  r2_als_ord <- summary(
    lm(mls_vol_dens ~ als_ord_under, data = df_rad)
  )$adj.r.squared
  r2_uls_ord <- summary(
    lm(mls_vol_dens ~ uls_ord_under, data = df_rad)
  )$adj.r.squared
  
  # add results to df
  df_r2$r2_als_nrd[df_r2$radius == radius] <- r2_als_nrd
  df_r2$r2_uls_nrd[df_r2$radius == radius] <- r2_uls_nrd
  df_r2$r2_als_ord[df_r2$radius == radius] <- r2_als_ord
  df_r2$r2_uls_ord[df_r2$radius == radius] <- r2_uls_ord
  
}

# set up plot
par(mar = c(5,5,1,1), las = 1)

# create empty plot
plot(
  x = c(1,8),
  y = range(df_r2[,2:5]),
  xlab = "Subplot Radius (m)",
  ylab = expression(R^2),
  type = "n"
)
grid()

# add lines
lines(
  x = df_r2$radius,
  y = df_r2$r2_als_nrd,
  lwd = 2,
  col = 2
)
lines(
  x = df_r2$radius,
  y = df_r2$r2_uls_nrd,
  lwd = 2,
  col = 4
)
lines(
  x = df_r2$radius,
  y = df_r2$r2_als_ord,
  lwd = 2,
  col = 2,
  lty = 3
)
lines(
  x = df_r2$radius,
  y = df_r2$r2_uls_ord,
  lwd = 2,
  col = 4,
  lty = 3
)

# add legend
legend(
  "bottomright",
  legend = c("ALS NRD", "ULS NRD", "ALS ORD", "ULS ORD"),
  lwd = 2,
  col = c(2,4,2,4),
  lty = c(1,1,3,3),
  bty = "n"
)

Excellent! NRD still reigns supreme.

For the sake of simplicity, moving forward, all analyses will be conducted at 5m, as this was the radius with the highest average performance between ULS and ALS platforms.

Examining the Effect of Overstory Conditions

An underlying hypothesis of this research is that overstory conditions affect the ability to accurately quantify understory density. A denser overstory should occlude lidar pulse energy, limiting the amount that can reach the understory. With less pulse energy reaching the understory, there may be a diminished ability to precisely quantify understory structure. This may also be true of taller overstories as well. Those were two of the main findings in Campbell et al. (2018). Following the approach taken in that paper, we can use a Monte Carlo resampling approach, where we iteratively take a subset of the data, build a linear model between predictor (ALS or ULS NRD) and response (MLS density) variables, and compare the resulting R2 value to the mean overstory density or canopy height within the subset of data.

Code
# subset the data to just 5m radius subplot
df_rad <- df[df$radius == 5,]

# define number of iterations
n_iter <- 10000

# create empty results data frame
df_over <- data.frame(
  r2_als = rep(NA, n_iter),
  r2_uls = rep(NA, n_iter),
  od_als = rep(NA, n_iter),
  od_uls = rep(NA, n_iter),
  ht_als = rep(NA, n_iter),
  ht_uls = rep(NA, n_iter)
)

# begin monte carlo loop
for (i in 1:n_iter){
  
  # take sample of the data (1/10)
  df_samp <- df_rad[sample(nrow(df_rad), nrow(df_rad) * 0.1),]
  
  # get r2s
  df_over$r2_als[i] <- summary(
    lm(mls_vol_dens ~ als_nrd_under, data = df_samp)
  )$adj.r.squared
  df_over$r2_uls[i] <- summary(
    lm(mls_vol_dens ~ uls_nrd_under, data = df_samp)
  )$adj.r.squared
  
  # get overstory density and height
  df_over$od_als[i] <- mean(df_samp$als_ord_over, na.rm = T)
  df_over$od_uls[i] <- mean(df_samp$uls_ord_over, na.rm = T)
  df_over$ht_als[i] <- mean(df_samp$als_ht_over, na.rm = T)
  df_over$ht_uls[i] <- mean(df_samp$uls_ht_over, na.rm = T)
  
}

# set up the plot
par(mfrow = c(2,2), mar = c(4.5,4.5,0.5,0.5), las = 1)

# define plotting function
plot_fun <- function(x,y,col,xlim,xlab,als_or_uls){
  plot(x, y, ylim = range(df_over[,1:2]), xlim = xlim, pch = 16, cex = 0.5,
       col = rgb(0,0,0,0.1), xlab = xlab, ylab = expression(R^2))
  grid()
  mod <- lm(y ~ x)
  abline(mod, lwd = 2, col = col)
  p <- summary(mod)$coefficients[2,4] |> format.pval(2,0.01)
  legend("topright", legend = paste0("p", p), text.col = col, bty = "n",
         x.intersp = 0)
  legend("bottomleft", legend = als_or_uls, text.font = 2, bty = "n", 
         text.col = col, x.intersp = 0)
}

# plot the data out
plot_fun(df_over$od_als, df_over$r2_als, 2, range(df_over[,c("od_als", "od_uls")]),
         "Mean Overstory Density", "ALS")
plot_fun(df_over$ht_als, df_over$r2_als, 2, range(df_over[,c("ht_als", "ht_uls")]),
         "Mean Overstory Height", "ALS")
plot_fun(df_over$od_uls, df_over$r2_uls, 4, range(df_over[,c("od_uls", "od_uls")]),
         "Mean Overstory Density", "ULS")
plot_fun(df_over$ht_uls, df_over$r2_uls, 4, range(df_over[,c("ht_uls", "ht_uls")]),
         "Mean Overstory Height", "ULS")

Pretty weak relationships, but all statistically significantly non-zero slopes, suggesting the existence of a robust, negative impact of both height and overstory density on model performance.

Breaking it Down by Vegetation Type

Let’s see how models built for individual vegetation types perform. First for ALS:

Code
# get veg names
veg_names <- dns_order

# get colors
cols <- brewer.pal(length(dns_order), "Set3")
cols_line <- darken(cols, 0.1)
cols_fill <- lighten(cols, 0.1)

# set up plot
par(mfrow = c(2,4), mar = rep(0,4), oma = c(5,5,1,1), las = 1)

# loop through veg names
for (i in 1:length(veg_names)){
  
  # get veg name
  veg_name <- veg_names[i]
  
  # create subset df
  df_vn <- df_rad[df_rad$veg_name == veg_name,]
  
  # create empty plot
  plot(
    x = range(df_rad$als_nrd_under),
    y = range(df_rad$mls_vol_dens),
    type = "n",
    xlab = NA,
    ylab = NA,
    xaxt = "n",
    yaxt = "n"
  )
  grid()
  
  # add points
  points(
    x = df_vn$als_nrd_under,
    y = df_vn$mls_vol_dens,
    pch = 16,
    cex = 0.5,
    col = cols_fill[i]
  )
  
  # build model
  mod <- lm(mls_vol_dens ~ als_nrd_under, data = df_vn)
  
  # add line
  abline(mod, lwd = 2, col = cols_line[i])
  
  # add veg name
  legend(
    "topleft",
    legend = veg_name,
    text.col = cols_line[i],
    bty = "n",
    x.intersp = 0,
    text.font = 2
  )
  
  # add r2
  r2 <- summary(mod)$adj.r.squared |> round(2)
  legend(
    "bottomright",
    legend = bquote(R^2==.(r2)),
    text.col = cols_line[i],
    bty = "n",
    x.intersp = 0
  )
  
  # add axes conditionally
  if (i %in% c(1,5)) axis(2)
  if (i %in% 5:8) axis(1)
  
}

# add axis labels
mtext("ALS NRD", 1, 3, T)
mtext("MLS Density", 2, 3, T, las = 0)

And now for ULS:

Code
# get veg names
veg_names <- dns_order

# get colors
cols <- brewer.pal(length(dns_order), "Set3")
cols_line <- darken(cols, 0.1)
cols_fill <- lighten(cols, 0.1)

# set up plot
par(mfrow = c(2,4), mar = rep(0,4), oma = c(5,5,1,1), las = 1)

# loop through veg names
for (i in 1:length(veg_names)){
  
  # get veg name
  veg_name <- veg_names[i]
  
  # create subset df
  df_vn <- df_rad[df_rad$veg_name == veg_name,]
  
  # create empty plot
  plot(
    x = range(df_rad$uls_nrd_under),
    y = range(df_rad$mls_vol_dens),
    type = "n",
    xlab = NA,
    ylab = NA,
    xaxt = "n",
    yaxt = "n"
  )
  grid()
  
  # add points
  points(
    x = df_vn$uls_nrd_under,
    y = df_vn$mls_vol_dens,
    pch = 16,
    cex = 0.5,
    col = cols_fill[i]
  )
  
  # build model
  mod <- lm(mls_vol_dens ~ uls_nrd_under, data = df_vn)
  
  # add line
  abline(mod, lwd = 2, col = cols_line[i])
  
  # add veg name
  legend(
    "topleft",
    legend = veg_name,
    text.col = cols_line[i],
    bty = "n",
    x.intersp = 0,
    text.font = 2
  )
  
  # add r2
  r2 <- summary(mod)$adj.r.squared |> round(2)
  legend(
    "bottomright",
    legend = bquote(R^2==.(r2)),
    text.col = cols_line[i],
    bty = "n",
    x.intersp = 0
  )
  
  # add axes conditionally
  if (i %in% c(1,5)) axis(2)
  if (i %in% 5:8) axis(1)
  
}

# add axis labels
mtext("ULS NRD", 1, 3, T)
mtext("MLS Density", 2, 3, T, las = 0)

Across platforms, aspen and maple were the most challenging – especially with the ULS. Lodgepole pine performed way better with ALS than ULS. And spruce-fir, pinyon-juniper, dry mixed conifer, and Gambel oak all performed better with ULS than ALS. Difficult to discern drivers of performance, but interesting results nonetheless.

Mixed Effects Modeling: One(-ish) Model to Rule them All?

As evident in these previous plots, the differences in y-intercepts and slopes of the regression lines above demonstrate why a singular, across-vegetation-type model may struggle to capture variance in observed (MLS) density. But, we have a handy-dandy tool at our disposal to account for these different slopes and y-intercepts: mixed effects modeling (MEM). MEM allows us to create a single model where y-intercepts and slopes vary according to some random (i.e., grouping) effects. In our case, the random effect is vegetation type. Let’s see how well a singular MEM model performs. To do this, we’ll assess the model performance in a more robust way than we have so far. We’ll perform a leave-one-plot-out cross-validation, whichs will help avoid issues with spatial autocorrelation.

Code
# get list of plots
plot_ids <- df_rad$plot_id
plot_ids_unq <- unique(plot_ids)

# loop through them
for (plot_id in plot_ids_unq){
  
  # split training and validation data
  df_train <- df_rad[plot_ids != plot_id,]
  df_valid <- df_rad[plot_ids == plot_id,]
  
  # build model
  mod_als <- lmer(mls_vol_dens ~ (als_nrd_under | veg_name), data = df_train)
  mod_uls <- lmer(mls_vol_dens ~ (uls_nrd_under | veg_name), data = df_train)
  
  # make predictions
  pred_als <- predict(mod_als, df_valid)
  pred_uls <- predict(mod_uls, df_valid)
  
  # compile predictions and observations
  if (plot_id == plot_ids_unq[1]){
    df_pred_obs <- data.frame(
      pred_als = pred_als,
      pred_uls = pred_uls,
      obs = df_valid$mls_vol_dens
    )
  } else {
    df_pred_obs <- rbind(
      df_pred_obs,
      data.frame(
        pred_als = pred_als,
        pred_uls = pred_uls,
        obs = df_valid$mls_vol_dens
      )
    )
  }
  
}

# set up plot
par(mfrow = c(1,2), mar = c(0,0,2,0), oma = c(5,5,0,1), las = 1)

#----------------ALS

# plot als
plot(
  x = range(df_pred_obs),
  y = range(df_pred_obs),
  type = "n",
  xlab = NA,
  ylab = NA,
  main = "ALS"
)
grid()

# add points
points(
  x = df_pred_obs$obs,
  y = df_pred_obs$pred_als,
  cex = 0.5,
  pch = 16,
  col = rgb(0,0,0,0.25)
)

# get performance
mod <- lm(pred_als ~ obs, data = df_pred_obs)
r2 <- summary(mod)$adj.r.squared |> round(2)
abline(mod, lwd = 2, col = 2)
legend(
  "topleft",
  legend = bquote(R^2==.(r2)),
  text.col = 2,
  bty = "n",
  x.intersp = 0
)

#----------------uls

# plot uls
plot(
  x = range(df_pred_obs),
  y = range(df_pred_obs),
  type = "n",
  xlab = NA,
  ylab = NA,
  yaxt = "n",
  main = "ULS"
)
grid()

# add points
points(
  x = df_pred_obs$obs,
  y = df_pred_obs$pred_uls,
  pch = 16,
  cex = 0.5,
  col = rgb(0,0,0,0.25)
)

# get performance
mod <- lm(pred_uls ~ obs, data = df_pred_obs)
r2 <- summary(mod)$adj.r.squared |> round(2)
abline(mod, lwd = 2, col = 4)
legend(
  "topleft",
  legend = bquote(R^2==.(r2)),
  text.col = 4,
  bty = "n",
  x.intersp = 0
)

# add axis labels
mtext("Observed MLS Density", 1, 3, T)
mtext("Predicted MLS Density", 2, 3, T, las = 0)

Both performing admirably. ALS with slightly better performance than ULS. In both cases, MEM outperforms the simpler linear models that incorporate all of the data without any knowledge of vegetation type.