Multi-Platform Lidar Analysis of Understory Vegetation Density
Author
Mickey Campbell
Published
April 22, 2025
Reference Data
The MLS data will serve as the reference data, giving us the truest representation of understory vegetation density among the three platforms, given the near-ground scanning perspective and the very high point density in relevant aboveground height ranges. Let’s first explore our data. Here is a summary of the data:
Let’s take a look at how the MLS understory density values (i.e., the primary response variable for all modeling in this study) vary by vegetation type, first using the 1m radius subplot data:
Code
# create summary tabledf_med_dns <- df |>group_by(veg_name) |>summarize(med_dns =median(mls_vol_dens)) |>as.data.frame()# get veg names by order of density (ascending)dns_order <- df_med_dns$veg_name[order(df_med_dns$med_dns)]# set up plotpar(mar =c(5,8,1,1), las =1)# create empty plotplot(x =range(df$mls_vol_dens),y =c(0.5,7.5),type ="n",xlab ="MLS-Measured Understory Density",ylab =NA,main ="Subplot Radius = 1m",axes = F)# add axis and vertical linesabline(v =axTicks(1), col ="lightgray", lty =3)axis(1)# subset data frame to subplot radiusdf_rad <- df[df$radius ==1,]# get colorscols <-brewer.pal(length(dns_order), "Set3")cols_line <-darken(cols, 0.1)cols_fill <-lighten(cols, 0.1)# get maximum densitiesdy_max <-lapply(dns_order, function(x){ d <-density(df_rad$mls_vol_dens[df_rad$veg_name == x], bw =0.04) dy <- d$yreturn(max(dy))}) |>unlist() |>max(0)# loop through veg typesfor (i in1:length(dns_order)){# get veg name veg_name <- dns_order[i]# subset data frame to veg type df_vt <- df_rad[df_rad$veg_name == veg_name,]# get density d <-density(df_vt$mls_vol_dens, bw =0.04, cut =0) dx <- d$x dy <- d$y# adjust dy dy <- dy / (2* dy_max)# add violin polygonpolygon(x =c(dx, rev(dx)),y =c(dy, rev(dy) *-1) + i,border = cols_line[i],col = cols_fill[i] )# add median pointpoints(x =median(df_vt$mls_vol_dens),y = i,pch =16,col = cols_line[i] )# add veg type labelmtext(veg_name, side =2, at = i, col = cols_line[i])}
Now we’ll take a look at the opposite extreme, with the 8m-radius subplot data:
Code
# create summary tabledf_med_dns <- df |>group_by(veg_name) |>summarize(med_dns =median(mls_vol_dens)) |>as.data.frame()# get veg names by order of density (ascending)dns_order <- df_med_dns$veg_name[order(df_med_dns$med_dns)]# set up plotpar(mar =c(5,8,1,1), las =1)# create empty plotplot(x =range(df$mls_vol_dens),y =c(0.5,7.5),type ="n",xlab ="MLS-Measured Understory Density",ylab =NA,main ="Subplot Radius = 8m",axes = F)# add axis and vertical linesabline(v =axTicks(1), col ="lightgray", lty =3)axis(1)# subset data frame to subplot radiusdf_rad <- df[df$radius ==8,]# get colorscols <-brewer.pal(length(dns_order), "Set3")cols_line <-darken(cols, 0.1)cols_fill <-lighten(cols, 0.1)# get maximum densitiesdy_max <-lapply(dns_order, function(x){ d <-density(df_rad$mls_vol_dens[df_rad$veg_name == x], bw =0.04) dy <- d$yreturn(max(dy))}) |>unlist() |>max(0)# loop through veg typesfor (i in1:length(dns_order)){# get veg name veg_name <- dns_order[i]# subset data frame to veg type df_vt <- df_rad[df_rad$veg_name == veg_name,]# get density d <-density(df_vt$mls_vol_dens, bw =0.04, cut =0) dx <- d$x dy <- d$y# adjust dy dy <- dy / (2* dy_max)# add violin polygonpolygon(x =c(dx, rev(dx)),y =c(dy, rev(dy) *-1) + i,border = cols_line[i],col = cols_fill[i] )# add median pointpoints(x =median(df_vt$mls_vol_dens),y = i,pch =16,col = cols_line[i] )# add veg type labelmtext(veg_name, side =2, at = i, col = cols_line[i])}
Very clear differences. At 1m, there are a whole lot more zero or near-zero values, given the increased likelihood of having gaps in the understory with little or no vegetation within a 1m radius subplot area. But there are also a lot more extreme values, given the clumpiness of vegetation, and the associated increased likelihood of very high local densities emerging. Conversely, at 8m, the values are averaged out over a much larger area, yielding generally more moderate values with a narrower range.
ALS/ULS NRD vs MLS Density
In the best-case scenario, our reference MLS density values could be predicted in a single, ordinary least-squares regression model, with ULS or ALS NRD values as the sole predictor variable. If that worked, that would tell us that ULS and ALS can both be used to predict understory density reliably. The odds of that are low, but it’s worth testing to establish a baseline, foundational relationship between NRD and MLS-measured density. First, ALS:
Code
# set up plotpar(mfrow =c(2,4), mar =rep(0,4), oma =c(5,5,1,1), las =1)# loop through radiiradii <-1:8for (radius in radii){# subset the data by subplot radius df_rad <- df[df$radius == radius,]# plot the dataplot(x = df_rad$als_nrd_under,y = df_rad$mls_vol_dens,xlim =c(0,1),ylim =c(0,1),xlab ="ALS NRD",ylab ="MLS Density",type ="n",xaxt ="n",yaxt ="n" )grid()points(x = df_rad$als_nrd_under,y = df_rad$mls_vol_dens,pch =16,cex =0.5,col =rgb(0,0,0,0.25) )# build model mod <-lm(mls_vol_dens ~ als_nrd_under, data = df_rad)# add regression lineabline(mod, lwd =2, col =2)# add radiuslegend("topright",legend =paste0(radius, "m"),bty ="n",x.intersp =0,text.font =2 )# add r2 r2 <-summary(mod)$adj.r.squared |>round(2)legend("topleft",legend =bquote(R^2==.(r2)),bty ="n",x.intersp =0,text.col =2 )# add axes conditionallyif (radius %in%c(1,5)) axis(2)if (radius %in%5:8) axis(1)}# add axis labelsmtext("ALS NRD", 1, 3, outer = T)mtext("MLS Density", 2, 3, outer = T, las =0)
Next, ULS:
Code
# set up plotpar(mfrow =c(2,4), mar =rep(0,4), oma =c(5,5,1,1), las =1)# loop through radiiradii <-1:8for (radius in radii){# subset the data by subplot radius df_rad <- df[df$radius == radius,]# plot the dataplot(x = df_rad$uls_nrd_under,y = df_rad$mls_vol_dens,xlim =c(0,1),ylim =c(0,1),xlab ="uls NRD",ylab ="MLS Density",type ="n",xaxt ="n",yaxt ="n" )grid()points(x = df_rad$uls_nrd_under,y = df_rad$mls_vol_dens,pch =16,cex =0.5,col =rgb(0,0,0,0.25) )# build model mod <-lm(mls_vol_dens ~ uls_nrd_under, data = df_rad)# add regression lineabline(mod, lwd =2, col =2)# add radiuslegend("topright",legend =paste0(radius, "m"),bty ="n",x.intersp =0,text.font =2 )# add r2 r2 <-summary(mod)$adj.r.squared |>round(2)legend("topleft",legend =bquote(R^2==.(r2)),bty ="n",x.intersp =0,text.col =2 )# add axes conditionallyif (radius %in%c(1,5)) axis(2)if (radius %in%5:8) axis(1)}# add axis labelsmtext("ULS NRD", 1, 3, outer = T)mtext("MLS Density", 2, 3, outer = T, las =0)
Interesting that ALS performs at least as well as ULS in most cases. Arguably better on the whole. The one exception is at the finest resolution (1m subplots), where ALS has a poor relationship and ULS has comparably high predictive ability. That makes sense, given the higher point density of ULS. So, overall perhaps ULS’ abilities are somewhat worse, but more adaptable to different resolutions?
Here’s another way to view those results:
Code
# create empty data framedf_r2 <-data.frame(radius =1:8,r2_als =rep(NA,8),r2_uls =rep(NA,8))# loop through radiiradii <-1:8for (radius in radii){# subset the data by subplot radius df_rad <- df[df$radius == radius,]# get r2 vals r2_als <-summary(lm(mls_vol_dens ~ als_nrd_under, data = df_rad))$adj.r.squared r2_uls <-summary(lm(mls_vol_dens ~ uls_nrd_under, data = df_rad))$adj.r.squared# add results to df df_r2$r2_als[df_r2$radius == radius] <- r2_als df_r2$r2_uls[df_r2$radius == radius] <- r2_uls}# set up plotpar(mar =c(5,5,1,1), las =1)# create empty plotplot(x =c(1,8),y =range(df_r2[,2:3]),xlab ="Subplot Radius (m)",ylab =expression(R^2),type ="n")grid()# add lineslines(x = df_r2$radius,y = df_r2$r2_als,lwd =2,col =2)lines(x = df_r2$radius,y = df_r2$r2_uls,lwd =2,col =4)# add legendlegend("bottomright",legend =c("ALS", "ULS"),lwd =2,col =c(2,4),bty ="n")
ALS saw its best performance at 5m subplot radius whereas ULS saw its best performance at 4m subplot radius.
Sanity Check on NRD vs ORD
We have long believed NRD to be superior to ORD with respect to density estimation, so since we have the data, I thought it would be a worthwhile exercise to make sure we’ve hitched our wagon to the best horse.
Code
# create empty data framedf_r2 <-data.frame(radius =1:8,r2_als_nrd =rep(NA,8),r2_uls_nrd =rep(NA,8),r2_als_ord =rep(NA,8),r2_uls_ord =rep(NA,8))# loop through radiiradii <-1:8for (radius in radii){# subset the data by subplot radius df_rad <- df[df$radius == radius,]# get r2 vals r2_als_nrd <-summary(lm(mls_vol_dens ~ als_nrd_under, data = df_rad) )$adj.r.squared r2_uls_nrd <-summary(lm(mls_vol_dens ~ uls_nrd_under, data = df_rad) )$adj.r.squared r2_als_ord <-summary(lm(mls_vol_dens ~ als_ord_under, data = df_rad) )$adj.r.squared r2_uls_ord <-summary(lm(mls_vol_dens ~ uls_ord_under, data = df_rad) )$adj.r.squared# add results to df df_r2$r2_als_nrd[df_r2$radius == radius] <- r2_als_nrd df_r2$r2_uls_nrd[df_r2$radius == radius] <- r2_uls_nrd df_r2$r2_als_ord[df_r2$radius == radius] <- r2_als_ord df_r2$r2_uls_ord[df_r2$radius == radius] <- r2_uls_ord}# set up plotpar(mar =c(5,5,1,1), las =1)# create empty plotplot(x =c(1,8),y =range(df_r2[,2:5]),xlab ="Subplot Radius (m)",ylab =expression(R^2),type ="n")grid()# add lineslines(x = df_r2$radius,y = df_r2$r2_als_nrd,lwd =2,col =2)lines(x = df_r2$radius,y = df_r2$r2_uls_nrd,lwd =2,col =4)lines(x = df_r2$radius,y = df_r2$r2_als_ord,lwd =2,col =2,lty =3)lines(x = df_r2$radius,y = df_r2$r2_uls_ord,lwd =2,col =4,lty =3)# add legendlegend("bottomright",legend =c("ALS NRD", "ULS NRD", "ALS ORD", "ULS ORD"),lwd =2,col =c(2,4,2,4),lty =c(1,1,3,3),bty ="n")
Excellent! NRD still reigns supreme.
For the sake of simplicity, moving forward, all analyses will be conducted at 5m, as this was the radius with the highest average performance between ULS and ALS platforms.
Examining the Effect of Overstory Conditions
An underlying hypothesis of this research is that overstory conditions affect the ability to accurately quantify understory density. A denser overstory should occlude lidar pulse energy, limiting the amount that can reach the understory. With less pulse energy reaching the understory, there may be a diminished ability to precisely quantify understory structure. This may also be true of taller overstories as well. Those were two of the main findings in Campbell et al. (2018). Following the approach taken in that paper, we can use a Monte Carlo resampling approach, where we iteratively take a subset of the data, build a linear model between predictor (ALS or ULS NRD) and response (MLS density) variables, and compare the resulting R2 value to the mean overstory density or canopy height within the subset of data.
Code
# subset the data to just 5m radius subplotdf_rad <- df[df$radius ==5,]# define number of iterationsn_iter <-10000# create empty results data framedf_over <-data.frame(r2_als =rep(NA, n_iter),r2_uls =rep(NA, n_iter),od_als =rep(NA, n_iter),od_uls =rep(NA, n_iter),ht_als =rep(NA, n_iter),ht_uls =rep(NA, n_iter))# begin monte carlo loopfor (i in1:n_iter){# take sample of the data (1/10) df_samp <- df_rad[sample(nrow(df_rad), nrow(df_rad) *0.1),]# get r2s df_over$r2_als[i] <-summary(lm(mls_vol_dens ~ als_nrd_under, data = df_samp) )$adj.r.squared df_over$r2_uls[i] <-summary(lm(mls_vol_dens ~ uls_nrd_under, data = df_samp) )$adj.r.squared# get overstory density and height df_over$od_als[i] <-mean(df_samp$als_ord_over, na.rm = T) df_over$od_uls[i] <-mean(df_samp$uls_ord_over, na.rm = T) df_over$ht_als[i] <-mean(df_samp$als_ht_over, na.rm = T) df_over$ht_uls[i] <-mean(df_samp$uls_ht_over, na.rm = T)}# set up the plotpar(mfrow =c(2,2), mar =c(4.5,4.5,0.5,0.5), las =1)# define plotting functionplot_fun <-function(x,y,col,xlim,xlab,als_or_uls){plot(x, y, ylim =range(df_over[,1:2]), xlim = xlim, pch =16, cex =0.5,col =rgb(0,0,0,0.1), xlab = xlab, ylab =expression(R^2))grid() mod <-lm(y ~ x)abline(mod, lwd =2, col = col) p <-summary(mod)$coefficients[2,4] |>format.pval(2,0.01)legend("topright", legend =paste0("p", p), text.col = col, bty ="n",x.intersp =0)legend("bottomleft", legend = als_or_uls, text.font =2, bty ="n", text.col = col, x.intersp =0)}# plot the data outplot_fun(df_over$od_als, df_over$r2_als, 2, range(df_over[,c("od_als", "od_uls")]),"Mean Overstory Density", "ALS")plot_fun(df_over$ht_als, df_over$r2_als, 2, range(df_over[,c("ht_als", "ht_uls")]),"Mean Overstory Height", "ALS")plot_fun(df_over$od_uls, df_over$r2_uls, 4, range(df_over[,c("od_uls", "od_uls")]),"Mean Overstory Density", "ULS")plot_fun(df_over$ht_uls, df_over$r2_uls, 4, range(df_over[,c("ht_uls", "ht_uls")]),"Mean Overstory Height", "ULS")
Pretty weak relationships, but all statistically significantly non-zero slopes, suggesting the existence of a robust, negative impact of both height and overstory density on model performance.
Breaking it Down by Vegetation Type
Let’s see how models built for individual vegetation types perform. First for ALS:
Code
# get veg namesveg_names <- dns_order# get colorscols <-brewer.pal(length(dns_order), "Set3")cols_line <-darken(cols, 0.1)cols_fill <-lighten(cols, 0.1)# set up plotpar(mfrow =c(2,4), mar =rep(0,4), oma =c(5,5,1,1), las =1)# loop through veg namesfor (i in1:length(veg_names)){# get veg name veg_name <- veg_names[i]# create subset df df_vn <- df_rad[df_rad$veg_name == veg_name,]# create empty plotplot(x =range(df_rad$als_nrd_under),y =range(df_rad$mls_vol_dens),type ="n",xlab =NA,ylab =NA,xaxt ="n",yaxt ="n" )grid()# add pointspoints(x = df_vn$als_nrd_under,y = df_vn$mls_vol_dens,pch =16,cex =0.5,col = cols_fill[i] )# build model mod <-lm(mls_vol_dens ~ als_nrd_under, data = df_vn)# add lineabline(mod, lwd =2, col = cols_line[i])# add veg namelegend("topleft",legend = veg_name,text.col = cols_line[i],bty ="n",x.intersp =0,text.font =2 )# add r2 r2 <-summary(mod)$adj.r.squared |>round(2)legend("bottomright",legend =bquote(R^2==.(r2)),text.col = cols_line[i],bty ="n",x.intersp =0 )# add axes conditionallyif (i %in%c(1,5)) axis(2)if (i %in%5:8) axis(1)}# add axis labelsmtext("ALS NRD", 1, 3, T)mtext("MLS Density", 2, 3, T, las =0)
And now for ULS:
Code
# get veg namesveg_names <- dns_order# get colorscols <-brewer.pal(length(dns_order), "Set3")cols_line <-darken(cols, 0.1)cols_fill <-lighten(cols, 0.1)# set up plotpar(mfrow =c(2,4), mar =rep(0,4), oma =c(5,5,1,1), las =1)# loop through veg namesfor (i in1:length(veg_names)){# get veg name veg_name <- veg_names[i]# create subset df df_vn <- df_rad[df_rad$veg_name == veg_name,]# create empty plotplot(x =range(df_rad$uls_nrd_under),y =range(df_rad$mls_vol_dens),type ="n",xlab =NA,ylab =NA,xaxt ="n",yaxt ="n" )grid()# add pointspoints(x = df_vn$uls_nrd_under,y = df_vn$mls_vol_dens,pch =16,cex =0.5,col = cols_fill[i] )# build model mod <-lm(mls_vol_dens ~ uls_nrd_under, data = df_vn)# add lineabline(mod, lwd =2, col = cols_line[i])# add veg namelegend("topleft",legend = veg_name,text.col = cols_line[i],bty ="n",x.intersp =0,text.font =2 )# add r2 r2 <-summary(mod)$adj.r.squared |>round(2)legend("bottomright",legend =bquote(R^2==.(r2)),text.col = cols_line[i],bty ="n",x.intersp =0 )# add axes conditionallyif (i %in%c(1,5)) axis(2)if (i %in%5:8) axis(1)}# add axis labelsmtext("ULS NRD", 1, 3, T)mtext("MLS Density", 2, 3, T, las =0)
Across platforms, aspen and maple were the most challenging – especially with the ULS. Lodgepole pine performed way better with ALS than ULS. And spruce-fir, pinyon-juniper, dry mixed conifer, and Gambel oak all performed better with ULS than ALS. Difficult to discern drivers of performance, but interesting results nonetheless.
Mixed Effects Modeling: One(-ish) Model to Rule them All?
As evident in these previous plots, the differences in y-intercepts and slopes of the regression lines above demonstrate why a singular, across-vegetation-type model may struggle to capture variance in observed (MLS) density. But, we have a handy-dandy tool at our disposal to account for these different slopes and y-intercepts: mixed effects modeling (MEM). MEM allows us to create a single model where y-intercepts and slopes vary according to some random (i.e., grouping) effects. In our case, the random effect is vegetation type. Let’s see how well a singular MEM model performs. To do this, we’ll assess the model performance in a more robust way than we have so far. We’ll perform a leave-one-plot-out cross-validation, whichs will help avoid issues with spatial autocorrelation.
Code
# get list of plotsplot_ids <- df_rad$plot_idplot_ids_unq <-unique(plot_ids)# loop through themfor (plot_id in plot_ids_unq){# split training and validation data df_train <- df_rad[plot_ids != plot_id,] df_valid <- df_rad[plot_ids == plot_id,]# build model mod_als <-lmer(mls_vol_dens ~ (als_nrd_under | veg_name), data = df_train) mod_uls <-lmer(mls_vol_dens ~ (uls_nrd_under | veg_name), data = df_train)# make predictions pred_als <-predict(mod_als, df_valid) pred_uls <-predict(mod_uls, df_valid)# compile predictions and observationsif (plot_id == plot_ids_unq[1]){ df_pred_obs <-data.frame(pred_als = pred_als,pred_uls = pred_uls,obs = df_valid$mls_vol_dens ) } else { df_pred_obs <-rbind( df_pred_obs,data.frame(pred_als = pred_als,pred_uls = pred_uls,obs = df_valid$mls_vol_dens ) ) }}# set up plotpar(mfrow =c(1,2), mar =c(0,0,2,0), oma =c(5,5,0,1), las =1)#----------------ALS# plot alsplot(x =range(df_pred_obs),y =range(df_pred_obs),type ="n",xlab =NA,ylab =NA,main ="ALS")grid()# add pointspoints(x = df_pred_obs$obs,y = df_pred_obs$pred_als,cex =0.5,pch =16,col =rgb(0,0,0,0.25))# get performancemod <-lm(pred_als ~ obs, data = df_pred_obs)r2 <-summary(mod)$adj.r.squared |>round(2)abline(mod, lwd =2, col =2)legend("topleft",legend =bquote(R^2==.(r2)),text.col =2,bty ="n",x.intersp =0)#----------------uls# plot ulsplot(x =range(df_pred_obs),y =range(df_pred_obs),type ="n",xlab =NA,ylab =NA,yaxt ="n",main ="ULS")grid()# add pointspoints(x = df_pred_obs$obs,y = df_pred_obs$pred_uls,pch =16,cex =0.5,col =rgb(0,0,0,0.25))# get performancemod <-lm(pred_uls ~ obs, data = df_pred_obs)r2 <-summary(mod)$adj.r.squared |>round(2)abline(mod, lwd =2, col =4)legend("topleft",legend =bquote(R^2==.(r2)),text.col =4,bty ="n",x.intersp =0)# add axis labelsmtext("Observed MLS Density", 1, 3, T)mtext("Predicted MLS Density", 2, 3, T, las =0)
Both performing admirably. ALS with slightly better performance than ULS. In both cases, MEM outperforms the simpler linear models that incorporate all of the data without any knowledge of vegetation type.