INTRODUCTION

The objective of this document is to try to gain a better understanding of what FIA data will be needed for a project aimed at mapping the distribution of piñon and juniper tree species in the US.

# load libraries
library(stringr)
library(magrittr)
library(data.table)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

READING IN FIA DATA

I have downloaded CSV files for plot, subplot, and tree data for all states that may contain P or J. I will first read them all in and merge them together.

# define main directory
main.dir <- "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112"

# list file by table type and print them
csvs.plot <- list.files(path = main.dir, 
                        pattern = "*_PLOT.csv",
                        full.names = T)
csvs.plot
##  [1] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/AZ_PLOT.csv"
##  [2] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/CA_PLOT.csv"
##  [3] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/CO_PLOT.csv"
##  [4] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/ID_PLOT.csv"
##  [5] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/KS_PLOT.csv"
##  [6] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/MT_PLOT.csv"
##  [7] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/ND_PLOT.csv"
##  [8] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/NE_PLOT.csv"
##  [9] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/NM_PLOT.csv"
## [10] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/NV_PLOT.csv"
## [11] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/OK_PLOT.csv"
## [12] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/OR_PLOT.csv"
## [13] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/SD_PLOT.csv"
## [14] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/TX_PLOT.csv"
## [15] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/UT_PLOT.csv"
## [16] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/WA_PLOT.csv"
## [17] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/WY_PLOT.csv"
csvs.subp <- list.files(path = main.dir, 
                        pattern = "*_SUBPLOT.csv",
                        full.names = T)
csvs.subp
##  [1] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/AZ_SUBPLOT.csv"
##  [2] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/CA_SUBPLOT.csv"
##  [3] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/CO_SUBPLOT.csv"
##  [4] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/ID_SUBPLOT.csv"
##  [5] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/KS_SUBPLOT.csv"
##  [6] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/MT_SUBPLOT.csv"
##  [7] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/ND_SUBPLOT.csv"
##  [8] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/NE_SUBPLOT.csv"
##  [9] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/NM_SUBPLOT.csv"
## [10] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/NV_SUBPLOT.csv"
## [11] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/OK_SUBPLOT.csv"
## [12] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/OR_SUBPLOT.csv"
## [13] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/SD_SUBPLOT.csv"
## [14] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/TX_SUBPLOT.csv"
## [15] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/UT_SUBPLOT.csv"
## [16] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/WA_SUBPLOT.csv"
## [17] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/WY_SUBPLOT.csv"
csvs.tree <- list.files(path = main.dir, 
                        pattern = "*_TREE.csv",
                        full.names = T)
csvs.tree
##  [1] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/AZ_TREE.csv"
##  [2] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/CA_TREE.csv"
##  [3] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/CO_TREE.csv"
##  [4] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/ID_TREE.csv"
##  [5] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/KS_TREE.csv"
##  [6] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/MT_TREE.csv"
##  [7] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/ND_TREE.csv"
##  [8] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/NE_TREE.csv"
##  [9] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/NM_TREE.csv"
## [10] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/NV_TREE.csv"
## [11] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/OK_TREE.csv"
## [12] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/OR_TREE.csv"
## [13] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/SD_TREE.csv"
## [14] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/TX_TREE.csv"
## [15] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/UT_TREE.csv"
## [16] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/WA_TREE.csv"
## [17] "S:/ursa/campbell/NIP/Data/fia/FIA_CSVs_20230112/WY_TREE.csv"
# get list of states and print them
states <- list.files(path = main.dir,
                     pattern = "*.csv") %>%
  str_sub(1,2) %>%
  unique()
states
##  [1] "AZ" "CA" "CO" "ID" "KS" "MT" "ND" "NE" "NM" "NV" "OK" "OR" "SD" "TX" "UT"
## [16] "WA" "WY"
# define the fields that are needed for each table type
flds.plot <- c("CN", "INVYR", "STATECD", "UNITCD", "COUNTYCD", "PLOT",
               "PLOT_STATUS_CD", "MEASYEAR", "DESIGNCD", "LAT", "LON")
flds.subp <- c("PLT_CN", "SUBP", "SUBPCOND")
flds.tree <- c("PLT_CN", "STATUSCD", "SPCD", "TPA_UNADJ", "CARBON_AG")

# read in the data
df.plot <- do.call(rbind, 
                   lapply(csvs.plot, 
                          function(x) fread(x, select = flds.plot))) %>%
  as.data.frame
df.subp <- do.call(rbind, 
                   lapply(csvs.subp, 
                          function(x) fread(x, select = flds.subp))) %>%
  as.data.frame
df.tree <- do.call(rbind, 
                   lapply(csvs.tree, 
                          function(x) fread(x, select = flds.tree))) %>%
  as.data.frame

# convert plot cns to character
df.plot$CN <- as.character(df.plot$CN)
df.subp$PLT_CN <- as.character(df.subp$PLT_CN)
df.tree$PLT_CN <- as.character(df.tree$PLT_CN)

Data summary

Prior to any data filtering…

  • The plot data frame has 689127 records.
  • The subplot data frame has 2153634 records.
  • The tree data frame has 4517190 records.

DATA FILTERING

I will perform some basic filtering operations to ensure that the best data are being used to drive the modeling process.

Filter #1: Only take plots that were visited

Plots that have PLOT_STATUS_CD of 1 indicate:

Sampled - at least one accessible forest land condition present on plot.

Plots that have PLOT_STATUS_CD of 2 indicate:

Sampled - no accessible forest land condition present on plot.

Plots that have PLOT_STATUS_CD of 3 indicate:

Nonsampled.

So, we’ll keep 1 and 2.

# remove plots with status code of 3
df.plot <- df.plot[df.plot$PLOT_STATUS_CD != 3,]

# remove the same plots in the other tables as well
df.subp <- df.subp[df.subp$PLT_CN %in% df.plot$CN,]
df.tree <- df.tree[df.tree$PLT_CN %in% df.plot$CN,]

Data summary

After this filter…

  • The plot data frame has 669496 records.
  • The subplot data frame has 2079676 records.
  • The tree data frame has 4453196 records.

Filter #2: Only take the most recent measurement

In many cases, the same plot will be remeasured and recorded within the same dataset. However, we only want one survey per plot location, so I will filter to only the most recent plot measurement.

# get unique plot identifier
df.plot$UNIQUEID <- paste0(df.plot$STATECD, "-",
                           df.plot$UNITCD, "-",
                           df.plot$COUNTYCD, "-",
                           df.plot$PLOT)

# sort based on measyear
df.plot <- df.plot[order(df.plot$MEASYEAR, decreasing = T),]

# remove duplicate plots (gets rid of previous measurements)
df.plot <- df.plot[!duplicated(df.plot$UNIQUEID),]

# remove the same plots in the other tables as well
df.subp <- df.subp[df.subp$PLT_CN %in% df.plot$CN,]
df.tree <- df.tree[df.tree$PLT_CN %in% df.plot$CN,]

Data summary

After this filter…

  • The plot data frame has 427264 records.
  • The subplot data frame has 1137120 records.
  • The tree data frame has 2701269 records.

Filter #3: Only take plots with four subplots

I’m not entirely sure this is a necessary step, but I did it for the BWA analysis, so I’ve repeated it here….

# join subplots to plots
df.plot.subp <- merge(df.plot, df.subp, by.x = "CN", by.y = "PLT_CN")

# get the number of subplots and remove plots without four
df.nsubp <- aggregate(list(NSUBP = df.plot.subp$CN),
                        list(CN = df.plot.subp$CN),
                        length)
df.nsubp <- df.nsubp[df.nsubp$NSUBP == 4,]

# filter data
df.plot <- df.plot[df.plot$CN %in% df.nsubp$CN,]
df.subp <- df.subp[df.subp$PLT_CN %in% df.nsubp$CN,]
df.tree <- df.tree[df.tree$PLT_CN %in% df.nsubp$CN,]

Data summary

After this filter…

  • The plot data frame has 232057 records.
  • The subplot data frame has 928228 records.
  • The tree data frame has 2186937 records.

Filter #4: Only take single condition plots

For the sake of simple presence/absence modeling, I don’t think this is critical, but for the subsequent analysis steps (e.g., determining whether or not PJ species are dominant), I want to avoid having messy plots (i.e., plots that fall along edges of different conditions). So, it seems like a valuable step?

# join subplots to plots
df.plot.subp <- merge(df.plot, df.subp, by.x = "CN", by.y = "PLT_CN")

# get the number of conditions within subplots and remove plots with more than 1
df.ncond <- aggregate(list(ncond = df.plot.subp$SUBPCOND),
                        list(CN = df.plot.subp$CN),
                        max)
df.ncond <- df.ncond[df.ncond$ncond == 1,]

# filter data
df.plot <- df.plot[df.plot$CN %in% df.ncond$CN,]
df.subp <- df.subp[df.subp$PLT_CN %in% df.ncond$CN,]
df.tree <- df.tree[df.tree$PLT_CN %in% df.ncond$CN,]

Data summary

After this filter…

  • The plot data frame has 210914 records.
  • The subplot data frame has 843656 records.
  • The tree data frame has 1710515 records.

Possible additional filters to consider

There may be value to doing some additional filtering. I would be interested in having a conversation about the virtue of filtering for…

  • Disturbances/Treatments: is it worth focusing on areas that have had minimal changes to vegetation condition/structure/health for the sake of developing an accurate distribution model? For example if someone removed all the P and J from a plot to promote ponderosa pine, then it would suggest that that plot is “bad” habitat for P and J (since it wouldn’t be in the tree data any more post-treatment), when in fact it might be great PJ habitat…
  • Inventory/Measurement Years: since my time series analysis is 1984-2022, it may be worth focusing on plot data from that range.
  • Plot Designs: the filtering to four subplots kind of enforces that I’m only using plots that are similar to the national design. But, is it worth being even more explicit?

GETTING PJ SPECIES

With the data somewhat filtered, I’m going to shift gears now and focus on PJ data specifically. To pull out the target species, I’m going to use the FIA’s master tree list. As a starting point, I will extract all of the “woodland” species belonging to the genus Pinus and Juniperus.

# read in the fia master species list
csv.spec <- "S:/ursa/campbell/NIP/Data/fia/Master Species_ver_2-0__9_2022_final.csv"
df.spec <- read.csv(csv.spec)

# get all pinyons and junipers
df.spec.p <- df.spec[df.spec$woodland == "w" & df.spec$genus == "Pinus",]
df.spec.j <- df.spec[df.spec$woodland == "w" & df.spec$genus == "Juniperus",]
df.spec.pj <- rbind(df.spec.p, df.spec.j)
df.spec.pj
##      woodland fia.code                    common.name     genus      species
## 1541        w      140            Mexican pinyon pine     Pinus   cembroides
## 1546        w      134                  border pinyon     Pinus     discolor
## 1548        w      106    common or two-needle pinyon     Pinus       edulis
## 1561        w      133              singleleaf pinyon     Pinus   monophylla
## 1562        w      143            Arizona pinyon pine     Pinus   monophylla
## 1574        w      138 four-leaf or Parry pinyon pine     Pinus  quadrifolia
## 1576        w      141         papershell pinyon pine     Pinus       remota
## 1093        w       61                   Ashe juniper Juniperus        ashei
## 1095        w       62             California juniper Juniperus  californica
## 1097        w       59               redberry juniper Juniperus coahuilensis
## 1098        w       63              alligator juniper Juniperus     deppeana
## 1099        w       60               Drooping juniper Juniperus     flaccida
## 1100        w       69                oneseed juniper Juniperus   monosperma
## 1102        w       65                   Utah juniper Juniperus  osteosperma
## 1103        w       58                Pinchot juniper Juniperus    pinchotii
## 1104        w       66         Rocky Mountain juniper Juniperus   scopulorum
##      variety subspecies plants.code
## 1541                           PICE
## 1546                          PIDI3
## 1548                           PIED
## 1561                           PIMO
## 1562  fallax                  PIMOF
## 1574                           PIQU
## 1576                          PIRE5
## 1093                           JUAS
## 1095                          JUCA7
## 1097                         JUCO11
## 1098                          JUDE2
## 1099                           JUFL
## 1100                           JUMO
## 1102                           JUOS
## 1103                           JUPI
## 1104                          JUSC2

DETERMINING PJ PRESENCE/ABSENCE

With those species isolated, I will now go plot by plot within the tree data and flag whether or not each plot contains each tree species in a binary (presence vs. absence) manner.

# load in states shapefile
states <- st_read("S:/ursa/campbell/NIP/Data/Census/a02_extracted/tl_2020_us_state.shp")
## Reading layer `tl_2020_us_state' from data source 
##   `S:\ursa\campbell\NIP\Data\Census\a02_extracted\tl_2020_us_state.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 14 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS:  NAD83
# loop through pj species
pj.codes <- df.spec.pj$fia.code
pj.abbvs <- df.spec.pj$plants.code
for (i in seq(1,length(pj.codes))){
  
  # find plots with that species
  pj.code <- pj.codes[i]
  pj.abbv <- pj.abbvs[i]
  df.tree.spec <- df.tree[df.tree$SPCD == pj.code,]
  
  # add presence/absence column to plot df
  fld <- paste0(pj.abbv, "_PRESABS")
  df.plot[,fld] <- 0
  df.plot[,fld][df.plot$CN %in% df.tree.spec$PLT_CN] <- 1
  
}

# plot it out
par(mfrow = c(4,4),
    mar = c(0,0,2,0),
    oma = rep(0,4),
    las = 1)
xlim <- range(df.plot$LON)
ylim <- range(df.plot$LAT)
for (i in seq(1,length(pj.codes))){
  pj.abbv <- pj.abbvs[i]
  fld <- paste0(pj.abbv, "_PRESABS")
  df.plot.i <- df.plot[df.plot[,fld] == 1,]
  if (nrow(df.plot.i) == 0) next
  plot(LAT ~ LON, data = df.plot.i, col = "red", xlim = xlim, ylim = ylim,
       main = pj.abbv, pch = 16,  cex = 0.5, xlab = NA, ylab = NA, 
       xaxt = "n", yaxt = "n")
  plot(st_geometry(states), add = T)
}

GETTING CONVEX HULLS

I don’t want this to just be a presence-only type analysis. So, I need more than just the plots with P and J present. I also need absence plots. But, I don’t want to use ALL of the plots where P and J aren’t present in ALL of the states’ data, so I think it makes sense to spatially refine the candidate plot region. To do this, I am going to generate some convex hulls, which will represent an encompassing geometric feature within which all of the plot data containing P and J species are found.

First, I’ll generate convex hulls around each species individually…

# loop through species and get convex hulls
for (i in seq(1,length(pj.codes))){
  pj.abbv <- pj.abbvs[i]
  fld <- paste0(pj.abbv, "_PRESABS")
  df.plot.i <- df.plot[df.plot[,fld] == 1,]
  if (nrow(df.plot.i) == 0) next
  sf.plot.i <- st_as_sf(df.plot.i, coords = c("LON", "LAT"), crs = 4269)
  hull <- st_convex_hull(st_union(sf.plot.i)) %>% st_as_sf
  hull$species <- pj.abbv
  if (i == 1){
    hulls <- hull
  } else {
    hulls <- rbind(hulls, hull)
  }
}

# plot it out
par(mfrow = c(4,4),
    mar = c(0,0,2,0),
    oma = rep(0,4),
    las = 1)
xlim <- range(df.plot$LON)
ylim <- range(df.plot$LAT)
for (i in seq(1,nrow(hulls))){
  pj.abbv <- hulls$species[i]
  plot(st_geometry(hulls[i,]), col = "red", xlim = xlim, ylim = ylim,
       main = pj.abbv, pch = 16,  cex = 0.5, xlab = NA, ylab = NA, 
       xaxt = "n", yaxt = "n")
  plot(st_geometry(states), add = T)
}

Then, I’ll create combined hulls in two ways:

  1. The union of the individual species hulls (more restrictive)
  2. A convex hull around all plots with any PJ species (more inclusive)

In both cases, I will buffer by 20 km, assuming that the FIA data may not have caught the absolute range extremes due to the relatively sparse spatial sampling design. This will also allow for some “other wooded lands” inclusion, given the fact that there will likley be a gradient from >10% cover to 0% cover over a distance.

# combine individual hulls to create combined hull
hull.comb.1 <- st_union(hulls) %>%
  st_transform(crs = 5070) %>%
  st_buffer(20000) %>%
  st_transform(crs = 4269)

# create hull around all pj plots
presabs.cols <- colnames(df.plot)[grep("_PRESABS", colnames(df.plot))]
presabs.cols <- presabs.cols[presabs.cols != "PJ_PRESABS"]
df.plot$PJ_PRESABS <- rowSums(df.plot[,presabs.cols])
df.plot.pj <- df.plot[df.plot$PJ_PRESABS >= 1,]
sf.plot.pj <- st_as_sf(df.plot.pj, coords = c("LON", "LAT"), crs = 4269)
hull.comb.2 <- st_convex_hull(st_union(sf.plot.pj)) %>% 
  st_as_sf %>%
  st_transform(crs = 5070) %>%
  st_buffer(20000) %>%
  st_transform(crs = 4269)

# plot it out
par(mar = c(5,5,1,1),
    las = 1)
xlim <- range(df.plot$LON)
ylim <- range(df.plot$LAT)
plot(st_geometry(states), xlim = xlim, ylim = ylim,
     xlab = "LON", ylab = "LAT")
box()
axis(1)
axis(2)
plot(st_geometry(hull.comb.1), border = "blue", add = T)
plot(st_geometry(hull.comb.2), border = "red", add = T)
legend("topright", legend = c("Individual", "Combined"), lty = 1,
       col = c("blue", "red"))

Given the relatively small difference and greater geometric simplicity, it likely makes sense to use the more inclusive hull.

However, this does include Rocky Mountain juniper (JUSC2), which is arguably not really a “PJ woodland” species. If I remove JUSC2 from the analysis…

# combine individual hulls to create combined hull
hulls <- hulls[hulls$species != "JUSC2",]
hull.comb.3 <- st_union(hulls) %>%
  st_transform(crs = 5070) %>%
  st_buffer(20000) %>%
  st_transform(crs = 4269)

# create hull around all pj plots
presabs.cols <- colnames(df.plot)[grep("_PRESABS", colnames(df.plot))]
presabs.cols <- presabs.cols[presabs.cols != "PJ_PRESABS"]
presabs.cols <- presabs.cols[presabs.cols != "JUSC2_PRESABS"]
df.plot$PJ_PRESABS <- rowSums(df.plot[,presabs.cols])
df.plot.pj <- df.plot[df.plot$PJ_PRESABS >= 1,]
sf.plot.pj <- st_as_sf(df.plot.pj, coords = c("LON", "LAT"), crs = 4269)
hull.comb.4 <- st_convex_hull(st_union(sf.plot.pj)) %>% 
  st_as_sf %>%
  st_transform(crs = 5070) %>%
  st_buffer(20000) %>%
  st_transform(crs = 4269)

# plot it out
par(mar = c(5,5,1,1),
    las = 1)
xlim <- range(df.plot$LON)
ylim <- range(df.plot$LAT)
plot(st_geometry(states), xlim = xlim, ylim = ylim,
     xlab = "LON", ylab = "LAT")
box()
axis(1)
axis(2)
plot(st_geometry(hull.comb.3), border = "blue", add = T)
plot(st_geometry(hull.comb.4), border = "red", add = T)
legend("topright", legend = c("Individual", "Combined"), lty = 1,
       col = c("blue", "red"))

Although these hulls are certainly more refined, I worry about the effect on the eventual impact of a resulting paper if JUSC2 is removed. That would change a theoretical paper title from:

Mapping the distribution of all woodland piñon and juniper species in the Western US

to:

Mapping the distribution of all woodland piñon and juniper species in the Western US (except Rocky Mountain Juniper)

Kidding, of course, but it does raise a question of “why not include them all?” If the answer is just “the extent was too big”, that’s not a great answer. Though perhaps an understandable one.

For the sake of the rest of this document, at least, I’m going to go with the most inclusive mask (the full hull on all P and J species).

GETTING FIA PLOTS WITHIN HULL

Now I’m going to figure out which plots are needed within the target convex hull. This would be a list of plots that I would eventually send to Erin for predictor data extraction.

# add pj presence-absence column to plot data
presabs.cols <- colnames(df.plot)[grep("_PRESABS", colnames(df.plot))]
presabs.cols <- presabs.cols[presabs.cols != "PJ_PRESABS"]
df.plot$PJ_PRESABS <- rowSums(df.plot[,presabs.cols])

# clip fia plots to hull
sf.plot <- st_as_sf(df.plot, coords = c("LON", "LAT"), crs = 4269) %>%
  st_intersection(hull.comb.2)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
# plot the data out
sf.plot <- sf.plot[order(rev(sf.plot$PLOT_STATUS_CD), sf.plot$PJ_PRESABS),]
cols <- rep("red", nrow(sf.plot))
cols[sf.plot$PLOT_STATUS_CD == 1] <- "green"
cols[sf.plot$PJ_PRESABS >= 1] <- "blue"
plot(st_geometry(sf.plot), col = cols, pch = 16, cex = 0.1)
plot(st_geometry(states), add = T)
box()
axis(1)
axis(2)
legend("topleft", legend = c("Non-Forest", "Non-PJ", "PJ"), pch = 16,
       col = c("red", "green", "blue"))

# create statecd lookup table
statecds <- c(4,6,8,16,20,30,31,32,35,38,40,41,46,48,49,53,56)
statenms <- c("AZ", "CA", "CO", "ID", "KS", "MT", "NE", "NV", "NM", "ND", 
              "OK", "OR", "SD", "TX", "UT", "WA", "WY")

# create a tabular summary of plot counts by state
plots.by.state <- tapply(sf.plot$STATECD, sf.plot$STATECD, length)
names(plots.by.state) <- statenms
plots.by.state
##    AZ    CA    CO    ID    KS    MT    NE    NV    NM    ND    OK    OR    SD 
## 18913 11083 10179  8384  6116 12348  6692 11920 21921  1913  4385  3827  5555 
##    TX    UT    WA    WY 
## 15291  9628    79 17966

DISTINGUISHING BETWEEN PJ PRESENCE AND DOMINANCE

It will be important to not only model where PJ species are present, but also where they are dominant, as the final mapping mask should not include areas where P and J might be relatively small components. I will do this using the per-tree carbon estimates in conjunction with the expansion factors.

# apply expansion factor to aboveground carbon
df.tree$CARBON_AG_DENS <- df.tree$CARBON_AG * df.tree$TPA_UNADJ

# get total live pj carbon with each plot
df.tree.carb <- df.tree %>%
  group_by(PLT_CN) %>%
  filter(STATUSCD == 1) %>%
  summarize(CARBON_AG_DENS_TOTAL = sum(CARBON_AG_DENS),
            CARBON_AG_DENS_PJ = sum(CARBON_AG_DENS[SPCD %in% pj.codes]),
            CARBON_AG_DENS_OTHER = sum(CARBON_AG_DENS[!SPCD %in% pj.codes])) %>%
  mutate(CARBON_AG_DENS_PJ_PROP = CARBON_AG_DENS_PJ / CARBON_AG_DENS_TOTAL) %>%
  as.data.frame()

# determine which plots are pj dominant by carbon
df.tree.carb$PJ_DOM <- 0
df.tree.carb$PJ_DOM[df.tree.carb$CARBON_AG_DENS_PJ_PROP >= 0.5] <- 1

# join back to plots
sf.plot <- merge(sf.plot, df.tree.carb, by.x = "CN", by.y = "PLT_CN", all.x = T)

# plot it out
sf.plot <- sf.plot[order(rev(sf.plot$PLOT_STATUS_CD), 
                         sf.plot$PJ_PRESABS, 
                         sf.plot$PJ_DOM),]
cols <- rep("red", nrow(sf.plot))
cols[sf.plot$PLOT_STATUS_CD == 1] <- "green"
cols[sf.plot$PJ_PRESABS >= 1] <- "blue"
cols[sf.plot$PJ_DOM >= 1] <- "yellow"
plot(st_geometry(sf.plot), col = cols, pch = 16, cex = 0.1)
plot(st_geometry(states), add = T)
box()
axis(1)
axis(2)
legend("topleft", legend = c("Non-Forest", "Non-PJ", "PJ Present", "PJ Dominant"),
       pch = 16, col = c("red", "green", "blue", "yellow"))