Setup

# Loading in LiDAR data using lidR readLAS function

# First, the four tiles for the 2017 data: 2017 data uses smaller tiles than the 2014 data, so it was necessary to have four 2017 tiles to approximate one 2014 tile

a2017 <- readLAS("~/Desktop/Data Viz with R/R Spatial Project Alex/2017/30165.las")
## Warning: There are 3702152 points flagged 'withheld'.
b2017 <- readLAS("~/Desktop/Data Viz with R/R Spatial Project Alex/2017/30167.las")
## Warning: There are 3377152 points flagged 'withheld'.
c2017 <- readLAS("~/Desktop/Data Viz with R/R Spatial Project Alex/2017/32165.las")
## Warning: There are 2028861 points flagged 'withheld'.
d2017 <- readLAS("~/Desktop/Data Viz with R/R Spatial Project Alex/2017/32167.las")
## Warning: There are 3185123 points flagged 'withheld'.
tile2014 <- readLAS("~/Desktop/Data Viz with R/R Spatial Project Alex//18TWK985970.las")
# First experimental plot using 2014 data, copied from lidR documentation to begin learning how to use plots

# Plot object by scan angle, 
# make the background white, 
# display XYZ axis and  scale colors
lidR::plot(tile2014, color = "ScanAngleRank", bg = "white", axis = TRUE, legend = TRUE)

Data Cleaning

# Validating data, following steps in lidR documentation
las_check(tile2014)
## 
##  Checking the data
##   - Checking coordinates... ✓
##   - Checking coordinates type... ✓
##   - Checking coordinates range... ✓
##   - Checking coordinates quantization... ✓
##   - Checking attributes type... ✓
##   - Checking ReturnNumber validity... ✓
##   - Checking NumberOfReturns validity... ✓
##   - Checking ReturnNumber vs. NumberOfReturns... ✓
##   - Checking RGB validity... ✓
##   - Checking absence of NAs... ✓
##   - Checking duplicated points...
##     âš  2404 points are duplicated and share XYZ coordinates with other points
##   - Checking degenerated ground points...
##     âš  There were 1 degenerated ground points. Some X Y Z coordinates were repeated
##   - Checking attribute population... ✓
##   - Checking gpstime incoherances
##     ✗ 4864066 pulses (points with the same gpstime) have points with identical ReturnNumber
##   - Checking flag attributes... ✓
##   - Checking user data attribute... ✓
##  Checking the header
##   - Checking header completeness... ✓
##   - Checking scale factor validity... ✓
##   - Checking point data format ID validity... ✓
##   - Checking extra bytes attributes validity... ✓
##   - Checking the bounding box validity... ✓
##   - Checking coordinate reference system... ✓
##  Checking header vs data adequacy
##   - Checking attributes vs. point format... ✓
##   - Checking header bbox vs. actual content... ✓
##   - Checking header number of points vs. actual content... ✓
##   - Checking header return number vs. actual content... ✓
##  Checking coordinate reference system...
##   - Checking if the CRS was understood by R...
##     ✗ EPSG code 32767 unknown
##  Checking preprocessing already done 
##   - Checking ground classification... yes
##   - Checking normalization... no
##   - Checking negative outliers...
##     âš  1927436 points below 0
##   - Checking flightline classification... yes
##  Checking compression
##   - Checking attribute compression...
##    -  Synthetic_flag is compressed
##    -  Keypoint_flag is compressed
##    -  Withheld_flag is compressed
##    -  UserData is compressed
# Cleaning up 2014 data based on las_check()

# Step 1: Fix the issue with the CRS being unknown

st_crs(tile2014) <- 6347


# Step 2: Getting rid of the 2404 duplicate points
tile2014 <- filter_duplicates(tile2014)
# Repeating las_check to see what's fixed and what's still a problem

las_check(tile2014)
## 
##  Checking the data
##   - Checking coordinates... ✓
##   - Checking coordinates type... ✓
##   - Checking coordinates range... ✓
##   - Checking coordinates quantization... ✓
##   - Checking attributes type... ✓
##   - Checking ReturnNumber validity... ✓
##   - Checking NumberOfReturns validity... ✓
##   - Checking ReturnNumber vs. NumberOfReturns... ✓
##   - Checking RGB validity... ✓
##   - Checking absence of NAs... ✓
##   - Checking duplicated points... ✓
##   - Checking degenerated ground points... ✓
##   - Checking attribute population... ✓
##   - Checking gpstime incoherances
##     ✗ 4862387 pulses (points with the same gpstime) have points with identical ReturnNumber
##   - Checking flag attributes... ✓
##   - Checking user data attribute... ✓
##  Checking the header
##   - Checking header completeness... ✓
##   - Checking scale factor validity... ✓
##   - Checking point data format ID validity... ✓
##   - Checking extra bytes attributes validity... ✓
##   - Checking the bounding box validity... ✓
##   - Checking coordinate reference system... ✓
##  Checking header vs data adequacy
##   - Checking attributes vs. point format... ✓
##   - Checking header bbox vs. actual content... ✓
##   - Checking header number of points vs. actual content... ✓
##   - Checking header return number vs. actual content... ✓
##  Checking coordinate reference system...
##   - Checking if the CRS was understood by R... ✓
##  Checking preprocessing already done 
##   - Checking ground classification... yes
##   - Checking normalization... no
##   - Checking negative outliers...
##     âš  1927213 points below 0
##   - Checking flightline classification... yes
##  Checking compression
##   - Checking attribute compression...
##    -  Synthetic_flag is compressed
##    -  Keypoint_flag is compressed
##    -  Withheld_flag is compressed
##    -  UserData is compressed
print(tile2014)
## class        : LAS (v1.2 format 1)
## memory       : 782.9 Mb 
## extent       : 598500, 6e+05, 4497000, 4498500 (xmin, xmax, ymin, ymax)
## coord. ref.  : NAD83(2011) / UTM zone 18N 
## area         : 2.24 km²
## points       : 12.83 million points
## density      : 5.71 points/m²
## density      : 5.31 pulses/m²
# Negative elevation points are probably fine because we're looking at shoreline

# Normalization...no
# In the documentation, it mentions the goal of normalization is to get flat terrain, which we don't need cause we're focusing on ground points/bare earth


# Looking at classifications
unique(tile2014$Classification)
## [1]  1  9  7 10  2
# Filtering out all but last returns, as per project instructions 

tile2014 <- filter_poi(tile2014, ReturnNumber == NumberOfReturns)

print(tile2014)
## class        : LAS (v1.2 format 1)
## memory       : 727 Mb 
## extent       : 598500, 6e+05, 4497000, 4498500 (xmin, xmax, ymin, ymax)
## coord. ref.  : NAD83(2011) / UTM zone 18N 
## area         : 2.24 km²
## points       : 11.91 million points
## density      : 5.31 points/m²
## density      : 4.91 pulses/m²
# Merging 2017 tiles into one tile for easier processing later on

# Using LAScatalog processing engine for better results, based on what it recommends in the documentation 

#removing smaller datasets to free up space, since am going with this new approach

rm(a2017)
rm(b2017)
rm(c2017)
rm(d2017)

ctg2017 <- readLAScatalog("~/Desktop/Data Viz with R/R Spatial Project Alex/2017")
las2017 <- readLAS(ctg2017)
## Warning: There are 12293288 points flagged 'withheld'.
las_check(las2017)
## 
##  Checking the data
##   - Checking coordinates... ✓
##   - Checking coordinates type... ✓
##   - Checking coordinates range... ✓
##   - Checking coordinates quantization... ✓
##   - Checking attributes type... ✓
##   - Checking ReturnNumber validity... ✓
##   - Checking NumberOfReturns validity... ✓
##   - Checking ReturnNumber vs. NumberOfReturns... ✓
##   - Checking RGB validity... ✓
##   - Checking absence of NAs... ✓
##   - Checking duplicated points...
##     âš  19267 points are duplicated and share XYZ coordinates with other points
##   - Checking degenerated ground points...
##     âš  There were 1 degenerated ground points. Some X Y Z coordinates were repeated
##     âš  There were 4 degenerated ground points. Some X Y coordinates were repeated but with different Z coordinates
##   - Checking attribute population... ✓
##   - Checking gpstime incoherances
##     ✗ 6336543 pulses (points with the same gpstime) have points with identical ReturnNumber
##   - Checking flag attributes...
##     🛈 12293288 points flagged 'withheld'
##   - Checking user data attribute...
##     🛈 36203865 points have a non 0 UserData attribute. This probably has a meaning
##  Checking the header
##   - Checking header completeness... ✓
##   - Checking scale factor validity... ✓
##   - Checking point data format ID validity... ✓
##   - Checking extra bytes attributes validity... ✓
##   - Checking the bounding box validity... ✓
##   - Checking coordinate reference system... ✓
##  Checking header vs data adequacy
##   - Checking attributes vs. point format... ✓
##   - Checking header bbox vs. actual content... ✓
##   - Checking header number of points vs. actual content... ✓
##   - Checking header return number vs. actual content... ✓
##  Checking coordinate reference system...
##   - Checking if the CRS was understood by R... ✓
##  Checking preprocessing already done 
##   - Checking ground classification... yes
##   - Checking normalization... no
##   - Checking negative outliers...
##     âš  24165921 points below 0
##   - Checking flightline classification... yes
##  Checking compression
##   - Checking attribute compression...
##    -  Synthetic_flag is compressed
##    -  Keypoint_flag is compressed
# Checking CRS as advised

print(las2017)
## class        : LAS (v1.4 format 6)
## memory       : 3.6 Gb 
## extent       : 1030000, 1035000, 165000, 170000 (xmin, xmax, ymin, ymax)
## coord. ref.  : NAD83 / New York Long Island (ftUS) + NAVD88 height (ftUS) 
## area         : 21.62 kus-ft²
## points       : 46.29 million points
## density      : 2.14 points/us-ft²
## density      : 1.61 pulses/us-ft²
#Based on an online search, EPSG is EPSG:2263
las2017 <- filter_duplicates(las2017)
#Checking to see what the classification value for withheld points is, so I can remove them
unique(las2017$Classification)
## [1]  7  2  1 10  9 45 40 41
# Getting rid of withheld points - based on online resources, the code for withheld points should be 7

las2017 <- filter_poi(las2017, !Withheld_flag)
las_check(las2017)
## 
##  Checking the data
##   - Checking coordinates... ✓
##   - Checking coordinates type... ✓
##   - Checking coordinates range... ✓
##   - Checking coordinates quantization... ✓
##   - Checking attributes type... ✓
##   - Checking ReturnNumber validity... ✓
##   - Checking NumberOfReturns validity... ✓
##   - Checking ReturnNumber vs. NumberOfReturns... ✓
##   - Checking RGB validity... ✓
##   - Checking absence of NAs... ✓
##   - Checking duplicated points... ✓
##   - Checking degenerated ground points...
##     âš  There were 4 degenerated ground points. Some X Y coordinates were repeated but with different Z coordinates
##   - Checking attribute population... ✓
##   - Checking gpstime incoherances
##     ✗ 4290368 pulses (points with the same gpstime) have points with identical ReturnNumber
##   - Checking flag attributes... ✓
##   - Checking user data attribute...
##     🛈 26546448 points have a non 0 UserData attribute. This probably has a meaning
##  Checking the header
##   - Checking header completeness... ✓
##   - Checking scale factor validity... ✓
##   - Checking point data format ID validity... ✓
##   - Checking extra bytes attributes validity... ✓
##   - Checking the bounding box validity... ✓
##   - Checking coordinate reference system... ✓
##  Checking header vs data adequacy
##   - Checking attributes vs. point format... ✓
##   - Checking header bbox vs. actual content... ✓
##   - Checking header number of points vs. actual content... ✓
##   - Checking header return number vs. actual content... ✓
##  Checking coordinate reference system...
##   - Checking if the CRS was understood by R... ✓
##  Checking preprocessing already done 
##   - Checking ground classification... yes
##   - Checking normalization... no
##   - Checking negative outliers...
##     âš  17514707 points below 0
##   - Checking flightline classification... yes
##  Checking compression
##   - Checking attribute compression...
##    -  Synthetic_flag is compressed
##    -  Keypoint_flag is compressed
# Seems all good for now
# Filtering by last returns

las2017 <- filter_poi(las2017, ReturnNumber == NumberOfReturns)

unique(las2017$Classification)
## [1]  2  1 10  9 45 40 41
plot(las2017)
# Based on the NY state website with the original datasets, the 2014 data and the combined 2017 data do not fully overlap. 
 
# I will need to clip one to the other to make sure I'm analyzing the same area. 

# This means I need to reproject one of the two to the other's projection first

# For 2014, current projection is EPSG:6347, for 2017, current projection is EPSG:2263

plot(tile2014)
# Decided to use a TIN initially to interpolate the LiDAR data for efficiency, but considering other methods and weighing pros and cons of different options
DEM2014 <- rasterize_terrain(tile2014, res = 1, algorithm = tin())
## Delaunay rasterization[==============------------------------------------] 29% (1 threads)Delaunay rasterization[===============-----------------------------------] 30% (1 threads)Delaunay rasterization[===============-----------------------------------] 31% (1 threads)Delaunay rasterization[================----------------------------------] 32% (1 threads)Delaunay rasterization[================----------------------------------] 33% (1 threads)Delaunay rasterization[=================---------------------------------] 34% (1 threads)Delaunay rasterization[=================---------------------------------] 35% (1 threads)Delaunay rasterization[==================--------------------------------] 36% (1 threads)Delaunay rasterization[==================--------------------------------] 37% (1 threads)Delaunay rasterization[===================-------------------------------] 38% (1 threads)Delaunay rasterization[===================-------------------------------] 39% (1 threads)Delaunay rasterization[====================------------------------------] 40% (1 threads)Delaunay rasterization[====================------------------------------] 41% (1 threads)Delaunay rasterization[=====================-----------------------------] 42% (1 threads)Delaunay rasterization[=====================-----------------------------] 43% (1 threads)Delaunay rasterization[======================----------------------------] 44% (1 threads)Delaunay rasterization[======================----------------------------] 45% (1 threads)Delaunay rasterization[=======================---------------------------] 46% (1 threads)Delaunay rasterization[=======================---------------------------] 47% (1 threads)Delaunay rasterization[========================--------------------------] 48% (1 threads)Delaunay rasterization[========================--------------------------] 49% (1 threads)Delaunay rasterization[=========================-------------------------] 50% (1 threads)Delaunay rasterization[=========================-------------------------] 51% (1 threads)Delaunay rasterization[==========================------------------------] 52% (1 threads)Delaunay rasterization[==========================------------------------] 53% (1 threads)Delaunay rasterization[===========================-----------------------] 54% (1 threads)Delaunay rasterization[===========================-----------------------] 55% (1 threads)Delaunay rasterization[============================----------------------] 56% (1 threads)Delaunay rasterization[============================----------------------] 57% (1 threads)Delaunay rasterization[=============================---------------------] 58% (1 threads)Delaunay rasterization[=============================---------------------] 59% (1 threads)Delaunay rasterization[==============================--------------------] 60% (1 threads)Delaunay rasterization[==============================--------------------] 61% (1 threads)Delaunay rasterization[===============================-------------------] 62% (1 threads)Delaunay rasterization[===============================-------------------] 63% (1 threads)Delaunay rasterization[================================------------------] 64% (1 threads)Delaunay rasterization[================================------------------] 65% (1 threads)Delaunay rasterization[=================================-----------------] 66% (1 threads)Delaunay rasterization[=================================-----------------] 67% (1 threads)Delaunay rasterization[==================================----------------] 68% (1 threads)Delaunay rasterization[==================================----------------] 69% (1 threads)Delaunay rasterization[===================================---------------] 70% (1 threads)Delaunay rasterization[===================================---------------] 71% (1 threads)Delaunay rasterization[====================================--------------] 72% (1 threads)Delaunay rasterization[====================================--------------] 73% (1 threads)Delaunay rasterization[=====================================-------------] 74% (1 threads)Delaunay rasterization[=====================================-------------] 75% (1 threads)Delaunay rasterization[======================================------------] 76% (1 threads)Delaunay rasterization[======================================------------] 77% (1 threads)Delaunay rasterization[=======================================-----------] 78% (1 threads)Delaunay rasterization[=======================================-----------] 79% (1 threads)Delaunay rasterization[========================================----------] 80% (1 threads)Delaunay rasterization[========================================----------] 81% (1 threads)Delaunay rasterization[=========================================---------] 82% (1 threads)Delaunay rasterization[=========================================---------] 83% (1 threads)Delaunay rasterization[==========================================--------] 84% (1 threads)Delaunay rasterization[==========================================--------] 85% (1 threads)Delaunay rasterization[===========================================-------] 86% (1 threads)Delaunay rasterization[===========================================-------] 87% (1 threads)Delaunay rasterization[============================================------] 88% (1 threads)Delaunay rasterization[============================================------] 89% (1 threads)Delaunay rasterization[=============================================-----] 90% (1 threads)Delaunay rasterization[=============================================-----] 91% (1 threads)Delaunay rasterization[==============================================----] 92% (1 threads)Delaunay rasterization[==============================================----] 93% (1 threads)Delaunay rasterization[===============================================---] 94% (1 threads)Delaunay rasterization[===============================================---] 95% (1 threads)Delaunay rasterization[================================================--] 96% (1 threads)Delaunay rasterization[================================================--] 97% (1 threads)Delaunay rasterization[=================================================-] 98% (1 threads)Delaunay rasterization[=================================================-] 99% (1 threads)Delaunay rasterization[==================================================] 100% (1 threads)
plot(DEM2014)

#las2017_transf <- st_transform(las2017, 6347)

#commented out temporarily to knit because it takes a long time
#DEM2017 <- rasterize_terrain(las2017_transf, res = 1, algorithm = tin())

#commented out temporarily to knit because it takes a long time
#plot(DEM2017)