#load libraries
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
library(units)
## udunits database from /Users/morgan/Library/R/arm64/4.5/library/units/share/udunits/udunits2.xml
library(nngeo)
library(ggplot2)
#Acorn Woodpecker
occ_sf <- st_read(
"/Volumes/SanDisk4TB/UrbanEcosystemsPub/GBIF/acornwoodpecker/occ_woodpecker_26911.shp"
)
## Reading layer `occ_woodpecker_26911' from data source
## `/Volumes/SanDisk4TB/UrbanEcosystemsPub/GBIF/acornwoodpecker/occ_woodpecker_26911.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1239 features and 230 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 346146.2 ymin: 3738981 xmax: 393538.8 ymax: 3799830
## Projected CRS: NAD83 / UTM zone 11N
st_crs(occ_sf)
## Coordinate Reference System:
## User input: NAD83 / UTM zone 11N
## wkt:
## PROJCRS["NAD83 / UTM zone 11N",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["UTM zone 11N",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",0,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",-117,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",0.9996,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",500000,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## ID["EPSG",26911]]
summary(occ_sf$coordinate)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.00 6.25 20.00 1956.61 114.75 50575.00 381
range(occ_sf$coordinate, na.rm = TRUE)
## [1] 1 50575
occ_sf <- occ_sf %>%
rename(coord_unc_m = coordinate)
#Keep only records with reported uncertainty
occ_sf_unc <- occ_sf %>%
filter(!is.na(coord_unc_m))
##QC check
cat("Total records:", nrow(occ_sf), "\n")
## Total records: 1239
cat("Records with uncertainty:", nrow(occ_sf_unc), "\n")
## Records with uncertainty: 858
cat("Proportion retained:",
round(nrow(occ_sf_unc) / nrow(occ_sf), 3), "\n")
## Proportion retained: 0.692
##summary statistics
summary(occ_sf_unc$coord_unc_m)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 6.25 20.00 1956.61 114.75 50575.00
quantile(
occ_sf_unc$coord_unc_m,
probs = c(0.25, 0.5, 0.75, 0.9, 0.95)
)
## 25% 50% 75% 90% 95%
## 6.25 20.00 114.75 690.80 28874.00
##Threshold analysis
thresholds <- c(30, 50, 100, 250, 500, 1000, 2000, 5000)
retention_df <- data.frame(
threshold_m = thresholds,
n_retained = sapply(thresholds, function(t) {
sum(occ_sf_unc$coord_unc_m <= t)
})
)
retention_df$proportion_retained <-
retention_df$n_retained / nrow(occ_sf_unc)
retention_df
## threshold_m n_retained proportion_retained
## 1 30 495 0.5769231
## 2 50 584 0.6806527
## 3 100 637 0.7424242
## 4 250 716 0.8344988
## 5 500 757 0.8822844
## 6 1000 779 0.9079254
## 7 2000 789 0.9195804
## 8 5000 802 0.9347319
occ_sf <- st_read(
"/Volumes/SanDisk4TB/UrbanEcosystemsPub/GBIF/westernbluebird/occ_bluebird_clip_26911.shp"
)
## Reading layer `occ_bluebird_clip_26911' from data source
## `/Volumes/SanDisk4TB/UrbanEcosystemsPub/GBIF/westernbluebird/occ_bluebird_clip_26911.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1532 features and 230 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 346216.4 ymin: 3733013 xmax: 394340.9 ymax: 3799322
## Projected CRS: NAD83 / UTM zone 11N
st_crs(occ_sf)
## Coordinate Reference System:
## User input: NAD83 / UTM zone 11N
## wkt:
## PROJCRS["NAD83 / UTM zone 11N",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["UTM zone 11N",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",0,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",-117,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",0.9996,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",500000,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## ID["EPSG",26911]]
summary(occ_sf$coordinate)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1 6 16 1374 82 62461 316
range(occ_sf$coordinate, na.rm = TRUE)
## [1] 1 62461
occ_sf <- occ_sf %>%
rename(coord_unc_m = coordinate)
#Keep only records with reported uncertainty
occ_sf_unc <- occ_sf %>%
filter(!is.na(coord_unc_m))
##QC check
cat("Total records:", nrow(occ_sf), "\n")
## Total records: 1532
cat("Records with uncertainty:", nrow(occ_sf_unc), "\n")
## Records with uncertainty: 1216
cat("Proportion retained:",
round(nrow(occ_sf_unc) / nrow(occ_sf), 3), "\n")
## Proportion retained: 0.794
##summary statistics
summary(occ_sf_unc$coord_unc_m)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 6 16 1374 82 62461
quantile(
occ_sf_unc$coord_unc_m,
probs = c(0.25, 0.5, 0.75, 0.9, 0.95)
)
## 25% 50% 75% 90% 95%
## 6.00 16.00 82.00 400.00 2003.75
##Threshold analysis
thresholds <- c(30, 50, 100, 250, 500, 1000, 2000, 5000)
retention_df <- data.frame(
threshold_m = thresholds,
n_retained = sapply(thresholds, function(t) {
sum(occ_sf_unc$coord_unc_m <= t)
})
)
retention_df$proportion_retained <-
retention_df$n_retained / nrow(occ_sf_unc)
retention_df
## threshold_m n_retained proportion_retained
## 1 30 740 0.6085526
## 2 50 825 0.6784539
## 3 100 926 0.7615132
## 4 250 1054 0.8667763
## 5 500 1114 0.9161184
## 6 1000 1135 0.9333882
## 7 2000 1155 0.9498355
## 8 5000 1164 0.9572368