#load libraries

library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
library(units)
## udunits database from /Users/morgan/Library/R/arm64/4.5/library/units/share/udunits/udunits2.xml
library(nngeo)
library(ggplot2)

#Acorn Woodpecker

read in woodpecker occ records & clean up data

occ_sf <- st_read(
  "/Volumes/SanDisk4TB/UrbanEcosystemsPub/GBIF/acornwoodpecker/occ_woodpecker_26911.shp"
)
## Reading layer `occ_woodpecker_26911' from data source 
##   `/Volumes/SanDisk4TB/UrbanEcosystemsPub/GBIF/acornwoodpecker/occ_woodpecker_26911.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1239 features and 230 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 346146.2 ymin: 3738981 xmax: 393538.8 ymax: 3799830
## Projected CRS: NAD83 / UTM zone 11N
st_crs(occ_sf)
## Coordinate Reference System:
##   User input: NAD83 / UTM zone 11N 
##   wkt:
## PROJCRS["NAD83 / UTM zone 11N",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["UTM zone 11N",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",0,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",-117,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",0.9996,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",500000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     ID["EPSG",26911]]
summary(occ_sf$coordinate)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##     1.00     6.25    20.00  1956.61   114.75 50575.00      381
range(occ_sf$coordinate, na.rm = TRUE)
## [1]     1 50575
occ_sf <- occ_sf %>%
  rename(coord_unc_m = coordinate)

#Keep only records with reported uncertainty
occ_sf_unc <- occ_sf %>%
  filter(!is.na(coord_unc_m))

##QC check

cat("Total records:", nrow(occ_sf), "\n")
## Total records: 1239
cat("Records with uncertainty:", nrow(occ_sf_unc), "\n")
## Records with uncertainty: 858
cat("Proportion retained:",
    round(nrow(occ_sf_unc) / nrow(occ_sf), 3), "\n")
## Proportion retained: 0.692

##summary statistics

summary(occ_sf_unc$coord_unc_m)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     1.00     6.25    20.00  1956.61   114.75 50575.00
quantile(
  occ_sf_unc$coord_unc_m,
  probs = c(0.25, 0.5, 0.75, 0.9, 0.95)
)
##      25%      50%      75%      90%      95% 
##     6.25    20.00   114.75   690.80 28874.00

##Threshold analysis

thresholds <- c(30, 50, 100, 250, 500, 1000, 2000, 5000)

retention_df <- data.frame(
  threshold_m = thresholds,
  n_retained = sapply(thresholds, function(t) {
    sum(occ_sf_unc$coord_unc_m <= t)
  })
)

retention_df$proportion_retained <-
  retention_df$n_retained / nrow(occ_sf_unc)

retention_df
##   threshold_m n_retained proportion_retained
## 1          30        495           0.5769231
## 2          50        584           0.6806527
## 3         100        637           0.7424242
## 4         250        716           0.8344988
## 5         500        757           0.8822844
## 6        1000        779           0.9079254
## 7        2000        789           0.9195804
## 8        5000        802           0.9347319

western bluebird

occ_sf <- st_read(
  "/Volumes/SanDisk4TB/UrbanEcosystemsPub/GBIF/westernbluebird/occ_bluebird_clip_26911.shp"
)
## Reading layer `occ_bluebird_clip_26911' from data source 
##   `/Volumes/SanDisk4TB/UrbanEcosystemsPub/GBIF/westernbluebird/occ_bluebird_clip_26911.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1532 features and 230 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 346216.4 ymin: 3733013 xmax: 394340.9 ymax: 3799322
## Projected CRS: NAD83 / UTM zone 11N
st_crs(occ_sf)
## Coordinate Reference System:
##   User input: NAD83 / UTM zone 11N 
##   wkt:
## PROJCRS["NAD83 / UTM zone 11N",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["UTM zone 11N",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",0,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",-117,
##             ANGLEUNIT["Degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",0.9996,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",500000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     ID["EPSG",26911]]
summary(occ_sf$coordinate)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       1       6      16    1374      82   62461     316
range(occ_sf$coordinate, na.rm = TRUE)
## [1]     1 62461
occ_sf <- occ_sf %>%
  rename(coord_unc_m = coordinate)

#Keep only records with reported uncertainty
occ_sf_unc <- occ_sf %>%
  filter(!is.na(coord_unc_m))

##QC check

cat("Total records:", nrow(occ_sf), "\n")
## Total records: 1532
cat("Records with uncertainty:", nrow(occ_sf_unc), "\n")
## Records with uncertainty: 1216
cat("Proportion retained:",
    round(nrow(occ_sf_unc) / nrow(occ_sf), 3), "\n")
## Proportion retained: 0.794

##summary statistics

summary(occ_sf_unc$coord_unc_m)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       6      16    1374      82   62461
quantile(
  occ_sf_unc$coord_unc_m,
  probs = c(0.25, 0.5, 0.75, 0.9, 0.95)
)
##     25%     50%     75%     90%     95% 
##    6.00   16.00   82.00  400.00 2003.75

##Threshold analysis

thresholds <- c(30, 50, 100, 250, 500, 1000, 2000, 5000)

retention_df <- data.frame(
  threshold_m = thresholds,
  n_retained = sapply(thresholds, function(t) {
    sum(occ_sf_unc$coord_unc_m <= t)
  })
)

retention_df$proportion_retained <-
  retention_df$n_retained / nrow(occ_sf_unc)

retention_df
##   threshold_m n_retained proportion_retained
## 1          30        740           0.6085526
## 2          50        825           0.6784539
## 3         100        926           0.7615132
## 4         250       1054           0.8667763
## 5         500       1114           0.9161184
## 6        1000       1135           0.9333882
## 7        2000       1155           0.9498355
## 8        5000       1164           0.9572368