library(dplyr)
library(ggplot2)
library(here)
library(lwgeom)
library(readr)
#library(rgdal)
library(sf)
ecoreg<-st_read(here::here("WWF_Ecoregions/official/wwf_terr_ecos.shp"))
## Reading layer `wwf_terr_ecos' from data source `D:\Fiona\SHEFS\WWF_Ecoregions\official\wwf_terr_ecos.shp' using driver `ESRI Shapefile'
## Simple feature collection with 14458 features and 21 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -180 ymin: -89.89197 xmax: 180 ymax: 83.62313
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
ecoreg<-ecoreg %>% 
    select(OBJECTID, ECO_NAME)   ##just selecting out the columns we're interested in 

occ_files<-list.files(here::here("GBIF/Occurences/Network_Groups/"), pattern = "*.csv", full.names = TRUE, recursive = TRUE)  

head(read.csv(occ_files[1]))
##          key                     scientificName decimalLatitude
## 1 2339121318 Ciconia episcopus (Boddaert, 1783)       -22.45792
## 2 2339162388 Ciconia episcopus (Boddaert, 1783)       -22.37458
## 3 2339239492 Ciconia episcopus (Boddaert, 1783)       -25.37458
## 4 2349904494 Ciconia episcopus (Boddaert, 1783)       -29.37458
## 5 2005648505 Ciconia episcopus (Boddaert, 1783)        -3.29125
## 6 2339326045 Ciconia episcopus (Boddaert, 1783)       -25.12458
##   decimalLongitude         issues
## 1         31.20792 cdround,gass84
## 2         31.20792 cdround,gass84
## 3         31.87458 cdround,gass84
## 4         30.12458 cdround,gass84
## 5         39.95792 cdround,gass84
## 6         31.87458 cdround,gass84
background_sampler<-function(occ_file, no_pnts){
  
  xy<-read_csv(occ_file) %>%
    select("decimalLongitude", "decimalLatitude") %>% 
    distinct() %>% 
    st_as_sf(.,coords =c("decimalLongitude", "decimalLatitude"), crs = 4326) %>% 
    st_intersection(., ecoreg)

    sf_int<-st_intersection(xy, ecoreg)

        
  bkg_ecoreg<-ecoreg %>% 
    filter(ECO_NAME %in% sf_int$ECO_NAME) %>% 
    st_sample(., size = no_pnts, type = "random")
  
  print(basename(occ_file))
  return(bkg_ecoreg)  
}

Checking the background points are where you expect for a random species

rsp<-runif(1,min = 1, max = length(occ_files))

check<-background_sampler(occ_files[rsp], 10000)
## [1] "Gynanisa_maja.csv"
pnt_buff<-0.15

xy<-read_csv(occ_files[rsp]) %>%
  select("decimalLongitude", "decimalLatitude") %>% 
  distinct() %>% 
  st_as_sf(.,coords =c("decimalLongitude", "decimalLatitude"), crs = 4326) %>% 
  st_intersection(., ecoreg)

ggplot(data = ecoreg)+
  geom_sf(aes(fill = ECO_NAME, alpha = 0.6))+
  geom_sf(data = check, alpha = 0.05)+
  geom_sf(data = xy, colour = "red", size = 2)+
  coord_sf(xlim = c(st_bbox(check)[1] - pnt_buff, st_bbox(check)[3]+pnt_buff), ylim = c(st_bbox(check)[2] - pnt_buff, st_bbox(check)[4]+pnt_buff))+
  theme_bw()+ 
  theme(legend.position = "none")+
  ggtitle(gsub(".csv","",paste0(basename(occ_files[rsp]))))