1. Load dataset of species occurrence

Load your dataset with records of species occurrences.

setwd("C:/Users/juliana.stropp/Dropbox (UFAL-ECO)/Tiago_R/Dados_Maio_2018")
df_occ<-read.csv("Dados Completeness para análise.csv", header = TRUE, sep = ";")
head(df_occ)
##       IND Database                                       Species Latitude
## 1 IND0001     GBIF Auchenipterus nuchalis (Spix & Agassiz, 1829)  -2.0431
## 2 IND0002     GBIF          Centromochlus existimatus Mees, 1974  -2.0431
## 3 IND0003     GBIF          Centromochlus existimatus Mees, 1974  -2.1961
## 4 IND0005     GBIF Auchenipterichthys longimanus (Günther, 1864)  -2.5919
## 5 IND0006     GBIF Auchenipterus nuchalis (Spix & Agassiz, 1829)  -2.0564
## 6 IND0007     GBIF     Centromochlus heckelii (De Filippi, 1853)  -2.0431
##   Longitude Date_accuracy Records         Ecoregion
## 1  -55.4097      Accurate  Unique Amazonas Lowlands
## 2  -55.4097      Accurate  Unique Amazonas Lowlands
## 3  -55.1072      Accurate  Unique Amazonas Lowlands
## 4  -55.4728      Accurate  Unique Amazonas Lowlands
## 5  -55.4192      Accurate  Unique Amazonas Lowlands
## 6  -55.4097      Accurate  Unique Amazonas Lowlands

2. Load the shapefile

Here, we will load the shape file of the geographical boundaries of the areas for which we want to calculate inventory completeness. First, we need to load the package rgdal that we will sue the manipulate spatial data

library(rgdal)
## Warning: package 'rgdal' was built under R version 3.3.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.3.3
## rgdal: version: 1.2-16, (SVN revision 701)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.0, released 2017/04/28
##  Path to GDAL shared files: C:/Users/juliana.stropp/Documents/R/win-library/3.3/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/juliana.stropp/Documents/R/win-library/3.3/rgdal/proj
##  Linking to sp version: 1.2-7

This are warning messages and info that you get after loading the package rgdal.

Now, we will load and plot the shape file with ecoregions.

ECO<-readOGR(dsn="C:/Users/juliana.stropp/Dropbox (UFAL-ECO)/Tiago_R/Dados_Maio_2018/SouthAmericaEcoregions",layer="Ecoregions Auchenipteridae")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:/Users/juliana.stropp/Dropbox (UFAL-ECO)/Tiago_R/Dados_Maio_2018/SouthAmericaEcoregions", layer: "Ecoregions Auchenipteridae"
## with 52 features
## It has 14 fields
## Integer64 fields read as strings:  tessellate extrude visibility drawOrder
proj4string(ECO)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

And this is the info we get after laoding the shapefile.

plot(ECO)

3. Overlay points with polygons

We will overlay points of species occurrences with polygons of ecoregions in order to get the ecorregion that coincide with the geographical location of each record of species occurrence. For this, we will first need to convert the dataframe “df_occ” to a SpatialPointsDataFrame.

lon<-df_occ[,c("Longitude")]
lat<-df_occ[,c("Latitude")]
coords<-SpatialPoints(cbind(lon,lat))
df1_SPpoints <- SpatialPointsDataFrame(coords, df_occ)
class(df1_SPpoints)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"

Now, we need to define the projection of this SpatialPointsDataFrame, we will use the same projection of the shapefile of ecoregions.

proj4string(df1_SPpoints) = CRS("+proj=longlat +datum=WGS84 +no_defs")
proj4string(df1_SPpoints) <- proj4string(ECO) # define that SpatialPointsDataFrame and shapefile have the same projection

We can now overlay points with polygons.

names(ECO@data) # check fields in the attribute table of the shape file.
##  [1] "Name"       "descriptio" "timestamp"  "begin"      "end"       
##  [6] "altitudeMo" "tessellate" "extrude"    "visibility" "drawOrder" 
## [11] "icon"       "snippet"    "Species"    "Records"
df1_SPpoints$ECO_Name<- over(df1_SPpoints, ECO)$Name # Add the column "Name" from the shape file to the SpatiaPointsDataFrame
names(df1_SPpoints@data)# check the attribute table of the SpatialPointsDataFrame. 
## [1] "IND"           "Database"      "Species"       "Latitude"     
## [5] "Longitude"     "Date_accuracy" "Records"       "Ecoregion"    
## [9] "ECO_Name"
head(df1_SPpoints@data)
##       IND Database                                       Species Latitude
## 1 IND0001     GBIF Auchenipterus nuchalis (Spix & Agassiz, 1829)  -2.0431
## 2 IND0002     GBIF          Centromochlus existimatus Mees, 1974  -2.0431
## 3 IND0003     GBIF          Centromochlus existimatus Mees, 1974  -2.1961
## 4 IND0005     GBIF Auchenipterichthys longimanus (Günther, 1864)  -2.5919
## 5 IND0006     GBIF Auchenipterus nuchalis (Spix & Agassiz, 1829)  -2.0564
## 6 IND0007     GBIF     Centromochlus heckelii (De Filippi, 1853)  -2.0431
##   Longitude Date_accuracy Records         Ecoregion          ECO_Name
## 1  -55.4097      Accurate  Unique Amazonas Lowlands Amazonas Lowlands
## 2  -55.4097      Accurate  Unique Amazonas Lowlands Amazonas Lowlands
## 3  -55.1072      Accurate  Unique Amazonas Lowlands Amazonas Lowlands
## 4  -55.4728      Accurate  Unique Amazonas Lowlands Amazonas Lowlands
## 5  -55.4192      Accurate  Unique Amazonas Lowlands Amazonas Lowlands
## 6  -55.4097      Accurate  Unique Amazonas Lowlands Amazonas Lowlands

Note that the column ECO_NAME stores the name of the ecoregion after the overlay we did with this script. The field “Ecoregion” stores the name of the ecoregion that was in the original file of species occurrence.

4. Calculate inventory completeness for each ecoregion.

First, we will convert the SpatialPointsDataFrame to data.frame

df_occ2<-as.data.frame(df1_SPpoints)
class(df_occ2)
## [1] "data.frame"

Second, we will check whether each records as a unique ID.

nrow(df_occ2)
## [1] 5467
length(unique(df_occ2$IND))
## [1] 3689

We need to create an unique ID for each record.

df_occ2$id<-row.names(df_occ2)

We will also need to create a column called “abundance” filled with “1”

df_occ2$abundance<-1
df_occ2$abundance<-as.integer(df_occ2$abundance)

We will check whether there is a record with value NA in the field ECO_name.

sum(is.na(df_occ2$ECO_Name))
## [1] 0

Great!!! There are no records with NA in the field ECO_Name.

Now, we will load the R pakcages that we will use to calculate inventory completeness.

library(vegan)
## Warning: package 'vegan' was built under R version 3.3.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 3.3.2
## Loading required package: lattice
## This is vegan 2.4-3
library(spaa)
## Warning: package 'spaa' was built under R version 3.3.3

We will now quantify inventory completeness using the final slope of smoothed species accumulation curve (see Lobo et al. 2018 - Ecological Indicators). This metric indicates the rate at which new species enter the species accumulation curve; a slope of 0.05, for example, suggests a flat curve where the sampling of 20 specimens, results in one new tree species (Hortal et al. 2004 - Ecography). We defined well-sampled cells as those with at least 100 unique occurrences and a slope smaller than 0.05. We calculated smoothed SACs with the method ‘exact’ of the function ‘specaccum’ in the R package vegan (Oksanen et al., 2017).

library(vegan)
library(spaa)

First, we need to define the parameters that will be used in the loop to calculate completeness

eco = unique(df_occ2$ECO_Name) # a vector with unique names of ecoregions
res1 = list() # a list that we will store our Site x Species matrix (as requered by vegan)
spaccum = list() # a list to store the results of the species accumulation curve
slope = list() # a list to store the slopes
names(df_occ2)
##  [1] "IND"           "Database"      "Species"       "Latitude"     
##  [5] "Longitude"     "Date_accuracy" "Records"       "Ecoregion"    
##  [9] "ECO_Name"      "lon"           "lat"           "id"           
## [13] "abundance"

Now, we can calculate completeness.

for (i in 1:length(eco)) {  
  dat2 = df_occ2[df_occ2$ECO_Name == eco[i], c("id", "Species", "abundance")]
  res1[[i]] = data2mat(dat2)  
  spaccum[[i]] = specaccum (res1[[i]], method = "exact")
  slope[[i]]<-specslope(spaccum[[i]], length(spaccum[[i]][[4]])-1)
}
## Warning in sqrt(V + cv): Si è prodotto un NaN

## Warning in sqrt(V + cv): Si è prodotto un NaN

## Warning in sqrt(V + cv): Si è prodotto un NaN

## Warning in sqrt(V + cv): Si è prodotto un NaN
## No actual accumulation since only 1 site provided
## Warning in sqrt(V + cv): Si è prodotto un NaN
## No actual accumulation since only 1 site provided
## No actual accumulation since only 1 site provided

Now, we will obtain the slopes_

ss<-as.data.frame(unlist(slope)) # Convert list of slope values to data.frame
head(ss)
##   unlist(slope)
## 1   0.003420267
## 2   0.016158094
## 3   0.003546011
## 4   0.018181820
## 5   0.028571429
## 6   0.286111111
cc<-as.data.frame(eco) # Convert vector of unique names of Ecoregions to data.frame
head(cc)
##                           eco
## 1           Amazonas Lowlands
## 2 Northeastern Mata Atlantica
## 3                Upper Parana
## 4                S. Francisco
## 5              Paraiba do Sul
## 6       Tramandai - Mampituba
slope_eco<-cbind(cc,ss) # bind these two objects
colnames(slope_eco)[2]<-"slope_exact" # rename columns
colnames(slope_eco)[1]<-"ECO_Name" # rename columns
head(slope_eco)
##                      ECO_Name slope_exact
## 1           Amazonas Lowlands 0.003420267
## 2 Northeastern Mata Atlantica 0.016158094
## 3                Upper Parana 0.003546011
## 4                S. Francisco 0.018181820
## 5              Paraiba do Sul 0.028571429
## 6       Tramandai - Mampituba 0.286111111
write.csv(slope_eco, "slope_eco.txt") # This is your file with a slope value for each ecoregion.