Here we test the function setup_sdmdata() and its different parametrization options. This function prepares the data to run the models with do_any() or do_many() functions. It creates an object in the workspace to be used in do_any() or do_many() and writes two csv files with metadata and sdmdata.
Loading required packages.
library(rJava)
library(raster)
#library(modleR)
library(dplyr)
#eu estou usando uma cópia local para desenvolvimento
devtools::load_all("../../1_modleR")
library(maps)
library(maptools)
We use a standard dataset inside the package modleR. First, from example_occs object we select only data from one species Abarema langsdorffii and create one training set (70% of the data) and one test set (30% of the data) for the data.
## Creating an object with species names
especies <- names(example_occs)[1]
# Selecting only coordinates for the first species
coord1sp <- example_occs[[1]]
head(coord1sp)
## sp lon lat
## 343 Abarema_langsdorffii -40.615 -19.921
## 344 Abarema_langsdorffii -40.729 -20.016
## 345 Abarema_langsdorffii -41.174 -20.303
## 346 Abarema_langsdorffii -41.740 -20.493
## 347 Abarema_langsdorffii -42.482 -20.701
## 348 Abarema_langsdorffii -40.855 -17.082
dim(coord1sp)
## [1] 104 3
# Subsetting data into training and test
ceiling(0.7 * nrow(coord1sp))
## [1] 73
# Making a sample of 70% of species' records
set <- sample(1:nrow(coord1sp), size = ceiling(0.7 * nrow(coord1sp)))
# Creating training data set (70% of species' records)
train_set <- coord1sp[set,]
# Creating test data set (other 30%)
test_set <- coord1sp[setdiff(1:nrow(coord1sp),set),]
Now lets the check our data points. We plot the traning and test data sets with the first axis of the environmental PCA data from the object example_vars.
# selecting only the first PCA axis
predictor <- example_vars[[1]]
# transforming the data frame with the coordinates in a spatial object
pts <- SpatialPoints(coord1sp[,c(2,3)])
# ploting environmental layer
plot(predictor, legend = F)
points(lat ~ lon, data=coord1sp)
Lets explore different arguments in setup_sdmdata() function.
## function (species_name, occurrences, predictors, lon = "lon",
## lat = "lat", models_dir = "./models", real_absences = NULL,
## buffer_type = NULL, dist_buf = NULL, env_buffer = FALSE,
## env_distance = "centroid", dist_min = NULL, buffer_shape = NULL,
## max_env_dist = 0.5, write_buffer = FALSE, seed = NULL, clean_dupl = FALSE,
## clean_nas = FALSE, clean_uni = FALSE, geo_filt = FALSE, geo_filt_dist = NULL,
## select_variables = FALSE, cutoff = 0.8, percent = 0.8, plot_sdmdata = TRUE,
## n_back = 1000, partition_type = c("bootstrap"), boot_n = 1,
## boot_proportion = 0.7, cv_n = NULL, cv_partitions = NULL)
## NULL
First, running with default options. Pseudo-absences are generated in the entire area of the predictor variables and data is partitioned following bootstrap procedures.
a <- setup_sdmdata(species_name = especies[1], # species #01
occurrences = coord1sp[,-1],
predictors = example_vars,
models_dir = "./setupsdmdata/setupsdmdatam",
real_absences = NULL,
buffer_type = NULL,
clean_dupl = T,
clean_nas = T,
seed = 512)
Exploring the output object from setup_sdmdata().
head(a)
## boot1 pa lon lat layer.1 layer.2 layer.3 layer.4
## 1 1 1 -40.615 -19.921 -8.961108 6.114213 1.4063037 -7.387877
## 2 0 1 -40.729 -20.016 -8.721493 5.877643 1.3779029 -7.611304
## 3 0 1 -41.174 -20.303 -8.256238 5.837558 1.2663769 -8.370461
## 4 0 1 -41.740 -20.493 -7.275032 3.612645 0.8156436 -8.967305
## 5 0 1 -42.482 -20.701 -7.878113 5.980754 1.2590602 -9.314089
## 6 1 1 -40.855 -17.082 -8.830172 8.385015 1.2544242 -7.260952
## layer.5 layer.6
## 1 -1.498280 -5.799044
## 2 -1.470197 -5.797889
## 3 -1.451739 -5.968802
## 4 -1.260719 -5.530104
## 5 -1.418201 -6.134574
## 6 -1.759634 -6.927485
knitr::include_graphics("./setupsdmdata/setupsdmdatam/Abarema_langsdorffii/present/data_setup/sdmdata_Abarema_langsdorffii.png")
For crossvalidation we need to specify the number of runs with cv_n and partitions with cv_partitions.
aa <- setup_sdmdata(species_name = especies[1], # species #01
occurrences = coord1sp[,-1],
predictors = example_vars,
models_dir = "./setupsdmdata/setupsdmdatam_cross",
partition_type = "crossvalidation",
cv_n = 1,
cv_partitions = 5,
real_absences = NULL,
buffer_type = NULL,
clean_dupl = T,
clean_nas = T,
seed = 512)
Exploring the new output object.
head(aa)
## cv_0 pa lon lat layer.1 layer.2 layer.3 layer.4 layer.5
## 1 3 1 -40.615 -19.921 -8.961108 6.114213 1.4063037 -7.387877 -1.498280
## 2 3 1 -40.729 -20.016 -8.721493 5.877643 1.3779029 -7.611304 -1.470197
## 3 4 1 -41.174 -20.303 -8.256238 5.837558 1.2663769 -8.370461 -1.451739
## 4 4 1 -41.740 -20.493 -7.275032 3.612645 0.8156436 -8.967305 -1.260719
## 5 2 1 -42.482 -20.701 -7.878113 5.980754 1.2590602 -9.314089 -1.418201
## 6 5 1 -40.855 -17.082 -8.830172 8.385015 1.2544242 -7.260952 -1.759634
## layer.6
## 1 -5.799044
## 2 -5.797889
## 3 -5.968802
## 4 -5.530104
## 5 -6.134574
## 6 -6.927485
buffer_typeBuffer for pseudo-absence generation based on mean distance between occurrence points.
b <- setup_sdmdata(species_name = especies[1],
occurrences = coord1sp[, -1],
predictors = example_vars,
models_dir = "./setupsdmdata/setupsdmdatan",
real_absences = NULL,
buffer_type = "mean",
clean_dupl = T,
clean_nas = T,
seed = 512)
head(b)
## boot1 pa lon lat layer.1 layer.2 layer.3 layer.4
## 1 1 1 -40.615 -19.921 -8.961108 6.114213 1.4063037 -7.387877
## 2 0 1 -40.729 -20.016 -8.721493 5.877643 1.3779029 -7.611304
## 3 0 1 -41.174 -20.303 -8.256238 5.837558 1.2663769 -8.370461
## 4 0 1 -41.740 -20.493 -7.275032 3.612645 0.8156436 -8.967305
## 5 0 1 -42.482 -20.701 -7.878113 5.980754 1.2590602 -9.314089
## 6 1 1 -40.855 -17.082 -8.830172 8.385015 1.2544242 -7.260952
## layer.5 layer.6
## 1 -1.498280 -5.799044
## 2 -1.470197 -5.797889
## 3 -1.451739 -5.968802
## 4 -1.260719 -5.530104
## 5 -1.418201 -6.134574
## 6 -1.759634 -6.927485
knitr::include_graphics("./setupsdmdata/setupsdmdatan/Abarema_langsdorffii/present/data_setup/sdmdata_Abarema_langsdorffii.png")
Buffer for pseudo-absence generation based on median of distance between occurrence points.
c <- setup_sdmdata(species_name = especies[1],
occurrences = coord1sp[,-1],
predictors = example_vars,
models_dir = "./setupsdmdata/setupsdmdatap",
real_absences = NULL,
buffer_type = "median",
clean_dupl = T,
clean_nas = T,
seed = 512)
head(c)
## boot1 pa lon lat layer.1 layer.2 layer.3 layer.4
## 1 1 1 -40.615 -19.921 -8.961108 6.114213 1.4063037 -7.387877
## 2 0 1 -40.729 -20.016 -8.721493 5.877643 1.3779029 -7.611304
## 3 0 1 -41.174 -20.303 -8.256238 5.837558 1.2663769 -8.370461
## 4 0 1 -41.740 -20.493 -7.275032 3.612645 0.8156436 -8.967305
## 5 0 1 -42.482 -20.701 -7.878113 5.980754 1.2590602 -9.314089
## 6 1 1 -40.855 -17.082 -8.830172 8.385015 1.2544242 -7.260952
## layer.5 layer.6
## 1 -1.498280 -5.799044
## 2 -1.470197 -5.797889
## 3 -1.451739 -5.968802
## 4 -1.260719 -5.530104
## 5 -1.418201 -6.134574
## 6 -1.759634 -6.927485
knitr::include_graphics("./setupsdmdata/setupsdmdatap/Abarema_langsdorffii/present/data_setup/sdmdata_Abarema_langsdorffii.png")
Buffer for pseudo-absence generation based on maximum distance between occurrence points.
d <- setup_sdmdata(species_name = especies[1],
occurrences = coord1sp[,-1],
predictors = example_vars,
models_dir = "./setupsdmdata/setupsdmdatax",
real_absences = NULL,
buffer_type = "max",
clean_dupl = T,
clean_nas = T,
seed = 512)
head(d)
## boot1 pa lon lat layer.1 layer.2 layer.3 layer.4
## 1 1 1 -40.615 -19.921 -8.961108 6.114213 1.4063037 -7.387877
## 2 0 1 -40.729 -20.016 -8.721493 5.877643 1.3779029 -7.611304
## 3 0 1 -41.174 -20.303 -8.256238 5.837558 1.2663769 -8.370461
## 4 0 1 -41.740 -20.493 -7.275032 3.612645 0.8156436 -8.967305
## 5 0 1 -42.482 -20.701 -7.878113 5.980754 1.2590602 -9.314089
## 6 1 1 -40.855 -17.082 -8.830172 8.385015 1.2544242 -7.260952
## layer.5 layer.6
## 1 -1.498280 -5.799044
## 2 -1.470197 -5.797889
## 3 -1.451739 -5.968802
## 4 -1.260719 -5.530104
## 5 -1.418201 -6.134574
## 6 -1.759634 -6.927485
knitr::include_graphics("./setupsdmdata/setupsdmdatax/Abarema_langsdorffii/present/data_setup/sdmdata_Abarema_langsdorffii.png")
Buffer for pseudo-absence generation based on specific distance between occurrence points. In this case dist_buf argument must be specified.
e <- setup_sdmdata(species_name = especies[1],
occurrences = coord1sp[,-1],
predictors = example_vars,
models_dir = "./setupsdmdata/setupsdmdataxd",
real_absences = NULL,
buffer_type = "distance",
dist_buf = 4,
clean_dupl = T,
clean_nas = T,
seed = 512)
head(e)
## boot1 pa lon lat layer.1 layer.2 layer.3 layer.4
## 1 1 1 -40.615 -19.921 -8.961108 6.114213 1.4063037 -7.387877
## 2 0 1 -40.729 -20.016 -8.721493 5.877643 1.3779029 -7.611304
## 3 0 1 -41.174 -20.303 -8.256238 5.837558 1.2663769 -8.370461
## 4 0 1 -41.740 -20.493 -7.275032 3.612645 0.8156436 -8.967305
## 5 0 1 -42.482 -20.701 -7.878113 5.980754 1.2590602 -9.314089
## 6 1 1 -40.855 -17.082 -8.830172 8.385015 1.2544242 -7.260952
## layer.5 layer.6
## 1 -1.498280 -5.799044
## 2 -1.470197 -5.797889
## 3 -1.451739 -5.968802
## 4 -1.260719 -5.530104
## 5 -1.418201 -6.134574
## 6 -1.759634 -6.927485
knitr::include_graphics("./setupsdmdata/setupsdmdataxd/Abarema_langsdorffii/present/data_setup/sdmdata_Abarema_langsdorffii.png")
myshapefile <- rgdal::readOGR("../data/myshapefile.shp")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/andreasancheztapia/Documents/1_modleR/1_repos_modleR/4_modleR_tests/data/myshapefile.shp", layer: "myshapefile"
## with 1 features
## It has 1 fields
f <- setup_sdmdata(species_name = especies[1],
occurrences = coord1sp[,-1],
predictors = example_vars,
models_dir = "./setupsdmdata/setupsdmdataub",
real_absences = NULL,
buffer_type = "user",
buffer_shape = myshapefile,
clean_dupl = T,
clean_nas = T,
seed = 512)
head(f)
## boot1 pa lon lat layer.1 layer.2 layer.3 layer.4
## 1 1 1 -40.615 -19.921 -8.961108 6.114213 1.4063037 -7.387877
## 2 0 1 -40.729 -20.016 -8.721493 5.877643 1.3779029 -7.611304
## 3 0 1 -41.174 -20.303 -8.256238 5.837558 1.2663769 -8.370461
## 4 0 1 -41.740 -20.493 -7.275032 3.612645 0.8156436 -8.967305
## 5 0 1 -42.482 -20.701 -7.878113 5.980754 1.2590602 -9.314089
## 6 1 1 -40.855 -17.082 -8.830172 8.385015 1.2544242 -7.260952
## layer.5 layer.6
## 1 -1.498280 -5.799044
## 2 -1.470197 -5.797889
## 3 -1.451739 -5.968802
## 4 -1.260719 -5.530104
## 5 -1.418201 -6.134574
## 6 -1.759634 -6.927485
knitr::include_graphics("./setupsdmdata/setupsdmdataub/Abarema_langsdorffii/present/data_setup/sdmdata_Abarema_langsdorffii.png")