# install.packages('rnaturalearth') #high res world map
# # On my version of R, I have to install this package from Github
# install_github('ropensci/rnaturalearthhires') #This is the higher version of the package above (they both need to be installed)
# install.packages('gdalUtils')
library(rnaturalearth)
library(rnaturalearthdata)
library(rnaturalearthhires)
library(gdalUtils)
library(devtools)
library(ENMGadgets)
require(ENMGadgets)
library(rgeos)
library(rgdal)
library(sf) # This package allows you to import shapefiles
library(RStoolbox) #This package allows you to perform PCA on rasters or raster stacks
library(ggplot2)
library(reshape2)
library(maptools)
library(readr)
library(usdm)
library(spThin)
library(dplyr)
library(dismo)
library(maxnet)
library(mapview)
library(MASS) # for 2D kernel density function
library(magrittr) # for piping functionality, i.e., %>%
library(maptools) # reading shapefiles
library(ncdf4)
library(raster)
require(raster)
library(brainGraph)
library(corrplot)
## Importing occurrence data
le_ca_occ <- read_delim("data/l_carianatus_GBIF_occurrence.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
## .default = col_logical(),
## gbifID = col_double(),
## accessRights = col_character(),
## bibliographicCitation = col_character(),
## created = col_date(format = ""),
## identifier = col_character(),
## language = col_character(),
## license = col_character(),
## modified = col_datetime(format = ""),
## references = col_character(),
## rights = col_character(),
## rightsHolder = col_character(),
## type = col_character(),
## institutionID = col_character(),
## collectionID = col_double(),
## institutionCode = col_character(),
## collectionCode = col_character(),
## datasetName = col_character(),
## ownerInstitutionCode = col_character(),
## basisOfRecord = col_character(),
## informationWithheld = col_character()
## # ... with 96 more columns
## )
## See spec(...) for full column specifications.
## Warning: 1132 parsing failures.
## row col expected actual file
## 1436 collectionID a double http://grbio.org/cool/fra1-wrdm | http://grbio.org/cool/vg5r-57jw 'data/l_carianatus_GBIF_occurrence.txt'
## 1462 datasetID 1/0/T/F/TRUE/FALSE HerpDWC 'data/l_carianatus_GBIF_occurrence.txt'
## 1463 datasetID 1/0/T/F/TRUE/FALSE HerpDWC 'data/l_carianatus_GBIF_occurrence.txt'
## 1464 datasetID 1/0/T/F/TRUE/FALSE HerpDWC 'data/l_carianatus_GBIF_occurrence.txt'
## 1542 collectionID a double http://grbio.org/cool/6e5v-fta9 'data/l_carianatus_GBIF_occurrence.txt'
## .... ............ .................. ................................................................. .......................................
## See problems(...) for more details.
squam_occ <- read_delim("data/squamate_occ.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
## .default = col_logical(),
## gbifID = col_double(),
## accessRights = col_character(),
## bibliographicCitation = col_character(),
## identifier = col_character(),
## language = col_character(),
## license = col_character(),
## modified = col_datetime(format = ""),
## publisher = col_character(),
## references = col_character(),
## type = col_character(),
## institutionID = col_character(),
## collectionID = col_character(),
## institutionCode = col_character(),
## collectionCode = col_character(),
## datasetName = col_character(),
## basisOfRecord = col_character(),
## dynamicProperties = col_character(),
## occurrenceID = col_character(),
## catalogNumber = col_double(),
## recordNumber = col_character()
## # ... with 80 more columns
## )
## See spec(...) for full column specifications.
## Warning: 456165 parsing failures.
## row col expected actual file
## 1 -- 241 columns 240 columns 'data/squamate_occ.txt'
## 2 -- 241 columns 240 columns 'data/squamate_occ.txt'
## 3 -- 241 columns 240 columns 'data/squamate_occ.txt'
## 4 -- 241 columns 240 columns 'data/squamate_occ.txt'
## 5 -- 241 columns 240 columns 'data/squamate_occ.txt'
## ... ... ........... ........... .......................
## See problems(...) for more details.
dim(le_ca_occ)
## [1] 3975 241
unique(le_ca_occ$basisOfRecord)
## [1] "HUMAN_OBSERVATION" "MACHINE_OBSERVATION" "PRESERVED_SPECIMEN"
## [4] "FOSSIL_SPECIMEN"
unique(le_ca_occ$scientificName)
## [1] "Leiocephalus carinatus Gray, 1827"
## [2] "Leiocephalus carinatus aquarius Schwartz & Ogren, 1956"
## [3] "Leiocephalus carinatus carinatus"
## [4] "Leiocephalus carinatus armouri Barbour & Shreve, 1935"
## [5] "Leiocephalus carinatus varius Garman, 1887"
## [6] "Leiocephalus carinatus labrossytus Schwartz, 1959"
## [7] "Liocephalus carinatus Boulenger, 1885"
## [8] "Leiocephalus carinatus hodsdoni"
## [9] "Leiocephalus carinatus mogotensis Schwartz, 1959"
## [10] "Leiocephalus carinatus coryi Schmidt, 1936"
## [11] "Leiocephalus carinatus granti Rabb, 1957"
## [12] "Leiocephalus carinatus virescens Stejneger, 1901"
## [13] "Leiocephalus carinatus microcyon Schwartz, 1959"
## [14] "Leiocephalus carinatus hodsoni Schmidt, 1936"
## [15] "Liocephalus varius Garman, 1887"
## [16] "Leiocephalus carinatus cayensis Schwartz, 1959"
## [17] "Leiocephalus carinatus zayasi Schwartz, 1959"
unique(le_ca_occ$specificEpithet)
## [1] "carinatus" "varius"
list.files('data/', full.names = FALSE)
## [1] "air.mon.mean.nc" "citations.txt"
## [3] "clean_le_ca_df.csv" "dataset"
## [5] "desktop.ini" "l_carianatus_GBIF_occurrence.txt"
## [7] "l_carinatus_10km_thin1.csv" "l_carinatus_1km_thin1.csv"
## [9] "l_carinatus_5km_thin1.csv" "l_carinatus_IUCN.cpg"
## [11] "l_carinatus_IUCN.dbf" "l_carinatus_IUCN.prj"
## [13] "l_carinatus_IUCN.shp" "l_carinatus_IUCN.shx"
## [15] "le_ca_pres_na.csv" "meta.xml"
## [17] "metadata.xml" "multimedia.txt"
## [19] "rights.txt" "squamate_occ.txt"
## [21] "test_L_carinatus.csv" "tl_2016_12_cousub.cpg"
## [23] "tl_2016_12_cousub.dbf" "tl_2016_12_cousub.prj"
## [25] "tl_2016_12_cousub.shp" "tl_2016_12_cousub.shx"
## [27] "verbatim.txt"
#Importing IUCN range map - We will use this to exclude all L. carinatus points outside of this area
le_ca_shapefile<-shapefile('data/l_carinatus_IUCN.shp')
plot(le_ca_shapefile)
projection(le_ca_shapefile)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
## Define required fields (GBIF gives a ton of superfluous variables)
gbif_fields = c("gbifID", "basisOfRecord", "catalogNumber", "collectionCode", "collectionID", "recordedBy", "continent", "countryCode", "stateProvince",
"county", "locality", "year", "month", "day", "decimalLatitude", "decimalLongitude", "coordinateUncertaintyInMeters", "establishmentMeans" ,
"scientificName", "class", "order", "family", "genus", "specificEpithet", "infraspecificEpithet", "taxonomicStatus")
## Take the subset of fields from the data
le_ca_reduced <- le_ca_occ[, gbif_fields]
squam_reduced <- squam_occ[, gbif_fields]
#Now I am reducing the bias layer data to see all of the families within the dataset
squam_reduced %>%
filter(family == " ") %>%
distinct(scientificName)
## # A tibble: 0 x 1
## # ... with 1 variable: scientificName <chr>
#Here I am subsetting to only include those families of interest (@Natalie, if you think some of these should be excluded, let me know)
squam_guild <- data.frame(
squam_reduced %>%
filter(family == 'Agamidae' | family == 'Chamaeleonidae' | family == 'Cordylidae' |
family == 'Corytophanidae' | family == 'Crotaphytidae' | family == 'Gerrhosauridae' |
family == 'Iguanidae' | family == 'Lacertidae' | family == 'Leiocephalidae' |
family == 'Phrynosomatidae' | family == 'Scincidae' | family == 'Teiidae' |
family == 'Tropiduridae' | family == 'Varanidae')
)
# Because some of these records are not field collected specimens, we want to remove those
squam_guild_no_fossils <- squam_guild %>%
filter(basisOfRecord != 'FOSSIL_SPECIMEN' & basisOfRecord != 'MACHINE_OBSERVATION')
## Remove occurrences with NA values.
NA_index = which(is.na(le_ca_reduced$decimalLatitude) == TRUE | is.na(le_ca_reduced$decimalLatitude) == TRUE)
le_ca_reduced_no_NAs = le_ca_reduced[-NA_index, ]
###Creating a map
north_am_map <- ne_countries(scale = "large", continent = 'north america')
##Matching projections first
projection(north_am_map)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
projection(le_ca_shapefile)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
crs(north_am_map) <- crs(le_ca_shapefile)
plot(north_am_map, axes=TRUE, col="light yellow",
xlim = c(-90,-70),
ylim = c(15, 30))
points(squam_guild_no_fossils$decimalLongitude,
squam_guild_no_fossils$decimalLatitude,
col='orange', pch=20, cex=0.75)
points(le_ca_reduced_no_NAs$decimalLongitude,
le_ca_reduced_no_NAs$decimalLatitude,
col='blue', pch=3, cex=0.75)
plot(le_ca_shapefile,add = TRUE)
#subsetting the spatial data to only include florida
us_map <- ne_states(country = 'United States of America')
FL_map <- us_map[us_map$name == 'Florida',]
plot(FL_map)
#Subsetting the spatial data to only include Caribbean Islands
NR_map <- north_am_map[north_am_map$admin == 'The Bahamas' |
north_am_map$admin == 'Cayman Islands' |
north_am_map$admin == 'Cuba',]
plot(NR_map)
#Matching projections
crs(FL_map) <- crs(le_ca_shapefile)
crs(NR_map) <- crs(le_ca_shapefile)
#Converting to spatial dataframe
coordinates(le_ca_reduced_no_NAs) <- ~ decimalLongitude + decimalLatitude
coordinates(squam_guild_no_fossils) <- ~ decimalLongitude + decimalLatitude
head(le_ca_reduced_no_NAs)
## # A tibble: 6 x 24
## gbifID basisOfRecord catalogNumber collectionCode collectionID recordedBy
## <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 2.58e9 HUMAN_OBSERV~ 40241530 Observations NA doggoleo
## 2 2.58e9 HUMAN_OBSERV~ 40250912 Observations NA lizardtiff
## 3 2.58e9 HUMAN_OBSERV~ 40164474 Observations NA Matt
## 4 2.58e9 HUMAN_OBSERV~ 40164475 Observations NA Matt
## 5 2.58e9 HUMAN_OBSERV~ 40194513 Observations NA reeseabro~
## 6 2.58e9 HUMAN_OBSERV~ 40105337 Observations NA zara_natu~
## # ... with 18 more variables: continent <chr>, countryCode <chr>,
## # stateProvince <chr>, county <chr>, locality <chr>, year <dbl>, month <dbl>,
## # day <dbl>, coordinateUncertaintyInMeters <dbl>, establishmentMeans <chr>,
## # scientificName <chr>, class <chr>, order <chr>, family <chr>, genus <chr>,
## # specificEpithet <chr>, infraspecificEpithet <chr>, taxonomicStatus <chr>
class(le_ca_reduced_no_NAs)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
#Changing the projection of occurrences to match the IUCN shapefile
crs(le_ca_reduced_no_NAs) <- crs(le_ca_shapefile)
crs(squam_guild_no_fossils) <- crs(le_ca_shapefile)
projection(squam_guild_no_fossils)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
#Using the IUCN shapefile to clip occurrences outside of those bounds
le_ca_reduced_no_NAs_IUCN_native_range <- le_ca_reduced_no_NAs[le_ca_shapefile,]
NR_squam_guild_no_fossils <- squam_guild_no_fossils[NR_map,]
FL_squam_guild_no_fossils <- squam_guild_no_fossils[FL_map,] # THis will be for when we incorporate FL into the models
## Remove duplicate records for L_carinatus.
le_ca_reduced_no_NAs_IUCN_native_range<-data.frame(le_ca_reduced_no_NAs_IUCN_native_range)
dups <- duplicated(le_ca_reduced_no_NAs_IUCN_native_range[, c("decimalLatitude", "decimalLongitude")])
dups_index = which(dups == TRUE)
le_ca_reduced_no_NAs_IUCN_native_range = le_ca_reduced_no_NAs_IUCN_native_range[-dups_index, ]
dim(le_ca_reduced_no_NAs_IUCN_native_range)
## [1] 364 27
## Visually inspect the final cleaned records
plot(NR_map, axes=TRUE, col="light yellow")
points(le_ca_reduced_no_NAs_IUCN_native_range$decimalLongitude,
le_ca_reduced_no_NAs_IUCN_native_range$decimalLatitude,
col='orange', pch=20, cex=0.75)
####### SAVE THE FINAL CLEANED OCCURRENCE FILE
# write.csv(le_ca_reduced_no_NAs_IUCN_native_range, "data/clean_le_ca_df.csv", row.names = FALSE)
NR_squam_guild_no_fossils<-data.frame(NR_squam_guild_no_fossils)
dups2 <- duplicated(NR_squam_guild_no_fossils[, c("decimalLatitude", "decimalLongitude")])
dups_index2 = which(dups2 == TRUE)
NR_squam_guild_no_fossils = NR_squam_guild_no_fossils[-dups_index2, ]
dim(NR_squam_guild_no_fossils)
## [1] 172 27
## Visually inspect the final cleaned records
plot(NR_map, axes=TRUE, col="light yellow")
points(NR_squam_guild_no_fossils$decimalLongitude,
NR_squam_guild_no_fossils$decimalLatitude,
col='orange', pch=20, cex=0.75)
####### SAVE THE FINAL CLEANED OCCURRENCE FILE
# write.csv(NR_squam_guild_no_fossils, "data/clean_NR_squam_guild_bias_layer_df.csv", row.names = FALSE)
FL_squam_guild_no_fossils<-data.frame(FL_squam_guild_no_fossils)
dups3 <- duplicated(FL_squam_guild_no_fossils[, c("decimalLatitude", "decimalLongitude")])
dups_index3 = which(dups3 == TRUE)
FL_squam_guild_no_fossils = FL_squam_guild_no_fossils[-dups_index3, ]
dim(FL_squam_guild_no_fossils)
## [1] 2269 27
## Visually inspect the final cleaned records
plot(FL_map, axes=TRUE, col="light yellow")
points(FL_squam_guild_no_fossils$decimalLongitude,
FL_squam_guild_no_fossils$decimalLatitude,
col='orange', pch=20, cex=0.75)
####### SAVE THE FINAL CLEANED OCCURRENCE FILE
# write.csv(FL_squam_guild_no_fossils, "data/clean_FL_squam_guild_bias_layer_df.csv", row.names = FALSE)
# Downloading/ cropping BIOclim variables
bioclim2.5 <- getData("worldclim",var="bio",res=2.5)
projection(bioclim2.5)
## [1] "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
projection(FL_map)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
projection(NR_map)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
#Changing projection of bioclim variables
crs(bioclim2.5) <- crs(NR_map)
# Cropping/ masking the bioclim variables for all of native range countries
NR_countries_bioclim2.5 <- crop(bioclim2.5, NR_map)
NR_countries_bioclim2.5 <- mask(NR_countries_bioclim2.5, NR_map)
plot(NR_countries_bioclim2.5$bio1)
FL_bioclim2.5 <- crop(bioclim2.5, FL_map)
FL_bioclim2.5 <- mask(FL_bioclim2.5, FL_map)
plot(FL_bioclim2.5$bio1)
#Showing the files to be imported
list.files('DEMs/', full.names = FALSE)
## [1] "10n090w_20101117_gmted_mea075.tif" "10n090w_20101117_gmted_mea300.tif"
## [3] "10n090w_20101117_gmted_med075.tif" "30n090w_20101117_gmted_mea075.tif"
## [5] "30n090w_20101117_gmted_mea300.tif" "30n090w_20101117_gmted_med075.tif"
#loading/ viewing the files
Caribbean_dem <- raster('DEMs/10n090w_20101117_gmted_mea300.tif')
US_dem <- raster('DEMs/30n090w_20101117_gmted_mea300.tif')
#Checking projections
projection(Caribbean_dem)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
projection(US_dem)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
projection(FL_bioclim2.5)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
#Cropping/ mapping the DEMs to the relevant areas
NR_dem <- crop(Caribbean_dem, NR_map)
NR_dem <- mask(NR_dem, NR_map)
FL_dem <- crop(mosaic(Caribbean_dem, US_dem, fun=mean), FL_map)
FL_dem <- mask(FL_dem, FL_map)
NR_dem_resampled_2.5 <- resample(NR_dem, NR_countries_bioclim2.5, method = 'bilinear')
plot(NR_dem_resampled_2.5)
FL_dem_resampled_2.5 <- resample(FL_dem, FL_bioclim2.5, method = 'bilinear')
plot(FL_dem_resampled_2.5)
# Stacking the rasters
NR_bioclim_dem_stack <- stack(NR_countries_bioclim2.5, NR_dem_resampled_2.5)
names(NR_bioclim_dem_stack)[20] <- "mean_elevation"
FL_bioclim_dem_stack <- stack(FL_bioclim2.5, FL_dem_resampled_2.5)
names(FL_bioclim_dem_stack)[20] <- "mean_elevation"
#Importing/ cropping/ matching extents/ stacking impervious surface raster
imp_surf <- raster('gmis_impervious_surface_percentage_geographic_250m.tif')
FL_imp_surf <- crop(imp_surf, FL_bioclim_dem_stack)
FL_imp_surf_resam <- resample(FL_imp_surf, FL_bioclim_dem_stack$bio1, method ='bilinear')
plot(FL_imp_surf_resam)
FL_bioclim_dem__imp_stack <- stack(FL_bioclim_dem_stack, FL_imp_surf_resam)
NR_imp_surf <- crop(imp_surf, NR_bioclim_dem_stack)
NR_imp_surf_resam <- resample(NR_imp_surf, NR_bioclim_dem_stack$bio1, method = "bilinear")
NR_imp_surf_resam_mask <- mask(NR_imp_surf_resam, NR_bioclim_dem_stack$bio1)
plot(NR_imp_surf_resam_mask)
NR_bioclim_dem__imp_stack <- stack(NR_bioclim_dem_stack, NR_imp_surf_resam_mask)
#First we need to run a function. This function is available in a package, but R will not let me open it, so I had to find the function itselt. Essentially, it just lets you to sample points from a raster based upon spatial probabilities.
sampleRast <- function(x, n, adjArea = TRUE, replace = TRUE, prob = TRUE) {
val <- as.vector(x[[1]])
# adjust probabilities for cell area and/or cell values
if (adjArea) {
areas <- raster::area(x[[1]], na.rm=TRUE)
areas <- as.vector(areas)
probs <- if (prob) {
val * areas
} else {
areas
}
} else if (!adjArea & prob) {
if (any(probs < 0)) warning('Some probabilities are < 0.', .immediate=TRUE)
probs <- val
} else if (!adjArea & !prob) {
probs <- rep(1, length(val))
}
cellNum <- 1:raster::ncell(x)
cellNum <- cellNum[!is.na(val)]
probs <- probs[!is.na(val)]
sites <- sample(cellNum, size=n, replace=replace, prob=probs)
xy <- raster::xyFromCell(x, sites)
xy
}
#Converting the dataframe to a coordinate object
coordinates(NR_squam_guild_no_fossils) <- ~ decimalLongitude + decimalLatitude
#Creating a raster of all point data
occur.ras <- rasterize(NR_squam_guild_no_fossils, NR_bioclim_dem_stack, 1)
plot(occur.ras)
#Creating a point dataframe
presences <- which(values(occur.ras) == 1)
pres.locs <- coordinates(occur.ras)[presences, ]
plot(pres.locs)
#Estimating kernel density. Here you want the limits to be set to the boundaries of your accessible area. 'h' refers to bandwidth. I think that we want a lower bandwidth, as this creates greater contrast between areas with and without sampling.
dens <- kde2d(pres.locs[,1], pres.locs[,2],
h = rep(1,1),
n = c(nrow(occur.ras), ncol(occur.ras)),
lims = c(range(-84.95833, -72.75), range(19.25, 26.91667)))
dens.ras <- raster(dens)
# The resample command will change both the extent AND the resolution to match a desired file. Here, I want to make sure that the heatmap resolution matches the resolution of our bioclim raster
dens.ras_resam <- resample(dens.ras, NR_bioclim_dem_stack, method='bilinear')
#Now I want to make sure the projections match, and mask all values outside our area of interest
crs(dens.ras_resam) = crs(NR_bioclim_dem_stack$bio1)
dens.ras_resam_mask <- mask(dens.ras_resam, NR_bioclim_dem_stack$bio1)
plot(dens.ras_resam_mask)
#Now we have to put the above map in point form so that it can be run in the Maxent command
#I am using the function we created earlier. Essentially, we are selecting samples from our accessible area, only weighted by the density of Anuran occurrence points (from our heatmap). I am using 300 here, but when we run all this in higher resolution, wil will increase the number.
background_T<-sampleRast(dens.ras_resam_mask,300,adjArea=TRUE, replace=FALSE,prob=TRUE)
nrow(dens.ras_resam_mask)
## [1] 184
plot(dens.ras_resam_mask)
points(background_T)
plot(le_ca_shapefile, add = TRUE)
le_ca_pres <- data.frame(le_ca_reduced_no_NAs_IUCN_native_range$decimalLongitude,
le_ca_reduced_no_NAs_IUCN_native_range$decimalLatitude)
colnames(le_ca_pres) <- c('lon', 'lat')
#First we find all values in our occurrence data with NA preidictor values
find_na <- raster::extract(NR_bioclim_dem__imp_stack, le_ca_pres)
#Binding that to the original dataframe to get locations
le_ca_pres_na <- cbind(le_ca_pres, find_na)
#You can see we lost about half of all those points (not a big deal given we still have more than enough)
dim(le_ca_pres_na)
## [1] 364 23
dim(na.omit(le_ca_pres_na))
## [1] 198 23
#Removing the rows with NA values, and then removing all predictor variables
le_ca_pres_no_na_preds <- na.omit(le_ca_pres_na)
le_ca_pres_no_na_preds <- data.frame(le_ca_pres_no_na_preds$lon, le_ca_pres_no_na_preds$lat)
colnames(le_ca_pres_no_na_preds) <- c('lon', 'lat')
# write.csv(le_ca_pres_no_na_preds, 'data/le_ca_pres_na.csv')
## Generate random number to separate training and testing occurrence data (70% training, 30% test)
i = sample(seq(1,nrow(le_ca_pres_no_na_preds)),
nrow(le_ca_pres_no_na_preds)* 0.30, replace = FALSE)
le_ca_pres_no_na_preds[, "scientificName"] <- "L_carinatus"
## training and testing data separated, only 3
test_occr = le_ca_pres_no_na_preds[i, c("scientificName", "lon", "lat")]
train_occr = le_ca_pres_no_na_preds[-i, c("scientificName", "lon", "lat")]
dim(test_occr)
## [1] 59 3
dim(train_occr)
## [1] 139 3
# Saving these data so that we can access it later
# write.csv(test_occr, "data/test_L_carinatus.csv", row.names =FALSE)
# write.csv(train_occr, "data/train_L_carinatus.csv", row.names =FALSE)
## Get presences data but only latitiude and longitude.
occur <- train_occr[,-1]
names(occur)<-c("lon","lat")
Here we will be using package dismo
#Subsetting that raster based on you a priori approach
NR_bioclim_dem__imp_stack_reduced<- stack(NR_bioclim_dem__imp_stack[[c(2,5,6,7,10,11,15,20,21),]])
FL_bioclim_dem__imp_stack_reduced <- stack(FL_bioclim_dem__imp_stack[[c(2,5,6,7,10,11,15,20,21),]])
#Matching all projections
crs(NR_bioclim_dem__imp_stack_reduced)="+proj=longlat +datum=WGS84"
crs(FL_bioclim_dem__imp_stack_reduced) ="+proj=longlat +datum=WGS84"
## Maxent arguments for dismo is listed here. https://urldefense.proofpoint.com/v2/url?u=https-3A__groups.google.com_forum_-23-21topic_maxent_yRBlvZ1-5F9rQ&d=DwIGaQ&c=sJ6xIWYx-zLMB3EPkvcnVg&r=nNnY6X5WY58RgeQXOKDQ61FWZnWJi1m_cldgGu59VxM&m=skM0YtecCzIghUzqVpUqwFdpGX3fReLIMh72UsSFje8&s=Ug_bbiC0ZCyJyEIlwiEMNQh8aW-vBSsd6uTAqY755Qw&e= .
## only linear and regularization is 1
arguments <- c("-J", "-P", "-q", "-p", "-h", "randomtestpoints=30",
"replicates=5", "betamultiplier=1", "askoverwrite=false",
"threads=6")
mod_lin1 <- maxent(x=NR_bioclim_dem__imp_stack_reduced, p=coordinates(occur), a = background_T, args=arguments)
## Loading required namespace: rJava
#Running the code below will generate an html site that gives the results of the Maxent model
# mod_lin1
#Using the model to predict over the native range (using the mean function to create a composite of all 5 replicates)
NR_Pred1 = predict(mod_lin1, NR_bioclim_dem__imp_stack_reduced)
plot(mean(NR_Pred1))
#Finding the threshold to set occurrence
mean_nr_pred <- mean(NR_Pred1)
pred_at_occ<- raster::extract(mean_nr_pred, coordinates(occur)) # Here we are extracting those relative (0-1) suitability values at each of our occurrence points
quantile(pred_at_occ, 0.05) #Now we are determining the 0.05 quantile, as we will set the presence/ absence threshold to that
## 5%
## 0.3610937
#Predicting occurrence
FL_predict1 = predict(mod_lin1,FL_bioclim_dem__imp_stack_reduced)
plot(mean(FL_predict1))
mean_fl_pred <- mean(FL_predict1)
#Setting the presence/ absence threshold to the 0.05 quantile calulated above
mean_fl_pred[mean_fl_pred <= quantile(pred_at_occ, 0.05) ] = 0
mean_fl_pred[mean_fl_pred >quantile(pred_at_occ, 0.05) ] = 1
plot(mean_fl_pred)
####As you can see, there is apparently very little area predicted as suitable in FL. However,I think a lot of this is driven the the %impervious surface raster. So, for the second model, we will remove that. We can talk more about it, but I have a feeling that not having very much impervious surface is penalizing predicted suitability too severely
#Subsetting that raster based on you a priori approach
NR_bioclim_no_imp_reduced<- stack(NR_bioclim_dem__imp_stack[[c(2,5,6,7,10,11,15,20),]])
FL_bioclim_no_imp_reduced <- stack(FL_bioclim_dem__imp_stack[[c(2,5,6,7,10,11,15,20),]])
#Matching all projections
crs(NR_bioclim_no_imp_reduced)="+proj=longlat +datum=WGS84"
crs(FL_bioclim_no_imp_reduced) ="+proj=longlat +datum=WGS84"
## only linear and regularization is 1
arguments <- c("-J", "-P", "-q", "-p", "-h", "randomtestpoints=30",
"replicates=5", "betamultiplier=1", "askoverwrite=false",
"threads=6")
mod_lin2 <- maxent(x=NR_bioclim_no_imp_reduced, p=coordinates(occur), a = background_T, args=arguments)
#Running the code below will generate an html site that gives the results of the Maxent model
# mod_lin2
#Using the model to predict over the native range (using the mean function to create a composite of all 5 replicates)
NR_Pred2 = predict(mod_lin2, NR_bioclim_no_imp_reduced)
plot(mean(NR_Pred2))
#Finding the threshold to set occurrence
mean_nr_pred2 <- mean(NR_Pred2)
pred_at_occ2<- raster::extract(mean_nr_pred2, coordinates(occur)) # Here we are extracting those relative (0-1) suitability values at each of our occurrence points
quantile(pred_at_occ2, 0.05) #Now we are determining the 0.05 quantile, as we will set the presence/ absence threshold to that
## 5%
## 0.4027691
#Predicting occurrence
FL_predict2 = predict(mod_lin2,FL_bioclim_no_imp_reduced)
plot(mean(FL_predict2))
mean_fl_pred2 <- mean(FL_predict2)
#Setting the presence/ absence threshold to the 0.05 quantile calulated above
mean_fl_pred2[mean_fl_pred2 <= quantile(pred_at_occ, 0.05) ] = 0
mean_fl_pred2[mean_fl_pred2 >quantile(pred_at_occ, 0.05) ] = 1
plot(mean_fl_pred2)
NR_bioclim_precip_reduced<- stack(NR_bioclim_dem__imp_stack[[c(2,5,7,11,13,15,18,20),]])
FL_bioclim_precip_reduced <- stack(FL_bioclim_dem__imp_stack[[c(2,5,7,11,13,15,18,20),]])
#Matching all projections
crs(NR_bioclim_precip_reduced)="+proj=longlat +datum=WGS84"
crs(FL_bioclim_precip_reduced) ="+proj=longlat +datum=WGS84"
## only linear and regularization is 1
arguments <- c("-J", "-P", "-q", "-p", "-h", "randomtestpoints=30",
"replicates=5", "betamultiplier=1", "askoverwrite=false",
"threads=6")
mod_lin3 <- maxent(x=NR_bioclim_precip_reduced, p=coordinates(occur), a = background_T, args=arguments)
#Running the code below will generate an html site that gives the results of the Maxent model
# mod_lin3
#Using the model to predict over the native range (using the mean function to create a composite of all 5 replicates)
NR_Pred3 = predict(mod_lin3, NR_bioclim_precip_reduced)
plot(mean(NR_Pred3))
#Finding the threshold to set occurrence
mean_nr_pred3 <- mean(NR_Pred3)
pred_at_occ3<- raster::extract(mean_nr_pred3, coordinates(occur)) # Here we are extracting those relative (0-1) suitability values at each of our occurrence points
quantile(pred_at_occ3, 0.05) #Now we are determining the 0.05 quantile, as we will set the presence/ absence threshold to that
## 5%
## 0.3629274
#Predicting occurrence
FL_predict3 = predict(mod_lin3,FL_bioclim_precip_reduced)
plot(mean(FL_predict3))
mean_fl_pred3 <- mean(FL_predict3)
#Setting the presence/ absence threshold to the 0.05 quantile calulated above
mean_fl_pred3[mean_fl_pred3 <= quantile(pred_at_occ, 0.05) ] = 0
mean_fl_pred3[mean_fl_pred3 >quantile(pred_at_occ, 0.05) ] = 1
plot(mean_fl_pred3)
#I think part of the issue here is the loss of Bio6 (min temp of coldest month). While this variable (as well as mean temp of coldest quarter) are not super important in the models so far, this is largely because there is not that much variation in these variables in the native range. But, Obviously we know this is going to be a drastic limiting factor based upon the CT data alone.