Create a global SDM for weighting pseudoabsences
Read in the global occurrence data for the target species that was
downloaded using “global_download.Rmd”
[1] "Vaccinium corymbosum"
[1] "2882849"
Specify paths for output (defaults to file structure in ReadMe)
Filter global occurrence data
#remove unverified records
identificationVerificationStatus_to_discard <- c("unverified", "unvalidated","not able to validate","control could not be conclusive due to insufficient knowledge")
#enter value for max coordinate uncertainty in meters.
global.occ<-global %>%
filter(speciesKey==taxonkey) %>% #using taxonKey filters out accepted synonyms
filter(is.na(coordinateUncertaintyInMeters)| coordinateUncertaintyInMeters< 1000) %>%
filter(!str_to_lower(identificationVerificationStatus) %in% identificationVerificationStatus_to_discard)
global.occ$lon_dplaces<-sapply(global.occ$decimalLongitude, function(x) decimalplaces(x))
global.occ$lat_dplaces<-sapply(global.occ$decimalLatitude, function(x) decimalplaces(x))
global.occ[global.occ$lon_dplaces < 4& global.occ$lat_dplaces < 4 , ]<-NA
global.occ<-global.occ[ which(!is.na(global.occ$lon_dplaces)),]
global.occ<-within(global.occ,rm("lon_dplaces","lat_dplaces"))
global.occ<-global.occ[which( global.occ$year > 1975 & global.occ$year < 2020),]
Convert global occurrences to spatial points needed for
modelling

Flag and remove centroids and invalid georeferenced points
Testing coordinate validity
Flagged 0 records.
Testing zero coordinates
Warning: GEOS support is provided by the sf and terra packages among othersFlagged 0 records.
Testing country capitals
Flagged 8 records.
Testing country centroids
Flagged 0 records.
Testing sea coordinates
trying URL 'https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_land.zip'
Content type 'application/zip' length 457183 bytes (446 KB)
downloaded 446 KB
Flagged 44 records.
Testing GBIF headquarters, flagging records around Copenhagen
Flagged 0 records.
Testing biodiversity institutions
Flagged 3 records.
Flagged 54 of 1400 records, EQ = 0.04.
Testing coordinate validity
Flagged 0 records.
Testing zero coordinates
Warning: GEOS support is provided by the sf and terra packages among othersFlagged 0 records.
Testing country capitals
Flagged 8 records.
Testing country centroids
Flagged 0 records.
Testing sea coordinates
trying URL 'https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_land.zip'
Content type 'application/zip' length 457183 bytes (446 KB)
downloaded 446 KB
Flagged 44 records.
Testing GBIF headquarters, flagging records around Copenhagen
Flagged 0 records.
Testing biodiversity institutions
Flagged 3 records.
Flagged 54 of 1400 records, EQ = 0.04.
Create global rasterstack using CHELSA data for model building
globalclimrasters <- list.files((here("./data/external/climate/trias_CHELSA")),pattern='tif',full.names = T) #import CHELSA data
globalclimpreds <- stack(globalclimrasters)
Use SDMtab command from the SDMPlay package to remove duplicates per
grid cell
global.SDMtable<- SDMPlay:::SDMtab(global.occ.LL.cleaned, globalclimpreds, unique.data = TRUE,background.nb= 0) #
numb.pseudoabs <-length(global.SDMtable$id) #sets the number of pseudoabsences equal to number of unique presences
global.occ.sp<-global.SDMtable[c("longitude", "latitude")]
coordinates(global.occ.sp)<- c("longitude", "latitude")
global.occ.sp$species<- rep(1,length(global.occ.sp$latitude)) #adds columns indicating species presence needed for modeling
Select wwf ecoregions that contain global occurrence points
Specify and import bias grids for relevant taxonomic group (e.g
vascular plants)
biasgrid<-raster(here("./data/external/bias_grids/final/trias/plants_1deg_min5.tif"))### specify appropriate bias grid here
Subset bias grid by ecoregions containing occurrence points
ext_wwf_ecoSub<-extent(wwf_ecoSub1)
biasgrid_crop<-crop(biasgrid,ext_wwf_ecoSub)
biasgrid_sub<-mask(biasgrid_crop,wwf_ecoSub1)
plot(biasgrid_sub)
plot(wwf_ecoSub1,add=TRUE)

NA
Use randomPoints function from dismo package to locate
pseduobasences within the bias grid subset
set.seed(728)
global_points<-randomPoints(biasgrid_sub, numb.pseudoabs, global.occ.sp, ext=NULL, extf=1.1, excludep=TRUE, prob=FALSE, cellnumbers=FALSE, tryf=70, warn=2, lonlatCorrection=TRUE)
# will throw a warning if randomPoints generated is less than numb.pseudoabs. If this happens, increase the number of tryf or ignore bias grid and sample from ecoregion only.
OPTIONAL: Sample from ecoregion only
# wwf_grid<-raster(here("./data/external/GIS/wwf_ecoregions_v1.tif"))
# ecoregions_raster<-mask(wwf_grid,wwf_ecoSub1)
# set.seed(768)
# global_points<-randomPoints(ecoregions_raster, numb.pseudoabs, global.occ.sp, ext=NULL, extf=1.1, excludep=TRUE, prob=FALSE, cellnumbers=FALSE, tryf=150, warn=2, lonlatCorrection=TRUE)
Extract generated pseudo absences and create presence-pseudobasence
dataset
Warning: OGR support is provided by the sf and terra packages among othersWarning: OGR support is provided by the sf and terra packages among others
Remove highly correlated predictors from dataframe
Correct global clim preds values from integer format
Use caretList from Caret package to run multiple machine learning
models
control <- trainControl(method="repeatedcv",number=10, repeats=10, savePredictions="final", preProc=c("center","scale"),classProbs=TRUE)
classList1 <- c("glm","gbm","rf","knn", "earth")
set.seed(457)
hideoutput<-capture.output(
global_train <- caretList(
species~., data= global.data.df.uncor,
trControl=control,
methodList=classList1
))
modelResults<-resamples(global_train)
summary(modelResults)# displays accuracy of each model
Call:
summary.resamples(object = modelResults)
Models: glm, gbm, rf, knn, earth
Number of resamples: 100
Accuracy
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
glm 0.6009852 0.6403941 0.6600985 0.6601555 0.6798030 0.7450980 0
gbm 0.7623762 0.8027118 0.8168317 0.8144422 0.8316832 0.8719212 0
rf 0.8078818 0.8323050 0.8465347 0.8471209 0.8627451 0.8970588 0
knn 0.7772277 0.8137255 0.8366337 0.8319377 0.8514851 0.8916256 0
earth 0.7029703 0.7496921 0.7728381 0.7720725 0.7948457 0.8423645 0
Kappa
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
glm 0.2022995 0.2806485 0.3199825 0.3203036 0.3597457 0.4901961 0
gbm 0.5247525 0.6053303 0.6336634 0.6288757 0.6633663 0.7438113 0
rf 0.6157356 0.6644820 0.6930693 0.6942432 0.7254902 0.7941176 0
knn 0.5544554 0.6274510 0.6732673 0.6638629 0.7029703 0.7832249 0
earth 0.4059406 0.4994847 0.5454726 0.5441390 0.5896913 0.6848132 0
modelCor(resamples(global_train))# shows correlation among models.Weakly correlated algorithms are persuasive for stacking them in ensemble.
glm gbm rf knn earth
glm 1.0000000 0.2949202 0.1911769 0.1942191 0.2081093
gbm 0.2949202 1.0000000 0.5516553 0.5781952 0.6029141
rf 0.1911769 0.5516553 1.0000000 0.5889041 0.3030091
knn 0.1942191 0.5781952 0.5889041 1.0000000 0.3025351
earth 0.2081093 0.6029141 0.3030091 0.3025351 1.0000000
Create ensemble model (combine individual models into one)
set.seed(478)
global_stack <- caretStack(
global_train,
method="glm",
trControl=trainControl(method="cv",
number=10,
savePredictions= "final" ))
print(global_stack)
A glm ensemble of 5 base models: glm, gbm, rf, knn, earth
Ensemble results:
Generalized Linear Model
20280 samples
5 predictor
2 classes: 'present', 'absent'
No pre-processing
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 18252, 18252, 18252, 18252, 18252, 18252, ...
Resampling results:
Accuracy Kappa
0.8476824 0.6953649
Create rasterstack of CHELSA climate data clipped to European
modeling extent for prediction
euclimrasters <- list.files((here("./data/external/climate/chelsa_eu_clips")),pattern='tif',full.names = T)
eu_climpreds<-stack(euclimrasters)
eu_climpreds.10<-divide10(eu_climpreds) # correct for integer format of Chelsa preds
Restrict global model prediction to the extent of Europe

Create European subset

Create RasterStack of European climate variables from RMI
rmiclimrasters <- list.files((here("./data/external/climate/rmi_corrected")),pattern='tif',full.names = T)
rmiclimpreds <- stack(rmiclimrasters)
Use SDMtab command from the SDMPlay package to remove duplicates per
1km grid cell
# convert a single raster to LL for use in SDMtab
LLproj<-CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
rmiclimpredLL<-projectRaster(rmiclimpreds$anntemp_eea, crs=LLproj)
rmiclimpredLL2<-projectRaster(rmiclimpreds$annprecip_eea, crs=LLproj)
rmipredstackLL<-stack(rmiclimpredLL,rmiclimpredLL2)
# Create SpatialPoints dataframe needed for SDMtab command.
euocc<-occ.eu@coords
euocc1<-data.frame(euocc)[c(1:2)]
names(euocc1)<-c("longitude", "latitude")
#run SDMtab command to remove duplicates
euocc.SDMtable<- SDMPlay:::SDMtab(euocc1, rmipredstackLL, unique.data = TRUE,same=TRUE)
numb.pseudoabs <- length(euocc.SDMtable$id)
Clip bias grid to European extent
studyextent<-euboundary
ecoregions_eu<-crop(biasgrid_sub,studyextent)
biasgrid_eu<-projectRaster(ecoregions_eu,rmiclimpreds)
plot(biasgrid_eu)
plot(studyextent,add=TRUE)

Mask areas of high habitat suitability from global climate
model
#Read in global raster from earlier step if needed
#globalfilename<-paste(here("./data/processed/geotiffs/GlobalEnsEU_"), taxonkey, ".tif",sep="")
#global_model<-raster("C:/Users/Amy.Davis/OneDrive - Ipsos/Desktop/xps15/risk-modelling-and-mapping/data/processed/geotiffs/GlobalEnsEU_2882849.tif")
wgs84_gcs<-CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
crs(global_model)<-wgs84_gcs
m<-global_model >.5
global_mask<-mask(global_model,m,maskvalue=TRUE)
global_masked_proj<-projectRaster(global_mask,biasgrid_eu)
Overlay low predicted habitat suitability on bias grid to exclude
low sampled areas
pseudoSamplingArea<-mask(biasgrid_eu,global_masked_proj)
plot(pseudoSamplingArea)

Randomly locate pseudo absences within “pseudoSamplingArea”
set.seed=120
euocc_points<-randomPoints(pseudoSamplingArea, numb.pseudoabs, euocc, ext=NULL, extf=1.1, excludep=TRUE, prob=FALSE,cellnumbers=FALSE, tryf=50, warn=2, lonlatCorrection=TRUE)
euocc_pseudoAbs<-as.data.frame(euocc_points)
coordinates(euocc_pseudoAbs)<-c("x","y")
euocc_pseudoAbs$occ<-rep(0,length(euocc_pseudoAbs$x))
crs(euocc_pseudoAbs)<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,-0,-0,-0,0 +units=m +no_defs ")
Join eu occurrences with pseudo absences to create eu level
presence-pseudoabsence dataset
eu_presabs<- spRbind(euocc1,euocc_pseudoAbs)
crs(eu_presabs)<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,-0,-0,-0,0 +units=m +no_defs ")
#confirm equal number of presences and absences
table(eu_presabs@data$occ)
0 1
234 234
#export occ points as shapefile
writeOGR(eu_presabs, dsn=(here("./data/processed/sdm_occ_pts/europe")), layer=paste(taxonName,"_EUpresabs",sep=""), driver="ESRI Shapefile", overwrite_layer = TRUE)
Warning: OGR support is provided by the sf and terra packages among othersWarning: OGR support is provided by the sf and terra packages among others
Create SDM data object for European data
class : sdmdata
===========================================================
number of species : 1
species names : occ
number of features : 13
feature names : anngdd100, annprecip_eea, annpvarrecip_eea, ...
type : Presence-Absence
has independet test data? : FALSE
number of records : 455
has Coordinates? : TRUE
Add habitat and anthropogenic predictors
class : sdmdata
===========================================================
number of species : 1
species names : occ
number of features : 12
feature names : anngdd100, annpvarrecip_eea, dristprec, ...
type : Presence-Absence
has independet test data? : FALSE
number of records : 454
has Coordinates? : TRUE
Build models with climate and habitat data
# uncomment 2nd control options for LOOCV (leave one out cross validation, which is aka as "jacknife" ) which should be used when occurrences are smaller than n=10 for each predictor in the model)
occeu.full.data.df1$occ<-as.factor(occeu.full.data.df1$occ)
levels(occeu.full.data.df1$occ)<-c("absent","present")
#control<-trainControl(method="LOOCV",savePredictions="final", preProc=c("center","scale"),classProbs=TRUE)
control <- trainControl(method="repeatedcv",number=10, repeats=10,savePredictions="final",classProbs=TRUE)
mylist<-list(
glm =caretModelSpec(method = "glm",maxit=100),
gbm= caretModelSpec(method = "gbm"),
rf = caretModelSpec(method = "rf", importance = TRUE),
knn = caretModelSpec(method = "knn"),
earth= caretModelSpec(method = "earth"))
set.seed(157)
hideout<-capture.output(model_train_habitat <- caretList(
occ~., data=occeu.full.data.df1,
trControl=control,
tuneList=mylist))
Display model evaluation statistics
modelResults1<-resamples(model_train_habitat)
summary(modelResults1)
Call:
summary.resamples(object = modelResults1)
Models: glm, gbm, rf, knn, earth
Number of resamples: 100
Accuracy
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
glm 0.5217391 0.5869565 0.6222222 0.6308180 0.6739130 0.8043478 0
gbm 0.5777778 0.6666667 0.7111111 0.7107276 0.7555556 0.8695652 0
rf 0.6000000 0.6956522 0.7391304 0.7406948 0.7789855 0.9148936 0
knn 0.4000000 0.5777778 0.6263285 0.6312433 0.6756475 0.8478261 0
earth 0.5000000 0.6666667 0.7173913 0.7189498 0.7608696 0.9130435 0
Kappa
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
glm 0.043478261 0.1739130 0.2463054 0.2625767 0.3478261 0.6145251 0
gbm 0.152626363 0.3375859 0.4247788 0.4224862 0.5103858 0.7396226 0
rf 0.201183432 0.3910161 0.4782609 0.4830374 0.5604220 0.8303249 0
knn -0.194690265 0.1584621 0.2558021 0.2634112 0.3534978 0.6956522 0
earth 0.003766478 0.3372602 0.4347826 0.4392122 0.5221895 0.8250951 0
modelCor(resamples(model_train_habitat))
glm gbm rf knn earth
glm 1.0000000 0.2923840 0.3033874 0.1371341 0.2154149
gbm 0.2923840 1.0000000 0.6348932 0.2323403 0.5495479
rf 0.3033874 0.6348932 1.0000000 0.2509761 0.4975596
knn 0.1371341 0.2323403 0.2509761 1.0000000 0.1037231
earth 0.2154149 0.5495479 0.4975596 0.1037231 1.0000000
Create ensemble model
set.seed(456)
hideoutput<-capture.output(
lm_ens_hab<-caretEnsemble(model_train_habitat1, trControl=trainControl(method="cv", number=10,savePredictions= "final",classProbs = TRUE)))
lm_ens_hab
A glm ensemble of 5 base models: glm, gbm, rf, knn, earth
Ensemble results:
Generalized Linear Model
454 samples
5 predictor
2 classes: 'absent', 'present'
No pre-processing
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 409, 408, 408, 409, 407, 409, ...
Resampling results:
Accuracy Kappa
0.7550961 0.511276
variableImportance<-varImp(lm_ens_hab)
kable(variableImportance,digits=2,caption="Variable Importance") %>%
kable_styling(bootstrap_options = c("striped"))
Variable Importance
| |
overall |
glm |
gbm |
rf |
knn |
earth |
| corine_perWetland |
0.63 |
4.05 |
0.00 |
0.00 |
2.81 |
0.00 |
| corine_perdeciduous |
1.49 |
6.13 |
3.13 |
0.83 |
0.00 |
0.00 |
| corine_perConiferous |
1.93 |
1.72 |
1.66 |
2.35 |
2.79 |
0.00 |
| ESM1000m_singleINT_ext |
5.24 |
0.00 |
3.85 |
7.04 |
10.00 |
0.00 |
| corine_perAgriculture |
6.05 |
17.53 |
3.75 |
6.01 |
5.92 |
0.00 |
| corine_pergrass |
6.34 |
14.96 |
2.14 |
6.73 |
10.47 |
0.00 |
| varSolRad100 |
8.56 |
4.88 |
8.52 |
11.44 |
6.89 |
0.00 |
| wettprec |
8.67 |
1.44 |
10.94 |
11.76 |
6.20 |
0.00 |
| dristprec |
9.46 |
3.95 |
15.08 |
11.39 |
8.32 |
0.00 |
| annpvarrecip_eea |
11.19 |
11.58 |
12.14 |
12.79 |
15.87 |
0.00 |
| anngdd100 |
17.00 |
25.53 |
17.10 |
14.98 |
13.44 |
21.35 |
| temprang |
23.43 |
8.22 |
21.69 |
14.69 |
17.28 |
78.65 |
write.csv(variableImportance,file = paste0(genOutput,taxonkey,"_varImp.csv"))
Use EU level ensemble model (ensModel) to predict for Europe
ens_pred_hab_eu<-raster::predict(fullstack,lm_ens_hab,type="prob")
ens_pred_hab_eu1<-1-ens_pred_hab_eu
writeRaster(ens_pred_hab_eu1,filename=file.path(rasterOutput,paste("eu_",taxonkey, "_hist.tif",sep="")) , format="GTiff",overwrite=TRUE)
exportPNG(ens_pred_hab_eu1,taxonkey,taxonName=taxonName,"hist.png")
brks <- seq(0, 1, by=0.1)
nb <- length(brks)-1
pal <- colorRampPalette(rev(brewer.pal(8, 'Spectral')))
cols<-pal(nb)
plot(ens_pred_hab_eu1, breaks=brks, col=cols,lab.breaks=brks)

NA
Predict for Belgium only using EU-level ensemble model to forecast
risk under historical climate conditions
laea_grs80<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,-0,-0,-0,0 +units=m +no_defs ")
ens_pred_hab<-raster::predict(fullstack_be,lm_ens_hab,type="prob")
ens_pred_hab1<-1-ens_pred_hab
crs(ens_pred_hab1)<-laea_grs80
writeRaster(ens_pred_hab1, filename=file.path(rasterOutput,paste("be_",taxonkey, "_hist.tif",sep="")), format="GTiff",overwrite=TRUE)
exportPDF(ens_pred_hab1,taxonkey,taxonName=taxonName,"eu_hist.pdf")
exportPNG(ens_pred_hab1,taxonkey,taxonName=taxonName,"eu_hist.png")
brks <- seq(0, 1, by=0.1)
nb <- length(brks)-1
pal <- colorRampPalette(rev(brewer.pal(11, 'Spectral')))
cols<-pal(nb)
plot(ens_pred_hab1, breaks=brks, col=cols,lab.breaks=brks)
plot(euocc,pch=3,cex=0.75,add=TRUE)

Clip habitat stack to Belgium
use this stack for vertebrates or anything likely to be influenced
by water availability
# habitat_stack<-stack(habitat,dist2water)
# habitat_only_stack<-crop(habitat_stack,country)
# habitat_only_stack_be<-crop(habitat_only_stack,country)
use this stack for plants
habitat_stack<-stack(habitat)
habitat_only_stack<-crop(habitat_stack,country)
habitat_only_stack_be<-crop(habitat_only_stack,country)
Create individual RCP (2.6, 4.5, 8.5) climate data stacks for
Belgium
be26 <- list.files((here("./data/external/climate/byEEA_finalRCP/belgium_rcps/rcp26")),pattern='tif',full.names = T)
belgium_stack26 <- stack(be26)
be45 <- list.files((here("./data/external/climate/byEEA_finalRCP/belgium_rcps/rcp45")),pattern='tif',full.names = T)
belgium_stack45 <- stack(be45)
be85 <- list.files((here("./data/external/climate/byEEA_finalRCP/belgium_rcps/rcp85")),pattern='tif',full.names = T)
belgium_stack85 <- stack(be85)
Combine habitat stacks with climate stacks for each RCP
scenario
fullstack26<-stack(be26,habitat_only_stack_be)
fullstack45<-stack(be45,habitat_only_stack_be)
fullstack85<-stack(be85,habitat_only_stack_be)
Create and export RCP risk maps for each RCP scenario
ens_pred_hab26<-raster::predict(fullstack26,lm_ens_hab,type="prob")
ens_pred_hab26_1<-1-ens_pred_hab26
crs(ens_pred_hab26_1)<-laea_grs80
writeRaster(ens_pred_hab26_1, filename=file.path(rasterOutput,paste("be_",taxonkey, "_rcp26.tif",sep="")), format="GTiff",overwrite=TRUE)
exportPDF(ens_pred_hab26_1,taxonkey,taxonName=taxonName,"rcp26.pdf")
null device
1
ens_pred_hab45<-raster::predict(fullstack45,lm_ens_hab,type="prob")
ens_pred_hab45_1<-1-ens_pred_hab45
crs(ens_pred_hab45_1)<-laea_grs80
writeRaster(ens_pred_hab45_1, filename=file.path(rasterOutput,paste("be_",taxonkey, "_rcp45.tif",sep="")), format="GTiff",overwrite=TRUE)
exportPDF(ens_pred_hab45_1,taxonkey,taxonName=taxonName,"rcp45.pdf")
null device
1
ens_pred_hab85<-raster::predict(fullstack85,lm_ens_hab,type="prob")
ens_pred_hab85_1<-1-ens_pred_hab85
crs(ens_pred_hab85_1)<-laea_grs80
writeRaster(ens_pred_hab85_1, filename=file.path(rasterOutput,paste("be_",taxonkey, "_rcp85.tif",sep="")), format="GTiff",overwrite=TRUE)
exportPDF(ens_pred_hab85_1,taxonkey,taxonName=taxonName,"rcp85.pdf")
null device
1
Display RCP risk maps
plot(ens_pred_hab26_1,breaks=brks, col=cols,lab.breaks=brks)
plot(ens_pred_hab45_1,breaks=brks, col=cols,lab.breaks=brks)
plot(ens_pred_hab85_1,breaks=brks, col=cols,lab.breaks=brks)
Create and export “difference maps”: the difference between
predicted risk by each RCP scenario and historical climate
null device
1
png
2

png
2


Output predictor data used for SDM
df4export<-cbind(occeu.full.data.df1,coordinates(occ.full.data))
write.csv(df4export, paste(genOutput,taxonkey,"_sdmdata.csv",sep=""))
Check spatial autocorrelation of residuals to assess whether
occurrence data should be thinned
derive residuals from unthinned model
predEns1<-lm_ens_hab$ens_model$pred
obs.numeric<-ifelse(predEns1$obs == "absent",0,1)
standardize residuals
stdres<-function(obs.numeric, yhat){
num<-obs.numeric-yhat
denom<-sqrt(yhat*(1-yhat))
return(num/denom)
}
hab.res<-stdres(obs.numeric,predEns1$present)
res.best.coords<-cbind(coordinates(occ.full.data),hab.res)
res.best.geo<-as.geodata(res.best.coords,coords.col=1:2,data.col = 3)
summary(res.best.geo) #note distance is in meters
Number of data points: 454
Coordinates summary
longitude latitude
min 2733429 1653675
max 5754770 5247500
Distance summary
min max
966.5814 4034634.4597
Data summary
Min. 1st Qu. Median Mean 3rd Qu. Max.
-2.60157202 -0.70462124 0.19909308 0.03617965 0.65557610 3.21251457
Check Morans I.
#If Moran's I is very low (<0.10), or not significant, do not need to thin occurrences.
library(ape)
res.best.df<-as.data.frame(res.best.coords)
occ.dists <- as.matrix(dist(cbind(res.best.df[1], res.best.df[2])))
occ.dists.inv <- 1/occ.dists
diag(occ.dists.inv) <- 0
Moran.I(res.best.df$hab.res,occ.dists.inv,scaled=TRUE,alternative="greater")
$observed
[1] -0.01447212
$expected
[1] -0.002207506
$sd
[1] 0.008547577
$p.value
[1] 0.9243372
Code for class conformal prediction function
Create confidence maps
brks <- seq(0, 1, by=0.1)
nb <- length(brks)-1
pal <- colorRampPalette(rev(brewer.pal(4, 'Spectral')))
cols<-pal(nb)
confidenceMaps<-function(x,taxonkey,taxonName,maptype){
pvals_dataframe<-get("x")
data.xyz <- pvals_dataframe[c("x","y","conf")]
rst <- rasterFromXYZ(data.xyz)
crs(rst)<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs")
plot(rst,breaks=brks, col=cols,lab.breaks=brks)
writeRaster(rst, filename=file.path(rasterOutput,paste("be_",taxonkey, "_",maptype,".tif",sep="")), format="GTiff",overwrite=TRUE)
exportPDF(rst,taxonkey,taxonName=taxonName,nameextension= paste(maptype,".pdf",sep=""))
}
hist.conf.map<-confidenceMaps(pvalsdf_hist,taxonkey,taxonName,maptype="hist_conf")

rcp26.conf.map<-confidenceMaps(pvalsdf_rcp26,taxonkey,taxonName,maptype="rcp26_conf")

rcp45.conf.map<-confidenceMaps(pvalsdf_rcp45,taxonkey,taxonName,maptype="rcp45_conf")

rcp85.conf.map<-confidenceMaps(pvalsdf_rcp85,taxonkey,taxonName,maptype="rcp85_conf")

Model evalution summary
meanResults<-summary(modelResults1)
eu_accuracy<-meanResults$statistics$Accuracy
eu_kappa<-meanResults$statistics$Kappa
write.csv(eu_accuracy,file=paste0(genOutput,taxonkey,"_accuracy.csv"))
write.csv(eu_kappa,file=paste0(genOutput,taxonkey,"_kappa.csv"))
# derive sensitivity and specificity
table(predEns1$pred,predEns1$obs)
absent present
absent 181 70
present 41 162
sensitivity(predEns1$pred,predEns1$obs)
[1] 0.8153153
specificity(predEns1$pred,predEns1$obs)
[1] 0.6982759
sens<-sensitivity(predEns1$pred,predEns1$obs)
spec<-specificity(predEns1$pred,predEns1$obs)
moransI<-Moran.I(res.best.df$hab.res,occ.dists.inv,scaled=TRUE,alternative="greater")
MoransI<-moransI$observed
ensembleEvaluation<-(lm_ens_hab$ens_model$results[2:5])
modelEvaluation<-cbind(sens,spec,MoransI,ensembleEvaluation)
write.csv(modelEvaluation,file=paste0(genOutput,taxonkey,"_ModelEval.csv"))
Generate and export response curves in order of variable
importance
topPreds <- variableImportance[with(variableImportance,order(-overall)),]
varNames<-rownames(topPreds)
# combine predictions from each model for each variable
partial_gbm<-function(x){
m.gbm<-pdp::partial(model_train_habitat$gbm$finalModel,pred.var=paste(x),train = occeu.full.data.df1,type="classification",
prob=TRUE,n.trees= model_train_habitat$gbm$finalModel$n.trees, which.class = 2,grid.resolution=nrow(occeu.full.data.df1))
}
gbm.partial.list<-lapply(varNames,partial_gbm)
partial_glm<-function(x){
m.glm<-pdp::partial(model_train_habitat$glm$finalModel,pred.var=paste(x),train = occeu.full.data.df1,type="classification",
prob=TRUE,which.class = 2,grid.resolution=nrow(occeu.full.data.df1))
}
glm.partial.list<-lapply(varNames,partial_glm)
partial_rf<-function(x){
pdp::partial(model_train_habitat$rf$finalModel,pred.var=paste(x),train = occeu.full.data.df1,type="classification",
prob=TRUE,which.class = 2,grid.resolution=nrow(occeu.full.data.df1))
}
rf.partial.list<-lapply(varNames,partial_rf)
partial_knn<-function(x){
m.knn<-pdp::partial(model_train_habitat$knn,pred.var=paste(x),train = occeu.full.data.df1,type="classification",
prob=TRUE,which.class = 2,grid.resolution=nrow(occeu.full.data.df1))
}
knn.partial.list<-lapply(varNames,partial_knn)
partial_mars<-function(x){
m.mars<-pdp::partial(model_train_habitat$earth$finalModel,pred.var=paste(x),train = occeu.full.data.df1,type="classification",
prob=TRUE,which.class = 2,grid.resolution=nrow(occeu.full.data.df1))
}
mars.partial.list<-lapply(varNames,partial_mars)
names(glm.partial.list)<-varNames
names(gbm.partial.list)<-varNames
names(rf.partial.list)<-varNames
names(knn.partial.list)<-varNames
names(mars.partial.list)<-varNames
glm.partial.df<-as.data.frame(glm.partial.list)
gbm.partial.df<-as.data.frame(gbm.partial.list)
rf.partial.df<-as.data.frame(rf.partial.list)
knn.partial.df<-as.data.frame(knn.partial.list)
mars.partial.df<-as.data.frame(mars.partial.list)
predx<-data.frame()
predy<-data.frame()
for (i in varNames){
predx <- rbind(predx, as.data.frame(paste(i,i,sep=".")))
predy<- rbind(predy,as.data.frame(paste(i,"yhat",sep=".")))
}
names(predx)<-""
names(predy)<-""
predx1<-t(predx)
predy1<-t(predy)
glm.partial.df$data<-'GLM'
gbm.partial.df$data<-'GBM'
rf.partial.df$data<-'RF'
knn.partial.df$data<-'KNN'
mars.partial.df$data<-'MARS'
all_dfs<-rbind.data.frame(glm.partial.df,gbm.partial.df,rf.partial.df,knn.partial.df,mars.partial.df)
responseCurves<-function(x,y) {
colors <- c("GLM" = "gray", "GBM"="red","RF"="blueviolet","KNN"="chartreuse", "MARS"= "hotpink")
ggplot(all_dfs,(aes(x=.data[[x]],y=.data[[y]]))) +
geom_line(aes(color = data), size =1.2, position=position_dodge(width=0.2))+
theme_bw()+
labs(y="Partial probability", x= gsub("//..*","",x),color="Legend") +
scale_color_manual(values = colors)
}
allplots<-map2(predx1,predy1, ~responseCurves(.x,.y))
#export plots as PNGs
for(i in seq_along(allplots)){
png(paste0(genOutput,taxonkey,"_",i,".png"),width = 5, height = 5, units = "in",res=300)
print(allplots[[i]])
dev.off()
}
Plot response curves
par(mfrow=c(3,3))
for(i in seq_along(allplots)){
print(allplots[[i]])
}












---
title: "Workflow to create risk and confidence maps for `r params$species`"
author: "Amy J.S Davis"
date: "`r Sys.Date()`"
output:
    html_notebook:
     toc: true
     toc_depth: 3
      
params:
  species: "Vaccinium corymbosum"
  GBIF_taxonkey: "2882849"
editor_options: 
  chunk_output_type: inline

  
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,error = TRUE)
```



```{r load libraries,echo=FALSE,message=FALSE}
options("rgdal_show_exportToProj4_warnings"="none")
library(sdm)
library(rgdal)
library(raster)
library(spocc)
library(sp)
library(rgeos)
library(SDMPlay)
library(tidyverse)
library(caret)
library(caretEnsemble)
library(gbm)
library(trias)
library(rgbif)
library(RColorBrewer)
library(maptools)
library(dismo)
library(sf)
library(geoR)
library(pdp)
library(purrr)
library(here)
library(CoordinateCleaner)
library(humboldt)
library(knitr)
library(kableExtra)

```

### Create a global SDM for weighting pseudoabsences 

#### Read in the global occurrence data for the target species that was downloaded using "global_download.Rmd"

```{r retrieve global occurrence file, echo=FALSE}
#taxonName <- readline(prompt="Enter species name: ")
 
 readname = function()# Get the species name
 { 
   params$species
 }
 species=readname()
 species
 
 readkey = function()# Get the taxon key
 { 
   params$GBIF_taxonkey
 }
 GBIF_taxonkey=readkey()
 GBIF_taxonkey

####if not rendering html put species info here
   species<- "Vaccinium corymbosum"
   GBIF_taxonkey<- "2882849"

taxonName<-species
taxonkey<-GBIF_taxonkey
gbif_filename<- paste(taxonName,".csv",sep="") #add name of species being modeled (this should be the same name in your original species list)
                      

global <- read.csv(file = here("data", "raw", gbif_filename))
 
```
#### Specify paths for output (defaults to file structure in ReadMe)
```{r defineOutputPaths, echo=FALSE}
rasterOutput<-here("data/processed/geotiffs/")
pdfOutput<-here("data/processed/pdf/")
genOutput<-here("data/processed/general//")
```
### Filter global occurrence data 

```{r decimal places,echo=FALSE}
decimalplaces <- function(x) {
  if (abs(x - round(x)) > .Machine$double.eps^0.5) {
    nchar(strsplit(sub('0+$', '', as.character(x)), ".", fixed = TRUE)[[1]][[2]])
  } else {
    return(0)
  }
}
```

```{r filter global occurrence data}

#remove unverified records
identificationVerificationStatus_to_discard <- c("unverified", "unvalidated","not able to validate","control could not be conclusive due to insufficient knowledge")

#enter value for max coordinate uncertainty in meters.

global.occ<-global %>%
  filter(speciesKey==taxonkey) %>%   #using taxonKey filters out accepted synonyms
  filter(is.na(coordinateUncertaintyInMeters)| coordinateUncertaintyInMeters< 1000) %>%
  filter(!str_to_lower(identificationVerificationStatus) %in% identificationVerificationStatus_to_discard)

global.occ$lon_dplaces<-sapply(global.occ$decimalLongitude, function(x) decimalplaces(x))
global.occ$lat_dplaces<-sapply(global.occ$decimalLatitude, function(x) decimalplaces(x))
global.occ[global.occ$lon_dplaces < 4& global.occ$lat_dplaces < 4 , ]<-NA
global.occ<-global.occ[ which(!is.na(global.occ$lon_dplaces)),]
global.occ<-within(global.occ,rm("lon_dplaces","lat_dplaces"))
global.occ<-global.occ[which( global.occ$year > 1975 & global.occ$year < 2020),]
``` 

#### Convert global occurrences to spatial points needed for modelling

```{r occ to spatialData,echo=FALSE}
global.occ<-global.occ[c("decimalLongitude", "decimalLatitude")]
coordinates(global.occ)<- c("decimalLongitude", "decimalLatitude")
plot(global.occ)

global.occ.LL<-data.frame(global.occ)[c(1:2)] #extract long and lat 

```
#### Flag and remove centroids and invalid georeferenced points
```{r remove invalid_pts_and_centroids,echo=FALSE}
global.occ.LL$species<-rep("Vaccinium corymbosum",nrow(global.occ.LL))  

flags_report<-clean_coordinates(x = global.occ.LL, lon= "decimalLongitude", lat= "decimalLatitude",
                          tests = c("capitals", 
                          "centroids","gbif", "institutions", 
                           "seas", "zeros"))

cleaned<-clean_coordinates(x = global.occ.LL, lon= "decimalLongitude", lat= "decimalLatitude",
                          tests = c("capitals", 
                          "centroids","gbif", "institutions", 
                           "seas", "zeros"),value="clean")
global.occ.LL.cleaned<-subset(cleaned,select= -c(species))
```
#### Create global rasterstack using CHELSA data for model building

```{r importChelsaData,echo=TRUE}
globalclimrasters <- list.files((here("./data/external/climate/trias_CHELSA")),pattern='tif',full.names = T) #import CHELSA data
globalclimpreds <- stack(globalclimrasters)
```

#### Use SDMtab command from the SDMPlay package to remove duplicates per grid cell

```{r remove global duplicates,echo=TRUE}

global.SDMtable<- SDMPlay:::SDMtab(global.occ.LL.cleaned, globalclimpreds, unique.data = TRUE,background.nb= 0) #
numb.pseudoabs <-length(global.SDMtable$id) #sets the number of pseudoabsences equal to number of unique presences


global.occ.sp<-global.SDMtable[c("longitude", "latitude")]
coordinates(global.occ.sp)<- c("longitude", "latitude")
global.occ.sp$species<- rep(1,length(global.occ.sp$latitude)) #adds columns indicating species presence needed for modeling
```

### Select wwf ecoregions that contain global occurrence points

```{r select ecoregions,cache= TRUE,echo=FALSE}
wwf_eco<-shapefile(here("./data/external/GIS/official/wwf_terr_ecos.shp"))
crs(global.occ.sp)<-CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
occ_ecoIntersect <- over(wwf_eco,global.occ.sp) 
wwf_ecoSub1 <- wwf_eco[!is.na(occ_ecoIntersect$species),]
```

### Specify and import bias grids for relevant taxonomic group (e.g vascular plants) 

```{r importBiasgrid}
biasgrid<-raster(here("./data/external/bias_grids/final/trias/plants_1deg_min5.tif"))### specify appropriate bias grid here

```

### Subset bias grid by ecoregions containing occurrence points

```{r subsetBiasgrid}
ext_wwf_ecoSub<-extent(wwf_ecoSub1)
biasgrid_crop<-crop(biasgrid,ext_wwf_ecoSub)
biasgrid_sub<-mask(biasgrid_crop,wwf_ecoSub1)
 plot(biasgrid_sub)
 plot(wwf_ecoSub1,add=TRUE)
 
```

###  Use randomPoints function from dismo package to locate pseduobasences within the bias grid subset 

```{r locatePseudo_absences}
set.seed(728)
global_points<-randomPoints(biasgrid_sub, numb.pseudoabs, global.occ.sp, ext=NULL, extf=1.1, excludep=TRUE, prob=FALSE, cellnumbers=FALSE, tryf=70, warn=2, lonlatCorrection=TRUE) 
# will throw a warning if randomPoints generated is less than numb.pseudoabs. If this happens, increase the number of tryf or ignore bias grid and sample from ecoregion only.
```

### OPTIONAL: Sample from ecoregion only 
```{r locatePseudo_absences_inEcoregions}
#  wwf_grid<-raster(here("./data/external/GIS/wwf_ecoregions_v1.tif"))
#  ecoregions_raster<-mask(wwf_grid,wwf_ecoSub1)
#  set.seed(768)
#  global_points<-randomPoints(ecoregions_raster, numb.pseudoabs, global.occ.sp, ext=NULL, extf=1.1, excludep=TRUE, prob=FALSE, cellnumbers=FALSE, tryf=150, warn=2, lonlatCorrection=TRUE) 
```

### Extract generated pseudo absences and create presence-pseudobasence dataset 

```{r create_presence_absence_dataset,echo=FALSE}
global_pseudoAbs<-as.data.frame(global_points)
coordinates(global_pseudoAbs)<-c("x","y")
crs(global_pseudoAbs)<-CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
global_pseudoAbs$species<-rep(0,length(global_pseudoAbs$x))
global_presabs<- spRbind(global.occ.sp,global_pseudoAbs) # join pseudoabsences with presences (occurrences)
writeOGR(global_presabs, dsn=(here("./data/processed/sdm_occ_pts/global")), layer=paste(taxonName,"_globalSDM"), driver = "ESRI Shapefile", overwrite_layer=TRUE)
```

### Extract climate data for global scale modelling

```{r extractClimateData,message=FALSE}

global.data <- sdmData(species~.,train=global_presabs, predictors=globalclimpreds)
global.data.df<-as.data.frame(global.data)
```

### Identify highly correlated predictors

```{r identifyCorrelatedPreds,echo=FALSE}
correlationMatrix<-cor(global.data.df[,-c(1)])
highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.7,exact=TRUE,names=TRUE)
preds<-as.data.frame(highlyCorrelated)
kable(preds) %>%
kable_styling(bootstrap_options = c("striped"))
```

### Remove highly correlated predictors from dataframe 

```{r removeCorrelatedPreds,echo=FALSE,message=FALSE,warning=FALSE}
global.data.df.subset<-select (global.data.df,-c(highlyCorrelated))
global.data.df.subset<-within(global.data.df.subset,rm("rID"))
global.data.df.subset$species<-as.factor(global.data.df.subset$species) #later steps require non numeric dependent variable
levels(global.data.df.subset$species)<-c("absent","present")
global.data.df.subset$species <- relevel(global.data.df.subset$species, ref = "present")
```

### Correct global clim preds values from integer format

```{r correctPreds,echo=FALSE}
divide10<-function(x){
  value<-x/10
  return(value)
}


global.data.df.uncor<-cbind("species"=  global.data.df.subset$species,divide10(global.data.df.subset[,-c(1)]))
```

### Use caretList from Caret package to run multiple machine learning models

```{r run_globalModel,cache=TRUE,results= 'hide',message=FALSE,warning=FALSE}
control <- trainControl(method="repeatedcv",number=10, repeats=10, savePredictions="final", preProc=c("center","scale"),classProbs=TRUE)
classList1 <- c("glm","gbm","rf","knn", "earth")
set.seed(457)
hideoutput<-capture.output(
global_train <- caretList(
  species~., data= global.data.df.uncor,
  trControl=control,
  methodList=classList1
))
```
```{r print_globalModelResults,warning=FALSE}
modelResults<-resamples(global_train)
summary(modelResults)# displays accuracy of each model
modelCor(resamples(global_train))# shows correlation among models.Weakly correlated algorithms are persuasive for stacking them in ensemble.
```

### Create ensemble model (combine individual models into one) 

```{r run global_ensemble}
set.seed(478)
global_stack <- caretStack(
  global_train, 
  method="glm",
  trControl=trainControl(method="cv",
                         number=10,
                         savePredictions= "final"  ))
print(global_stack)
```

### Create rasterstack of CHELSA climate data clipped to European modeling extent for prediction

```{r prepareEU_chelsaData,echo=TRUE}
euclimrasters <- list.files((here("./data/external/climate/chelsa_eu_clips")),pattern='tif',full.names = T)
eu_climpreds<-stack(euclimrasters)
eu_climpreds.10<-divide10(eu_climpreds) # correct for integer format of Chelsa preds 
```

### Restrict global model  prediction to the extent of Europe

```{r predictGlobal, cache=TRUE,echo=FALSE}
 global_model<-raster::predict(eu_climpreds.10,global_stack,type="prob")
  
brks <- seq(0, 1, by=0.1) 
  nb <- length(brks)-1 
  pal <- colorRampPalette(rev(brewer.pal(8, 'Spectral')))
  cols<-pal(nb)
  plot(global_model, breaks=brks, col=cols,lab.breaks=brks) 
 
  writeRaster(global_model, filename=file.path(rasterOutput,paste("GlobalEnsEU_",taxonkey, ".tif",sep="")), format="GTiff",overwrite=TRUE) 
```
### Create European subset
```{r subset_eu_occurrences,echo=FALSE}
euboundary<-shapefile(here("./data/external/GIS/EUROPE.shp")) 
 ext<-extent(euboundary)
 #crs(global.occ.sp)<-CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
 occ_euIntersect <- over(global.occ.sp,euboundary,returnList=TRUE) 
 occ.eu  <- global.occ.sp[!is.na(occ_euIntersect),]
plot(euboundary)
plot(occ.eu,add=TRUE)
```

### Create RasterStack of European climate variables from RMI

```{r create euclimate_stack}  
rmiclimrasters <- list.files((here("./data/external/climate/rmi_corrected")),pattern='tif',full.names = T) 
rmiclimpreds <- stack(rmiclimrasters)

```


### Use SDMtab command from the SDMPlay package to remove duplicates per 1km grid cell 
```{r remove_eu_duplicates,message=FALSE,error=TRUE}
 # convert a single raster to LL for use in SDMtab 
LLproj<-CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
rmiclimpredLL<-projectRaster(rmiclimpreds$anntemp_eea, crs=LLproj)
rmiclimpredLL2<-projectRaster(rmiclimpreds$annprecip_eea, crs=LLproj)
rmipredstackLL<-stack(rmiclimpredLL,rmiclimpredLL2)

# Create SpatialPoints dataframe needed for SDMtab command. 
euocc<-occ.eu@coords
 euocc1<-data.frame(euocc)[c(1:2)] 
 names(euocc1)<-c("longitude", "latitude")

 
#run SDMtab command to remove duplicates
euocc.SDMtable<- SDMPlay:::SDMtab(euocc1, rmipredstackLL, unique.data = TRUE,same=TRUE)
numb.pseudoabs <- length(euocc.SDMtable$id) 

```

### Transform eu occurrence dataset with unique presences back to a SpatialPoints dataframe. 

```{r eu_occurences_to_spatialDF}
euocc<-euocc.SDMtable[c("longitude", "latitude")]
coordinates(euocc)<- c("longitude", "latitude")
euocc$occ<- rep(1,length(euocc$latitude))#adds columns indicating species presence needed for modeling
proj4string(euocc)<-CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")#specify here the existing projection of the data

rmiclipreds<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,-0,-0,-0,0 +units=m +no_defs ")
rmiproj<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,-0,-0,-0,0 +units=m +no_defs")
euocc1<-spTransform(euocc,rmiproj)
```

### Clip bias grid to European extent

```{r clip_biasgrid}
studyextent<-euboundary
ecoregions_eu<-crop(biasgrid_sub,studyextent)
biasgrid_eu<-projectRaster(ecoregions_eu,rmiclimpreds)
plot(biasgrid_eu)
plot(studyextent,add=TRUE)
```

### Mask areas of high habitat suitability from global climate model
```{r mask highSuitability}
#Read in global raster from earlier step if needed

#globalfilename<-paste(here("./data/processed/geotiffs/GlobalEnsEU_"), taxonkey, ".tif",sep="")
#global_model<-raster("C:/Users/Amy.Davis/OneDrive - Ipsos/Desktop/xps15/risk-modelling-and-mapping/data/processed/geotiffs/GlobalEnsEU_2882849.tif")


wgs84_gcs<-CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
crs(global_model)<-wgs84_gcs
m<-global_model >.5
global_mask<-mask(global_model,m,maskvalue=TRUE)
global_masked_proj<-projectRaster(global_mask,biasgrid_eu)
```

### Overlay low predicted habitat suitability on bias grid to exclude low sampled areas 

```{r create_pseudoSamplingArea}
pseudoSamplingArea<-mask(biasgrid_eu,global_masked_proj)
plot(pseudoSamplingArea)
```

### Randomly locate pseudo absences within "pseudoSamplingArea" 
```{r create_eu_pseudoAbsences}
set.seed=120
euocc_points<-randomPoints(pseudoSamplingArea, numb.pseudoabs, euocc, ext=NULL, extf=1.1, excludep=TRUE, prob=FALSE,cellnumbers=FALSE, tryf=50, warn=2, lonlatCorrection=TRUE)


euocc_pseudoAbs<-as.data.frame(euocc_points)
coordinates(euocc_pseudoAbs)<-c("x","y")
euocc_pseudoAbs$occ<-rep(0,length(euocc_pseudoAbs$x))

crs(euocc_pseudoAbs)<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,-0,-0,-0,0 +units=m +no_defs ")

```

### Join eu occurrences with pseudo absences to create eu level presence-pseudoabsence dataset

```{r create eu_presence_absence dataset}
eu_presabs<- spRbind(euocc1,euocc_pseudoAbs)

crs(eu_presabs)<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,-0,-0,-0,0 +units=m +no_defs ")
#confirm equal number of presences and absences
table(eu_presabs@data$occ)

#export occ points as shapefile 
writeOGR(eu_presabs, dsn=(here("./data/processed/sdm_occ_pts/europe")), layer=paste(taxonName,"_EUpresabs",sep=""), driver="ESRI Shapefile", overwrite_layer = TRUE) 

```

### Create SDM data object for European data 

```{r create eu_sdmdata,echo=FALSE}
occeu.sdmdata <- sdmData(occ~.,train=eu_presabs, predictors=rmiclimpreds) 
occeu.sdmdata
```

### Identify and remove highly correlated predictors from RMI rasterstack

```{r removeCorrelated_RMI_Preds}
# convert eu data to dataframe
occeu.sdmdata.df<-as.data.frame(occeu.sdmdata)

#identify highly correlated predictors drop first two columns which always be rID and occ
correlationMatrix <- cor(occeu.sdmdata.df[,-c(1:2)])

# find attributes that are highly corrected 
highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.7,exact=TRUE,names=TRUE)
eupreds<-as.data.frame(highlyCorrelated)
kable(eupreds) %>%
kable_styling(bootstrap_options = c("striped"))
rmiclimpreds_uncor<-dropLayer(rmiclimpreds,highlyCorrelated)
```

### Add habitat and anthropogenic predictors

```{r add_habitatPreds,echo=FALSE}
habitat<-list.files((here("./data/external/habitat/landcover")),pattern='tif',full.names = T)
habitat_stack<-stack(habitat)
fullstack<-stack(rmiclimpreds_uncor,habitat_stack) #combine uncorrelated climate variable selected earlier with habitat

# uncomment below for vertebrates and invertebrates to include distance to water
#dist2water<-raster(here("./data/external/habitat/distance2water_EEA_1km.tif"))
#dist2water<-extend(dist2water,habitat_stack)
#fullstack<-stack(rmiclimpreds_uncor,habitat_stack,dist2water)

# clip fullstack to belgium extent (if using another country, replace with country boundary shapefile)
country<-shapefile(here("./data/external/GIS/belgium_boundary.shp"))
fullstack_crop<-crop(fullstack,country)
fullstack_be<-mask(fullstack_crop,country)

occ.full.data <- sdmData(occ~.,train=eu_presabs, predictors=fullstack) 
occ.full.data

# convert eu data to dataframe
occeu.full.data.df<-as.data.frame(occ.full.data)
occeu.full.data.df<-occeu.full.data.df[,-1]

```

### Remove highly correlated predictors from the habitat/anthropogenic/climate stack (full stack)

```{r removeCorrelatedPreds_from_fullStack}
# first remove and identify low variance predictors
nzv_preds<-nearZeroVar(occeu.full.data.df,names=TRUE)
occeu.full.data.df1<-select (occeu.full.data.df,-c(nzv_preds))

# find attributes that are highly corrected 
correlationMatrix <- cor(occeu.full.data.df1[,-1])
highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.7,exact=TRUE,names=TRUE)
eupreds1<-as.data.frame(highlyCorrelated)
kable(eupreds1) %>%
kable_styling(bootstrap_options = c("striped"))

# print names of highly correlated attributes
print(highlyCorrelated)
occeu.full.data.df1<-select (occeu.full.data.df1,-c(highlyCorrelated))
```

### Build models with climate and habitat data


```{r run_euModel,message=FALSE,warning=FALSE}
# uncomment 2nd control options for  LOOCV (leave one out cross validation, which is aka as "jacknife" ) which should be used when occurrences are smaller than n=10 for each predictor in the model)
occeu.full.data.df1$occ<-as.factor(occeu.full.data.df1$occ)
levels(occeu.full.data.df1$occ)<-c("absent","present")
#control<-trainControl(method="LOOCV",savePredictions="final", preProc=c("center","scale"),classProbs=TRUE)
control <- trainControl(method="repeatedcv",number=10, repeats=10,savePredictions="final",classProbs=TRUE)
mylist<-list(
  glm =caretModelSpec(method = "glm",maxit=100),
  gbm= caretModelSpec(method = "gbm"),
  rf = caretModelSpec(method = "rf", importance = TRUE),
  knn = caretModelSpec(method = "knn"),
  earth= caretModelSpec(method = "earth"))

set.seed(157)
hideout<-capture.output(model_train_habitat <- caretList(
  occ~., data=occeu.full.data.df1,
  trControl=control,
   tuneList=mylist))



```
### Display model evaluation statistics

```{r show_euModel_accuracy}
modelResults1<-resamples(model_train_habitat)
summary(modelResults1)
modelCor(resamples(model_train_habitat))
```





### Create ensemble model 

```{r run eu_ensemble}

set.seed(458)
control <- trainControl(method="cv",number=10, savePredictions="final",classProbs=TRUE)
model_train_habitat1 <- caretList(
  occ~., data=occeu.full.data.df1,
  trControl=control,
   tuneList=mylist)

modelResults1<-resamples(model_train_habitat1)
summary(modelResults1)
modelCor(resamples(model_train_habitat1))

set.seed(456)
hideoutput<-capture.output(
lm_ens_hab<-caretEnsemble(model_train_habitat1, trControl=trainControl(method="cv",                                                                number=10,savePredictions= "final",classProbs = TRUE)))

lm_ens_hab


```

```{r get_var_importance,echo=TRUE}
variableImportance<-varImp(lm_ens_hab)
kable(variableImportance,digits=2,caption="Variable Importance") %>%
kable_styling(bootstrap_options = c("striped"))
write.csv(variableImportance,file = paste0(genOutput,taxonkey,"_varImp.csv"))
```


```{r export_toPDF,echo=FALSE}
exportPDF<-function(rst,taxonkey,taxonName,nameextension,is.diff="FALSE"){
  filename=file.path(pdfOutput,paste("be_",taxonkey, "_",nameextension,sep=""))
  pdf(file=filename,width=10,height=8,paper="a4r")
  par(bty="n")#to turn off box around plot
  ifelse(is.diff=="TRUE", brks<-seq(-1, 1, by=0.25), brks <- seq(0, 1, by=0.1)) 
  nb <- length(brks)-1 
  pal <- colorRampPalette(rev(brewer.pal(11, 'Spectral')))
  cols<-pal(nb)
  maintitle<-paste(taxonName,taxonkey,"_",nameextension, sep= " ")
  plot(rst, breaks=brks, col=cols,main=maintitle, lab.breaks=brks,axes=FALSE)
  dev.off() 
} 
```

```{r export_toPNG,echo=FALSE}
exportPNG<-function(rst,taxonkey,taxonName,nameextension,is.diff="FALSE"){
  filename=file.path(pdfOutput,paste("be_",taxonkey, "_",nameextension,sep=""))
  png(file=filename)
  par(bty="n")#to turn off box around plot
  ifelse(is.diff=="TRUE", brks<-seq(-1, 1, by=0.25), brks <- seq(0, 1, by=0.1)) 
  nb <- length(brks)-1 
  pal <- colorRampPalette(rev(brewer.pal(11, 'Spectral')))
  cols<-pal(nb)
  maintitle<-paste(taxonName,taxonkey,"_",nameextension, sep= " ")
  plot(rst, breaks=brks, col=cols,main=maintitle, lab.breaks=brks,axes=FALSE)
  dev.off() 
} 
```

###  Use EU level ensemble model (ensModel) to predict for Europe

```{r ensModel_predictEU,results= "hide", cache=TRUE}
 ens_pred_hab_eu<-raster::predict(fullstack,lm_ens_hab,type="prob")
 ens_pred_hab_eu1<-1-ens_pred_hab_eu
 
 writeRaster(ens_pred_hab_eu1,filename=file.path(rasterOutput,paste("eu_",taxonkey, "_hist.tif",sep="")) , format="GTiff",overwrite=TRUE)

exportPNG(ens_pred_hab_eu1,taxonkey,taxonName=taxonName,"hist.png")
  
```
```{r Plot_ensModel_eu}

  brks <- seq(0, 1, by=0.1) 
  nb <- length(brks)-1 
  pal <- colorRampPalette(rev(brewer.pal(8, 'Spectral')))
   cols<-pal(nb)
  plot(ens_pred_hab_eu1, breaks=brks, col=cols,lab.breaks=brks)
 
```

### Predict for Belgium only using EU-level ensemble model to forecast risk under historical climate conditions

```{r ensModel_predictBE,results="hide"}
laea_grs80<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,-0,-0,-0,0 +units=m +no_defs ")
ens_pred_hab<-raster::predict(fullstack_be,lm_ens_hab,type="prob")
ens_pred_hab1<-1-ens_pred_hab
crs(ens_pred_hab1)<-laea_grs80
writeRaster(ens_pred_hab1, filename=file.path(rasterOutput,paste("be_",taxonkey, "_hist.tif",sep="")),  format="GTiff",overwrite=TRUE)
exportPDF(ens_pred_hab1,taxonkey,taxonName=taxonName,"eu_hist.pdf")
exportPNG(ens_pred_hab1,taxonkey,taxonName=taxonName,"eu_hist.png")
```

```{r Plot_ensModel_be}
brks <- seq(0, 1, by=0.1) 
  nb <- length(brks)-1 
  pal <- colorRampPalette(rev(brewer.pal(11, 'Spectral')))
  cols<-pal(nb)
  plot(ens_pred_hab1, breaks=brks, col=cols,lab.breaks=brks)
   plot(euocc,pch=3,cex=0.75,add=TRUE)

```

### Clip habitat stack to Belgium 
#### use this stack for vertebrates or anything likely to be influenced by water availability
```{r create BE_habitatStack}
# habitat_stack<-stack(habitat,dist2water)
# habitat_only_stack<-crop(habitat_stack,country)
# habitat_only_stack_be<-crop(habitat_only_stack,country)
```
## use this stack for plants
```{r create BE_habitatStack_plants}
habitat_stack<-stack(habitat)
habitat_only_stack<-crop(habitat_stack,country)
habitat_only_stack_be<-crop(habitat_only_stack,country)
```

### Create individual RCP (2.6, 4.5, 8.5) climate data stacks for Belgium

```{r create BE_RCP_stacks}
be26 <- list.files((here("./data/external/climate/byEEA_finalRCP/belgium_rcps/rcp26")),pattern='tif',full.names = T)
belgium_stack26 <- stack(be26)

be45 <- list.files((here("./data/external/climate/byEEA_finalRCP/belgium_rcps/rcp45")),pattern='tif',full.names = T)
belgium_stack45 <- stack(be45)

be85 <- list.files((here("./data/external/climate/byEEA_finalRCP/belgium_rcps/rcp85")),pattern='tif',full.names = T)
belgium_stack85 <- stack(be85)
```

### Combine habitat stacks with climate stacks for each RCP scenario

```{r combine habitat_RCP_stacks}
fullstack26<-stack(be26,habitat_only_stack_be)
fullstack45<-stack(be45,habitat_only_stack_be)
fullstack85<-stack(be85,habitat_only_stack_be)
```

### Create and export RCP risk maps for each RCP scenario

```{r create_RCP_risk_maps}
ens_pred_hab26<-raster::predict(fullstack26,lm_ens_hab,type="prob")
ens_pred_hab26_1<-1-ens_pred_hab26
crs(ens_pred_hab26_1)<-laea_grs80
writeRaster(ens_pred_hab26_1, filename=file.path(rasterOutput,paste("be_",taxonkey, "_rcp26.tif",sep="")), format="GTiff",overwrite=TRUE) 
exportPDF(ens_pred_hab26_1,taxonkey,taxonName=taxonName,"rcp26.pdf")

ens_pred_hab45<-raster::predict(fullstack45,lm_ens_hab,type="prob")
ens_pred_hab45_1<-1-ens_pred_hab45
crs(ens_pred_hab45_1)<-laea_grs80
writeRaster(ens_pred_hab45_1, filename=file.path(rasterOutput,paste("be_",taxonkey, "_rcp45.tif",sep="")), format="GTiff",overwrite=TRUE) 
exportPDF(ens_pred_hab45_1,taxonkey,taxonName=taxonName,"rcp45.pdf")
ens_pred_hab85<-raster::predict(fullstack85,lm_ens_hab,type="prob")
ens_pred_hab85_1<-1-ens_pred_hab85
crs(ens_pred_hab85_1)<-laea_grs80
writeRaster(ens_pred_hab85_1, filename=file.path(rasterOutput,paste("be_",taxonkey, "_rcp85.tif",sep="")), format="GTiff",overwrite=TRUE) 
exportPDF(ens_pred_hab85_1,taxonkey,taxonName=taxonName,"rcp85.pdf")

```

### Display RCP risk maps

```{r, rcp_riskMaps}
plot(ens_pred_hab26_1,breaks=brks, col=cols,lab.breaks=brks)
plot(ens_pred_hab45_1,breaks=brks, col=cols,lab.breaks=brks)
plot(ens_pred_hab85_1,breaks=brks, col=cols,lab.breaks=brks)
```

### Create and export "difference maps": the difference between predicted risk by each RCP scenario and historical climate

```{r create_difference_maps,echo=FALSE}
hist26_diff_hab<-overlay(ens_pred_hab26_1, ens_pred_hab1, fun=function(r1,r2){return(r1-r2)})
writeRaster(hist26_diff_hab,filename=file.path(rasterOutput,paste("be_",taxonkey, "_rcp26_diff.tif",sep="")) , format="GTiff",overwrite=TRUE) 
exportPDF(hist26_diff_hab,taxonkey,taxonName=taxonName,"rcp26_diff.pdf","TRUE")
plot(hist26_diff_hab)

hist45_diff_hab<-overlay(ens_pred_hab45_1, ens_pred_hab1, fun=function(r1,r2){return(r1-r2)})
writeRaster(hist45_diff_hab,filename=file.path(rasterOutput,paste("be_",taxonkey, "_rcp45_diff.tif",sep="")), format="GTiff",overwrite=TRUE) 
exportPDF(hist45_diff_hab,taxonkey,taxonName=taxonName,"rcp45_diff.pdf","TRUE")
plot(hist45_diff_hab)

hist85_diff_hab<-overlay(ens_pred_hab85_1, ens_pred_hab1, fun=function(r1,r2){return(r1-r2)})
writeRaster(hist85_diff_hab, filename=file.path(rasterOutput,paste("be_",taxonkey, "_rcp_85_diff.tif",sep="")), format="GTiff",overwrite=TRUE) 
exportPDF(hist85_diff_hab,taxonkey,taxonName=taxonName,"rcp85_diff.pdf","TRUE")
plot(hist85_diff_hab)

```


### Output predictor data used for SDM

```{r export_SDM_predictorData}
df4export<-cbind(occeu.full.data.df1,coordinates(occ.full.data))
write.csv(df4export, paste(genOutput,taxonkey,"_sdmdata.csv",sep=""))
```

### Check spatial autocorrelation of residuals to assess whether occurrence data should be thinned
#### derive residuals from unthinned model
```{r deriveResiduals}
predEns1<-lm_ens_hab$ens_model$pred
obs.numeric<-ifelse(predEns1$obs == "absent",0,1)
```

#### standardize residuals

```{r standardize_residuals}
stdres<-function(obs.numeric, yhat){
  num<-obs.numeric-yhat
  denom<-sqrt(yhat*(1-yhat))
  return(num/denom)
}
hab.res<-stdres(obs.numeric,predEns1$present)


res.best.coords<-cbind(coordinates(occ.full.data),hab.res)
res.best.geo<-as.geodata(res.best.coords,coords.col=1:2,data.col = 3)
summary(res.best.geo) #note distance is in meters
```

### Check Morans I.

```{r residual_MoransI}
#If Moran's I is very low (<0.10), or not significant, do not need to thin occurrences.
library(ape)
res.best.df<-as.data.frame(res.best.coords)
occ.dists <- as.matrix(dist(cbind(res.best.df[1], res.best.df[2])))
occ.dists.inv <- 1/occ.dists
diag(occ.dists.inv) <- 0
Moran.I(res.best.df$hab.res,occ.dists.inv,scaled=TRUE,alternative="greater")
```

```{r conformalPredictionfunctions,echo=FALSE}
# functions needed for conformal prediction function


GetLength<-function(x,y){
length(x[which(x >= y)])
}




CPconf<-function(pA,pB,confidence){
  if(pA > confidence && pB< confidence){
    predClass<-"classA"
  }else if(pA < confidence && pB> confidence){
    predClass<-"classB"
  }else if(pA< confidence && pB< confidence){
    predClass<-"noClass"
  }else{
    predClass<-"bothClasses"
    
    return(predClass)
  }}


#function to calculate confidence of each prediction
get.confidence<-function(pvalA,pvalB){
  secondHighest<-ifelse(pvalA>pvalB,pvalB,pvalA)
  conf<-(1-secondHighest)
  return(conf)
}

forcedCp<-function(pvalA,pvalB){
  ifelse(pvalA>pvalB,"presence","absence")
}

extractVals<-function(predras){
  library(raster)
  vals <-  raster::values(predras)
  coord <-  raster::xyFromCell(predras,1:ncell(predras))
  raster_fitted <- cbind(coord,vals)
  raster_fitted.df<-as.data.frame(raster_fitted)
  raster_fitted.df1<-na.omit(raster_fitted.df)
  raster_fitted.df1$absence<-raster_fitted.df1$vals
  raster_fitted.df1$presence<- (1-raster_fitted.df1$absence)
  return(raster_fitted.df1)
}
```

### Code for class conformal prediction function

```{r ClassConformalPrediction,echo=FALSE}
classConformalPrediction<-function(x,y){
ens_results<- get("x")
ens_calib<-ens_results$ens_model$pred
calibPresence<-subset(ens_calib$present,ens_calib$obs=='present',)
calibAbsence<-subset(ens_calib$absent,ens_calib$obs=='absent',)
predicted.values<-extractVals(y)

testPresence<-predicted.values$presence
testAbsence<-predicted.values$absence

#derive p.Values for class A
smallrA<-lapply(testPresence,function(x) GetLength(calibPresence,x))
smallrA_1<- unlist (smallrA)
nCalibSet<-length(calibPresence)+1
pvalA<-smallrA_1/nCalibSet

# derive p.Values for Class B
smallrB<-lapply(testAbsence,function(x) GetLength(calibAbsence,x))
smallrB_1<- unlist (smallrB)
nCalibSetB<-length(calibAbsence)+1
pvalB<-smallrB_1/nCalibSetB

 pvalsdf<-as.data.frame(cbind(pvalA,pvalB,0.20))
# raster_cp_20<-mapply(CPconf,pvalsdf$pvalA,pvalsdf$pvalB,pvalsdf[3])
# table(raster_cp_20)

pvalsdf$conf<-get.confidence(pvalsdf$pvalA,pvalsdf$pvalB)
pvalsdf_1<-cbind(pvalsdf,predicted.values)
}
```

### Quantify confidence of predicted values using class conformal prediction

```{r quantifyConfidence}
# confidence map for Belgium for predicted risk under historical climate
pvalsdf_hist<-classConformalPrediction(lm_ens_hab,ens_pred_hab)
# Confidence maps for Belgium under RCP scenarios of climate change
pvalsdf_rcp26<-classConformalPrediction(lm_ens_hab,ens_pred_hab26)
pvalsdf_rcp45<-classConformalPrediction(lm_ens_hab,ens_pred_hab45)
pvalsdf_rcp85<-classConformalPrediction(lm_ens_hab,ens_pred_hab85)

# option to export confidence and pvals as csv 
write.csv(pvalsdf_hist,file=paste(genOutput,"confidence_",taxonkey, "_hist.csv",sep=""))
```

### Create confidence maps

```{r createConfidenceMaps,fig.show="hold"}
brks <- seq(0, 1, by=0.1) 
  nb <- length(brks)-1 
  pal <- colorRampPalette(rev(brewer.pal(4, 'Spectral')))
  cols<-pal(nb)
  

confidenceMaps<-function(x,taxonkey,taxonName,maptype){
pvals_dataframe<-get("x")
data.xyz <- pvals_dataframe[c("x","y","conf")]
rst <- rasterFromXYZ(data.xyz)
crs(rst)<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs") 
plot(rst,breaks=brks, col=cols,lab.breaks=brks)
writeRaster(rst, filename=file.path(rasterOutput,paste("be_",taxonkey, "_",maptype,".tif",sep="")), format="GTiff",overwrite=TRUE)
exportPDF(rst,taxonkey,taxonName=taxonName,nameextension= paste(maptype,".pdf",sep=""))
}

hist.conf.map<-confidenceMaps(pvalsdf_hist,taxonkey,taxonName,maptype="hist_conf")
rcp26.conf.map<-confidenceMaps(pvalsdf_rcp26,taxonkey,taxonName,maptype="rcp26_conf")
rcp45.conf.map<-confidenceMaps(pvalsdf_rcp45,taxonkey,taxonName,maptype="rcp45_conf")
rcp85.conf.map<-confidenceMaps(pvalsdf_rcp85,taxonkey,taxonName,maptype="rcp85_conf")
```




### Model evalution summary
```{r export ModelEvaluation}
meanResults<-summary(modelResults1)
eu_accuracy<-meanResults$statistics$Accuracy
eu_kappa<-meanResults$statistics$Kappa

write.csv(eu_accuracy,file=paste0(genOutput,taxonkey,"_accuracy.csv"))
write.csv(eu_kappa,file=paste0(genOutput,taxonkey,"_kappa.csv"))

# derive sensitivity and specificity
 table(predEns1$pred,predEns1$obs)
 sensitivity(predEns1$pred,predEns1$obs)
 specificity(predEns1$pred,predEns1$obs)
 sens<-sensitivity(predEns1$pred,predEns1$obs)
 spec<-specificity(predEns1$pred,predEns1$obs)

moransI<-Moran.I(res.best.df$hab.res,occ.dists.inv,scaled=TRUE,alternative="greater")
MoransI<-moransI$observed
ensembleEvaluation<-(lm_ens_hab$ens_model$results[2:5])

modelEvaluation<-cbind(sens,spec,MoransI,ensembleEvaluation)
write.csv(modelEvaluation,file=paste0(genOutput,taxonkey,"_ModelEval.csv"))
```

### Generate and export response curves in order of variable importance

```{r responseCurves}
topPreds <- variableImportance[with(variableImportance,order(-overall)),]
varNames<-rownames(topPreds)
# combine predictions from each model for each variable
partial_gbm<-function(x){
  m.gbm<-pdp::partial(model_train_habitat$gbm$finalModel,pred.var=paste(x),train = occeu.full.data.df1,type="classification",
                      prob=TRUE,n.trees= model_train_habitat$gbm$finalModel$n.trees, which.class = 2,grid.resolution=nrow(occeu.full.data.df1))
}



gbm.partial.list<-lapply(varNames,partial_gbm)

partial_glm<-function(x){
m.glm<-pdp::partial(model_train_habitat$glm$finalModel,pred.var=paste(x),train = occeu.full.data.df1,type="classification",
              prob=TRUE,which.class = 2,grid.resolution=nrow(occeu.full.data.df1))
}

glm.partial.list<-lapply(varNames,partial_glm)

partial_rf<-function(x){
  pdp::partial(model_train_habitat$rf$finalModel,pred.var=paste(x),train = occeu.full.data.df1,type="classification",
              prob=TRUE,which.class = 2,grid.resolution=nrow(occeu.full.data.df1))
}

rf.partial.list<-lapply(varNames,partial_rf)

partial_knn<-function(x){
m.knn<-pdp::partial(model_train_habitat$knn,pred.var=paste(x),train = occeu.full.data.df1,type="classification",
               prob=TRUE,which.class = 2,grid.resolution=nrow(occeu.full.data.df1))
}

knn.partial.list<-lapply(varNames,partial_knn)




partial_mars<-function(x){
m.mars<-pdp::partial(model_train_habitat$earth$finalModel,pred.var=paste(x),train = occeu.full.data.df1,type="classification",
              prob=TRUE,which.class = 2,grid.resolution=nrow(occeu.full.data.df1))
}

mars.partial.list<-lapply(varNames,partial_mars)


names(glm.partial.list)<-varNames
names(gbm.partial.list)<-varNames
names(rf.partial.list)<-varNames
names(knn.partial.list)<-varNames
names(mars.partial.list)<-varNames

glm.partial.df<-as.data.frame(glm.partial.list)
gbm.partial.df<-as.data.frame(gbm.partial.list)
rf.partial.df<-as.data.frame(rf.partial.list)
knn.partial.df<-as.data.frame(knn.partial.list)

mars.partial.df<-as.data.frame(mars.partial.list)

predx<-data.frame()
predy<-data.frame()

for (i in varNames){
  predx <- rbind(predx, as.data.frame(paste(i,i,sep=".")))
  predy<- rbind(predy,as.data.frame(paste(i,"yhat",sep=".")))
}
names(predx)<-""
names(predy)<-""

predx1<-t(predx)
predy1<-t(predy)


glm.partial.df$data<-'GLM'
gbm.partial.df$data<-'GBM'
rf.partial.df$data<-'RF'
knn.partial.df$data<-'KNN'
mars.partial.df$data<-'MARS'

all_dfs<-rbind.data.frame(glm.partial.df,gbm.partial.df,rf.partial.df,knn.partial.df,mars.partial.df)


responseCurves<-function(x,y) {
  colors <- c("GLM" = "gray", "GBM"="red","RF"="blueviolet","KNN"="chartreuse", "MARS"= "hotpink") 
  ggplot(all_dfs,(aes(x=.data[[x]],y=.data[[y]]))) +
    geom_line(aes(color = data), size =1.2, position=position_dodge(width=0.2))+
   theme_bw()+
    labs(y="Partial probability", x= gsub("//..*","",x),color="Legend") +
    scale_color_manual(values = colors)
}  

allplots<-map2(predx1,predy1, ~responseCurves(.x,.y))

#export plots as PNGs
for(i in seq_along(allplots)){
  png(paste0(genOutput,taxonkey,"_",i,".png"),width = 5, height = 5, units = "in",res=300)
  print(allplots[[i]])
  dev.off()
}
```


### Plot response curves
```{r plotResponseCurves,fig.show="hold"}

par(mfrow=c(3,3))
for(i in seq_along(allplots)){
  print(allplots[[i]])
}
```