#remove unverified records
identificationVerificationStatus_to_discard <- c("unverified", "unvalidated","not able to validate","control could not be conclusive due to insufficient knowledge")
#enter value for max coordinate uncertainty in meters.
global.occ<-global %>%
filter(speciesKey==taxonkey) %>% #using taxonKey filters out accepted synonyms
filter(is.na(coordinateUncertaintyInMeters)| coordinateUncertaintyInMeters< 1000) %>%
filter(!str_to_lower(identificationVerificationStatus) %in% identificationVerificationStatus_to_discard)
global.occ$lon_dplaces<-sapply(global.occ$decimalLongitude, function(x) decimalplaces(x))
global.occ$lat_dplaces<-sapply(global.occ$decimalLatitude, function(x) decimalplaces(x))
global.occ[global.occ$lon_dplaces < 4& global.occ$lat_dplaces < 4 , ]<-NA
global.occ<-global.occ[ which(!is.na(global.occ$lon_dplaces)),]
global.occ<-within(global.occ,rm("lon_dplaces","lat_dplaces"))
global.occ<-global.occ[which( global.occ$year > 1970 & global.occ$year < 2011),]
globalclimrasters <- list.files((here("./data/external/climate/trias_CHELSA")),pattern='tif',full.names = T) #import CHELSA data
globalclimpreds <- stack(globalclimrasters)
global.SDMtable<- SDMPlay:::SDMtab(global.occ.LL.cleaned, globalclimpreds, unique.data = TRUE,background.nb= 0) #
numb.global.pseudoabs <-length(global.SDMtable$id) #sets the number of pseudoabsences equal to number of unique presences
global.occ.sp<-global.SDMtable[c("longitude", "latitude")]
coordinates(global.occ.sp)<- c("longitude", "latitude")
global.occ.sp$species<- rep(1,length(global.occ.sp$latitude)) #adds columns indicating species presence needed for modeling
biasgrid<-raster(here("./data/external/bias_grids/final/trias/plants_1deg_min5.tif"))### specify appropriate bias grid here
ext_wwf_ecoSub<-extent(wwf_ecoSub1)
biasgrid_crop<-crop(biasgrid,ext_wwf_ecoSub)
biasgrid_sub<-mask(biasgrid_crop,wwf_ecoSub1)
plot(biasgrid_sub)
plot(wwf_ecoSub1,add=TRUE)
set.seed(728)
global_points<-randomPoints(biasgrid_sub,numb.global.pseudoabs, global.occ.sp, ext=NULL, extf=1.1, excludep=TRUE, prob=FALSE, cellnumbers=FALSE, tryf=70, warn=2, lonlatCorrection=TRUE)
# will throw a warning if randomPoints generated is less than numb.pseudoabs. If this happens, increase the number of tryf or ignore bias grid and sample from ecoregion only.
# wwf_grid<-raster(here("./data/external/GIS/wwf_ecoregions_v1.tif"))
# ecoregions_raster<-mask(wwf_grid,wwf_ecoSub1)
# set.seed(768)
# global_points<-randomPoints(ecoregions_raster, numb.pseudoabs, global.occ.sp, ext=NULL, extf=1.1, excludep=TRUE, prob=FALSE, cellnumbers=FALSE, tryf=150, warn=2, lonlatCorrection=TRUE)
global.data <- sdmData(species~.,train=global_presabs, predictors=globalclimpreds)
global.data.df<-as.data.frame(global.data)
control <- trainControl(method="cv",number=10,savePredictions="final", preProc=c("center","scale"),classProbs=TRUE)
classList1 <- c("glm","gbm","rf","earth")
set.seed(457)
global_train <- caretList(
species~., data= global.data.df.uncor,
trControl=control,
methodList=classList1)
GlobalModelResults<-resamples(global_train)
Global.Mod.Accuracy<-summary(GlobalModelResults)# displays accuracy of each model
kable(Global.Mod.Accuracy$statistics$Accuracy,digits=2) %>%
kable_styling(bootstrap_options = c("striped"))
GlobalModelResults<-resamples(global_train)
kable(Global.Mod.Accuracy$statistics$Kappa,digits=2) %>%
kable_styling(bootstrap_options = c("striped"))
Global.Mod.Cor<-modelCor(resamples(global_train))# shows correlation among models.Weakly correlated algorithms are persuasive for stacking them in ensemble.
kable(Global.Mod.Cor,digits=2)%>%
kable_styling(bootstrap_options = c("striped"))
set.seed(478)
global_stack <- caretStack(
global_train,
method="glm",
trControl=trainControl(method="cv",
number=10,
savePredictions= "final",classProbs=TRUE ))
print(global_stack)
findThresh<-function(df){ df[c(“rowIndex”,“obs”,“present”)] df<-df %>% mutate(observed= ifelse(obs == “present”,1,0)) %>% select(rowIndex,observed,predicted=present) result<-PresenceAbsence::optimal.thresholds(df,opt.methods = 2) return(result) }
#accuracy measures accuracyStats<-function(df,y){ df[c(“rowIndex”,“obs”,“present”)] df<-df %>% mutate(observed= ifelse(obs == “present”,1,0)) %>% select(rowIndex,observed,predicted=present) result<-PresenceAbsence::presence.absence.accuracy(df,threshold = y,st.dev=FALSE) return(result) }
global.ens.thresh<-findThresh(global_stack$ens_model$pred)
accuracyStats(global_stack$ens_model$pred,global.ens.thresh$predicted)
euclimrasters <- list.files((here("./data/external/climate/chelsa_eu_clips")),pattern='tif',full.names = T)
eu_climpreds<-stack(euclimrasters)
eu_climpreds.10<-divide10(eu_climpreds) # correct for integer format of Chelsa preds
rmiclimrasters <- list.files((here("./data/external/climate/rmi_corrected")),pattern='tif',full.names = T)
rmiclimrasters #shows all available climate data
rmiclimpreds <- stack(rmiclimrasters) #includes all available climate data
# convert a single raster to LL for use in SDMtab
LLproj<-CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
rmiclimpredLL<-projectRaster(rmiclimpreds$anntemp_eea, crs=LLproj)
rmiclimpredLL2<-projectRaster(rmiclimpreds$annprecip_eea, crs=LLproj)
rmipredstackLL<-stack(rmiclimpredLL,rmiclimpredLL2)
# Create SpatialPoints dataframe needed for SDMtab command.
euocc<-occ.eu@coords
euocc1<-data.frame(euocc)[c(1:2)]
names(euocc1)<-c("longitude", "latitude")
#run SDMtab command to remove duplicates
euocc.SDMtable<- SDMPlay:::SDMtab(euocc1, rmipredstackLL, unique.data = TRUE,same=TRUE)
#numb.pseudoabs <- length(euocc.SDMtable$id)
euocc<-euocc.SDMtable[c("longitude", "latitude")]
coordinates(euocc)<- c("longitude", "latitude")
euocc$occ<- rep(1,length(euocc$latitude))#adds columns indicating species presence needed for modeling
proj4string(euocc)<-CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")#specify here the existing projection of the data
rmiclipreds<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,-0,-0,-0,0 +units=m +no_defs ")
rmiproj<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,-0,-0,-0,0 +units=m +no_defs")
euocc1<-spTransform(euocc,rmiproj)
studyextent<-euboundary
ecoregions_eu<-crop(biasgrid_sub,studyextent)
biasgrid_eu<-projectRaster(ecoregions_eu,rmiclimpreds)
plot(biasgrid_eu)
plot(studyextent,add=TRUE)
wgs84_gcs<-CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
crs(global_model)<-wgs84_gcs
m<-global_model >.5
global_mask<-mask(global_model,m,maskvalue=TRUE)
global_masked_proj<-projectRaster(global_mask,biasgrid_eu)
pseudoSamplingArea<-mask(biasgrid_eu,global_masked_proj)
plot(pseudoSamplingArea)
# set number of pseudoabsences equal to the number of presences
numb.eu.pseudoabs<-nrow(euocc)
# takes 10 draws of random pseudoabsences, returns as dataframes and names them X1-X10
setlist<-seq(1,10,1)
set.seed(120)
pseudoabs_pts<-lapply(setlist,function(x) as.data.frame(randomPoints(pseudoSamplingArea, numb.eu.pseudoabs , euocc, ext=NULL, extf=1.1, excludep=TRUE, prob=FALSE,cellnumbers=FALSE, tryf=50, warn=2, lonlatCorrection=TRUE)))
names(pseudoabs_pts)<-paste0("X",setlist)
# extract data from predictors for absences
pseudoabs_pts1<-lapply(pseudoabs_pts, function(x) raster::extract(rmiclimpreds,x))
# add absence indicator
add.occ<-function(x,y){
occ<-rep(y,nrow(x))
cbind(x,occ)
}
pseudoabs_pts2<-lapply(pseudoabs_pts1, function(x) add.occ(x,0))
# extract eu presences and add presence indicator
presence<-as.data.frame(euocc1@coords)
names(presence)<- c("x","y")
presence1<-raster::extract(rmiclimpreds,presence)
occ<-rep(1,nrow(presence1))
presence1<-cbind(presence1,occ)
# join each pseudoabsence set with presences
eu_presabs.pts<-lapply(pseudoabs_pts2, function(x) rbind(x,presence1))
eu_presabs.coord<-lapply(pseudoabs_pts, function(x) rbind(x,presence))
# prepare data for modeling
occ.full.data.df<-lapply(occ.full.data, function(x) as.data.frame(x))
occ.full.data.df<- sapply(names(occ.full.data.df), function (x) cbind(occ.full.data.df[[x]],occ=eu_presabs.pts.df[[x]]$occ, deparse.level=0),simplify=FALSE)
factorVars<-function(df,var){
df[,c(var)]<-as.factor(df[,c(var)])
levels(df[,c(var)])<-c("absent","present")
df[,c(var)]<-relevel(df[,c(var)], ref = "present")
return(df)
}
occ.full.data.forCaret<-sapply(names(occ.full.data.df), function (x) factorVars(occ.full.data.df[[x]], "occ"),simplify=FALSE)
# uncomment 2nd control options for LOOCV (leave one out cross validation, which is aka as "jacknife" ) which should be used when occurrences are smaller than n=10 for each predictor in the model)
#control<-trainControl(method="LOOCV",savePredictions="final", preProc=c("center","scale"),classProbs=TRUE)
control <- trainControl(method="cv",number=10,savePredictions="final",classProbs=TRUE)
mylist<-list(
glm =caretModelSpec(method = "glm",maxit=100),
gbm= caretModelSpec(method = "gbm"),
rf = caretModelSpec(method = "rf", importance = TRUE),
earth= caretModelSpec(method = "earth"))
# set.seed(167)
eu_models<-sapply(names(occ.full.data.forCaret), function(x) model_train_habitat <- caretList(
occ~annpvarrecip_eea +dristprec+ maxtemp +temprang +varSolRad100 +wettprec +corine_perWetland+ ESM1000m_singleINT_ext, data= na.omit(occ.full.data.forCaret[[x]]),
trControl=control,
tuneList=mylist), simplify=FALSE)
# set.seed(167)
# eu_models<-sapply(names(occ.full.data.forCaret), function(x) model_train_habitat <- caretList(
# occ~., data= na.omit(occ.full.data.forCaret[[x]]),
# trControl=control,
# tuneList=mylist), simplify=FALSE)
EU_ModelResults1<-sapply(names(eu_models), function(x) resamples(eu_models[[x]]),simplify=FALSE)
Results.summary<-sapply(names(EU_ModelResults1), function(x) summary(EU_ModelResults1[[x]]),simplify=FALSE)
Results.summary
Model.cor<-sapply(names(eu_models), function(x) modelCor(resamples(eu_models[[x]])),simplify=FALSE)
Model.cor
set.seed(458)
#hideoutput<-capture.output(
set.seed(458)
lm_ens_hab<-sapply(names(eu_models), function (x) caretEnsemble(eu_models[[x]], trControl=trainControl(method="cv", number=10,savePredictions= "final",classProbs = TRUE)),simplify=FALSE)
ens_pred_hab_eu1<-sapply(names(lm_ens_hab), function(x) raster::predict(fullstack,lm_ens_hab[[x]],type="prob"),simplify=FALSE)
# creates country level rasters using the European level models
ens_pred_hab_be<-sapply(names(lm_ens_hab), function(x) raster::predict(fullstack_be,lm_ens_hab[[x]],type="prob"),simplify=FALSE)
# assign proj to rasters
laea_grs80<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,-0,-0,-0,0 +units=m +no_defs ")
sapply(names(ens_pred_hab_be), function (x) crs(ens_pred_hab_be[[x]])<-laea_grs80)
# export rasters as GeoTiffs
#lapply(names(ens_pred_hab_be), function(x) writeRaster(ens_pred_hab_be[[x]], filename=file.path(rasterOutput,paste(x,"_",taxonkey,"_hist.tif",sep="")), format="GTiff",overwrite=TRUE))
#2011-2021
# specify best model below
bestModel<-lm_ens_hab$X1
brks <- seq(0, 1, by=0.1)
nb <- length(brks)-1
pal <- colorRampPalette(rev(brewer.pal(8, 'Spectral')))
cols<-pal(nb)
plot(ens_pred_hab_eu1$X1, breaks=brks, col=cols,lab.breaks=brks)
plot(euocc1,pch=1,cex=0.40,add=TRUE)#plots species presences in 10 fold cv comment this line to hide
brks <- seq(0, 1, by=0.1)
nb <- length(brks)-1
pal <- colorRampPalette(rev(brewer.pal(11, 'Spectral')))
cols<-pal(nb)
plot(ens_pred_hab_be$X9, breaks=brks, col=cols,lab.breaks=brks)
#plot(euocc1,pch=1,cex=0.75,add=TRUE)
#plot(euocc_pseudoAbs,pch=2,cex=0.75,add=TRUE)
habitat_stack<-stack(habitat)
habitat_only_stack<-crop(habitat_stack,country)
habitat_only_stack_be<-crop(habitat_only_stack,country)
be26 <- list.files((here("./data/external/climate/byEEA_finalRCP/belgium_rcps/rcp26")),pattern='tif',full.names = T)
belgium_stack26 <- stack(be26)
be45 <- list.files((here("./data/external/climate/byEEA_finalRCP/belgium_rcps/rcp45")),pattern='tif',full.names = T)
belgium_stack45 <- stack(be45)
be85 <- list.files((here("./data/external/climate/byEEA_finalRCP/belgium_rcps/rcp85")),pattern='tif',full.names = T)
belgium_stack85 <- stack(be85)
fullstack26<-stack(be26,habitat_only_stack_be)
fullstack45<-stack(be45,habitat_only_stack_be)
fullstack85<-stack(be85,habitat_only_stack_be)
ens_pred_hist<-raster::predict(fullstack_be,bestModel,type="prob")
ens_pred_hab26<-raster::predict(fullstack26,bestModel,type="prob")
crs(ens_pred_hab26)<-laea_grs80
writeRaster(ens_pred_hab26, filename=file.path(rasterOutput,paste("be_",taxonkey, "_rcp26.tif",sep="")), format="GTiff",overwrite=TRUE)
exportPDF(ens_pred_hab26,taxonkey,taxonName=taxonName,"rcp26.pdf")
ens_pred_hab45<-raster::predict(fullstack45,bestModel,type="prob")
crs(ens_pred_hab45)<-laea_grs80
writeRaster(ens_pred_hab45, filename=file.path(rasterOutput,paste("be_",taxonkey, "_rcp45.tif",sep="")), format="GTiff",overwrite=TRUE)
exportPDF(ens_pred_hab45,taxonkey,taxonName=taxonName,"rcp45.pdf")
ens_pred_hab85<-raster::predict(fullstack85,bestModel,type="prob")
crs(ens_pred_hab85)<-laea_grs80
writeRaster(ens_pred_hab85, filename=file.path(rasterOutput,paste("be_",taxonkey, "_rcp85.tif",sep="")), format="GTiff",overwrite=TRUE)
exportPDF(ens_pred_hab85,taxonkey,taxonName=taxonName,"rcp85.pdf")
par(mfrow=c(2,2), mar= c(2,2,0.8,0.8))
plot(ens_pred_hist,breaks=brks, col=cols,lab.breaks=brks)
plot(ens_pred_hab26,breaks=brks, col=cols,lab.breaks=brks)
plot(ens_pred_hab45,breaks=brks, col=cols,lab.breaks=brks)
plot(ens_pred_hab85,breaks=brks, col=cols,lab.breaks=brks)
predEns1<-bestModel$ens_model$pred
obs.numeric<-ifelse(predEns1$obs == "absent",0,1)
stdres<-function(obs.numeric, yhat){
num<-obs.numeric-yhat
denom<-sqrt(yhat*(1-yhat))
return(num/denom)
}
hab.res<-stdres(obs.numeric,predEns1$present)
# specify corresponding model number from eu_presabs.coord datafile to join data with xy locations. If best model is "X1", join with eu_presabs.coord$X1
res.best.coords1<-cbind(coordinates(eu_presabs.coord$X1),occ.full.data.forCaret$X1)
removedNAs.coords<-na.omit(res.best.coords1)
res.best.coords<-cbind(removedNAs.coords,hab.res)
res.best.geo<-as.geodata(res.best.coords,coords.col=1:2,data.col = 3)
summary(res.best.geo) #note distance is in meters
#If Moran's I is very low (<0.10), or not significant, do not need to thin occurrences.
library(ape)
res.best.df<-as.data.frame(res.best.coords)
occ.dists <- as.matrix(dist(cbind(res.best.df[1], res.best.df[2])))
occ.dists.inv <- 1/occ.dists
diag(occ.dists.inv) <- 0
Moran.I(res.best.df$hab.res,occ.dists.inv,scaled=TRUE,alternative="greater")
# quantify confidence for country level predictions based on historical climate and under RCP scenarios of climate change
set.seed(1609)
pvalsdf_hist<-classConformalPrediction(bestModel,ens_pred_hist)
set.seed(447)
pvalsdf_rcp26<-classConformalPrediction(bestModel,ens_pred_hab26)
set.seed(568)
pvalsdf_rcp45<-classConformalPrediction(bestModel,ens_pred_hab45)
set.seed(988)
pvalsdf_rcp85<-classConformalPrediction(bestModel,ens_pred_hab85)
# option to export confidence and pvals as csv
# write.csv(pvalsdf_hist,file=paste(genOutput,"confidence_",taxonkey, "_hist.csv",sep=""))
brks <- seq(0, 1, by=0.1)
nb <- length(brks)-1
pal <- colorRampPalette(rev(brewer.pal(4, 'Spectral')))
cols<-pal(nb)
confidenceMaps<-function(x,taxonkey,taxonName,maptype){
pvals_dataframe<-get("x")
data.xyz <- pvals_dataframe[c("x","y","conf")]
rst <- rasterFromXYZ(data.xyz)
crs(rst)<-CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs")
plot(rst,breaks=brks, col=cols,lab.breaks=brks)
writeRaster(rst, filename=file.path(rasterOutput,paste("be_",taxonkey, "_",maptype,".tif",sep="")), format="GTiff",overwrite=TRUE)
exportPDF(rst,taxonkey,taxonName=taxonName,nameextension= paste(maptype,".pdf",sep=""))
return(rst)
}
par(mfrow=c(2,2), mar= c(2,2,0.8,0.8))
hist.conf.map<-confidenceMaps(pvalsdf_hist,taxonkey,taxonName,maptype="hist_conf")
rcp26.conf.map<-confidenceMaps(pvalsdf_rcp26,taxonkey,taxonName,maptype="rcp26_conf")
rcp45.conf.map<-confidenceMaps(pvalsdf_rcp45,taxonkey,taxonName,maptype="rcp45_conf")
rcp85.conf.map<-confidenceMaps(pvalsdf_rcp85,taxonkey,taxonName,maptype="rcp85_conf")
# Cutoff for "high" confidence can be modified below. Cutoff should be a value between 0 and 1. Values that are less than the cutoff are shown in gray.
cutoff<-0.70
brks <- seq(0,1, by=0.1)
nb <- length(brks)-1
pal <- colorRampPalette(rev(brewer.pal(4, 'Spectral')))
cols<-pal(nb)
par(mfrow=c(2,2), mar= c(2,2,0.9,0.8))
m1<-hist.conf.map < cutoff
hist_masked<-mask(ens_pred_hist,m1,maskvalue=TRUE)
plot(hist_masked,breaks=brks, col=cols,lab.breaks=brks)
plot(country,add=TRUE,border="dark gray")
m2<-rcp26.conf.map < cutoff
rcp26_masked<-mask(ens_pred_hab26,m2,maskvalue=TRUE)
plot(rcp26_masked,breaks=brks, col=cols,lab.breaks=brks)
plot(country,add=TRUE,border="dark gray")
m3<-rcp45.conf.map < cutoff
rcp45_masked<-mask(ens_pred_hab45,m3,maskvalue=TRUE)
plot(rcp45_masked,breaks=brks, col=cols,lab.breaks=brks)
plot(country,add=TRUE,border="dark gray")
m4<-rcp85.conf.map < cutoff
rcp85_masked<-mask(ens_pred_hab85,m4,maskvalue=TRUE)
plot(rcp85_masked,breaks=brks, col=cols,lab.breaks=brks)
plot(country,add=TRUE,border="dark gray")
brks <- seq(0, 1, by=0.1)
nb <- length(brks)-1
pal <- colorRampPalette(rev(brewer.pal(4, 'Spectral')))
set.seed(792)
pvalsdf_hist_eu<-classConformalPrediction(bestModel,ens_pred_hab_eu1$X1)
hist.conf.map.eu<-confidenceMaps(pvalsdf_hist_eu,taxonkey,taxonName,maptype="hist_conf_eu")
variableImportance<-varImp(bestModel)
kable(variableImportance,digits=2,caption="Variable Importance") %>%
kable_styling(bootstrap_options = c("striped"))
write.csv(variableImportance,file = paste0(genOutput,taxonkey,"_varImp_EU_model.csv"))
topPreds <- variableImportance[with(variableImportance,order(-overall)),]
varNames<-rownames(topPreds)
## combine predictions from each model for each variable
## train data needs to be the training data used in the individual models used to build the ensemble model. This info can be extracted from the best ensemble model (ie. bestModel)
bestModel.train<-bestModel$models[[1]]$trainingData
partial_gbm<-function(x){
m.gbm<-pdp::partial(bestModel$models$gbm$finalModel,pred.var=paste(x),train = bestModel.train,type="classification",
prob=TRUE,n.trees= bestModel$models$gbm$finalModel$n.trees, which.class = 1,grid.resolution=nrow(bestModel.train))
}
gbm.partial.list<-lapply(varNames,partial_gbm)
partial_glm<-function(x){
m.glm<-pdp::partial(bestModel$models$glm$finalModel,pred.var=paste(x),train = bestModel.train,type="classification",
prob=TRUE,which.class = 1,grid.resolution=nrow(bestModel.train))
}
glm.partial.list<-lapply(varNames,partial_glm)
partial_rf<-function(x){
pdp::partial(bestModel$models$rf$finalModel,pred.var=paste(x),train = bestModel.train,type="classification",
prob=TRUE,which.class = 1,grid.resolution=nrow(bestModel.train))
}
rf.partial.list<-lapply(varNames,partial_rf)
partial_mars<-function(x){
m.mars<-pdp::partial(bestModel$models$earth$finalModel,pred.var=paste(x),train = bestModel.train,type="classification",
prob=TRUE,which.class = 2,grid.resolution=nrow(bestModel.train)) # class=2 because in earth pkg, absense is the first class
}
mars.partial.list<-lapply(varNames,partial_mars)
names(glm.partial.list)<-varNames
names(gbm.partial.list)<-varNames
names(rf.partial.list)<-varNames
names(mars.partial.list)<-varNames
glm.partial.df<-as.data.frame(glm.partial.list)
gbm.partial.df<-as.data.frame(gbm.partial.list)
rf.partial.df<-as.data.frame(rf.partial.list)
mars.partial.df<-as.data.frame(mars.partial.list)
predx<-data.frame()
predy<-data.frame()
for (i in varNames){
predx <- rbind(predx, as.data.frame(paste(i,i,sep=".")))
predy<- rbind(predy,as.data.frame(paste(i,"yhat",sep=".")))
}
names(predx)<-""
names(predy)<-""
predx1<-t(predx)
predy1<-t(predy)
glm.partial.df$data<-'GLM'
gbm.partial.df$data<-'GBM'
rf.partial.df$data<-'RF'
mars.partial.df$data<-'MARS'
all_dfs<-rbind.data.frame(glm.partial.df,gbm.partial.df,rf.partial.df,mars.partial.df)
responseCurves<-function(x,y) {
colors <- c("GLM" = "gray", "GBM"="red","RF"="blueviolet","MARS"= "hotpink")
ggplot(all_dfs,(aes(x=.data[[x]],y=.data[[y]]))) +
geom_line(aes(color = data), size =1.2, position=position_dodge(width=0.2))+
theme_bw()+
labs(y="Partial probability", x= gsub("//..*","",x),color="Legend") +
scale_color_manual(values = colors)
}
allplots<-map2(predx1,predy1, ~responseCurves(.x,.y))
#export plots as PNGs
for(i in seq_along(allplots)){
png(paste0(genOutput,taxonkey,"_",i,".png"),width = 5, height = 5, units = "in",res=300)
print(allplots[[i]])
dev.off()
}
par(mfrow=c(3,4))
for(i in seq_along(allplots)){
print(allplots[[i]])
}