Lovell — Oct 15, 2014, 9:48 AM
rm(list=ls())
setwd("~/Desktop/Adaptomics_Project/niche_modeling")
library(vegan)
Loading required package: permute
Loading required package: lattice
This is vegan 2.0-10
library(fields)
Loading required package: spam
Warning: package 'spam' was built under R version 3.1.1
Loading required package: grid
Spam version 1.0-1 (2014-09-09) is loaded.
Type 'help( Spam)' or 'demo( spam)' for a short introduction
and overview of this package.
Help for individual functions is also obtained by adding the
suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
Attaching package: 'spam'
The following objects are masked from 'package:base':
backsolve, forwardsolve
Loading required package: maps
Warning: package 'maps' was built under R version 3.1.1
library(varSelRF)
Loading required package: randomForest
Warning: package 'randomForest' was built under R version 3.1.1
randomForest 4.6-10
Type rfNews() to see new features/changes/bug fixes.
#read in the data
acc.data<-read.csv("mr2014_accessioninfo2.csv",header=T)
acc.data<-acc.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),"apollo","taxon","ploidy"),]
sel.vars<-list()
#read in the functions..
###########
quantnorm<-function(x) { n=sum(!is.na(x),na.rm=T); x=rank(x)/(n+1); x=qnorm(x); x[is.infinite(x)]=NA; x}
###########
full.nichemodel<-function(all.data, cat, species){
stats.out.all<-data.frame()
#1 process data
for ( i in species){
if (i=="all"){
all.data.in<-all.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}else{
all.in<-all.data[all.data$taxon==i,]
all.data.in<-all.in[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}
geo.dist<-rdist.earth(geo.in, miles=F)
#2 select variables through random forests
rfsel.in<-varSelRF(xdata=env.in, Class=as.factor(cat.in),vars.drop.frac = 0.2)
sel.vars.in<-rfsel.in$selected.vars
best.env.in1<-env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
#3 run cca w/o spatial covariate
cca1<-cca(best.env.in1~cat.in)
cca1.aov<-anova(cca1,by="terms",permu=10000)
f.raw<-data.frame(cca1.aov)$F[1]
pval.raw<-data.frame(cca1.aov)$Pr..F.[1]
#4 run cca w/ spatial partial
cca2<-cca(best.env.in1~as.factor(cat.in),z=geo.dist)
cca2.aov<-anova(cca2,by="terms",permu=10000)
f.part<-data.frame(cca2.aov)$F[1]
pval.part<-data.frame(cca2.aov)$Pr..F.[1]
#5 run cca for space
cca3<-cca(abs(geo.in)~cat.in)
cca3.aov<-anova(cca3,by="terms",permu=10000)
f.space<-data.frame(cca3.aov)$F[1]
pval.space<-data.frame(cca3.aov)$Pr..F.[1]
#6 output stats
stats.out<-data.frame(cat,i,as.numeric(table(cat.in)[1]),as.numeric(table(cat.in)[2]),f.raw,pval.raw,pval.part,f.space,pval.space)
colnames(stats.out)<-c("test","subset","n.grp1","n.grp1","f","p.raw","p.partial","f.space","p.space")
stats.out.all<-rbind(stats.out.all,stats.out)
}
return(stats.out.all)
}
#run the function for:
###########
#1 full sample apollo
#data processing
all.data=acc.data;cat="apollo";species="all"
stats.out.all<-data.frame()
for ( i in species){
if (i=="all"){
all.data.in<-all.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}else{
all.in<-all.data[all.data$taxon==i,]
all.data.in<-all.in[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}
geo.dist<-rdist.earth(geo.in, miles=F)
#2 select variables through random forests
rfsel.in<-varSelRF(xdata=env.in, Class=as.factor(cat.in),vars.drop.frac = 0.2)
sel.vars.in<-rfsel.in$selected.vars
best.env.in1<-env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
#3 run cca w/o spatial covariate
cca1<-cca(best.env.in1~cat.in)
cca1.aov<-anova(cca1,by="terms",permu=10000)
f.raw<-data.frame(cca1.aov)$F[1]
pval.raw<-data.frame(cca1.aov)$Pr..F.[1]
#4 run cca w/ spatial partial
cca2<-cca(best.env.in1~as.factor(cat.in),z=geo.dist)
cca2.aov<-anova(cca2,by="terms",permu=10000)
f.part<-data.frame(cca2.aov)$F[1]
pval.part<-data.frame(cca2.aov)$Pr..F.[1]
#5 run cca for space
cca3<-cca(abs(geo.in)~cat.in)
cca3.aov<-anova(cca3,by="terms",permu=10000)
f.space<-data.frame(cca3.aov)$F[1]
pval.space<-data.frame(cca3.aov)$Pr..F.[1]
#6 output stats
stats.out<-data.frame(cat,i,as.numeric(table(cat.in)[1]),as.numeric(table(cat.in)[2]),f.raw,pval.raw,pval.part,f.space,pval.space)
colnames(stats.out)<-c("test","subset","n.grp1","n.grp1","f","p.raw","p.partial","f.space","p.space")
stats.out.all<-rbind(stats.out.all,stats.out)
}
for all the selected environmental variables are: bio1 bio10 bio15 elev
full.apollo<-stats.out.all
sel.vars[["full.apollo"]]<-sel.vars.in
full.apollo$id<-"full.apollo"
###########
#2 full sample ploidy
#niche model
all.data=acc.data;cat="ploidy";species="all"
stats.out.all<-data.frame()
for ( i in species){
if (i=="all"){
all.data.in<-all.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}else{
all.in<-all.data[all.data$taxon==i,]
all.data.in<-all.in[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}
geo.dist<-rdist.earth(geo.in, miles=F)
#2 select variables through random forests
rfsel.in<-varSelRF(xdata=env.in, Class=as.factor(cat.in),vars.drop.frac = 0.2)
sel.vars.in<-rfsel.in$selected.vars
best.env.in1<-env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
#3 run cca w/o spatial covariate
cca1<-cca(best.env.in1~cat.in)
cca1.aov<-anova(cca1,by="terms",permu=10000)
f.raw<-data.frame(cca1.aov)$F[1]
pval.raw<-data.frame(cca1.aov)$Pr..F.[1]
#4 run cca w/ spatial partial
cca2<-cca(best.env.in1~as.factor(cat.in),z=geo.dist)
cca2.aov<-anova(cca2,by="terms",permu=10000)
f.part<-data.frame(cca2.aov)$F[1]
pval.part<-data.frame(cca2.aov)$Pr..F.[1]
#5 run cca for space
cca3<-cca(abs(geo.in)~cat.in)
cca3.aov<-anova(cca3,by="terms",permu=10000)
f.space<-data.frame(cca3.aov)$F[1]
pval.space<-data.frame(cca3.aov)$Pr..F.[1]
#6 output stats
stats.out<-data.frame(cat,i,as.numeric(table(cat.in)[1]),as.numeric(table(cat.in)[2]),f.raw,pval.raw,pval.part,f.space,pval.space)
colnames(stats.out)<-c("test","subset","n.grp1","n.grp1","f","p.raw","p.partial","f.space","p.space")
stats.out.all<-rbind(stats.out.all,stats.out)
}
for all the selected environmental variables are: bio1 bio10 bio11 bio13 bio15 bio16 bio18 bio3 bio4 bio5 bio6 bio7 elev
full.ploidy<-stats.out.all
sel.vars[["full.ploidy"]]<-sel.vars.in
full.ploidy$id<-"full.ploidy"
###########
#3 diploid sample apollo
diploid.data<-acc.data[acc.data$ploidy==2,]
all.data=diploid.data;cat="apollo";species="all"
stats.out.all<-data.frame()
for ( i in species){
if (i=="all"){
all.data.in<-all.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}else{
all.in<-all.data[all.data$taxon==i,]
all.data.in<-all.in[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}
geo.dist<-rdist.earth(geo.in, miles=F)
#2 select variables through random forests
rfsel.in<-varSelRF(xdata=env.in, Class=as.factor(cat.in),vars.drop.frac = 0.2)
sel.vars.in<-rfsel.in$selected.vars
best.env.in1<-env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
#3 run cca w/o spatial covariate
cca1<-cca(best.env.in1~cat.in)
cca1.aov<-anova(cca1,by="terms",permu=10000)
f.raw<-data.frame(cca1.aov)$F[1]
pval.raw<-data.frame(cca1.aov)$Pr..F.[1]
#4 run cca w/ spatial partial
cca2<-cca(best.env.in1~as.factor(cat.in),z=geo.dist)
cca2.aov<-anova(cca2,by="terms",permu=10000)
f.part<-data.frame(cca2.aov)$F[1]
pval.part<-data.frame(cca2.aov)$Pr..F.[1]
#5 run cca for space
cca3<-cca(abs(geo.in)~cat.in)
cca3.aov<-anova(cca3,by="terms",permu=10000)
f.space<-data.frame(cca3.aov)$F[1]
pval.space<-data.frame(cca3.aov)$Pr..F.[1]
#6 output stats
stats.out<-data.frame(cat,i,as.numeric(table(cat.in)[1]),as.numeric(table(cat.in)[2]),f.raw,pval.raw,pval.part,f.space,pval.space)
colnames(stats.out)<-c("test","subset","n.grp1","n.grp1","f","p.raw","p.partial","f.space","p.space")
stats.out.all<-rbind(stats.out.all,stats.out)
}
for all the selected environmental variables are: bio11 bio19 elev
apollo.2x<-stats.out.all
sel.vars[["apollo.2x"]]<-sel.vars.in
apollo.2x$id<-"apollo.2x"
###########
#4 triploid sample apollo
triploid.data<-acc.data[acc.data$ploidy==3,]
all.data=triploid.data;cat="apollo";species="all"
stats.out.all<-data.frame()
for ( i in species){
if (i=="all"){
all.data.in<-all.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}else{
all.in<-all.data[all.data$taxon==i,]
all.data.in<-all.in[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}
geo.dist<-rdist.earth(geo.in, miles=F)
#2 select variables through random forests
rfsel.in<-varSelRF(xdata=env.in, Class=as.factor(cat.in),vars.drop.frac = 0.2)
sel.vars.in<-rfsel.in$selected.vars
best.env.in1<-env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
#3 run cca w/o spatial covariate
cca1<-cca(best.env.in1~cat.in)
cca1.aov<-anova(cca1,by="terms",permu=10000)
f.raw<-data.frame(cca1.aov)$F[1]
pval.raw<-data.frame(cca1.aov)$Pr..F.[1]
#4 run cca w/ spatial partial
cca2<-cca(best.env.in1~as.factor(cat.in),z=geo.dist)
cca2.aov<-anova(cca2,by="terms",permu=10000)
f.part<-data.frame(cca2.aov)$F[1]
pval.part<-data.frame(cca2.aov)$Pr..F.[1]
#5 run cca for space
cca3<-cca(abs(geo.in)~cat.in)
cca3.aov<-anova(cca3,by="terms",permu=10000)
f.space<-data.frame(cca3.aov)$F[1]
pval.space<-data.frame(cca3.aov)$Pr..F.[1]
#6 output stats
stats.out<-data.frame(cat,i,as.numeric(table(cat.in)[1]),as.numeric(table(cat.in)[2]),f.raw,pval.raw,pval.part,f.space,pval.space)
colnames(stats.out)<-c("test","subset","n.grp1","n.grp1","f","p.raw","p.partial","f.space","p.space")
stats.out.all<-rbind(stats.out.all,stats.out)
}
for all the selected environmental variables are: bio14 bio18
apollo.3x<-stats.out.all
sel.vars[["apollo.3x"]]<-sel.vars.in
apollo.3x$id<-"apollo.3x"
###########
#5 apollo 0 sample ploidy
apollo0.data<-acc.data[acc.data$apollo==0,]
all.data=apollo0.data;cat="ploidy";species="all"
stats.out.all<-data.frame()
for ( i in species){
if (i=="all"){
all.data.in<-all.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}else{
all.in<-all.data[all.data$taxon==i,]
all.data.in<-all.in[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}
geo.dist<-rdist.earth(geo.in, miles=F)
#2 select variables through random forests
rfsel.in<-varSelRF(xdata=env.in, Class=as.factor(cat.in),vars.drop.frac = 0.2)
sel.vars.in<-rfsel.in$selected.vars
best.env.in1<-env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
#3 run cca w/o spatial covariate
cca1<-cca(best.env.in1~cat.in)
cca1.aov<-anova(cca1,by="terms",permu=10000)
f.raw<-data.frame(cca1.aov)$F[1]
pval.raw<-data.frame(cca1.aov)$Pr..F.[1]
#4 run cca w/ spatial partial
cca2<-cca(best.env.in1~as.factor(cat.in),z=geo.dist)
cca2.aov<-anova(cca2,by="terms",permu=10000)
f.part<-data.frame(cca2.aov)$F[1]
pval.part<-data.frame(cca2.aov)$Pr..F.[1]
#5 run cca for space
cca3<-cca(abs(geo.in)~cat.in)
cca3.aov<-anova(cca3,by="terms",permu=10000)
f.space<-data.frame(cca3.aov)$F[1]
pval.space<-data.frame(cca3.aov)$Pr..F.[1]
#6 output stats
stats.out<-data.frame(cat,i,as.numeric(table(cat.in)[1]),as.numeric(table(cat.in)[2]),f.raw,pval.raw,pval.part,f.space,pval.space)
colnames(stats.out)<-c("test","subset","n.grp1","n.grp1","f","p.raw","p.partial","f.space","p.space")
stats.out.all<-rbind(stats.out.all,stats.out)
}
for all the selected environmental variables are: bio1 bio14
ploidy.apollo0<-stats.out.all
sel.vars[["ploidy.apollo0"]]<-sel.vars.in
ploidy.apollo0$id<-"ploidy.apollo0"
###########
#6 apollo 1 sample ploidy
apollo1.data<-acc.data[acc.data$apollo==1,]
all.data=apollo1.data;cat="ploidy";species="all"
stats.out.all<-data.frame()
for ( i in species){
if (i=="all"){
all.data.in<-all.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}else{
all.in<-all.data[all.data$taxon==i,]
all.data.in<-all.in[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}
geo.dist<-rdist.earth(geo.in, miles=F)
#2 select variables through random forests
rfsel.in<-varSelRF(xdata=env.in, Class=as.factor(cat.in),vars.drop.frac = 0.2)
sel.vars.in<-rfsel.in$selected.vars
best.env.in1<-env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
#3 run cca w/o spatial covariate
cca1<-cca(best.env.in1~cat.in)
cca1.aov<-anova(cca1,by="terms",permu=10000)
f.raw<-data.frame(cca1.aov)$F[1]
pval.raw<-data.frame(cca1.aov)$Pr..F.[1]
#4 run cca w/ spatial partial
cca2<-cca(best.env.in1~as.factor(cat.in),z=geo.dist)
cca2.aov<-anova(cca2,by="terms",permu=10000)
f.part<-data.frame(cca2.aov)$F[1]
pval.part<-data.frame(cca2.aov)$Pr..F.[1]
#5 run cca for space
cca3<-cca(abs(geo.in)~cat.in)
cca3.aov<-anova(cca3,by="terms",permu=10000)
f.space<-data.frame(cca3.aov)$F[1]
pval.space<-data.frame(cca3.aov)$Pr..F.[1]
#6 output stats
stats.out<-data.frame(cat,i,as.numeric(table(cat.in)[1]),as.numeric(table(cat.in)[2]),f.raw,pval.raw,pval.part,f.space,pval.space)
colnames(stats.out)<-c("test","subset","n.grp1","n.grp1","f","p.raw","p.partial","f.space","p.space")
stats.out.all<-rbind(stats.out.all,stats.out)
}
for all the selected environmental variables are: bio15 bio18 bio3 bio4 bio7 bio8
ploidy.apollo1<-stats.out.all
sel.vars[["ploidy.apollo1"]]<-sel.vars.in
ploidy.apollo1$id<-"ploidy.apollo1"
###########
#7 each species apollo regardless of ploidy
species.for.apollo<-vector()
apollo.data<-acc.data[,-which(names(acc.data)=="ploidy")]
apollo.data<-apollo.data[complete.cases(apollo.data[,-which(names(apollo.data) %in% c("taxon")),]),]
for (i in unique(apollo.data$taxon)){
data.in<-apollo.data[apollo.data$taxon==i,]
tab.out<-as.numeric(table(data.in$apollo))
sex<-tab.out[1]
apo<-tab.out[2]
if(is.na(sex) | is.na(apo)){next
}else{
if(sex>=5 & apo>=5) {species.for.apollo<-c(species.for.apollo,i)
}else{next}
}
}
print(species.for.apollo)
[1] "Boechera stricta" "Boechera pinetorum"
[3] "Boechera retrofracta" "Boechera divaricarpa"
[5] "Boechera species" "Boechera collinsii"
[7] "Boechera pendulocarpa" "Boechera crandallii"
[9] "Boechera lemmonii" "Boechera pauciflora"
[11] "Boechera sparsiflora" "Boechera microphylla"
[13] "Boechera pallidifolia" "Boechera perennans"
[15] "Boechera fendleri" "Boechera puberula"
[17] "Boechera williamsii" "Boechera lyallii"
[19] "Boechera spatifolia"
all.data=acc.data;cat="apollo";species=species.for.apollo
stats.out.all<-data.frame()
for ( i in species){
if (i=="all"){
all.data.in<-all.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}else{
all.in<-all.data[all.data$taxon==i,]
all.data.in<-all.in[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}
geo.dist<-rdist.earth(geo.in, miles=F)
#2 select variables through random forests
rfsel.in<-varSelRF(xdata=env.in, Class=as.factor(cat.in),vars.drop.frac = 0.2)
sel.vars.in<-rfsel.in$selected.vars
best.env.in1<-env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
#3 run cca w/o spatial covariate
cca1<-cca(best.env.in1~cat.in)
cca1.aov<-anova(cca1,by="terms",permu=10000)
f.raw<-data.frame(cca1.aov)$F[1]
pval.raw<-data.frame(cca1.aov)$Pr..F.[1]
#4 run cca w/ spatial partial
cca2<-cca(best.env.in1~as.factor(cat.in),z=geo.dist)
cca2.aov<-anova(cca2,by="terms",permu=10000)
f.part<-data.frame(cca2.aov)$F[1]
pval.part<-data.frame(cca2.aov)$Pr..F.[1]
#5 run cca for space
cca3<-cca(abs(geo.in)~cat.in)
cca3.aov<-anova(cca3,by="terms",permu=10000)
f.space<-data.frame(cca3.aov)$F[1]
pval.space<-data.frame(cca3.aov)$Pr..F.[1]
#6 output stats
stats.out<-data.frame(cat,i,as.numeric(table(cat.in)[1]),as.numeric(table(cat.in)[2]),f.raw,pval.raw,pval.part,f.space,pval.space)
colnames(stats.out)<-c("test","subset","n.grp1","n.grp1","f","p.raw","p.partial","f.space","p.space")
stats.out.all<-rbind(stats.out.all,stats.out)
sel.vars[[paste("apollo.byspecies",i,sep="_")]]<-sel.vars.in
}
for Boechera stricta the selected environmental variables are: bio16 bio18
for Boechera pinetorum the selected environmental variables are: bio14 bio17
for Boechera retrofracta the selected environmental variables are: bio3 elev
for Boechera divaricarpa the selected environmental variables are: bio14 bio17 bio18 bio4
for Boechera species the selected environmental variables are: bio10 bio5
for Boechera collinsii the selected environmental variables are: bio3 elev
for Boechera pendulocarpa the selected environmental variables are: bio16 bio19 elev
for Boechera crandallii the selected environmental variables are: bio16 bio3
for Boechera lemmonii the selected environmental variables are: bio2 bio5 bio7 elev
for Boechera pauciflora the selected environmental variables are: bio1 bio9
for Boechera sparsiflora the selected environmental variables are: bio12 bio8
for Boechera microphylla the selected environmental variables are: bio11 bio6
for Boechera pallidifolia the selected environmental variables are: bio5 bio8
for Boechera perennans the selected environmental variables are: bio16 bio3
for Boechera fendleri the selected environmental variables are: bio3 bio9
for Boechera puberula the selected environmental variables are: bio5 bio9
for Boechera williamsii the selected environmental variables are: bio2 elev
for Boechera lyallii the selected environmental variables are: bio18 bio8
for Boechera spatifolia the selected environmental variables are: bio15 bio19
apollo.byspecies<-stats.out.all
apollo.byspecies$id<-"apollo.byspecies"
###########
#8 each species ploidy regardless of apollo
species.for.ploidy<-vector()
ploidy.data<-acc.data[,-which(names(acc.data)=="apollo")]
ploidy.data<-ploidy.data[complete.cases(ploidy.data[,-which(names(ploidy.data) %in% c("taxon")),]),]
for (i in unique(ploidy.data$taxon)){
data.in<-acc.data[acc.data$taxon==i,]
tab.out<-as.numeric(table(data.in$ploidy))
sex<-tab.out[1]
apo<-tab.out[2]
if(is.na(sex) | is.na(apo)){next
}else{
if(sex>=3 & apo>=3) {species.for.ploidy<-c(species.for.ploidy,i)
}else{next}
}
}
print(species.for.ploidy)
[1] "Boechera retrofracta"
[2] "Boechera collinsii"
[3] "Boechera divaricarpa"
[4] "Boechera lignifera"
[5] "Boechera pallidifolia"
[6] "Boechera stricta x Boechera spatifolia"
[7] "Boechera spatifolia"
all.data=acc.data;cat="ploidy";species=species.for.ploidy
stats.out.all<-data.frame()
for ( i in species){
if (i=="all"){
all.data.in<-all.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}else{
all.in<-all.data[all.data$taxon==i,]
all.data.in<-all.in[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}
geo.dist<-rdist.earth(geo.in, miles=F)
#2 select variables through random forests
rfsel.in<-varSelRF(xdata=env.in, Class=as.factor(cat.in),vars.drop.frac = 0.2)
sel.vars.in<-rfsel.in$selected.vars
best.env.in1<-env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
#3 run cca w/o spatial covariate
cca1<-cca(best.env.in1~cat.in)
cca1.aov<-anova(cca1,by="terms",permu=10000)
f.raw<-data.frame(cca1.aov)$F[1]
pval.raw<-data.frame(cca1.aov)$Pr..F.[1]
#4 run cca w/ spatial partial
cca2<-cca(best.env.in1~as.factor(cat.in),z=geo.dist)
cca2.aov<-anova(cca2,by="terms",permu=10000)
f.part<-data.frame(cca2.aov)$F[1]
pval.part<-data.frame(cca2.aov)$Pr..F.[1]
#5 run cca for space
cca3<-cca(abs(geo.in)~cat.in)
cca3.aov<-anova(cca3,by="terms",permu=10000)
f.space<-data.frame(cca3.aov)$F[1]
pval.space<-data.frame(cca3.aov)$Pr..F.[1]
#6 output stats
stats.out<-data.frame(cat,i,as.numeric(table(cat.in)[1]),as.numeric(table(cat.in)[2]),f.raw,pval.raw,pval.part,f.space,pval.space)
colnames(stats.out)<-c("test","subset","n.grp1","n.grp1","f","p.raw","p.partial","f.space","p.space")
stats.out.all<-rbind(stats.out.all,stats.out)
sel.vars[[paste("ploidy.byspecies",i,sep="_")]]<-sel.vars.in
}
for Boechera retrofracta the selected environmental variables are: bio3 elev
for Boechera collinsii the selected environmental variables are: bio11 bio6
for Boechera divaricarpa the selected environmental variables are: bio19 bio9
for Boechera lignifera the selected environmental variables are: bio13 bio7
for Boechera pallidifolia the selected environmental variables are: bio17 bio2
for Boechera stricta x Boechera spatifolia the selected environmental variables are: bio4 bio7
for Boechera spatifolia the selected environmental variables are: bio16 bio8
ploidy.byspecies<-stats.out.all
ploidy.byspecies$id<-"ploidy.byspecies"
###########
#9 retrofracta diploid across apollo
all.data=diploid.data;cat="apollo";species="Boechera retrofracta"
stats.out.all<-data.frame()
for ( i in species){
if (i=="all"){
all.data.in<-all.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}else{
all.in<-all.data[all.data$taxon==i,]
all.data.in<-all.in[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}
geo.dist<-rdist.earth(geo.in, miles=F)
#2 select variables through random forests
rfsel.in<-varSelRF(xdata=env.in, Class=as.factor(cat.in),vars.drop.frac = 0.2)
sel.vars.in<-rfsel.in$selected.vars
best.env.in1<-env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
#3 run cca w/o spatial covariate
cca1<-cca(best.env.in1~cat.in)
cca1.aov<-anova(cca1,by="terms",permu=10000)
f.raw<-data.frame(cca1.aov)$F[1]
pval.raw<-data.frame(cca1.aov)$Pr..F.[1]
#4 run cca w/ spatial partial
cca2<-cca(best.env.in1~as.factor(cat.in),z=geo.dist)
cca2.aov<-anova(cca2,by="terms",permu=10000)
f.part<-data.frame(cca2.aov)$F[1]
pval.part<-data.frame(cca2.aov)$Pr..F.[1]
#5 run cca for space
cca3<-cca(abs(geo.in)~cat.in)
cca3.aov<-anova(cca3,by="terms",permu=10000)
f.space<-data.frame(cca3.aov)$F[1]
pval.space<-data.frame(cca3.aov)$Pr..F.[1]
#6 output stats
stats.out<-data.frame(cat,i,as.numeric(table(cat.in)[1]),as.numeric(table(cat.in)[2]),f.raw,pval.raw,pval.part,f.space,pval.space)
colnames(stats.out)<-c("test","subset","n.grp1","n.grp1","f","p.raw","p.partial","f.space","p.space")
stats.out.all<-rbind(stats.out.all,stats.out)
}
for Boechera retrofracta the selected environmental variables are: bio2 bio3 elev
retro.2x.apollo<-stats.out.all
sel.vars[["retro.2x.apollo"]]<-sel.vars.in
retro.2x.apollo$id<-"retro.2x.apollo"
###########
#10 retrofract apollo 1 across ploidy
all.data=apollo1.data;cat="ploidy";species="Boechera retrofracta"
stats.out.all<-data.frame()
for ( i in species){
if (i=="all"){
all.data.in<-all.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}else{
all.in<-all.data[all.data$taxon==i,]
all.data.in<-all.in[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),cat,"taxon")]
all.data.in<-all.data.in[complete.cases(all.data.in),]
geo.in<-all.data.in[,c("lat","lon")]
env.in<-all.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
env.in<-apply(env.in,2, function (x) x+1+abs(min(x)))
cat.in<-all.data.in[,cat]
}
geo.dist<-rdist.earth(geo.in, miles=F)
#2 select variables through random forests
rfsel.in<-varSelRF(xdata=env.in, Class=as.factor(cat.in),vars.drop.frac = 0.2)
sel.vars.in<-rfsel.in$selected.vars
best.env.in1<-env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
#3 run cca w/o spatial covariate
cca1<-cca(best.env.in1~cat.in)
cca1.aov<-anova(cca1,by="terms",permu=10000)
f.raw<-data.frame(cca1.aov)$F[1]
pval.raw<-data.frame(cca1.aov)$Pr..F.[1]
#4 run cca w/ spatial partial
cca2<-cca(best.env.in1~as.factor(cat.in),z=geo.dist)
cca2.aov<-anova(cca2,by="terms",permu=10000)
f.part<-data.frame(cca2.aov)$F[1]
pval.part<-data.frame(cca2.aov)$Pr..F.[1]
#5 run cca for space
cca3<-cca(abs(geo.in)~cat.in)
cca3.aov<-anova(cca3,by="terms",permu=10000)
f.space<-data.frame(cca3.aov)$F[1]
pval.space<-data.frame(cca3.aov)$Pr..F.[1]
#6 output stats
stats.out<-data.frame(cat,i,as.numeric(table(cat.in)[1]),as.numeric(table(cat.in)[2]),f.raw,pval.raw,pval.part,f.space,pval.space)
colnames(stats.out)<-c("test","subset","n.grp1","n.grp1","f","p.raw","p.partial","f.space","p.space")
stats.out.all<-rbind(stats.out.all,stats.out)
}
for Boechera retrofracta the selected environmental variables are: bio3 elev
retro.apollo1.ploidy<-stats.out.all
sel.vars[["retro.apollo1.ploidy"]]<-sel.vars.in
retro.apollo1.ploidy$id<-"retro.apollo1.ploidy"
###########
#11 compile and output all stats
all.out<-rbind(full.apollo,
full.ploidy,
apollo.2x,
apollo.3x,
ploidy.apollo0,
ploidy.apollo1,
apollo.byspecies,
ploidy.byspecies,
retro.2x.apollo,
retro.apollo1.ploidy)
print(all.out)
test subset n.grp1 n.grp1 f
1 apollo all 869 726 0.59094
2 ploidy all 414 123 2.90041
3 apollo all 269 125 2.93710
4 apollo all 22 101 5.30264
5 ploidy all 269 22 6.97020
6 ploidy all 125 101 0.55337
7 apollo Boechera stricta 182 32 0.15997
8 apollo Boechera pinetorum 20 57 1.27102
9 apollo Boechera retrofracta 97 158 23.10333
10 apollo Boechera divaricarpa 46 158 3.11539
11 apollo Boechera species 8 19 2.15091
12 apollo Boechera collinsii 8 9 0.51375
13 apollo Boechera pendulocarpa 32 8 2.92401
14 apollo Boechera crandallii 26 7 6.34111
15 apollo Boechera lemmonii 23 7 3.40347
16 apollo Boechera pauciflora 6 18 0.29920
17 apollo Boechera sparsiflora 25 20 2.04086
18 apollo Boechera microphylla 8 27 5.82456
19 apollo Boechera pallidifolia 16 19 1.93826
20 apollo Boechera perennans 20 11 0.53108
21 apollo Boechera fendleri 8 8 2.45145
22 apollo Boechera puberula 30 17 1.81842
23 apollo Boechera williamsii 8 15 14.67060
24 apollo Boechera lyallii 14 5 0.05188
25 apollo Boechera spatifolia 8 11 8.13989
26 ploidy Boechera retrofracta 57 35 17.85254
27 ploidy Boechera collinsii 3 3 0.82491
28 ploidy Boechera divaricarpa 3 35 0.95323
29 ploidy Boechera lignifera 4 3 25.08239
30 ploidy Boechera pallidifolia 22 3 18.53509
31 ploidy Boechera stricta x Boechera spatifolia 10 5 47.97439
32 ploidy Boechera spatifolia 16 3 14.16778
33 apollo Boechera retrofracta 30 27 14.29599
34 ploidy Boechera retrofracta 27 35 5.12686
p.raw p.partial f.space p.space id
1 0.1643 0.1662 0.31880 0.5504 full.apollo
2 0.0358 0.0001 0.15209 0.6879 full.ploidy
3 0.0166 0.0165 2.86335 0.0831 apollo.2x
4 0.0357 0.0366 0.02411 0.8756 apollo.3x
5 0.0066 0.0147 0.48927 0.4644 ploidy.apollo0
6 0.5925 0.0086 0.08026 0.7759 ploidy.apollo1
7 0.8312 0.8300 14.05289 0.0002 apollo.byspecies
8 0.2676 0.2588 0.37789 0.5562 apollo.byspecies
9 0.0001 0.0001 34.21821 0.0001 apollo.byspecies
10 0.0659 0.0692 0.38695 0.5182 apollo.byspecies
11 0.1554 0.1527 3.24957 0.0897 apollo.byspecies
12 0.3129 0.3082 1.94378 0.1572 apollo.byspecies
13 0.0764 0.0763 8.56870 0.0073 apollo.byspecies
14 0.0166 0.0159 17.74086 0.0003 apollo.byspecies
15 0.0114 0.0114 2.03564 0.1686 apollo.byspecies
16 0.5739 0.5829 1.31592 0.2730 apollo.byspecies
17 0.1206 0.1209 0.67285 0.4107 apollo.byspecies
18 0.0125 0.0102 1.08164 0.3114 apollo.byspecies
19 0.1669 0.1605 0.09985 0.7488 apollo.byspecies
20 0.5467 0.5404 0.10706 0.7423 apollo.byspecies
21 0.1322 0.1360 0.26726 0.5917 apollo.byspecies
22 0.1573 0.1618 0.48657 0.4928 apollo.byspecies
23 0.0015 0.0017 5.17655 0.0197 apollo.byspecies
24 0.8194 0.8223 0.66788 0.4282 apollo.byspecies
25 0.0141 0.0128 1.97363 0.1374 apollo.byspecies
26 0.0001 0.0001 16.81754 0.0002 ploidy.byspecies
27 0.3054 0.2987 7.83813 0.1016 ploidy.byspecies
28 0.3830 0.3178 3.10301 0.0733 ploidy.byspecies
29 0.0006 0.0290 0.07358 0.8532 ploidy.byspecies
30 0.0003 0.0008 0.43017 0.4915 ploidy.byspecies
31 0.0002 0.0003 0.62011 0.6808 ploidy.byspecies
32 0.0010 0.0011 1.20783 0.3271 ploidy.byspecies
33 0.0003 0.0001 7.25425 0.0086 retro.2x.apollo
34 0.0056 0.0050 5.70543 0.0222 retro.apollo1.ploidy
write.csv(all.out, file="mr2014_allnichemodelstats2.csv", row.names=F)
print(sel.vars)
$full.apollo
[1] "bio1" "bio10" "bio15" "elev"
$full.ploidy
[1] "bio1" "bio10" "bio11" "bio13" "bio15" "bio16" "bio18" "bio3"
[9] "bio4" "bio5" "bio6" "bio7" "elev"
$apollo.2x
[1] "bio11" "bio19" "elev"
$apollo.3x
[1] "bio14" "bio18"
$ploidy.apollo0
[1] "bio1" "bio14"
$ploidy.apollo1
[1] "bio15" "bio18" "bio3" "bio4" "bio7" "bio8"
$`apollo.byspecies_Boechera stricta`
[1] "bio16" "bio18"
$`apollo.byspecies_Boechera pinetorum`
[1] "bio14" "bio17"
$`apollo.byspecies_Boechera retrofracta`
[1] "bio3" "elev"
$`apollo.byspecies_Boechera divaricarpa`
[1] "bio14" "bio17" "bio18" "bio4"
$`apollo.byspecies_Boechera species`
[1] "bio10" "bio5"
$`apollo.byspecies_Boechera collinsii`
[1] "bio3" "elev"
$`apollo.byspecies_Boechera pendulocarpa`
[1] "bio16" "bio19" "elev"
$`apollo.byspecies_Boechera crandallii`
[1] "bio16" "bio3"
$`apollo.byspecies_Boechera lemmonii`
[1] "bio2" "bio5" "bio7" "elev"
$`apollo.byspecies_Boechera pauciflora`
[1] "bio1" "bio9"
$`apollo.byspecies_Boechera sparsiflora`
[1] "bio12" "bio8"
$`apollo.byspecies_Boechera microphylla`
[1] "bio11" "bio6"
$`apollo.byspecies_Boechera pallidifolia`
[1] "bio5" "bio8"
$`apollo.byspecies_Boechera perennans`
[1] "bio16" "bio3"
$`apollo.byspecies_Boechera fendleri`
[1] "bio3" "bio9"
$`apollo.byspecies_Boechera puberula`
[1] "bio5" "bio9"
$`apollo.byspecies_Boechera williamsii`
[1] "bio2" "elev"
$`apollo.byspecies_Boechera lyallii`
[1] "bio18" "bio8"
$`apollo.byspecies_Boechera spatifolia`
[1] "bio15" "bio19"
$`ploidy.byspecies_Boechera retrofracta`
[1] "bio3" "elev"
$`ploidy.byspecies_Boechera collinsii`
[1] "bio11" "bio6"
$`ploidy.byspecies_Boechera divaricarpa`
[1] "bio19" "bio9"
$`ploidy.byspecies_Boechera lignifera`
[1] "bio13" "bio7"
$`ploidy.byspecies_Boechera pallidifolia`
[1] "bio17" "bio2"
$`ploidy.byspecies_Boechera stricta x Boechera spatifolia`
[1] "bio4" "bio7"
$`ploidy.byspecies_Boechera spatifolia`
[1] "bio16" "bio8"
$retro.2x.apollo
[1] "bio2" "bio3" "elev"
$retro.apollo1.ploidy
[1] "bio3" "elev"