Lovell — Oct 8, 2014, 6:37 PM
rm(list=ls())
setwd("~/Desktop/Adaptomics_Project/niche_modeling")
library(vegan)
Loading required package: permute
Loading required package: lattice
This is vegan 2.0-10
library(fields)
Loading required package: spam
Warning: package 'spam' was built under R version 3.1.1
Loading required package: grid
Spam version 1.0-1 (2014-09-09) is loaded.
Type 'help( Spam)' or 'demo( spam)' for a short introduction
and overview of this package.
Help for individual functions is also obtained by adding the
suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
Attaching package: 'spam'
The following objects are masked from 'package:base':
backsolve, forwardsolve
Loading required package: maps
Warning: package 'maps' was built under R version 3.1.1
library(varSelRF)
Loading required package: randomForest
Warning: package 'randomForest' was built under R version 3.1.1
randomForest 4.6-10
Type rfNews() to see new features/changes/bug fixes.
#read in the data
acc.data<-read.csv("mr2014_accessioninfo2.csv",header=T)
acc.data<-acc.data[,c("lat","lon","elev",paste("bio",c(1:19), sep=""),"apollo","taxon","ploidy"),]
for (i in unique(acc.data$taxon)){
data.in<-acc.data[acc.data$taxon==i,]
tab.in<-table(data.in$apollo, data.in$ploidy)
if(dim(tab.in)[2]>0){
print(i)
print(tab.in)
}
}
[1] "Boechera stricta"
2 3
0 106 0
1 14 2
[1] "Boechera pinetorum"
2 3
0 1 3
1 0 13
[1] "Boechera retrofracta"
2 3
0 30 0
1 27 35
[1] "Boechera divaricarpa"
2 3 4
0 0 9 2
1 3 26 3
[1] "Boechera divaricarpa x Boechera retrofracta"
3
1 1
[1] "Boechera species"
2 3
0 4 1
1 3 1
[1] "Boechera collinsii"
2 3
0 3 2
1 0 1
[1] "Boechera pendulocarpa"
2 3
0 4 1
1 1 0
[1] "Boechera stricta x Boechera retrofracta"
2
0 1
1 9
[1] "Boechera fernaldiana ssp. fernaldiana"
2 3
0 6 0
1 1 1
[1] "Boechera breweri"
2
0 2
1 0
[1] "Boechera cobrensis"
2
0 2
1 0
[1] "Boechera crandallii"
2
0 20
1 4
[1] "Boechera lemmonii"
2 3
0 3 0
1 0 1
[1] "Boechera lignifera"
2 3
0 1 0
1 3 3
[1] "Boechera sparsiflora"
2 3
0 0 2
1 1 0
[1] "Boechera microphylla"
2
0 0
1 4
[1] "Boechera pallidifolia"
2 3
0 10 0
1 12 3
[1] "Boechera pendulina"
2
0 4
1 1
[1] "Boechera perennans"
2
0 3
1 1
[1] "Boechera fendleri"
2 3
0 1 1
1 3 0
[1] "Boechera puberula"
2 3
0 3 0
1 0 1
[1] "Boechera williamsii"
2
0 5
1 13
[1] "Boechera laevigata"
2
0 6
1 0
[1] "Boechera missouriensis"
2
0 1
[1] "Boechera lyallii"
2
0 2
1 2
[1] "Boechera canadensis"
2
0 7
1 0
[1] "Boechera platysperma"
2
0 2
1 0
[1] "Boechera rectissima"
2
0 1
1 1
[1] "Boechera schistacea"
2
0 1
[1] "Boechera shockleyi"
2
0 2
[1] "Boechera shortii"
2
0 1
[1] "Boechera suffrutescens"
2
0 0
1 0
[1] "Polyctenium fremontii var. confertum"
3
0 1
[1] "Boechera inyoensis"
3
0 0
1 1
[1] "Boechera lincolnensis"
3
0 1
1 1
[1] "Boechera formosa"
2
0 5
1 1
[1] "Boechera xylopoda"
2
0 1
[1] "Boechera glaucovalvula"
3
0 1
[1] "Boechera gracilipes"
2
0 1
1 1
[1] "Boechera patens"
2
0 1
[1] "B. oxylobula"
2
0 1
[1] "Boechera falcifructa"
3
1 1
[1] "Boechera glareosa"
2
1 1
[1] "Boechera lasiocarpa"
2
0 9
[1] "Boechera johnstonii"
2
0 1
[1] "Boechera nevadensis"
2
0 1
[1] "Boechera oxylobula"
2
0 10
[1] "Boechera polyantha"
2
0 5
1 2
[1] "Boechera polyantha x Boechera retrofracta"
3
1 2
[1] "Boechera stricta x Boechera spatifolia"
2 3
1 10 5
[1] "Boechera spatifolia"
2 3
0 8 0
1 8 3
#first for apollo-
#make sure data is complete for apollo
apollo.data<-acc.data[complete.cases(acc.data[,-which(names(acc.data) %in% c("ploidy")),]),]
#do for all data:
apollo.space<-apollo.data[,c("lat","lon")]
apollo.env<-apollo.data[,c("elev",paste("bio",c(1:19), sep=""))]
apollo.gen<-apollo.data[,c("apollo")]
# run random forest, backwards elimination
full.forest<-randomForest(x=apollo.env, y=as.factor(apollo.gen))
varImpPlot(full.forest, main="Full Sample Random Forest Variable Importance")
apollo.rfsel<-varSelRF(xdata=apollo.env, Class=as.factor(apollo.gen),vars.drop.frac = 0.2)
apollo.rfsel
Backwards elimination on random forest; ntree = 5000 ; mtryFactor = 1
Selected variables:
[1] "bio1" "bio10" "elev"
Number of selected variables: 3
# output best selected variables
sel.vars<-apollo.rfsel$selected.vars
print(sel.vars)
[1] "bio1" "bio10" "elev"
#subset by these columns
best.env<-apollo.env[,sel.vars]
#run cca- no partiapolloing, so geography may drive these patterns
cca1<-cca(best.env~apollo.gen)
Error: All row sums must be >0 in the community data matrix
#significance using permutations
aov.out.apollo<-anova(cca1,by="terms",permu=10000)
Error: object 'cca1' not found
print(aov.out.apollo)
Error: error in evaluating the argument 'x' in selecting a method for function 'print': Error: object 'aov.out.apollo' not found
#get geographic distance matrix
geo.dist<-rdist.earth(apollo.space[,c("lat","lon")], miles=F)
#run cca on residuals after partiapolloing geographic distance
cca2<-cca(best.env~as.factor(apollo.gen),z=geo.dist)
Error: All row sums must be >0 in the community data matrix
#permutation for significance
aov.out.apollo.partial<-anova(cca2,by="terms",permu=10000)
Error: object 'cca2' not found
print(aov.out.apollo.partial)
Error: error in evaluating the argument 'x' in selecting a method for function 'print': Error: object 'aov.out.apollo.partial' not found
#get the species to loop through
#criteria here is we need 10 observations of each apollo 1 and apollo 2
species.to.test<-vector()
for (i in unique(apollo.data$taxon)){
data.in<-apollo.data[apollo.data$taxon==i,]
tab.out<-table(data.in$apollo,data.in$ploidy)
if(dim(tab.out)[1]>1){
tab.out<-data.frame(tab.out)
sex<-sum(tab.out[tab.out$Var1==0,3])
apo<-sum(tab.out[tab.out$Var1==1,3])
cat(i, "apollo.0=",sex, "apollo.1=",apo,"\n")
if(sex>=10 & apo>=10){species.to.test<-c(species.to.test,i)
}else{next}
}else{next}
}
Boechera stricta apollo.0= 104 apollo.1= 16
Boechera pinetorum apollo.0= 4 apollo.1= 13
Boechera retrofracta apollo.0= 30 apollo.1= 62
Boechera divaricarpa apollo.0= 11 apollo.1= 32
Boechera species apollo.0= 5 apollo.1= 4
Boechera collinsii apollo.0= 5 apollo.1= 1
Boechera pendulocarpa apollo.0= 5 apollo.1= 1
Boechera stricta x Boechera retrofracta apollo.0= 1 apollo.1= 9
Boechera fernaldiana ssp. fernaldiana apollo.0= 6 apollo.1= 2
Boechera breweri apollo.0= 2 apollo.1= 0
Boechera cobrensis apollo.0= 2 apollo.1= 0
Boechera crandallii apollo.0= 20 apollo.1= 4
Boechera davidsonii apollo.0= 0 apollo.1= 0
Boechera lemmonii apollo.0= 3 apollo.1= 1
Boechera lignifera apollo.0= 1 apollo.1= 6
Boechera pulchra apollo.0= 0 apollo.1= 0
Boechera pauciflora apollo.0= 0 apollo.1= 0
Boechera sparsiflora apollo.0= 2 apollo.1= 1
Boechera arcuata apollo.0= 0 apollo.1= 0
Boechera microphylla apollo.0= 0 apollo.1= 4
Boechera pallidifolia apollo.0= 10 apollo.1= 15
Boechera pendulina apollo.0= 4 apollo.1= 1
Boechera perennans apollo.0= 3 apollo.1= 1
Boechera fendleri apollo.0= 2 apollo.1= 3
Boechera puberula apollo.0= 3 apollo.1= 1
Boechera williamsii apollo.0= 5 apollo.1= 13
Boechera laevigata apollo.0= 3 apollo.1= 0
Boechera macounii apollo.0= 0 apollo.1= 0
Boechera lyallii apollo.0= 2 apollo.1= 1
Boechera cusickii apollo.0= 0 apollo.1= 0
Boechera canadensis apollo.0= 6 apollo.1= 0
Boechera platysperma apollo.0= 2 apollo.1= 0
Boechera suffrutescens apollo.0= 0 apollo.1= 0
Cusickiella quadricostata apollo.0= 0 apollo.1= 0
Boechera subpinnatifida apollo.0= 0 apollo.1= 0
Boechera inyoensis apollo.0= 0 apollo.1= 1
Boechera rectissima apollo.0= 1 apollo.1= 1
Boechera rigidissima apollo.0= 0 apollo.1= 0
Boechera lincolnensis apollo.0= 1 apollo.1= 1
Boechera formosa apollo.0= 5 apollo.1= 1
Boechera gracilipes apollo.0= 1 apollo.1= 1
Boechera holboellii apollo.0= 0 apollo.1= 0
Boechera howellii apollo.0= 0 apollo.1= 0
Boechera polyantha apollo.0= 5 apollo.1= 2
Boechera spatifolia apollo.0= 8 apollo.1= 11
for (i in species.to.test){
apollo.data.in<-apollo.data[apollo.data$taxon==i,]
apollo.space.in<-apollo.data.in[,c("lat","lon")]
apollo.env.in<-apollo.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
apollo.gen.in<-apollo.data.in[,c("apollo")]
apollo.rfsel.in<-varSelRF(xdata=apollo.env.in, Class=as.factor(apollo.gen.in),vars.drop.frac = 0.2)
full.forest.in<-randomForest(x=apollo.env, y=as.factor(apollo.gen))
varImpPlot(full.forest.in, main=paste(i,"Random Forest Variable Importance"))
sel.vars.in<-apollo.rfsel.in$selected.vars
best.env.in<-apollo.env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
cca1<-cca(best.env.in~apollo.gen.in)
aov.out.apollo<-anova(cca1,by="terms",permu=10000)
print(aov.out.apollo)
geo.dist<-rdist.earth(apollo.space.in[,c("lat","lon")], miles=F)
cca2<-cca(best.env.in~as.factor(apollo.gen.in),z=geo.dist)
aov.out.apollo.partial<-anova(cca2,by="terms",permu=10000)
print(aov.out.apollo.partial)
cat("\n*********\n")
}
for Boechera stricta the selected environmental variables are: bio16 bio18
Permutation test for cca under reduced model
Terms added sequentially (first to last)
Model: cca(formula = best.env.in ~ apollo.gen.in)
Df Chisq F N.Perm Pr(>F)
apollo.gen.in 1 0.00 0.11 9999 0.87
Residual 212 0.03
Permutation test for cca under reduced model
Terms added sequentially (first to last)
Model: cca(formula = best.env.in ~ as.factor(apollo.gen.in), z = geo.dist)
Df Chisq F N.Perm Pr(>F)
as.factor(apollo.gen.in) 1 0.00 0.11 9999 0.87
Residual 212 0.03
*********
for Boechera retrofracta the selected environmental variables are: bio2 bio3 elev
Permutation test for cca under reduced model
Terms added sequentially (first to last)
Model: cca(formula = best.env.in ~ apollo.gen.in)
Df Chisq F N.Perm Pr(>F)
apollo.gen.in 1 0.00 20.94 9999 1e-04 ***
Residual 253 0.01
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Permutation test for cca under reduced model
Terms added sequentially (first to last)
Model: cca(formula = best.env.in ~ as.factor(apollo.gen.in), z = geo.dist)
Df Chisq F N.Perm Pr(>F)
as.factor(apollo.gen.in) 1 0.00 20.94 9999 1e-04 ***
Residual 253 0.01
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
*********
for Boechera divaricarpa the selected environmental variables are: bio14 bio18 bio4
Permutation test for cca under reduced model
Terms added sequentially (first to last)
Model: cca(formula = best.env.in ~ apollo.gen.in)
Df Chisq F N.Perm Pr(>F)
apollo.gen.in 1 0.00 3.95 9999 0.047 *
Residual 202 0.02
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Permutation test for cca under reduced model
Terms added sequentially (first to last)
Model: cca(formula = best.env.in ~ as.factor(apollo.gen.in), z = geo.dist)
Df Chisq F N.Perm Pr(>F)
as.factor(apollo.gen.in) 1 0.00 3.95 9999 0.045 *
Residual 202 0.02
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
*********
for Boechera pallidifolia the selected environmental variables are: bio5 bio8
Permutation test for cca under reduced model
Terms added sequentially (first to last)
Model: cca(formula = best.env.in ~ apollo.gen.in)
Df Chisq F N.Perm Pr(>F)
apollo.gen.in 1 0.00 2.08 9999 0.14
Residual 33 0.02
Permutation test for cca under reduced model
Terms added sequentially (first to last)
Model: cca(formula = best.env.in ~ as.factor(apollo.gen.in), z = geo.dist)
Df Chisq F N.Perm Pr(>F)
as.factor(apollo.gen.in) 1 0.00 2.08 9999 0.14
Residual 33 0.02
*********
#make sure data is complete for ploidy
ploidy.data<-acc.data[complete.cases(acc.data[,-which(names(acc.data) %in% c("apollo")),]),]
#do for all data:
ploidy.space<-ploidy.data[,c("lat","lon")]
ploidy.env<-ploidy.data[,c("elev",paste("bio",c(1:19), sep=""))]
ploidy.gen<-ploidy.data[,c("ploidy")]
# run random forest, backwards elimination
full.forest<-randomForest(x=ploidy.env, y=as.factor(ploidy.gen))
varImpPlot(full.forest, main="Full Sample Random Forest Variable Importance")
ploidy.rfsel<-varSelRF(xdata=ploidy.env, Class=as.factor(ploidy.gen),vars.drop.frac = 0.2)
ploidy.rfsel
Backwards elimination on random forest; ntree = 5000 ; mtryFactor = 1
Selected variables:
[1] "bio18" "bio4"
Number of selected variables: 2
# output best selected variables
sel.vars<-ploidy.rfsel$selected.vars
print(sel.vars)
[1] "bio18" "bio4"
#subset by these columns
best.env<-ploidy.env[,sel.vars]
#run cca- no partiploidying, so geography may drive these patterns
cca1<-cca(best.env~ploidy.gen)
#significance using permutations
aov.out.ploidy<-anova(cca1,by="terms",permu=10000)
print(aov.out.ploidy)
Permutation test for cca under reduced model
Terms added sequentially (first to last)
Model: cca(formula = best.env ~ ploidy.gen)
Df Chisq F N.Perm Pr(>F)
ploidy.gen 1 0.00 0.39 9999 0.54
Residual 540 0.02
#get geographic distance matrix
geo.dist<-rdist.earth(ploidy.space[,c("lat","lon")], miles=F)
#run cca on residuals after partiploidying geographic distance
cca2<-cca(best.env~as.factor(ploidy.gen),z=geo.dist)
#permutation for significance
aov.out.ploidy.partial<-anova(cca2,by="terms",permu=10000)
print(aov.out.ploidy.partial)
Permutation test for cca under reduced model
Terms added sequentially (first to last)
Model: cca(formula = best.env ~ as.factor(ploidy.gen), z = geo.dist)
Df Chisq F N.Perm Pr(>F)
as.factor(ploidy.gen) 2 0.00 2.10 9999 0.13
Residual 539 0.02
#get the species to loop through
#criteria here is we need 10 observations of each ploidy 1 and ploidy 2
species.to.test<-vector()
for (i in unique(ploidy.data$taxon)){
data.in<-ploidy.data[ploidy.data$taxon==i,]
tab.out<-table(data.in$ploidy,data.in$ploidy)
if(dim(tab.out)[1]>1){
tab.out<-data.frame(tab.out)
dip<-sum(tab.out[tab.out$Var2==2,3])
trip<-sum(tab.out[tab.out$Var2==3,3])
cat(i, "2x=",dip, "3x=",trip,"\n")
if(dip>=10 & trip>=10){species.to.test<-c(species.to.test,i)
}else{next}
}else{next}
}
Boechera stricta 2x= 122 3x= 2
Boechera species 2x= 7 3x= 2
Boechera retrofracta 2x= 57 3x= 35
Boechera collinsii 2x= 3 3x= 3
Boechera pinetorum 2x= 1 3x= 16
Boechera divaricarpa 2x= 3 3x= 35
Boechera pendulocarpa 2x= 5 3x= 1
Boechera lignifera 2x= 4 3x= 3
Boechera lemmonii 2x= 3 3x= 1
Boechera sparsiflora 2x= 1 3x= 2
Boechera puberula 2x= 3 3x= 1
Boechera fendleri 2x= 4 3x= 1
Boechera fernaldiana ssp. fernaldiana 2x= 8 3x= 1
Boechera pallidifolia 2x= 22 3x= 3
Boechera stricta x Boechera spatifolia 2x= 10 3x= 5
Boechera spatifolia 2x= 16 3x= 3
#just retrofracta
for (i in species.to.test){
ploidy.data.in<-ploidy.data[ploidy.data$taxon==i,]
ploidy.space.in<-ploidy.data.in[,c("lat","lon")]
ploidy.env.in<-ploidy.data.in[,c("elev",paste("bio",c(1:19), sep=""))]
ploidy.gen.in<-ploidy.data.in[,c("ploidy")]
ploidy.rfsel.in<-varSelRF(xdata=ploidy.env.in, Class=as.factor(ploidy.gen.in),vars.drop.frac = 0.2)
full.forest.in<-randomForest(x=ploidy.env, y=as.factor(ploidy.gen))
varImpPlot(full.forest.in, main=paste(i,"Random Forest Variable Importance"))
sel.vars.in<-ploidy.rfsel.in$selected.vars
best.env.in<-ploidy.env.in[,sel.vars.in]
cat("for", i , "the selected environmental variables are:",sel.vars.in, "\n")
cca1<-cca(best.env.in~ploidy.gen.in)
aov.out.ploidy<-anova(cca1,by="terms",permu=10000)
print(aov.out.ploidy)
geo.dist<-rdist.earth(ploidy.space.in[,c("lat","lon")], miles=F)
cca2<-cca(best.env.in~as.factor(ploidy.gen.in),z=geo.dist)
aov.out.ploidy.partial<-anova(cca2,by="terms",permu=10000)
print(aov.out.ploidy.partial)
cat("\n*********\n")
}
for Boechera retrofracta the selected environmental variables are: bio3 elev
Permutation test for cca under reduced model
Terms added sequentially (first to last)
Model: cca(formula = best.env.in ~ ploidy.gen.in)
Df Chisq F N.Perm Pr(>F)
ploidy.gen.in 1 0.00 13.58 9999 1e-04 ***
Residual 90 0.01
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Permutation test for cca under reduced model
Terms added sequentially (first to last)
Model: cca(formula = best.env.in ~ as.factor(ploidy.gen.in), z = geo.dist)
Df Chisq F N.Perm Pr(>F)
as.factor(ploidy.gen.in) 1 0.00 13.58 9999 1e-04 ***
Residual 90 0.01
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
*********
#full analysis for just retrofracta
data.retro<-acc.data[acc.data$taxon=="Boechera retrofracta",]
data.retro<-data.retro[complete.cases(data.retro),]
all.space.in<-data.retro[,c("lat","lon")]
all.env.in<-data.retro[,c("elev",paste("bio",c(1:19), sep=""))]
retro.ploidy<-data.retro[,c("ploidy")]
all.rfsel.in<-varSelRF(xdata=all.env.in, Class=as.factor(retro.ploidy),vars.drop.frac = 0.2)
full.forest.in<-randomForest(x=all.env.in, y=as.factor(retro.ploidy))
sel.vars.ploidy<-all.rfsel.in$selected.vars
retro.apollo<-data.retro[,c("apollo")]
all.rfsel.in<-varSelRF(xdata=all.env.in, Class=as.factor(retro.apollo),vars.drop.frac = 0.2)
full.forest.in<-randomForest(x=all.env.in, y=as.factor(retro.apollo))
sel.vars.apollo<-all.rfsel.in$selected.vars
retro.vars<-unique(c(sel.vars.apollo,sel.vars.ploidy))
retro.vars.data <- cbind(data.retro[,sel.vars.apollo])
ret.unique<-as.character(paste(data.retro$ploidy,data.retro$apollo, sep="."))
data.retro$ploidy.apollo<-ret.unique
retro.toplot<-data.retro[,c("elev","bio2","bio3","ploidy.apollo")]
library(GGally)
Warning: package 'GGally' was built under R version 3.1.1
ggpairs(retro.toplot, colour="ploidy.apollo",columns=c("elev","bio2","bio3"))
#manova
fit <- with(data.retro, manova(cbind(elev,bio2,bio3) ~ retro.ploidy*retro.apollo))
summary(fit, test="Pillai")
Df Pillai approx F num Df den Df Pr(>F)
retro.ploidy 1 0.402 19.5 3 87 9.2e-10 ***
retro.apollo 1 0.288 11.8 3 87 1.6e-06 ***
Residuals 89
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#just for diploids:
dip.retro<-data.retro[data.retro$ploidy==2,]
dip.retro<-dip.retro[complete.cases(dip.retro),]
all.space.in<-dip.retro[,c("lat","lon")]
all.env.in<-dip.retro[,c("elev",paste("bio",c(1:19), sep=""))]
retro.apollo<-dip.retro[,c("apollo")]
all.rfsel.in<-varSelRF(xdata=all.env.in, Class=as.factor(retro.apollo),vars.drop.frac = 0.2)
full.forest.in<-randomForest(x=all.env.in, y=as.factor(retro.apollo))
sel.vars.apollo<-all.rfsel.in$selected.vars
retro.toplot <- cbind(dip.retro[,sel.vars.apollo])
ret.apollo<-as.character(dip.retro$apollo)
retro.toplot$apollo<-ret.apollo
library(GGally)
ggpairs(retro.toplot, colour=as.character("apollo"),columns=sel.vars.apollo)
#manova
fit <- with(retro.toplot, manova(cbind(elev,bio2,bio3) ~ apollo))
summary(fit, test="Pillai")
Df Pillai approx F num Df den Df Pr(>F)
apollo 1 0.321 8.36 3 53 0.00012 ***
Residuals 55
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1