Lovell — Aug 30, 2014, 4:11 PM
rm(list=ls())
setwd("~/Desktop/Adaptomics_Project/niche_modeling")
library(vegan)
Loading required package: permute
Loading required package: lattice
This is vegan 2.0-10
library(fields)
Loading required package: spam
Loading required package: grid
Spam version 0.41-0 (2014-02-26) is loaded.
Type 'help( Spam)' or 'demo( spam)' for a short introduction
and overview of this package.
Help for individual functions is also obtained by adding the
suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
Attaching package: 'spam'
The following objects are masked from 'package:base':
backsolve, forwardsolve
Loading required package: maps
#read in the data
acc.data<-read.csv("mr2014_accessioninfo.csv",header=T)
acc.data$wwf.num<-as.numeric(acc.data$wwf)
#just for retro
retro.data<-acc.data[acc.data$taxon=="Boechera holboellii var. retrofracta",]
retro.env<-retro.data[,c(10:12,17:35,38)]
retro.gen<-retro.data[,c("apollo","upgrade2")]
#mantel tests
#bio vs apo constrained by distance
retro.dist<-rdist.earth(retro.env[,c("lat","lon")], miles=F)
retro.bioclim.dist<-dist(retro.env[,4:22])
retro.apollo.dist<-dist(retro.gen$apollo)
mantel.out<-mantel.partial(retro.apollo.dist,retro.bioclim.dist,retro.dist)
mantel.out
Partial Mantel statistic based on Pearson's product-moment correlation
Call:
mantel.partial(xdis = retro.apollo.dist, ydis = retro.bioclim.dist, zdis = retro.dist)
Mantel statistic r: 0.0691
Significance: 0.001
Upper quantiles of permutations (null model):
90% 95% 97.5% 99%
0.0235 0.0307 0.0363 0.0491
Based on 999 permutations
#rda/cca
retro.pcnm<-pcnm(retro.dist)
top.retro.pcnm<-retro.pcnm$vectors[,1:2]
rda1<-rda(retro.gen~wwf.num+bio1+bio2+bio3+bio4+bio5+bio6+bio7+bio8+bio9+bio10
+bio11+bio12+bio13+bio14+bio15+bio16+bio17+bio18+bio19,
data=retro.env)
rda1.cond<-rda(retro.gen~wwf.num+bio1+bio2+bio3+bio4+bio5+bio6+bio7+bio8+bio9+bio10
+bio11+bio12+bio13+bio14+bio15+bio16+bio17+bio18+bio19 +
Condition(top.retro.pcnm),
data=retro.env)
plot(rda1)
plot(rda1.cond)
#dfa- discriminant function analysis- this is my prefered analysis
library(MASS)
pdf("mr2014_retro.env.comps.pdf")
pairs(retro.env)
dev.off()
pdf
2
retro.apollo<-retro.gen$apollo
lda1<-lda(retro.apollo~wwf.num+bio1+bio2+bio3+bio4+bio5+bio6+bio7+bio8+bio9+bio10
+bio11+bio12+bio13+bio14+bio15+bio16+bio17+bio18+bio19, data=retro.env)
Warning: variables are collinear
# The first part of the output displays the formula that was fitted.
# This is followed by the prior probabilities of the groups,
# which reflects the proportion of each group within the dataset.
# In other words, if you had no measurements and the number of measured samples
# represented the actual relative abundances of the groups,
# the prior probabilities would describe the probability that any unknown
# sample would belong to each of the groups.
# Next comes the group means,
# which is a table of the average value of each of the variables for each of your groups.
# Scanning this table can help you to see if the groups are distinctive in
# terms of one or more of the variables.
# Next are the coefficients of the discriminant function (a, b, and c).
# Because there are three groups, there are 3-1 linear discriminants
# (if you had only two groups, you would need only 1 [2-1] linear discriminants).
# For each linear discriminant (LD1 and LD2), there is one coefficient corresponding,
# in order, to each of the variables. Last in the list is the proportion of the trace,
# which gives the variance explained by each discriminant function.
# Here, discriminant 1 explains 75% of the variance, with the remainder explained by
# discriminant 2.
lda1
Call:
lda(retro.apollo ~ wwf.num + bio1 + bio2 + bio3 + bio4 + bio5 +
bio6 + bio7 + bio8 + bio9 + bio10 + bio11 + bio12 + bio13 +
bio14 + bio15 + bio16 + bio17 + bio18 + bio19, data = retro.env)
Prior probabilities of groups:
0 1
0.3862 0.6138
Group means:
wwf.num bio1 bio2 bio3 bio4 bio5 bio6 bio7 bio8 bio9 bio10
0 37.67 3.235 13.52 34.56 942.7 24.65 -15.21 39.85 5.631 3.084 14.77
1 31.40 3.502 15.19 39.25 847.4 24.69 -14.20 38.89 4.542 4.562 14.09
bio11 bio12 bio13 bio14 bio15 bio16 bio17 bio18 bio19
0 -8.310 528.3 74.61 20.49 38.33 203.8 72.95 118.4 157.7
1 -6.593 461.6 61.38 21.13 32.57 166.5 74.39 103.2 131.6
Coefficients of linear discriminants:
LD1
wwf.num -0.003657
bio1 -0.042013
bio2 0.251539
bio3 0.237760
bio4 0.046297
bio5 -0.618541
bio6 -0.156234
bio7 -0.150302
bio8 -0.005844
bio9 0.011043
bio10 -1.033718
bio11 1.799699
bio12 0.007112
bio13 0.053122
bio14 -0.002699
bio15 0.010814
bio16 -0.013525
bio17 0.025599
bio18 -0.032747
bio19 -0.027189
lda1.predict<-predict(lda1)
plot(lda1)
tab1 <- table(retro.apollo, lda1.predict$class)
tab1
retro.apollo 0 1
0 60 35
1 24 127
#This command will calculate the overall predictive accuracy,
#that is, the proportion of cases that lie along the diagonal:
sum(tab1[row(tab1) == col(tab1)]) / sum(tab1)
[1] 0.7602
lda2 <- lda(retro.apollo~wwf.num+bio1+bio2+bio3+bio4+bio5+bio6+bio7+bio8+bio9+bio10
+bio11+bio12+bio13+bio14+bio15+bio16+bio17+bio18+bio19,
data=retro.env,
CV=TRUE)
Warning: variables are collinear
#The success of the discrimination can be measured similarly:
tab2 <- table(retro.apollo, lda2$class)
tab2
retro.apollo 0 1
0 53 42
1 31 120
sum(tab2[row(tab2) == col(tab2)]) / sum(tab2)
[1] 0.7033
#another way to look at it
ct <- table(retro.apollo, lda2$class)
diag(prop.table(ct, 1))
0 1
0.5579 0.7947
# total percent correct
sum(diag(prop.table(ct)))
[1] 0.7033
#get significance
retro.env.df<-cbind(retro.env$wwf.num,retro.env$bio1,retro.env$bio2,retro.env$bio3,retro.env$bio4,retro.env$bio5,retro.env$bio6,retro.env$bio7,retro.env$bio8,retro.env$bio9,retro.env$bio10
,retro.env$bio11,retro.env$bio12,retro.env$bio13,retro.env$bio14,retro.env$bio15,retro.env$bio16,retro.env$bio17,retro.env$bio18,retro.env$bio19)
retro.gen1<-cbind(retro.gen$apollo, retro.gen$upgrade)
Warning: Name partially matched in data frame
retro.env.df<-cbind(retro.env$wwf.num,bio1)
Error: object 'bio1' not found
fit <- manova(retro.env.df ~ retro.gen1)
summary(fit, test="Pillai")
Error: residuals have rank 19 < 20
summary(manova(retro.env.df~retro.apollo),test="Wilks")
Error: residuals have rank 19 < 20
#random forests
library(ipred)
bt1 = bagging(retro.apollo~wwf.num+bio1+bio2+bio3+bio4+bio5+bio6+bio7+bio8+bio9+bio10
+bio11+bio12+bio13+bio14+bio15+bio16+bio17+bio18+bio19,
data=retro.env,nbagg=30,coob=T)
bt1$err
[1] 0.4541
library(randomForest)
Warning: package 'randomForest' was built under R version 3.1.1
randomForest 4.6-10
Type rfNews() to see new features/changes/bug fixes.
rf1 = randomForest(retro.apollo~wwf.num+bio1+bio2+bio3+bio4+bio5+bio6+bio7+bio8+bio9+bio10
+bio11+bio12+bio13+bio14+bio15+bio16+bio17+bio18+bio19,
data=retro.env,importance=T,na.action=na.exclude)
Warning: The response has five or fewer unique values. Are you sure you
want to do regression?
rf1
Call:
randomForest(formula = retro.apollo ~ wwf.num + bio1 + bio2 + bio3 + bio4 + bio5 + bio6 + bio7 + bio8 + bio9 + bio10 + bio11 + bio12 + bio13 + bio14 + bio15 + bio16 + bio17 + bio18 + bio19, data = retro.env, importance = T, na.action = na.exclude)
Type of random forest: regression
Number of trees: 500
No. of variables tried at each split: 6
Mean of squared residuals: 0.1911
% Var explained: 19.39
varImpPlot(rf1)
plot(rf1)
bt2 = bagging(retro.apollo~wwf.num+bio1+bio2+bio3+bio4+bio5+bio6+bio7+bio8+bio9+bio10
+bio11+bio12+bio13+bio14+bio15+bio16+bio17+bio18+bio19,
data=retro.env,nbagg=30,coob=T)
bt2.resid = retro.apollo-predict(bt2)
SSE = sum(sapply(bt2.resid,function(x)(x-mean(bt2.resid))^2))
SSY = sum(sapply(retro.apollo,function(x)(x-mean(retro.apollo))^2))
#compare this to the variance explained in rf1
rf1
Call:
randomForest(formula = retro.apollo ~ wwf.num + bio1 + bio2 + bio3 + bio4 + bio5 + bio6 + bio7 + bio8 + bio9 + bio10 + bio11 + bio12 + bio13 + bio14 + bio15 + bio16 + bio17 + bio18 + bio19, data = retro.env, importance = T, na.action = na.exclude)
Type of random forest: regression
Number of trees: 500
No. of variables tried at each split: 6
Mean of squared residuals: 0.1911
% Var explained: 19.39
1-SSE/SSY
[1] 0.1835
#rf is better
library(tree)
rt = tree(retro.apollo~wwf.num+bio1+bio2+bio3+bio4+bio5+bio6+bio7+bio8+bio9+bio10
+bio11+bio12+bio13+bio14+bio15+bio16+bio17+bio18+bio19,
data=retro.env)
summary(rt)
Regression tree:
tree(formula = retro.apollo ~ wwf.num + bio1 + bio2 + bio3 +
bio4 + bio5 + bio6 + bio7 + bio8 + bio9 + bio10 + bio11 +
bio12 + bio13 + bio14 + bio15 + bio16 + bio17 + bio18 + bio19,
data = retro.env)
Variables actually used in tree construction:
[1] "bio3" "bio7" "bio15" "bio1" "bio14" "bio5" "bio8" "bio17" "bio18"
Number of terminal nodes: 13
Residual mean deviance: 0.121 = 28.1 / 233
Distribution of residuals:
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.9000 -0.0907 0.1010 0.0000 0.1010 0.9710
#just random trees
rf1 = randomForest(retro.apollo~wwf.num+bio1+bio2+bio3+bio4+bio5+bio6+bio7+bio8+bio9+bio10
+bio11+bio12+bio13+bio14+bio15+bio16+bio17+bio18+bio19,
data=retro.env,importance=T,na.action=na.exclude)
Warning: The response has five or fewer unique values. Are you sure you
want to do regression?
rf1
Call:
randomForest(formula = retro.apollo ~ wwf.num + bio1 + bio2 + bio3 + bio4 + bio5 + bio6 + bio7 + bio8 + bio9 + bio10 + bio11 + bio12 + bio13 + bio14 + bio15 + bio16 + bio17 + bio18 + bio19, data = retro.env, importance = T, na.action = na.exclude)
Type of random forest: regression
Number of trees: 500
No. of variables tried at each split: 6
Mean of squared residuals: 0.1878
% Var explained: 20.78
varImpPlot(rf1)
plot(rf1)