Title: Assessing colinearity
Name: Tammy L. Elliott
Date: March 19, 2016
R version 3.1
Show characteristics of environmental variables
names(env)
## [1] "Elevation..m." "Sky.visible...." "Slope..Degrees." "site.soil"
## [5] "site.moist"
dim(env)
## [1] 176 5
str(env)
## 'data.frame': 176 obs. of 5 variables:
## $ Elevation..m. : int 523 522 524 532 522 531 533 528 530 530 ...
## $ Sky.visible....: num 95 100 100 80 100 80 100 100 100 100 ...
## $ Slope..Degrees.: num 0 0 0 0 0 0 1 0 0 0 ...
## $ site.soil : num 22.2 22.5 14.5 18.1 26.4 ...
## $ site.moist : num 2 2 2 2 1 2 2 2 1 2 ...
head(env)
## Elevation..m. Sky.visible.... Slope..Degrees. site.soil site.moist
## EA1 523 95 0 22.250 2
## EB1 522 100 0 22.500 2
## EC1 524 100 0 14.500 2
## ED1 532 80 0 18.125 2
## EE1 522 100 0 26.375 1
## EF1 531 80 0 30.000 2
summary(env)
## Elevation..m. Sky.visible.... Slope..Degrees. site.soil
## Min. :522.0 Min. : 9.50 Min. : 0.000 Min. : 0.00
## 1st Qu.:550.8 1st Qu.: 90.00 1st Qu.: 0.000 1st Qu.: 7.75
## Median :600.0 Median :100.00 Median : 4.000 Median :14.94
## Mean :625.3 Mean : 93.97 Mean : 6.733 Mean :16.17
## 3rd Qu.:686.8 3rd Qu.:100.00 3rd Qu.:12.000 3rd Qu.:23.75
## Max. :806.0 Max. :100.00 Max. :27.000 Max. :30.00
## site.moist
## Min. :1.000
## 1st Qu.:3.000
## Median :3.000
## Mean :3.062
## 3rd Qu.:3.000
## Max. :5.000
Show pairwise relationships between variables
pairs(env, main="Bivariate Plots of the Environmental Data" )

Correlation coefficients between environmental variables
env.pearson<-cor(env) # Pearson r linear correlation
round(env.pearson, 2)
## Elevation..m. Sky.visible.... Slope..Degrees. site.soil
## Elevation..m. 1.00 0.11 0.63 -0.73
## Sky.visible.... 0.11 1.00 -0.04 -0.06
## Slope..Degrees. 0.63 -0.04 1.00 -0.66
## site.soil -0.73 -0.06 -0.66 1.00
## site.moist 0.79 0.12 0.50 -0.66
## site.moist
## Elevation..m. 0.79
## Sky.visible.... 0.12
## Slope..Degrees. 0.50
## site.soil -0.66
## site.moist 1.00
env.ken<-cor(env, method="kendall")# Kendall tau rank correlation
round(env.ken, 2)
## Elevation..m. Sky.visible.... Slope..Degrees. site.soil
## Elevation..m. 1.00 0.12 0.56 -0.50
## Sky.visible.... 0.12 1.00 -0.01 -0.17
## Slope..Degrees. 0.56 -0.01 1.00 -0.48
## site.soil -0.50 -0.17 -0.48 1.00
## site.moist 0.64 0.17 0.45 -0.54
## site.moist
## Elevation..m. 0.64
## Sky.visible.... 0.17
## Slope..Degrees. 0.45
## site.soil -0.54
## site.moist 1.00
Principle components analysis on standardized environmental variables
### Construct a biplot
#### Biplot of the PCA on transformed species data (scaling 1)
#dev.new(width=11.8, height=8)
plot(spe.h.pca)

#?biplot
#biplot(spe.h.pca)
#plot(spe.h.pca, scaling=1, type="none", # scaling 1 = distance biplot :
# distances among abjects in the biplot approximate their Euclidean distances
# but angles among descriptor vectors DO NOT reflect their correlation
#xlab=c("PC1 (%)", round((spe.h.pca$CA$eig[1]/sum(spe.h.pca$CA$eig))*100,2)),
#ylab=c("PC2 (%)", round((spe.h.pca$CA$eig[2]/sum(spe.h.pca$CA$eig))*100,2)))
#points(scores(spe.h.pca, display="sites", choices=c(1,2), scaling=1),
# pch=21, col="black", bg="steelblue", cex=1.2)
#text(scores(spe.h.pca, display="species", choices=c(1), scaling=1),
# scores(spe.h.pca, display="species", choices=c(2), scaling=1),
#labels=rownames(scores(spe.h.pca, display="species", scaling=1)),
#col="red", cex=0.8)
Principal components analysis with scaling=2 and standardized data
#### Biplot on the environmental variables (scaling 2)
#dev.new(width=11.8, height=8)
plot(env.pca)

plot(env.pca, scaling=2, type="none", # scaling 2 = correlation biplot :
# distances among abjects in the biplot DO NOT approximate their Euclidean distances
# but angles among descriptor vectors reflect their correlation
xlab=c("PC1 (%)", round((env.pca$CA$eig[1]/sum(env.pca$CA$eig))*100,2)),
ylab=c("PC2 (%)", round((env.pca$CA$eig[2]/sum(env.pca$CA$eig))*100,2)),
xlim=c(-3,3), ylim=c(-3,3))
points(scores(env.pca, display="sites", choices=c(1,2), scaling=2),
pch=21, col="black", bg="gray50", cex=1.2)
text(scores(env.pca, display="species", choices=c(1), scaling=2),
scores(env.pca, display="species", choices=c(2), scaling=2),
labels=rownames(scores(env.pca, display="species", scaling=2)),
col="black", cex=0.8)

Variance Inflation Factors
#Can be used to detect collinearity. Based on the square of the multiple correlation coefficient resulting from regressing
#a predictor variable against all other predictor variables. If a variable has a strong linear relationship with at least one
#other variables, the correlation coefficient would be close to 1, and VIF for that variable would be
#large. A VIF greater than 10 is a signal that the model has a collinearity problem.
# I use a VIF of 3 to eliminate variables
#Creat environmental data matrix with out soil.moist
env.4.var<-as.data.frame(cbind(env$Elevation..m., env$Sky.visible..., env$Slope..Degrees., env$site.soil))
colnames(env.4.var)<-c("Elevation", "Sky.visible", "Slope", "Soil")
rownames(env.4.var)<-rownames(env)
#VIF with all variables
(env.vif<-vif(env))
## Variables VIF
## 1 Elevation..m. 3.668521
## 2 Sky.visible.... 1.038912
## 3 Slope..Degrees. 1.995642
## 4 site.soil 2.603554
## 5 site.moist 2.839035
#VIF without soil moisture
(env.vif.4<-vif(env.4.var))
## Variables VIF
## 1 Elevation 2.354400
## 2 Sky.visible 1.036713
## 3 Slope 1.981368
## 4 Soil 2.475757