Title: Assessing colinearity

Name: Tammy L. Elliott

Date: March 19, 2016

R version 3.1

Show characteristics of environmental variables

names(env)
## [1] "Elevation..m."   "Sky.visible...." "Slope..Degrees." "site.soil"      
## [5] "site.moist"
dim(env)
## [1] 176   5
str(env)
## 'data.frame':    176 obs. of  5 variables:
##  $ Elevation..m.  : int  523 522 524 532 522 531 533 528 530 530 ...
##  $ Sky.visible....: num  95 100 100 80 100 80 100 100 100 100 ...
##  $ Slope..Degrees.: num  0 0 0 0 0 0 1 0 0 0 ...
##  $ site.soil      : num  22.2 22.5 14.5 18.1 26.4 ...
##  $ site.moist     : num  2 2 2 2 1 2 2 2 1 2 ...
head(env)
##     Elevation..m. Sky.visible.... Slope..Degrees. site.soil site.moist
## EA1           523              95               0    22.250          2
## EB1           522             100               0    22.500          2
## EC1           524             100               0    14.500          2
## ED1           532              80               0    18.125          2
## EE1           522             100               0    26.375          1
## EF1           531              80               0    30.000          2
summary(env)
##  Elevation..m.   Sky.visible....  Slope..Degrees.    site.soil    
##  Min.   :522.0   Min.   :  9.50   Min.   : 0.000   Min.   : 0.00  
##  1st Qu.:550.8   1st Qu.: 90.00   1st Qu.: 0.000   1st Qu.: 7.75  
##  Median :600.0   Median :100.00   Median : 4.000   Median :14.94  
##  Mean   :625.3   Mean   : 93.97   Mean   : 6.733   Mean   :16.17  
##  3rd Qu.:686.8   3rd Qu.:100.00   3rd Qu.:12.000   3rd Qu.:23.75  
##  Max.   :806.0   Max.   :100.00   Max.   :27.000   Max.   :30.00  
##    site.moist   
##  Min.   :1.000  
##  1st Qu.:3.000  
##  Median :3.000  
##  Mean   :3.062  
##  3rd Qu.:3.000  
##  Max.   :5.000

Show pairwise relationships between variables

pairs(env, main="Bivariate Plots of the Environmental Data" )

Correlation coefficients between environmental variables

env.pearson<-cor(env) # Pearson r linear correlation
round(env.pearson, 2)
##                 Elevation..m. Sky.visible.... Slope..Degrees. site.soil
## Elevation..m.            1.00            0.11            0.63     -0.73
## Sky.visible....          0.11            1.00           -0.04     -0.06
## Slope..Degrees.          0.63           -0.04            1.00     -0.66
## site.soil               -0.73           -0.06           -0.66      1.00
## site.moist               0.79            0.12            0.50     -0.66
##                 site.moist
## Elevation..m.         0.79
## Sky.visible....       0.12
## Slope..Degrees.       0.50
## site.soil            -0.66
## site.moist            1.00
env.ken<-cor(env, method="kendall")# Kendall tau rank correlation
round(env.ken, 2)
##                 Elevation..m. Sky.visible.... Slope..Degrees. site.soil
## Elevation..m.            1.00            0.12            0.56     -0.50
## Sky.visible....          0.12            1.00           -0.01     -0.17
## Slope..Degrees.          0.56           -0.01            1.00     -0.48
## site.soil               -0.50           -0.17           -0.48      1.00
## site.moist               0.64            0.17            0.45     -0.54
##                 site.moist
## Elevation..m.         0.64
## Sky.visible....       0.17
## Slope..Degrees.       0.45
## site.soil            -0.54
## site.moist            1.00

Principle components analysis on standardized environmental variables

### Construct a biplot
#### Biplot of the PCA on transformed species data (scaling 1)
#dev.new(width=11.8, height=8)
plot(spe.h.pca)

#?biplot 
#biplot(spe.h.pca)
#plot(spe.h.pca, scaling=1, type="none", # scaling 1 = distance biplot : 
     # distances among abjects in the biplot approximate their Euclidean distances
     # but angles among descriptor vectors DO NOT reflect their correlation
     #xlab=c("PC1 (%)", round((spe.h.pca$CA$eig[1]/sum(spe.h.pca$CA$eig))*100,2)),
     #ylab=c("PC2 (%)", round((spe.h.pca$CA$eig[2]/sum(spe.h.pca$CA$eig))*100,2)))
#points(scores(spe.h.pca, display="sites", choices=c(1,2), scaling=1),
      # pch=21, col="black", bg="steelblue", cex=1.2)
#text(scores(spe.h.pca, display="species", choices=c(1), scaling=1),
    # scores(spe.h.pca, display="species", choices=c(2), scaling=1),
     #labels=rownames(scores(spe.h.pca, display="species", scaling=1)),
     #col="red", cex=0.8)       

Principal components analysis with scaling=2 and standardized data

#### Biplot on the environmental variables (scaling 2)
#dev.new(width=11.8, height=8)
plot(env.pca)

plot(env.pca, scaling=2, type="none", # scaling 2 = correlation biplot : 
     # distances among abjects in the biplot DO NOT approximate their Euclidean distances
     # but angles among descriptor vectors reflect their correlation
     xlab=c("PC1 (%)", round((env.pca$CA$eig[1]/sum(env.pca$CA$eig))*100,2)),
     ylab=c("PC2 (%)", round((env.pca$CA$eig[2]/sum(env.pca$CA$eig))*100,2)),
     xlim=c(-3,3), ylim=c(-3,3))
points(scores(env.pca, display="sites", choices=c(1,2), scaling=2), 
       pch=21, col="black", bg="gray50", cex=1.2) 
text(scores(env.pca, display="species", choices=c(1), scaling=2),
     scores(env.pca, display="species", choices=c(2), scaling=2),
     labels=rownames(scores(env.pca, display="species", scaling=2)),
     col="black", cex=0.8)

Variance Inflation Factors

#Can be used to detect collinearity. Based on the square of the multiple correlation coefficient resulting from regressing
#a predictor variable against all other predictor variables. If a variable has a strong linear relationship with at least one
#other variables, the correlation coefficient would be close to 1, and VIF for that variable would be
#large. A VIF greater than 10 is a signal that the model has a collinearity problem.

# I use a VIF of 3 to eliminate variables

#Creat environmental data matrix with out soil.moist
env.4.var<-as.data.frame(cbind(env$Elevation..m., env$Sky.visible..., env$Slope..Degrees., env$site.soil))
colnames(env.4.var)<-c("Elevation", "Sky.visible", "Slope", "Soil")
rownames(env.4.var)<-rownames(env)

#VIF with all variables
(env.vif<-vif(env))
##         Variables      VIF
## 1   Elevation..m. 3.668521
## 2 Sky.visible.... 1.038912
## 3 Slope..Degrees. 1.995642
## 4       site.soil 2.603554
## 5      site.moist 2.839035
#VIF without soil moisture
(env.vif.4<-vif(env.4.var))
##     Variables      VIF
## 1   Elevation 2.354400
## 2 Sky.visible 1.036713
## 3       Slope 1.981368
## 4        Soil 2.475757