1.Visualizing the data

pred<- read.csv("gwd_pred.csv", row.names = 1)
allraw<- read.csv("gwd_all_raw.csv", row.names = 1)
summary(pred)
##       Temp          Conduct           Salinity           DO_sat      
##  Min.   : 5.07   Min.   :  132.7   Min.   : 0.0540   Min.   : 16.70  
##  1st Qu.:21.11   1st Qu.:  729.0   1st Qu.: 0.3425   1st Qu.: 79.75  
##  Median :23.40   Median : 7927.0   Median : 4.4175   Median : 95.40  
##  Mean   :23.06   Mean   :13512.0   Mean   : 7.9780   Mean   : 91.12  
##  3rd Qu.:25.35   3rd Qu.:22921.2   3rd Qu.:13.7550   3rd Qu.:106.15  
##  Max.   :30.20   Max.   :61602.5   Max.   :40.9000   Max.   :162.70  
##  NA's   :1       NA's   :1         NA's   :1         NA's   :5       
##     DO_diss      
##  Min.   : 1.450  
##  1st Qu.: 6.230  
##  Median : 7.670  
##  Mean   : 7.229  
##  3rd Qu.: 8.650  
##  Max.   :12.160  
##  NA's   :2
summary(allraw)
##        pH            Chloride           Sulfate         Sodium        
##  Min.   : 6.630   Min.   :    0.09   Min.   :   0   Min.   :    0.05  
##  1st Qu.: 7.685   1st Qu.:  116.00   1st Qu.:  38   1st Qu.:   83.20  
##  Median : 7.850   Median : 1705.00   Median : 314   Median : 1018.65  
##  Mean   : 7.810   Mean   : 4192.39   Mean   : 610   Mean   : 2432.66  
##  3rd Qu.: 7.974   3rd Qu.: 7459.00   3rd Qu.:1033   3rd Qu.: 4329.50  
##  Max.   :10.770   Max.   :20055.00   Max.   :2865   Max.   :11385.00  
##  NA's   :1        NA's   :2          NA's   :2      NA's   :2         
##    Potassium        Magnesium          Calcium         Alkalinity   
##  Min.   :  0.01   Min.   :   0.10   Min.   :  0.20   Min.   : 37.0  
##  1st Qu.:  6.02   1st Qu.:  22.95   1st Qu.: 16.70   1st Qu.: 72.0  
##  Median : 39.19   Median : 156.98   Median : 48.30   Median : 82.0  
##  Mean   : 84.68   Mean   : 343.14   Mean   : 82.96   Mean   :101.5  
##  3rd Qu.:146.00   3rd Qu.: 560.65   3rd Qu.:124.20   3rd Qu.: 93.5  
##  Max.   :391.10   Max.   :1597.97   Max.   :319.14   Max.   :486.0  
##  NA's   :2        NA's   :2         NA's   :2        NA's   :8      
##      Barium            Chromium           Silicon        Strontium     
##  Min.   :0.000000   Min.   :0.000000   Min.   : 0.00   Min.   :0.0000  
##  1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:12.92   1st Qu.:0.0700  
##  Median :0.003450   Median :0.000000   Median :18.60   Median :0.4509  
##  Mean   :0.005217   Mean   :0.001219   Mean   :17.70   Mean   :1.3778  
##  3rd Qu.:0.006450   3rd Qu.:0.002394   3rd Qu.:22.92   3rd Qu.:2.4867  
##  Max.   :0.050900   Max.   :0.004800   Max.   :35.59   Max.   :7.3560  
##  NA's   :1          NA's   :1          NA's   :1       NA's   :1       
##     Vanadium            Zinc          
##  Min.   :0.00000   Min.   :0.0000000  
##  1st Qu.:0.02887   1st Qu.:0.0001875  
##  Median :0.05197   Median :0.0031875  
##  Mean   :0.05351   Mean   :0.0097468  
##  3rd Qu.:0.07182   3rd Qu.:0.0079500  
##  Max.   :0.16570   Max.   :0.1606000  
##  NA's   :1         NA's   :1

View the data sets -( to identify the missing values)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)
library(knitr)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ stringr 1.4.0
## ✓ tidyr   1.0.0     ✓ forcats 0.4.0
## ✓ readr   1.3.1
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::extract()   masks magrittr::extract()
## x dplyr::filter()    masks stats::filter()
## x dplyr::lag()       masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
library(tidyr)
#Giving a column head for the locations in 'pred' data and 'allraw'#

pred_colname <-tibble::rownames_to_column(pred, "Site") #Giving a name for the locations/giving a column name for the sites#

allraw_colname <-tibble::rownames_to_column(allraw, "Site")
#View(pred_colname)
#View(allraw_colname)

Visualize the missing data in ‘alldata’ and ‘pred’ data

library(mice)
## Loading required package: lattice
## 
## Attaching package: 'mice'
## The following object is masked from 'package:tidyr':
## 
##     complete
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:purrr':
## 
##     transpose
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4
## VIM is ready to use. 
##  Since version 4.0.0 the GUI is in its own package VIMGUI.
## 
##           Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
#--- Allraw data- Chemical parameters of the water
mice_plot <- aggr(allraw_colname, col=c('navyblue','red'),
                    numbers=TRUE, sortVars=TRUE,
                    labels=names(pred), cex.axis=.7,
                    gap=3, ylab=c("Missing data","Pattern"))

## 
##  Variables sorted by number of missings: 
##  Variable       Count
##    DO_sat 0.077669903
##  Salinity 0.019417476
##    DO_sat 0.019417476
##   DO_diss 0.019417476
##      Temp 0.019417476
##   Conduct 0.019417476
##  Salinity 0.019417476
##   Conduct 0.009708738
##   DO_diss 0.009708738
##      Temp 0.009708738
##   Conduct 0.009708738
##  Salinity 0.009708738
##    DO_sat 0.009708738
##   DO_diss 0.009708738
##      Temp 0.000000000
md.pattern(allraw_colname, rotate.names = TRUE)

##    Site pH Barium Chromium Silicon Strontium Vanadium Zinc Chloride Sulfate
## 95    1  1      1        1       1         1        1    1        1       1
## 5     1  1      1        1       1         1        1    1        1       1
## 1     1  1      1        1       1         1        1    1        0       0
## 1     1  1      0        0       0         0        0    0        0       0
## 1     1  0      1        1       1         1        1    1        1       1
##       0  1      1        1       1         1        1    1        2       2
##    Sodium Potassium Magnesium Calcium Alkalinity   
## 95      1         1         1       1          1  0
## 5       1         1         1       1          0  1
## 1       0         0         0       0          0  7
## 1       0         0         0       0          0 13
## 1       1         1         1       1          0  2
##         2         2         2       2          8 27
##--- pred data- Physical parameters of the water
mice_plot <- aggr(pred_colname, col=c('navyblue','red'),
                    numbers=TRUE, sortVars=TRUE,
                    labels=names(pred), cex.axis=.7,
                    gap=3, ylab=c("Missing data","Pattern"))

## 
##  Variables sorted by number of missings: 
##  Variable       Count
##   DO_diss 0.048543689
##      Temp 0.019417476
##   Conduct 0.009708738
##  Salinity 0.009708738
##    DO_sat 0.009708738
##      Temp 0.000000000
md.pattern(pred_colname, rotate.names = TRUE)

##    Site Temp Conduct Salinity DO_diss DO_sat   
## 98    1    1       1        1       1      1  0
## 3     1    1       1        1       1      0  1
## 1     1    1       1        1       0      0  2
## 1     1    0       0        0       0      0  5
##       0    1       1        1       2      5 10

Imputating the missing observations chemical parameters(alldata) and Physical parameters(pred data)

pred_imput<- mice(pred_colname, m= 5, maxit = 50, method= 'pmm', seed = 500)
## Warning: Number of logged events: 1
detach(package:tidyverse)
detach(package:tidyr)

pred_final<- complete(pred_imput,1)

head(pred_final)
##                                           Site  Temp Conduct Salinity DO_sat
## 1                              Kalaoa Deepwell 23.30  304.00    0.121  84.70
## 2                            Holualoa Deepwell 20.55  927.50    0.430 100.65
## 3 Queen Liliuokalani Trust (Keahuolu) Deepwell 20.90  159.80    0.075  99.50
## 4                           Honokohau Deepwell 21.90  233.90    0.120  93.55
## 5                                     Puu Lani 23.55  420.90    0.195  94.20
## 6                                   Puu Waawaa 23.65  280.45    0.125  90.85
##   DO_diss
## 1   7.300
## 2   8.940
## 3   8.610
## 4   8.200
## 5   7.970
## 6   7.705
str(allraw_colname)
allraw_imp<- mice(allraw_colname, m =5 , maxit = 50, method = 'pmm', seed = 500)
## Warning: Number of logged events: 3001
allraw_final <- complete(allraw_imp,1)

head(allraw_final)
##                                           Site    pH Chloride Sulfate Sodium
## 1                              Kalaoa Deepwell 7.780      9.8  28.530  31.45
## 2                            Holualoa Deepwell 7.850    224.0  36.445 130.00
## 3 Queen Liliuokalani Trust (Keahuolu) Deepwell 7.930      6.5  10.050  16.70
## 4                           Honokohau Deepwell 7.825      7.8  20.020  25.70
## 5                                     Puu Lani 8.145     41.0  48.800  53.60
## 6                                   Puu Waawaa 8.150     21.0  25.400  34.50
##   Potassium Magnesium Calcium Alkalinity  Barium Chromium Silicon Strontium
## 1     5.270    11.400  12.375       93.0 0.00000  0.00260 23.4375  0.025875
## 2     7.650    24.045  26.000       54.0 0.01055  0.00170 19.2900  0.132625
## 3     3.300     6.000  10.130       56.0 0.00360  0.00215 20.2200  0.024300
## 4     4.165     7.755  10.100       76.0 0.00205  0.00090 23.0325  0.012300
## 5     3.860    10.530  17.390       79.0 0.00090  0.00310 18.9300  0.045850
## 6     2.920     7.800  11.410       68.5 0.00000  0.00240 19.8550  0.030000
##     Vanadium     Zinc
## 1 0.08758333 0.003225
## 2 0.03870000 0.008550
## 3 0.05600000 0.002050
## 4 0.03790000 0.003375
## 5 0.13486667 0.002000
## 6 0.11253333 0.038950

#_______________________________________________________#

2. Ecoinformatics

Finally- Starting the Ecoinformatics

#Creating a unique id using the site.

library(dplyr)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tidyr 1.0.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x data.table::between()   masks dplyr::between()
## x tidyr::complete()       masks mice::complete()
## x tidyr::extract()        masks magrittr::extract()
## x dplyr::filter()         masks stats::filter()
## x data.table::first()     masks dplyr::first()
## x dplyr::lag()            masks stats::lag()
## x data.table::last()      masks dplyr::last()
## x purrr::set_names()      masks magrittr::set_names()
## x data.table::transpose() masks purrr::transpose()
pred_uniqid<- mutate(pred_final, id= dplyr::row_number(Site)) # Creating a unique ID in Plant data set using the PlantID#

#View(pred_uniqid)

allraw_uniqid<- mutate(allraw_final, id= dplyr::row_number(Site)) # Creating a unique ID in Plant data set using the PlantID#
#View(allraw_uniqid)

#-Removing the nonnumeric columns such as ‘sites’ #-The codings below didn’t run as it contained non numeric values

#Removing the nonnumeric columns such as sites
pred_final1 <- pred_uniqid[,-c(1)]
#View(pred_final1)

allraw_final1<- allraw_uniqid[,-c(1)]
#View(allraw_final1)

Analysis

(i) -RDA

Packages

library(ade4)

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(vegan)
## Loading required package: permute
## This is vegan 2.5-6
library(ellipse)
## 
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
## 
##     pairs
library(FactoMineR)
## 
## Attaching package: 'FactoMineR'
## The following object is masked from 'package:ade4':
## 
##     reconst
#library(gclus) # According to the book, These packages are also required to do the heliinger tranformation.#
#library(cluster)
#library(FD)
#Load additinal functions:

source("evplot.R")

source("hcoplot.R")

Summary

pr.hel <- decostand(pred_final1, "hellinger") # Physical data was transformed (hellinger transformation)
 pr.rda <- rda(pr.hel ~ ., allraw_final1)

summary(pr.rda)
## 
## Call:
## rda(formula = pr.hel ~ pH + Chloride + Sulfate + Sodium + Potassium +      Magnesium + Calcium + Alkalinity + Barium + Chromium + Silicon +      Strontium + Vanadium + Zinc + id, data = allraw_final1) 
## 
## Partitioning of variance:
##               Inertia Proportion
## Total         0.05671     1.0000
## Constrained   0.03871     0.6826
## Unconstrained 0.01800     0.3174
## 
## Eigenvalues, and their contribution to the variance 
## 
## Importance of components:
##                          RDA1     RDA2      RDA3      RDA4      RDA5      RDA6
## Eigenvalue            0.03575 0.002767 0.0001486 4.673e-05 7.646e-07 1.159e-07
## Proportion Explained  0.63040 0.048786 0.0026198 8.241e-04 1.348e-05 2.044e-06
## Cumulative Proportion 0.63040 0.679183 0.6818024 6.826e-01 6.826e-01 6.826e-01
##                           PC1      PC2       PC3       PC4       PC5       PC6
## Eigenvalue            0.01665 0.001054 0.0001909 9.639e-05 1.866e-06 1.261e-06
## Proportion Explained  0.29365 0.018588 0.0033664 1.700e-03 3.291e-05 2.223e-05
## Cumulative Proportion 0.97629 0.994879 0.9982451 9.999e-01 1.000e+00 1.000e+00
## 
## Accumulated constrained eigenvalues
## Importance of components:
##                          RDA1     RDA2      RDA3      RDA4      RDA5      RDA6
## Eigenvalue            0.03575 0.002767 0.0001486 4.673e-05 7.646e-07 1.159e-07
## Proportion Explained  0.92347 0.071466 0.0038377 1.207e-03 1.975e-05 2.994e-06
## Cumulative Proportion 0.92347 0.994932 0.9987700 1.000e+00 1.000e+00 1.000e+00
## 
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores:  1.550823 
## 
## 
## Species scores
## 
##             RDA1      RDA2       RDA3       RDA4       RDA5       RDA6
## Temp     -0.4293  0.055798 -0.0373621 -0.0352930 -1.773e-04  6.692e-05
## Conduct   0.4263  0.009767 -0.0685378  0.0162702  4.644e-05 -2.691e-05
## Salinity  0.0177 -0.001241  0.0001043  0.0004974  2.294e-03  2.029e-03
## DO_sat   -0.8323  0.174645 -0.0054760  0.0190234  1.628e-03 -2.624e-04
## DO_diss  -0.2412  0.057300 -0.0009500  0.0078526 -4.948e-03  8.514e-04
## id       -0.6315 -0.283439 -0.0132836  0.0069197 -4.028e-05  1.383e-05
## 
## 
## Site scores (weighted sums of species scores)
## 
##             RDA1       RDA2       RDA3      RDA4       RDA5      RDA6
## row1   -0.256545  2.511e-01 -5.137e-02 -0.249878 -0.1636724  0.115872
## row2   -0.064332  3.494e-01 -2.084e-01  0.311772 -0.0103604 -0.065569
## row3   -0.468049 -3.217e-01  1.295e+00 -0.098415 -0.6412323  0.764030
## row4   -0.276640  5.601e-01  2.911e-01 -0.305652  0.0970671  0.952848
## row5   -0.279881 -2.071e-01  5.755e-02  0.237646 -0.2164526  0.469846
## row6   -0.352784 -2.524e-01  3.829e-01 -0.054819 -0.3840775  0.547083
## row7    0.154680 -3.118e-02  9.864e-02  0.013203 -0.4770715 -4.908368
## row8    0.117174 -4.711e-02 -5.112e-02  0.093145  0.1454819 -0.066369
## row9    0.165864 -6.702e-02  1.074e-01 -0.177348 -0.1782917  0.423300
## row10   0.135426 -3.988e-02  3.913e-02  0.137816  0.5120784 -0.173267
## row11   0.144827 -3.439e-02  8.143e-02  0.130645  0.5937124 -0.376143
## row12   0.148400 -3.299e-02  9.794e-02  0.128348  0.5176122 -0.282768
## row13   0.137555 -2.959e-02  3.742e-02  0.088084  0.0301938  0.169938
## row14   0.129494 -2.666e-02  6.017e-03  0.104588  0.0604904  0.070211
## row15   0.126564 -4.642e-02 -2.082e-02  0.055870  0.1027788  0.019271
## row16   0.163496 -7.215e-02  8.661e-02 -0.212543 -0.1390724  0.451808
## row17   0.171012 -7.697e-02  1.120e-01 -0.270496 -0.1001087  0.553013
## row18   0.139100 -2.945e-02  3.219e-02  0.036634  0.0783235  0.120011
## row19   0.137171 -1.107e-02  1.796e-02  0.002398 -0.0643471  0.147042
## row20   0.128777 -8.091e-03 -6.635e-04  0.077506  0.0009028  0.062667
## row21   0.124890  3.053e-03 -5.729e-03  0.117694 -0.0684324 -0.002693
## row22   0.129392  2.196e-03  4.030e-03  0.078039 -0.0287038  0.125302
## row23   0.127946 -5.006e-03 -8.645e-05  0.094272 -0.2230903 -0.150541
## row24   0.128354 -3.084e-05  6.054e-04  0.084483 -0.0579002  0.149627
## row25   0.123443  4.879e-03  2.240e-03  0.174385 -0.0016158 -0.190663
## row26   0.133986 -2.154e-03  2.645e-02  0.091227  0.0451048 -0.624963
## row27   0.136995 -1.540e-02  1.721e-02  0.004590  0.1149296  0.078227
## row28   0.138100 -1.719e-02  2.568e-02  0.020603  0.0709779  0.130944
## row29   0.132886 -1.245e-02  1.473e-02  0.069066  0.1158467  0.088988
## row30   0.141639 -2.666e-02  2.898e-02 -0.027175  0.0239680  0.152498
## row31   0.129898  2.323e-03  2.014e-02  0.133727  0.3116826 -0.056099
## row32   0.136789 -4.035e-03  1.999e-02  0.011689  0.1777199 -0.132565
## row33   0.135901  2.352e-04  2.295e-02  0.039171  0.0764680 -0.971318
## row34   0.136248  1.238e-02  4.498e-02  0.111260  0.3241575 -0.024615
## row35   0.129068 -6.899e-02 -2.247e-02  0.012989  0.0369681  0.145369
## row36   0.148388 -7.459e-02  3.670e-02 -0.107479  0.1521616  0.088711
## row37   0.138390 -7.279e-02  1.053e-02 -0.021060  0.0346343  0.162028
## row38   0.149019 -9.472e-02  1.330e-02 -0.215026 -0.0825295  0.308506
## row39   0.146482 -8.426e-02  2.219e-02 -0.127185 -0.0469437  0.223229
## row40   0.141165 -7.868e-02  5.026e-03 -0.095908 -0.0738215  0.241026
## row41   0.153603 -1.135e-01  1.782e-02 -0.289066 -0.2008041  0.372020
## row42   0.129284 -6.066e-02  8.877e-03  0.137277  0.0003422  0.188850
## row43  -0.461315 -2.809e-01  1.116e+00 -0.493546 -0.7688397  0.469476
## row44  -0.183553 -1.558e-01 -2.979e-01  0.129977  0.2243258  0.091444
## row45  -0.211157 -8.271e-02 -2.993e-01 -0.185594  0.3130500 -0.015505
## row46  -0.226750 -6.754e-02 -2.465e-01 -0.167549  0.3706817 -0.334223
## row47  -0.171774 -1.775e-01 -3.417e-01  0.045476  0.1365081  0.073504
## row48  -0.208106 -2.942e-02 -2.265e-01  0.144805  0.2448708 -0.051889
## row49  -0.084628  2.742e-01 -1.943e-01  0.580147 -0.0648679  0.015064
## row50  -0.079824  2.254e-01 -2.613e-01  0.428959  0.0749499 -0.150435
## row51  -0.242070  3.701e-01  1.040e-01  0.275497  0.3716444 -0.210498
## row52  -0.129637  2.946e-01 -2.238e-01  0.272902  0.0193110 -0.030379
## row53  -0.382678 -2.239e-01  5.738e-01 -0.055774 -0.5701038  0.408196
## row54   0.162331 -3.789e-02  1.215e-01 -0.056532  0.2203428  0.367292
## row55   0.178161 -7.743e-02  1.946e-01 -0.082790 -0.1549013  0.525742
## row56   0.172427 -6.332e-02  1.925e-01  0.033570 -0.0175688 -0.401599
## row57  -0.061595  1.345e-01 -2.864e-01  0.502480 -0.2857049  0.194900
## row58  -0.445344  3.728e-01  1.354e+00  0.220813 -0.4982794  0.448877
## row59  -0.373821  7.510e-01  1.054e+00 -0.542799  0.2233950 -0.283712
## row60  -0.022189  1.357e-01 -2.989e-01  0.342893 -0.1629700  0.026461
## row61  -0.022598  1.289e-01 -2.654e-01  0.495587 -0.7245821  0.900400
## row62  -0.078864  1.536e-02 -3.007e-01  0.595127  0.2296108 -0.295328
## row63   0.025541  1.407e-01 -3.451e-01 -0.184163 -0.0148357 -0.140492
## row64   0.009895  1.792e-01 -3.729e-01 -0.251292 -0.2784313  0.145675
## row65   0.047996  8.832e-02 -3.876e-01 -0.517637 -1.0472429  0.876619
## row66   0.078973  1.639e-01 -1.874e-01 -0.152844  0.1067048 -0.300408
## row67  -0.037996 -1.603e-01 -3.405e-01  0.431993  0.1169616 -0.206728
## row68  -0.208838  5.935e-01 -7.440e-02 -1.010557  0.9368877 -0.823376
## row69  -0.181423 -2.867e-01 -3.406e-01 -0.130413  0.2681520 -0.300037
## row70  -0.197517 -2.845e-01 -3.086e-01 -0.159700  0.3753927 -0.373810
## row71  -0.235249 -3.368e-01 -2.228e-01 -0.354695  0.0555965  0.063711
## row72  -0.307044 -2.228e-01  7.686e-02 -0.236005  0.2966610 -0.311223
## row73  -0.319403 -2.900e-01  8.776e-02 -0.537859  0.0888279 -0.121271
## row74   0.132676 -4.403e-02  7.943e-03  0.063009  0.2672868 -0.163921
## row75  -0.205296  1.242e-01 -1.981e-01  0.128461 -0.3021643  0.723118
## row76   0.069930 -1.214e-01 -4.182e-01 -0.789965 -0.6413581  0.336420
## row77   0.106802 -6.237e-02 -2.827e-01 -0.745617 -0.5341033  0.208696
## row78   0.178266 -3.495e-02  1.357e-01 -0.354765 -0.0901657  0.389380
## row79  -0.275968 -1.663e-01 -1.307e-02  0.034070  0.3284004 -0.078307
## row80  -0.349619 -2.897e-01  3.497e-01 -0.154983  0.1921735 -0.087639
## row81  -0.016329 -1.492e-01 -3.446e-01  0.332663 -0.0188272 -0.038709
## row82  -0.003555 -1.304e-01 -3.138e-01  0.393981 -0.0364721 -0.003413
## row83   0.029489 -6.800e-02 -2.662e-01  0.337965 -0.0270104 -0.025368
## row84  -0.412442  2.200e-01  8.463e-01 -0.209441 -0.3813299  0.286415
## row85   0.136015 -4.063e-02 -1.023e-02 -0.076379 -0.0358848  0.126634
## row86   0.136368 -1.872e-02  3.027e-02  0.073275  0.1273709  0.089397
## row87   0.133363  1.535e-02  4.351e-02  0.156518  0.2217556  0.038705
## row88  -0.042710  1.492e-01 -3.332e-01  0.245867  0.3035387 -0.274366
## row89   0.139976  1.162e-02  2.024e-02 -0.061388  0.1764929 -0.174445
## row90   0.148832  5.615e-03  5.955e-02 -0.065331  0.1440192 -0.013954
## row91   0.123363  3.090e-02 -1.106e-02  0.095977  0.0907814 -0.075565
## row92   0.013066  1.487e-01 -4.011e-01 -0.342704 -0.1246592 -0.047356
## row93   0.130961 -5.154e-02 -8.369e-03  0.030273  0.1253501  0.192888
## row94   0.024408 -1.175e-01 -2.501e-01  0.455812 -0.1032895  0.080236
## row95   0.140420 -4.295e-02 -1.425e-03 -0.119132 -0.3035882 -1.431470
## row96   0.083108  6.365e-02 -1.861e-01 -0.041812 -0.1677947  0.051753
## row97   0.143238 -2.290e-02  5.501e-02  0.047866  0.1174209  0.120610
## row98   0.110209  7.992e-02 -5.422e-02  0.081438  0.0300138 -0.118891
## row99   0.097898  1.583e-02 -1.050e-01  0.135924 -0.0382025 -0.018946
## row100  0.114222  1.407e-01 -3.936e-02 -0.018342  0.0054434 -0.140544
## row101  0.098269  1.910e-01 -5.184e-02  0.088175 -0.0049013 -0.141629
## row102 -0.223257 -2.231e-04 -7.387e-02  0.564026 -0.0047822  0.435864
## row103  0.143170 -4.688e-02  3.723e-02 -0.011997  0.1338640  0.201634
## 
## 
## Site constraints (linear combinations of constraining variables)
## 
##             RDA1      RDA2       RDA3      RDA4       RDA5      RDA6
## row1   -0.110071  0.184305 -0.0109715 -0.015131  0.0223561 -0.086323
## row2   -0.098088  0.268754  0.0253877 -0.052804  0.0647245  0.067816
## row3   -0.182527 -0.206686  0.1074275  0.020302 -0.0649426  0.017989
## row4   -0.151594  0.269490  0.1667599 -0.157919 -0.0183513 -0.014080
## row5   -0.045721 -0.205593 -0.0654692  0.250329 -0.1556964 -0.205472
## row6   -0.097341 -0.194454 -0.0337566  0.310247 -0.2115688 -0.020637
## row7    0.029780 -0.109324  0.1138173  0.050230 -0.1964831 -0.282048
## row8    0.023369 -0.093238  0.1413563 -0.015886 -0.1362032 -0.163267
## row9    0.123660 -0.040829  0.1875750 -0.023691  0.1143518 -0.121247
## row10   0.129855 -0.071719  0.1355404  0.174112  0.0222404 -0.232750
## row11   0.066474 -0.044498  0.2161534 -0.053023  0.0998023 -0.067726
## row12   0.123276 -0.051134  0.1527548 -0.041863  0.1341324 -0.080865
## row13   0.101160  0.017873  0.2190992  0.193348  0.1148067 -0.056811
## row14   0.114366 -0.004103  0.1358112  0.145395  0.0386221 -0.105989
## row15   0.012473 -0.081851 -0.0551858 -0.045908  0.0753825 -0.114513
## row16   0.075396 -0.028852  0.2030014 -0.188085  0.1519673  0.099970
## row17   0.178369 -0.053690  0.0757374 -0.233061  0.0896904  0.122224
## row18   0.247635 -0.061050 -0.0620870  0.140216  0.1131174 -0.014726
## row19   0.088812  0.087903  0.0406643 -0.058635 -0.0048800 -0.143304
## row20   0.039126  0.096822  0.1593608 -0.080220 -0.0517281 -0.057008
## row21   0.010893  0.087311  0.1577932 -0.026292 -0.1237535 -0.223190
## row22   0.100588  0.073111  0.1059219 -0.111373 -0.1121808  0.158727
## row23   0.064401  0.079282  0.1523409  0.035411 -0.1063437 -0.176833
## row24   0.075611  0.085983  0.0873480 -0.015204 -0.0289247 -0.150014
## row25   0.061880  0.085756  0.1423115  0.007178 -0.0875659 -0.188624
## row26   0.155166  0.041449 -0.1144882 -0.026134 -0.0283835 -0.144037
## row27   0.093383 -0.017940 -0.0792738 -0.236176 -0.1933310 -0.079044
## row28   0.157277 -0.026734 -0.1081242  0.079687  0.1238933  0.039216
## row29   0.103151 -0.006376  0.0423123 -0.065344  0.0016978 -0.050752
## row30   0.092452 -0.042329  0.0224028 -0.006755 -0.1324964 -0.037799
## row31   0.103986  0.023357  0.0466216 -0.004950 -0.0821838  0.218835
## row32   0.039261  0.031201  0.0989958 -0.054714  0.0121253 -0.124952
## row33   0.139290  0.043204  0.0323325 -0.066142  0.0546499  0.060804
## row34   0.263987 -0.025117 -0.1845580  0.189763  0.2904283  0.145517
## row35   0.092262 -0.144483  0.1044243  0.014172  0.0620388  0.018847
## row36   0.137401 -0.202215  0.1052245  0.016186 -0.0451359  0.090855
## row37   0.134262 -0.186817  0.0741176  0.047806 -0.0527280  0.041138
## row38   0.153646 -0.141420  0.1002259  0.063090 -0.0002969  0.117511
## row39   0.124123 -0.142741  0.1363792  0.026764 -0.0508279  0.106488
## row40   0.081700 -0.125107  0.0907394 -0.055981 -0.0738809  0.117445
## row41   0.107465 -0.112204  0.0633397 -0.055105  0.0644119  0.131311
## row42   0.089626 -0.202059  0.1294095  0.044517 -0.0785223 -0.027902
## row43  -0.238722 -0.136111  0.2370244 -0.128889 -0.1856850  0.088326
## row44  -0.248321 -0.100680 -0.0879113 -0.053066  0.2047308  0.074509
## row45  -0.238170  0.031112 -0.0448222 -0.088753  0.2033509  0.048603
## row46  -0.262732  0.030394 -0.0416819 -0.095189  0.1860559  0.071642
## row47  -0.255699 -0.114096 -0.0764727 -0.068142  0.1972843  0.068945
## row48  -0.244720  0.019944 -0.0418975 -0.105570  0.2010426  0.053004
## row49  -0.119212  0.226997  0.0512502 -0.012612  0.0751284 -0.020240
## row50  -0.094029  0.239264  0.0664576  0.012693  0.0510117 -0.016469
## row51  -0.154324  0.250642  0.1187177 -0.029276  0.0402923 -0.003522
## row52  -0.151156  0.234208  0.1198409 -0.056761 -0.0657347 -0.022086
## row53  -0.144313 -0.146107  0.1265232  0.002364 -0.1800982 -0.096670
## row54   0.367321 -0.082541 -0.0009113 -0.041931  0.2689061  0.298403
## row55   0.344929 -0.093318 -0.0498780 -0.093375  0.1991810  0.065807
## row56   0.330450 -0.087045 -0.0050981 -0.020922  0.1762734 -0.048278
## row57  -0.174843  0.187877 -0.0763937  0.485605 -0.3993533  0.612004
## row58  -0.134932  0.197343  0.1958001  0.007314 -0.0397556 -0.031689
## row59  -0.138090  0.301555  0.1684470 -0.011340 -0.0063676 -0.032472
## row60  -0.114586  0.189378 -0.0105928 -0.048775  0.0664263 -0.035707
## row61  -0.157334  0.198453 -0.0296353  0.280587 -0.1775697  0.354926
## row62  -0.001829 -0.093627 -0.3398906 -0.183852 -0.0626479 -0.227856
## row63   0.045392  0.060538 -0.4356560  0.034064 -0.4368693  0.196682
## row64   0.049722  0.171668 -0.3760226 -0.196563  0.0484524 -0.314526
## row65   0.007595  0.156107 -0.0741761 -0.126297 -0.2124626 -0.135315
## row66  -0.015935  0.219732 -0.0264575  0.005053 -0.1169555 -0.153104
## row67  -0.139413 -0.284709  0.0004099  0.124237 -0.0921475 -0.086717
## row68  -0.228084  0.318491 -0.0339045 -0.121165  0.3150172  0.007801
## row69  -0.246426 -0.254971 -0.0883144 -0.077390  0.1270322  0.052593
## row70  -0.265599 -0.234654 -0.1033186 -0.063586  0.1693874  0.032935
## row71  -0.246183 -0.239471 -0.0406988 -0.061666  0.0697445  0.003113
## row72  -0.278438 -0.111440 -0.0343707 -0.078438  0.1921779  0.036563
## row73  -0.259525 -0.184542  0.2492717 -0.209817 -0.1914887 -0.227041
## row74   0.059441 -0.032678  0.1290904  0.054495 -0.0555742 -0.096101
## row75  -0.151909  0.121522  0.0624881  0.105553 -0.1037817  0.192894
## row76   0.065143 -0.208896 -0.4743637 -0.550550 -0.3620158 -0.028364
## row77   0.093128  0.035253 -0.0841221 -0.333885 -0.2977779  0.099774
## row78   0.199972  0.013995  0.0044459 -0.229467 -0.0556882  0.259196
## row79  -0.263384 -0.095977 -0.0666450 -0.062703  0.2103125  0.065085
## row80  -0.291846 -0.210358 -0.0721494 -0.027392  0.1903646  0.081254
## row81  -0.153387 -0.278700 -0.2540435  0.280332  0.0676527 -0.236839
## row82  -0.124402 -0.242447 -0.0445192  0.156219 -0.0403896 -0.056744
## row83  -0.101167 -0.109648 -0.0372401  0.116499  0.0013333 -0.020893
## row84  -0.147902  0.153830  0.0743398 -0.007525  0.0408102  0.031141
## row85   0.177347  0.030811 -0.0849171  0.190549  0.0214501  0.197737
## row86   0.057300  0.001096 -0.1352166 -0.078191  0.0468826 -0.097196
## row87   0.131957 -0.055637  0.1139356 -0.255813 -0.1850574  0.487515
## row88  -0.001030  0.087311 -0.4547254  0.112915 -0.1122471  0.142877
## row89   0.131160  0.122371 -0.0228725 -0.102186 -0.0754751 -0.053326
## row90   0.221004  0.082256 -0.1261360 -0.026215  0.0294424 -0.086061
## row91   0.010811  0.084691 -0.0964236 -0.138652  0.2859490  0.394555
## row92   0.017455  0.157277 -0.4611185 -0.205334 -0.0545976 -0.085641
## row93   0.119830 -0.119381 -0.0455834  0.163766  0.4413634  0.001452
## row94  -0.065484 -0.298468 -0.1519609  0.245940 -0.0714849  0.034477
## row95   0.064380  0.021151  0.1405079 -0.014053 -0.2181471  0.016535
## row96  -0.014704  0.144714  0.0378699 -0.077712 -0.0047362 -0.014833
## row97   0.089285  0.011295  0.0138594  0.153465  0.1491118  0.097924
## row98   0.085378  0.227584 -0.1129784  0.238574  0.0608323 -0.148196
## row99   0.053626  0.123196 -0.1194199  0.283072 -0.0130235 -0.136104
## row100  0.068737  0.295389 -0.1769090  0.276548  0.1339802 -0.126549
## row101  0.069907  0.284362 -0.1175878  0.234859  0.0597800 -0.156810
## row102 -0.030424 -0.052929 -0.1370680  0.320614 -0.1415056 -0.104123
## row103 -0.013234 -0.016616  0.3250526  0.079382 -0.1861536  0.146422
## 
## 
## Biplot scores for constraining variables
## 
##                RDA1     RDA2     RDA3     RDA4     RDA5      RDA6
## pH         -0.15140 -0.15481  0.03821 -0.25856 -0.11278  0.032068
## Chloride    0.80245 -0.20806  0.24759 -0.12681  0.27186  0.104549
## Sulfate     0.80653 -0.20156  0.22478 -0.12818  0.26287  0.107474
## Sodium      0.81316 -0.20947  0.24564 -0.11441  0.28435  0.103545
## Potassium   0.80590 -0.21032  0.23587 -0.12544  0.27511  0.100883
## Magnesium   0.80228 -0.19755  0.21767 -0.12333  0.28021  0.119668
## Calcium     0.83202 -0.18911  0.11503 -0.20739  0.19987  0.144821
## Alkalinity  0.13729  0.07169 -0.54279 -0.20966 -0.04514 -0.036314
## Barium      0.18713 -0.02258 -0.34247  0.15732  0.24133  0.043297
## Chromium   -0.68949 -0.07639 -0.32350  0.25046  0.39375 -0.050150
## Silicon    -0.68883  0.10822 -0.36847  0.16097  0.05384 -0.292006
## Strontium   0.73783 -0.19784  0.23746 -0.03776  0.29456 -0.024901
## Vanadium   -0.27891  0.04415 -0.38946  0.53801 -0.09629 -0.425261
## Zinc       -0.14546  0.18665 -0.34142  0.39761 -0.37605  0.480112
## id         -0.07549 -0.93113  0.20511  0.13575  0.17849  0.002239
R2adj <- RsquareAdj(pr.rda)$adj.r.squared

R2adj
## [1] 0.6279252

THe adjusted r squre value is 0.604 which a really high value (60.4% ) as there are so many response variables. And tere are large number of dimensions that we are working with

Scaling 1

#Retreiving results and plotting RDA

plot(pr.rda, scaling = 1, main = "Triplot- 
RDA of physical parameters vs chemical parameters- scaling 1")

spe.sc <- scores(pr.rda, choices = 1:2, scaling = 1, display = "sp")

arrows(0, 0, spe.sc[,1], spe.sc[,2], length = 0, lty = 1, col = "red")

Scaling 2

plot(pr.rda, scaling = 2, main = "Triplot- RDA of 
     physical(dependent) vs chemical(explanatory)parameters- scaling 2")
spe2.sc <- scores(pr.rda, choices = 1:2, scaling = 2, display = "sp")

arrows(0, 0, spe2.sc[,1], spe2.sc[,2], length = 0, lty = 1, col = "red")

#gwd.sc <- scores(pr.rda, choices = 1:2, scaling = 3, display = "sp")
#bg <- c("#ff7f00","#1f78b4","#ffff33","#a6cee3") # 4 nice colors for groups

#plot(pr.rda, type="n", scaling=3)
#text(pr.rda, display="species", pch=20, cex=0.7, col="gray32", scaling=3)
#points(gwd.rda, display="sites", pch=21, cex=1.3, col="gray32", scaling=3,bg=bg[group])
#text(gwd.rda, display="sites", pch=21, cex=0.5, col="gray32", scaling=3, bg=bg[group])
#text(gwd.rda, scaling=3, display="bp", col="#0868ac", cex=.8)
#arrows(0, 0, gwd.sc[,1], gwd.sc[,2], length = 0, lty = 1, col = "#e31a1c")

(ii) -Creating Bivariate plots

Creating a bivariate plot

source("panelutils.R")

We can use the following code to create the bivariate plots with histograms and smooth fitted curves

op <- par(mfrow=c(1,1), pty = "s")
pairs(pred_final1, panel = panel.smooth, diag.panel = panel.hist, main = "Bivariate Plot with Histograms and Smooth Curves")

pairs(allraw_final1, panel = panel.smooth, diag.panel = panel.hist, main = "Bivariate Plot with Histograms and Smooth Curves")

par(op) # Reset graphing window to default

Transformation and standardization

Transformations

Simple transformations, such as the log transformation, can be used to improve the distributions of some variables (make it closer to the normal distribution).

Furthermore, because environmental variables are dimensionally heterogeneous (expressed in different units and scales),** many statistical analyses require their standardization to zero mean and unit variance. These centred and scaled variablesare called z-scores.**

Let’s standardize!!

Use the following code to standardize our environmental variables. We will center and scale our variables using z-scores:

library(vegan)
env.z <- decostand(pred_final1, "standardize")
apply(env.z, 2, mean) # means = 0
##          Temp       Conduct      Salinity        DO_sat       DO_diss 
## -2.639217e-16  5.839376e-17  9.829819e-17  2.077979e-16 -1.065192e-16 
##            id 
##  0.000000e+00
apply(env.z, 2, sd) # standard deviations = 1
##     Temp  Conduct Salinity   DO_sat  DO_diss       id 
##        1        1        1        1        1        1
#-----
env.x <- decostand(allraw_final1, "standardize")
apply(env.x, 2, mean) # means = 0
##            pH      Chloride       Sulfate        Sodium     Potassium 
##  9.698452e-17 -4.755227e-17 -6.499424e-17  3.380079e-17 -7.040893e-17 
##     Magnesium       Calcium    Alkalinity        Barium      Chromium 
##  2.376719e-17 -1.075255e-17 -1.591619e-17  2.479612e-17 -1.016055e-16 
##       Silicon     Strontium      Vanadium          Zinc            id 
##  2.963556e-17  1.540093e-17  4.094390e-17  1.632119e-17  0.000000e+00
apply(env.x, 2, sd) # standard deviations = 1
##         pH   Chloride    Sulfate     Sodium  Potassium  Magnesium    Calcium 
##          1          1          1          1          1          1          1 
## Alkalinity     Barium   Chromium    Silicon  Strontium   Vanadium       Zinc 
##          1          1          1          1          1          1          1 
##         id 
##          1

Plots

pairs(env.z, panel = panel.smooth, diag.panel = panel.hist, main = "Bivariate Plot with Histograms and Smooth Curves(Physical data)")

pairs(env.x, panel = panel.smooth, diag.panel = panel.hist, main = "Bivariate Plot with Histograms and Smooth Curves (Chemical data)")

(iii) -PCA

library("FactoMineR")
library("factoextra")
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

Physical & chemical data

par(mfrow=c(3,3))
PCA(pred_final1, scale.unit = TRUE, ncp = 7,  graph = TRUE)

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 103 individuals, described by 6 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
PCA(allraw_final1, scale.unit = TRUE, ncp= 14, graph= TRUE)

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 103 individuals, described by 15 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
#X: a data frame. Rows are individuals and columns are numeric variables
#scale.unit: a logical value. If TRUE, the data are scaled to unit variance before the analysis. This standardization to the same scale avoids some variables to become dominant just because of their large measurement units. It makes variable comparable.
#ncp: number of dimensions kept in the final results.
#graph: a logical value. If TRUE a graph is displayed.

Eigenvalues

res.pca <- PCA(pred_final1, graph = FALSE) # Physical parametes( pred data)
res.pca1 <- PCA(allraw_final1, graph = FALSE) # Chemical parametes(raw data)
#The R code above, computes principal component analysis on the active individuals/variables:

Eigenvalues / Variances the eigenvalues measure the amount of variation retained by each principal component. Eigenvalues are large for the first PCs and small for the subsequent PCs. That is, the first PCs corresponds to the directions with the maximum amount of variation in the data set.

We examine the eigenvalues to determine the number of principal components to be considered.

The eigenvalues and the proportion of variances (i.e., information) retained by the principal components (PCs) can be extracted using the function get_eigenvalue() [factoextra package].

eig.val <- get_eigenvalue(res.pca)
eig.val
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.48736152       41.4560253                    41.45603
## Dim.2 1.63826368       27.3043947                    68.76042
## Dim.3 1.22670329       20.4450548                    89.20547
## Dim.4 0.58230819        9.7051365                    98.91061
## Dim.5 0.04389793        0.7316322                    99.64224
## Dim.6 0.02146539        0.3577564                   100.00000

The sum of all the eigenvalues give a total variance of 10.

Explanation -The proportion of variation explained by each eigenvalue is given in the second column.
-41.45% of the variation is explained by this first dimension/PCA. -The cumulative percentage explained is obtained by adding the successive proportions of variation explained to obtain the running total. -68.76% of the variation is explained by the first two eigenvalues/PCA together. -And 89.20% of the variation explained by the first 3 eigenvalues. Aftere the 4th eigenvalue, the variation explained drops drastically.


(Extracted content:-Eigenvalues can be used to determine the number of principal components to retain after PCA (Kaiser 1961):

An eigenvalue > 1 indicates that PCs account for more variance than accounted by one of the original variables in standardized data. This is commonly used as a cutoff point for which PCs are retained. This holds true only when the data are standardized.

You can also limit the number of component to that number that accounts for a certain fraction of the total variance. For example, if you are satisfied with 70% of the total variance explained then use the number of components to achieve that.

Unfortunately, there is no well-accepted objective way to decide how many principal components are enough. This will depend on the specific field of application and the specific data set. In practice, we tend to look at the first few principal components in order to find interesting patterns in the data.

In our analysis, the first three principal components explain ~69% of the variation. This is an acceptably large percentage.

library(ade4)
library(vegan)
library(gclus)
## Loading required package: cluster
## Registered S3 method overwritten by 'gclus':
##   method         from 
##   reorder.hclust vegan
library(ape)

###  __Physical varibles/data__

env.pca <- rda(pred_final1, scale = TRUE) # The argument `scale = TRUE` calls for a standardization of the variables. How nice is that!!

env.pca # This command will give use the output
## Call: rda(X = pred_final1, scale = TRUE)
## 
##               Inertia Rank
## Total               6     
## Unconstrained       6    6
## Inertia is correlations 
## 
## Eigenvalues for unconstrained axes:
##    PC1    PC2    PC3    PC4    PC5    PC6 
## 2.4874 1.6383 1.2267 0.5823 0.0439 0.0215
#____________________________________________________#

#  __Chemical Varibles/data__

env.pca1 <- rda(allraw_final1, scale = TRUE) #THe output is not called here!!!#
ev <- env.pca$CA$eig
source("evplot.R") # activate special **evplot()** function, which is available in Laulima (saves you tons of code!)

evplot(ev)

-The mean of all eigenvalues and interpreting only the axes whose eigenvalues are larger than that mean. _Therefore, PCA 1, PCA 2 and PCA 3 are larger than the mean eigen value

Another is to compute a broken stick model, which randomly divides a stick of unit length into the same number of pieces as there are PCA axes. The theoretical equation for the broken stick model is known. The pieces are then put in order of decreasing length and compared to the eigenvalues. One interprets only the axes whose eigenvalues are larger than the length of the corresponding piece of the stick, or, alternately, one may compare the sum of eigenvalues, from 1 to k, to the sum of the values from 1 to k predicted by the broken stick model.

Percentages vs dimensions

An alternative method to determine the number of principal components is to look at a Scree Plot, which is the plot of eigenvalues ordered from largest to the smallest. The number of component is determined at the point, beyond which the remaining eigenvalues are all relatively small and of comparable size (Jollife 2002, Peres-Neto, Jackson, and Somers (2005)).

The scree plot can be produced using the function fviz_eig() or fviz_screeplot() [factoextra package].#

fviz_eig(res.pca, addlabels = TRUE, ylim = c(0, 60)) #ylim- number of raws in the graph 0 to 60#

fviz_eig(res.pca1, addlabels = TRUE, ylim = c(0, 60)) 

Colord PCA Plots

par(mfrow=c(2,2))

###____Physical variables_____
fviz_pca_var(res.pca, col.var = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), 
             repel = TRUE # Avoid text overlapping
             )

###____Chemical Variables(nutrients)____

fviz_pca_var(res.pca1, col.var = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), 
             repel = TRUE # Avoid text overlapping
             )

#fviz_pca_var(res.pca, col.var = "red") # This code works

#The plot above is also known as variable correlation plots. It shows the relationships between all variables. It can be interpreted as follow:

#Positively correlated variables are grouped together.
#Negatively correlated variables are positioned on opposite sides of the plot origin (opposed quadrants).
#The distance between variables and the origin measures the quality of the variables on the factor map. Variables that are away from the origin are well represented on the factor map.

Biplots

Physical variables

## PCA biplots of sites and variables- __Physical variables__

source("cleanplot.pca.R")
cleanplot.pca(env.pca, point = TRUE)

Chemical variables

## PCA biplots of sites and variables- __Chemical variables__

source("cleanplot.pca.R")
cleanplot.pca(env.pca1, point = TRUE)

# Refer the Ecoinformatics_PCA.rmd and do the interpretations.

Physical Variables

##___Physical Variables____
fviz_pca_biplot(res.pca, repel = TRUE,
                col.var = "#2E9FDF", # Variables color
                col.ind = "#696969"  # Individuals color
                )

The scaling 1 biplot shows a gradient from left to right (clockwise),

starting with a group formed by sites 1,5,44,48,50,63,66, 72, 92,100 which display the highest values of Temperature(temp) and the lowest values insalinity and conductivity.

Chemical Varibles

##___Chemical Varibles____
fviz_pca_biplot(res.pca1, repel = TRUE,
                col.var = "#2E9FDD", # Variables color
                col.ind = "#696969"  # Individuals color
                )