setwd("C:/Users/Sam/Documents/DSCI_605/Module_6/")
# install.packages("FactoMineR")
# install.packages("factoextra")
library(FactoMineR)
library(factoextra)
df <- read.csv("C:/Users/Sam/Documents/DSCI_605/Module_6/Samples-1.csv", header = TRUE)
This data was asked to be used “Please use the sample data provided to make your own PCA analysis.” It seems to be some type of scientific experiment using multiple species of some type of organism with tests collected from multiple sites.
# All Variables, excluding non-numeric were selected.
df <- df[,2:8]
Each species is assumed to be equally important so all were chosen, but dimension reduction will help make things more clear.
df.pca <- PCA(df, scale.unit = TRUE, graph = FALSE, ncp = 7)
print(df.pca)
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 6 individuals, described by 7 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
eig.val <- get_eigenvalue(df.pca)
eig.val
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 3.964859352 56.6408479 56.64085
## Dim.2 2.048397556 29.2628222 85.90367
## Dim.3 0.599987080 8.5712440 94.47491
## Dim.4 0.377301937 5.3900277 99.86494
## Dim.5 0.009454076 0.1350582 100.00000
fviz_eig(df.pca, addlabels = TRUE, ylim = c(0, 60))
var <- get_pca_var(df.pca)
var
## Principal Component Analysis Results for variables
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the variables"
## 2 "$cor" "Correlations between variables and dimensions"
## 3 "$cos2" "Cos2 for the variables"
## 4 "$contrib" "contributions of the variables"
ind <- get_pca_ind(df.pca)
ind
## Principal Component Analysis Results for individuals
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the individuals"
## 2 "$cos2" "Cos2 for the individuals"
## 3 "$contrib" "contributions of the individuals"
# Coordinates
head(var$coord)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## speciesA -0.85923096 -0.01075314 -0.169120077 0.48205532 0.025051742
## speciesB 0.92561001 -0.36725561 -0.003195971 0.08104185 -0.042325249
## speciesC -0.04812317 0.85415477 -0.470162856 -0.21657288 0.012118959
## speciesD -0.60591717 0.54074830 0.576607174 -0.08881910 0.009538841
## speciesE 0.96101189 -0.25397146 0.068339245 -0.02478600 0.081670367
## speciesF 0.73812254 0.63668651 0.074816996 0.21016789 -0.006105393
#Cos2: quality on the factor map
head(var$cos2)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## speciesA 0.738277847 0.00011563 2.860160e-02 0.2323773331 6.275898e-04
## speciesB 0.856753892 0.13487669 1.021423e-05 0.0065677818 1.791427e-03
## speciesC 0.002315839 0.72958037 2.210531e-01 0.0469038128 1.468692e-04
## speciesD 0.367135619 0.29240873 3.324758e-01 0.0078888318 9.098949e-05
## speciesE 0.923543849 0.06450150 4.670252e-03 0.0006143457 6.670049e-03
## speciesF 0.544824887 0.40536971 5.597583e-03 0.0441705417 3.727583e-05
# Contributions to the principal components
head(var$contrib)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## speciesA 18.62053055 0.005644899 4.767036054 61.589223 6.6382988
## speciesB 21.60868309 6.584497463 0.001702408 1.740723 18.9487241
## speciesC 0.05840912 35.617127419 36.842978567 12.431373 1.5535012
## speciesD 9.25973878 14.274998802 55.413832109 2.090854 0.9624366
## speciesE 23.29323103 3.148876258 0.778392161 0.162826 70.5520999
## speciesF 13.74134208 19.789601479 0.932950561 11.706948 0.3942832
# head(var$cood, 3)
fviz_pca_var(df.pca, col.var = "black")
As suspected, each is showing importance. Highest importance variables are showing B, E, F, and G.
library("corrplot")
## corrplot 0.90 loaded
corrplot(var$cos2, is.corr = FALSE)
fviz_cos2(df.pca, choice = "var", axes = 1:2)
head(ind$coord)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## 1 1.1951413 -1.320730 -1.2905781 -0.002962978 0.09781942
## 2 1.2876107 -1.254958 0.1164269 0.136953784 -0.18805503
## 3 2.9904266 1.051379 0.9465344 0.147367970 0.07863657
## 4 -2.8184606 -1.609609 0.8631057 0.133677885 0.06469264
## 5 -0.9447908 1.140238 -0.1202938 -1.236919052 -0.02401635
## 6 -1.7099272 1.993680 -0.5151951 0.821882392 -0.02907725
# Quality of individuals
head(ind$cos2)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## 1 0.2946378 0.3598139 0.343572684 1.810952e-06 0.0019737866
## 2 0.5023248 0.4771706 0.004106977 5.682814e-03 0.0107148307
## 3 0.8150521 0.1007482 0.081656707 1.979362e-03 0.0005635964
## 4 0.7028866 0.2292461 0.065915766 1.581177e-03 0.0003703148
## 5 0.2388122 0.3478376 0.003871432 4.093245e-01 0.0001543118
## 6 0.3729225 0.5069604 0.033853748 8.615545e-02 0.0001078376
# Contributions of individuals
head(ind$contrib)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## 1 6.004259 14.192615 46.2674395 3.878079e-04 16.868632
## 2 6.969316 12.814238 0.3765423 8.285291e-01 62.344708
## 3 37.591292 8.993997 24.8874054 9.593253e-01 10.901313
## 4 33.392190 21.080222 20.6935414 7.893668e-01 7.378013
## 5 3.752255 10.578542 0.4019697 6.758375e+01 1.016819
## 6 12.290688 32.340386 7.3731017 2.983864e+01 1.490515
df.desc <- dimdesc(df.pca, axes = c(1,2), proba = .05)
df.desc$Dim.1
## $quanti
## correlation p.value
## speciesE 0.9610119 0.002250477
## speciesB 0.9256100 0.008094973
## speciesA -0.8592310 0.028329149
##
## attr(,"class")
## [1] "condes" "list"
df.desc$Dim.2
## $quanti
## correlation p.value
## speciesC 0.8541548 0.03035512
##
## attr(,"class")
## [1] "condes" "list"
fviz_pca_ind(df.pca)
fviz_pca_ind(df.pca, col.ind = "cos2",
gradient.cols = c("red", "blue", "green"),
repel = TRUE)
fviz_cos2(df.pca, choice = "ind")
fviz_contrib(df.pca, choice = "ind", axes = 1:2)
set.seed(123)
my.cont.var <- rnorm(6)
fviz_pca_ind(df.pca, col.ind = my.cont.var,
gradient.cols = c("red", "blue", "green"),
legend.title = "Cont.Var")
fviz_pca_biplot(df.pca, repel = TRUE,
col.var = "red",
col.ind = "green")