Principal Component Analysis (Principal Component Methods in R)

Nguyen Chi Dung

Đây là bản Draft nên không có giải thích gì

rm(list = ls())
# Tải dữ liệu và chuẩn hóa dữ liệu: 
data("USArrests")
library(tidyverse)
library(magrittr)
df <- USArrests %>% 
  scale()

#   Principal Component Analysis
library("FactoMineR")
library("factoextra")

res.pca <- PCA(df, graph = FALSE)
eig.val <- get_eigenvalue(res.pca)
eig.val #  Cách 1
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  2.4802416        62.006039                    62.00604
## Dim.2  0.9897652        24.744129                    86.75017
## Dim.3  0.3565632         8.914080                    95.66425
## Dim.4  0.1734301         4.335752                   100.00000
res.pca$eig # Cách 2
##        eigenvalue percentage of variance cumulative percentage of variance
## comp 1  2.4802416              62.006039                          62.00604
## comp 2  0.9897652              24.744129                          86.75017
## comp 3  0.3565632               8.914080                          95.66425
## comp 4  0.1734301               4.335752                         100.00000
fviz_eig(res.pca, addlabels = TRUE)

fviz_pca_var(res.pca, col.var = "black")

library("corrplot")
var <- get_pca_var(res.pca)
corrplot(var$cos2, is.corr = FALSE)

fviz_cos2(res.pca, choice = "var")

fviz_pca_var(res.pca, col.var = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), 
             repel = TRUE)

fviz_pca_var(res.pca, alpha.var = "cos2")

corrplot(var$contrib, is.corr=FALSE)  

# Contributions of variables to PC1
fviz_contrib(res.pca, choice = "var", axes = 1)

# Contributions of variables to PC2
fviz_contrib(res.pca, choice = "var", axes = 2)

fviz_pca_var(res.pca, col.var = "contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))

# Create a random continuous variable of length 10
set.seed(123)
my.cont.var <- rnorm(4)
# Color variables by the continuous variable
fviz_pca_var(res.pca, col.var = my.cont.var,
             gradient.cols = c("blue", "yellow", "red"),
             legend.title = "Cont.Var")

# Create a grouping variable using kmeans
# Create 3 groups of variables (centers = 3)
set.seed(123)
res.km <- kmeans(var$coord, centers = 2, nstart = 25)
grp <- as.factor(res.km$cluster)
# Color variables by groups
fviz_pca_var(res.pca, col.var = grp, 
             palette = c("#0073C2FF", "#EFC000FF", "#868686FF"),
             legend.title = "Cluster")

res.desc <- dimdesc(res.pca, axes = c(1,2), proba = 0.05)
# Description of dimension 1
res.desc$Dim.1
## $quanti
##          correlation      p.value
## Assault    0.9184432 5.757351e-21
## Rape       0.8558394 2.403444e-15
## Murder     0.8439764 1.393084e-14
## UrbanPop   0.4381168 1.461945e-03
fviz_pca_ind(res.pca, col.ind = "cos2", 
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE)

fviz_pca_ind(res.pca, pointsize = "cos2", 
             pointshape = 21, fill = "#E7B800",
             repel = TRUE)