Đây là bản Draft nên không có giải thích gì
rm(list = ls())
# Tải dữ liệu và chuẩn hóa dữ liệu:
data("USArrests")
library(tidyverse)
library(magrittr)
df <- USArrests %>%
scale()
# Principal Component Analysis
library("FactoMineR")
library("factoextra")
res.pca <- PCA(df, graph = FALSE)
eig.val <- get_eigenvalue(res.pca)
eig.val # Cách 1
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.4802416 62.006039 62.00604
## Dim.2 0.9897652 24.744129 86.75017
## Dim.3 0.3565632 8.914080 95.66425
## Dim.4 0.1734301 4.335752 100.00000
res.pca$eig # Cách 2
## eigenvalue percentage of variance cumulative percentage of variance
## comp 1 2.4802416 62.006039 62.00604
## comp 2 0.9897652 24.744129 86.75017
## comp 3 0.3565632 8.914080 95.66425
## comp 4 0.1734301 4.335752 100.00000
fviz_eig(res.pca, addlabels = TRUE)

fviz_pca_var(res.pca, col.var = "black")

library("corrplot")
var <- get_pca_var(res.pca)
corrplot(var$cos2, is.corr = FALSE)

fviz_cos2(res.pca, choice = "var")

fviz_pca_var(res.pca, col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)

fviz_pca_var(res.pca, alpha.var = "cos2")

corrplot(var$contrib, is.corr=FALSE)

# Contributions of variables to PC1
fviz_contrib(res.pca, choice = "var", axes = 1)

# Contributions of variables to PC2
fviz_contrib(res.pca, choice = "var", axes = 2)

fviz_pca_var(res.pca, col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))

# Create a random continuous variable of length 10
set.seed(123)
my.cont.var <- rnorm(4)
# Color variables by the continuous variable
fviz_pca_var(res.pca, col.var = my.cont.var,
gradient.cols = c("blue", "yellow", "red"),
legend.title = "Cont.Var")

# Create a grouping variable using kmeans
# Create 3 groups of variables (centers = 3)
set.seed(123)
res.km <- kmeans(var$coord, centers = 2, nstart = 25)
grp <- as.factor(res.km$cluster)
# Color variables by groups
fviz_pca_var(res.pca, col.var = grp,
palette = c("#0073C2FF", "#EFC000FF", "#868686FF"),
legend.title = "Cluster")

res.desc <- dimdesc(res.pca, axes = c(1,2), proba = 0.05)
# Description of dimension 1
res.desc$Dim.1
## $quanti
## correlation p.value
## Assault 0.9184432 5.757351e-21
## Rape 0.8558394 2.403444e-15
## Murder 0.8439764 1.393084e-14
## UrbanPop 0.4381168 1.461945e-03
fviz_pca_ind(res.pca, col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)

fviz_pca_ind(res.pca, pointsize = "cos2",
pointshape = 21, fill = "#E7B800",
repel = TRUE)

fviz_pca_ind(res.pca, col.ind = "cos2", pointsize = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)

fviz_cos2(res.pca, choice = "ind") + coord_flip()

# Total contribution on PC1 and PC2
fviz_contrib(res.pca, choice = "ind", axes = 1:2) + coord_flip()

# Same length as the number of active individuals in the PCA
set.seed(123)
my.cont.var <- rnorm(50)
# Color individuals by the continuous variable
fviz_pca_ind(res.pca, col.ind = my.cont.var,
gradient.cols = c("blue", "yellow", "red"),
legend.title = "Cont.Var")

# The variable Species (index = 5) is removed
# before PCA analysis
iris.pca <- PCA(iris[,-5], graph = FALSE)
fviz_pca_ind(iris.pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = iris$Species, # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups")

fviz_pca_ind(iris.pca, geom.ind = "point", col.ind = iris$Species,
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, ellipse.type = "confidence",
legend.title = "Groups")

fviz_pca_ind(iris.pca,
label = "none", # hide individual labels
habillage = iris$Species, # color by groups
addEllipses = TRUE, # Concentration ellipses
palette = "jco")

fviz_pca_biplot(iris.pca,
col.ind = iris$Species, palette = "jco",
addEllipses = TRUE, label = "var",
col.var = "black", repel = TRUE,
legend.title = "Species")

fviz_pca_biplot(iris.pca,
# Fill individuals by groups
geom.ind = "point",
pointshape = 21,
pointsize = 2.5,
fill.ind = iris$Species,
col.ind = "black",
# Color variable by groups
col.var = factor(c("sepal", "sepal", "petal", "petal")),
legend.title = list(fill = "Species", color = "Clusters"),
repel = TRUE) +
ggpubr::fill_palette("jco")

fviz_pca_biplot(iris.pca,
# Individuals
geom.ind = "point",
fill.ind = iris$Species, col.ind = "black",
pointshape = 21, pointsize = 2,
palette = "jco",
addEllipses = TRUE,
# Variables
alpha.var ="contrib", col.var = "contrib",
gradient.cols = "RdYlBu",
legend.title = list(fill = "Species", color = "Contrib",
alpha = "Contrib"))
