Capítulo 3. Componentes principales del análisis

#install.packages("FactoMineR")
library("FactoMineR")
## Warning: package 'FactoMineR' was built under R version 4.2.3
library("factoextra")
## Warning: package 'factoextra' was built under R version 4.2.3
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
#Se descargan los datos 
data(decathlon2)
head(decathlon2)
##           X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus
## SEBRLE    11.04      7.58    14.83      2.07 49.81        14.69  43.75
## CLAY      10.76      7.40    14.26      1.86 49.37        14.05  50.72
## BERNARD   11.02      7.23    14.25      1.92 48.93        14.99  40.87
## YURKOV    11.34      7.09    15.19      2.10 50.42        15.31  46.26
## ZSIVOCZKY 11.13      7.30    13.48      2.01 48.62        14.17  45.67
## McMULLEN  10.83      7.31    13.76      2.13 49.91        14.38  44.41
##           Pole.vault Javeline X1500m Rank Points Competition
## SEBRLE          5.02    63.19  291.7    1   8217    Decastar
## CLAY            4.92    60.15  301.5    2   8122    Decastar
## BERNARD         5.32    62.77  280.1    4   8067    Decastar
## YURKOV          4.72    63.44  276.4    5   8036    Decastar
## ZSIVOCZKY       4.42    55.37  268.0    7   8004    Decastar
## McMULLEN        4.42    56.37  285.1    8   7995    Decastar
decathlon2.active <- decathlon2[1:23, 1:10]
head(decathlon2.active[, 1:6], 4)
##         X100m Long.jump Shot.put High.jump X400m X110m.hurdle
## SEBRLE  11.04      7.58    14.83      2.07 49.81        14.69
## CLAY    10.76      7.40    14.26      1.86 49.37        14.05
## BERNARD 11.02      7.23    14.25      1.92 48.93        14.99
## YURKOV  11.34      7.09    15.19      2.10 50.42        15.31
res.pca <- PCA(decathlon2.active, graph = FALSE)

#output
print(res.pca)
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 23 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
eig.val <- get_eigenvalue(res.pca)

eig.val
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1   4.1242133        41.242133                    41.24213
## Dim.2   1.8385309        18.385309                    59.62744
## Dim.3   1.2391403        12.391403                    72.01885
## Dim.4   0.8194402         8.194402                    80.21325
## Dim.5   0.7015528         7.015528                    87.22878
## Dim.6   0.4228828         4.228828                    91.45760
## Dim.7   0.3025817         3.025817                    94.48342
## Dim.8   0.2744700         2.744700                    97.22812
## Dim.9   0.1552169         1.552169                    98.78029
## Dim.10  0.1219710         1.219710                   100.00000
fviz_eig(res.pca, addlabels = TRUE, ylim = c(0, 50))

var <- get_pca_var(res.pca)

var
## Principal Component Analysis Results for variables
##  ===================================================
##   Name       Description                                    
## 1 "$coord"   "Coordinates for the variables"                
## 2 "$cor"     "Correlations between variables and dimensions"
## 3 "$cos2"    "Cos2 for the variables"                       
## 4 "$contrib" "contributions of the variables"
# Coordinates
head(var$coord)
##                   Dim.1       Dim.2      Dim.3       Dim.4      Dim.5
## X100m        -0.8506257 -0.17939806  0.3015564  0.03357320 -0.1944440
## Long.jump     0.7941806  0.28085695 -0.1905465 -0.11538956  0.2331567
## Shot.put      0.7339127  0.08540412  0.5175978  0.12846837 -0.2488129
## High.jump     0.6100840 -0.46521415  0.3300852  0.14455012  0.4027002
## X400m        -0.7016034  0.29017826  0.2835329  0.43082552  0.1039085
## X110m.hurdle -0.7641252 -0.02474081  0.4488873 -0.01689589  0.2242200
# Cos2: quality on the factore map
head(var$cos2)
##                  Dim.1        Dim.2      Dim.3        Dim.4      Dim.5
## X100m        0.7235641 0.0321836641 0.09093628 0.0011271597 0.03780845
## Long.jump    0.6307229 0.0788806285 0.03630798 0.0133147506 0.05436203
## Shot.put     0.5386279 0.0072938636 0.26790749 0.0165041211 0.06190783
## High.jump    0.3722025 0.2164242070 0.10895622 0.0208947375 0.16216747
## X400m        0.4922473 0.0842034209 0.08039091 0.1856106269 0.01079698
## X110m.hurdle 0.5838873 0.0006121077 0.20149984 0.0002854712 0.05027463
# Contributions to the principal components
head(var$contrib)
##                  Dim.1      Dim.2     Dim.3       Dim.4     Dim.5
## X100m        17.544293  1.7505098  7.338659  0.13755240  5.389252
## Long.jump    15.293168  4.2904162  2.930094  1.62485936  7.748815
## Shot.put     13.060137  0.3967224 21.620432  2.01407269  8.824401
## High.jump     9.024811 11.7715838  8.792888  2.54987951 23.115504
## X400m        11.935544  4.5799296  6.487636 22.65090599  1.539012
## X110m.hurdle 14.157544  0.0332933 16.261261  0.03483735  7.166193
# Coordinates of variables
head(var$coord, 4)
##                Dim.1       Dim.2      Dim.3      Dim.4      Dim.5
## X100m     -0.8506257 -0.17939806  0.3015564  0.0335732 -0.1944440
## Long.jump  0.7941806  0.28085695 -0.1905465 -0.1153896  0.2331567
## Shot.put   0.7339127  0.08540412  0.5175978  0.1284684 -0.2488129
## High.jump  0.6100840 -0.46521415  0.3300852  0.1445501  0.4027002
#plot
fviz_pca_var(res.pca, col.var = "black")

head(var$cos2, 4)
##               Dim.1       Dim.2      Dim.3      Dim.4      Dim.5
## X100m     0.7235641 0.032183664 0.09093628 0.00112716 0.03780845
## Long.jump 0.6307229 0.078880629 0.03630798 0.01331475 0.05436203
## Shot.put  0.5386279 0.007293864 0.26790749 0.01650412 0.06190783
## High.jump 0.3722025 0.216424207 0.10895622 0.02089474 0.16216747
corrplot::corrplot(var$cos2, is.corr=FALSE)

# Total cos2 of variables on Dim.1 and Dim.2
fviz_cos2(res.pca, choice = "var", axes = 1:2)

# Color by cos2 values: quality on the factor map

fviz_pca_var(res.pca, col.var = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), 
             repel = TRUE) # Avoid text overlapping

# Indicate cos2 by transparency
fviz_pca_var(res.pca, alpha.var = "cos2")

head(var$contrib, 4)
##               Dim.1      Dim.2     Dim.3     Dim.4     Dim.5
## X100m     17.544293  1.7505098  7.338659 0.1375524  5.389252
## Long.jump 15.293168  4.2904162  2.930094 1.6248594  7.748815
## Shot.put  13.060137  0.3967224 21.620432 2.0140727  8.824401
## High.jump  9.024811 11.7715838  8.792888 2.5498795 23.115504
corrplot::corrplot(var$contrib, is.corr=FALSE)

# Top 10 Contributions of variables to PC1
fviz_contrib(res.pca, choice = "var", axes = 1, top = 10)

# Top 10 Contributions of variables to PC2
fviz_contrib(res.pca, choice = "var", axes = 2, top = 10)

#Total contribution of PC1 and PC2
fviz_contrib(res.pca, choice = "var", axes = 1:2, top = 10)

fviz_pca_var(res.pca, col.var = "contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))

# Change the transparency by contrib values
fviz_pca_var(res.pca, alpha.var = "contrib")

# Create a random continuous variable of length 10
set.seed(123)
my.cont.var <- rnorm(10)

# Color variables by the continuous variable
fviz_pca_var(res.pca, col.var = my.cont.var,
             gradient.cols = c("blue", "yellow", "red"),
             legend.title = "Cont.Var")

res.desc <- dimdesc(res.pca, axes = c(1,2), proba = 0.05)

# Description of dimension 1
res.desc$Dim.1
## 
## Link between the variable and the continuous variables (R-square)
## =================================================================================
##              correlation      p.value
## Long.jump      0.7941806 6.059893e-06
## Discus         0.7432090 4.842563e-05
## Shot.put       0.7339127 6.723102e-05
## High.jump      0.6100840 1.993677e-03
## Javeline       0.4282266 4.149192e-02
## X400m         -0.7016034 1.910387e-04
## X110m.hurdle  -0.7641252 2.195812e-05
## X100m         -0.8506257 2.727129e-07
# Description of dimension 2
res.desc$Dim.2
## 
## Link between the variable and the continuous variables (R-square)
## =================================================================================
##            correlation      p.value
## Pole.vault   0.8074511 3.205016e-06
## X1500m       0.7844802 9.384747e-06
## High.jump   -0.4652142 2.529390e-02
ind <- get_pca_ind(res.pca)
ind
## Principal Component Analysis Results for individuals
##  ===================================================
##   Name       Description                       
## 1 "$coord"   "Coordinates for the individuals" 
## 2 "$cos2"    "Cos2 for the individuals"        
## 3 "$contrib" "contributions of the individuals"
# Coordinates of individuals
head(ind$coord)
##                Dim.1      Dim.2      Dim.3       Dim.4       Dim.5
## SEBRLE     0.1955047  1.5890567  0.6424912  0.08389652  1.16829387
## CLAY       0.8078795  2.4748137 -1.3873827  1.29838232 -0.82498206
## BERNARD   -1.3591340  1.6480950  0.2005584 -1.96409420  0.08419345
## YURKOV    -0.8889532 -0.4426067  2.5295843  0.71290837  0.40782264
## ZSIVOCZKY -0.1081216 -2.0688377 -1.3342591 -0.10152796 -0.20145217
## McMULLEN   0.1212195 -1.0139102 -0.8625170  1.34164291  1.62151286
# Quality of individuals
head(ind$cos2)
##                 Dim.1      Dim.2       Dim.3       Dim.4        Dim.5
## SEBRLE    0.007530179 0.49747323 0.081325232 0.001386688 0.2689026575
## CLAY      0.048701249 0.45701660 0.143628117 0.125791741 0.0507850580
## BERNARD   0.197199804 0.28996555 0.004294015 0.411819183 0.0007567259
## YURKOV    0.096109800 0.02382571 0.778230322 0.061812637 0.0202279796
## ZSIVOCZKY 0.001574385 0.57641944 0.239754152 0.001388216 0.0054654972
## McMULLEN  0.002175437 0.15219499 0.110137872 0.266486530 0.3892621478
# Contributions of individuals
head(ind$contrib)
##                Dim.1      Dim.2      Dim.3       Dim.4       Dim.5
## SEBRLE    0.04029447  5.9714533  1.4483919  0.03734589  8.45894063
## CLAY      0.68805664 14.4839248  6.7537381  8.94458283  4.21794385
## BERNARD   1.94740183  6.4234107  0.1411345 20.46819433  0.04393073
## YURKOV    0.83308415  0.4632733 22.4517396  2.69663605  1.03075263
## ZSIVOCZKY 0.01232413 10.1217143  6.2464325  0.05469230  0.25151025
## McMULLEN  0.01549089  2.4310854  2.6102794  9.55055888 16.29493304
fviz_pca_ind(res.pca)

#Color by cos2 value

fviz_pca_ind(res.pca, col.ind = "cos2", 
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE) # Avoid text overlapping (slow if many points)

#Point size by cos2 value

fviz_pca_ind(res.pca, pointsize = "cos2", 
             pointshape = 21, fill = "#E7B800",
             repel = TRUE) # Avoid text overlapping (slow if many points)

#Both size and color by cos2

fviz_pca_ind(res.pca, col.ind = "cos2", pointsize = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE) # Avoid text overlapping (slow if many points)

fviz_cos2(res.pca, choice = "ind")

# Total contribution on PC1 and PC2
fviz_contrib(res.pca, choice = "ind", axes = 1:2)

# Create a random continuous variable of length 23,
# Same length as the number of active individuals in the PCA

set.seed(123)
my.cont.var <- rnorm(23)

# Color individuals by the continuous variable

fviz_pca_ind(res.pca, col.ind = my.cont.var,
             gradient.cols = c("blue", "yellow", "red"),
             legend.title = "Cont.Var")

res.pca_2 <- PCA(decathlon2, ind.sup = 24:27, 
               quanti.sup = 11:12, quali.sup = 13, graph=FALSE)
res.pca_2$quanti.sup
## $coord
##             Dim.1       Dim.2      Dim.3       Dim.4       Dim.5
## Rank   -0.7014777 -0.24519443 -0.1834294  0.05575186 -0.07382647
## Points  0.9637075  0.07768262  0.1580225 -0.16623092 -0.03114711
## 
## $cor
##             Dim.1       Dim.2      Dim.3       Dim.4       Dim.5
## Rank   -0.7014777 -0.24519443 -0.1834294  0.05575186 -0.07382647
## Points  0.9637075  0.07768262  0.1580225 -0.16623092 -0.03114711
## 
## $cos2
##            Dim.1       Dim.2      Dim.3      Dim.4        Dim.5
## Rank   0.4920710 0.060120310 0.03364635 0.00310827 0.0054503477
## Points 0.9287322 0.006034589 0.02497110 0.02763272 0.0009701427
#visualize
fviz_pca_var(res.pca_2)

# Change color of variables
fviz_pca_var(res.pca_2,
             col.var = "black",     # Active variables
             col.quanti.sup = "red" # Suppl. quantitative variables
)

fviz_pca_var(res.pca_2, invisible = "var")

fviz_pca_var(res.pca_2, invisible = "quanti.sup")

res.pca_2$ind.sup
## $coord
##              Dim.1       Dim.2      Dim.3      Dim.4       Dim.5
## KARPOV   0.7947206  0.77951227 -1.6330203  1.7242283 -0.75070396
## WARNERS -0.3864645 -0.12159237 -1.7387332 -0.7063341 -0.03230011
## Nool    -0.5591306  1.97748871 -0.4830358 -2.2784526 -0.25461493
## Drews   -1.1092038  0.01741477 -3.0488182 -1.5343468 -0.32642192
## 
## $cos2
##              Dim.1        Dim.2      Dim.3      Dim.4        Dim.5
## KARPOV  0.05104677 4.911173e-02 0.21553730 0.24028620 0.0455487744
## WARNERS 0.02422707 2.398250e-03 0.49039677 0.08092862 0.0001692349
## Nool    0.02897149 3.623868e-01 0.02162236 0.48108780 0.0060077529
## Drews   0.09207094 2.269527e-05 0.69560547 0.17617609 0.0079736753
## 
## $dist
##   KARPOV  WARNERS     Nool    Drews 
## 3.517470 2.482899 3.284943 3.655527
p_2 <- fviz_pca_ind(res.pca_2, col.ind.sup = "blue", repel = TRUE)
p_2 <- fviz_add(p_2, res.pca_2$quali.sup$coord, color = "red")
p_2

res.pca_2$quali
## $coord
##              Dim.1      Dim.2       Dim.3      Dim.4      Dim.5
## Decastar -1.343451  0.1218097 -0.03789524  0.1808357  0.1343364
## OlympicG  1.231497 -0.1116589  0.03473730 -0.1657661 -0.1231417
## 
## $cos2
##              Dim.1       Dim.2        Dim.3      Dim.4       Dim.5
## Decastar 0.9051233 0.007440939 0.0007201669 0.01639956 0.009050062
## OlympicG 0.9051233 0.007440939 0.0007201669 0.01639956 0.009050062
## 
## $v.test
##              Dim.1      Dim.2      Dim.3      Dim.4      Dim.5
## Decastar -2.970766  0.4034256 -0.1528767  0.8971036  0.7202457
## OlympicG  2.970766 -0.4034256  0.1528767 -0.8971036 -0.7202457
## 
## $dist
## Decastar OlympicG 
## 1.412108 1.294433 
## 
## $eta2
##                 Dim.1      Dim.2       Dim.3      Dim.4      Dim.5
## Competition 0.4011568 0.00739783 0.001062332 0.03658159 0.02357972
fviz_pca_ind(res.pca_2, habillage = 13,
             addEllipses =TRUE, ellipse.type = "confidence",
             palette = "jco", repel = TRUE) 

Cápitulo 4: análisis de correspondencias en R

#install.packages(c("FactoMineR", "factoextra"))
library("FactoMineR")
library("factoextra")
data(housetasks)
head(housetasks)
##            Wife Alternating Husband Jointly
## Laundry     156          14       2       4
## Main_meal   124          20       5       4
## Dinner       77          11       7      13
## Breakfeast   82          36      15       7
## Tidying      53          11       1      57
## Dishes       32          24       4      53
library("gplots")
## Warning: package 'gplots' was built under R version 4.2.3
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
# 1. convert the data as a table
dt <- as.table(as.matrix(housetasks))
# 2. Graph
balloonplot(t(dt), main ="housetasks", xlab ="", ylab="",
            label = FALSE, show.margins = FALSE)

chisq <- chisq.test(housetasks)
chisq
## 
##  Pearson's Chi-squared test
## 
## data:  housetasks
## X-squared = 1944.5, df = 36, p-value < 2.2e-16
library("FactoMineR")
res.ca <- CA(housetasks, graph = FALSE)

print(res.ca)
## **Results of the Correspondence Analysis (CA)**
## The row variable has  13  categories; the column variable has 4 categories
## The chi square of independence between the two variables is equal to 1944.456 (p-value =  0 ).
## *The results are available in the following objects:
## 
##    name              description                   
## 1  "$eig"            "eigenvalues"                 
## 2  "$col"            "results for the columns"     
## 3  "$col$coord"      "coord. for the columns"      
## 4  "$col$cos2"       "cos2 for the columns"        
## 5  "$col$contrib"    "contributions of the columns"
## 6  "$row"            "results for the rows"        
## 7  "$row$coord"      "coord. for the rows"         
## 8  "$row$cos2"       "cos2 for the rows"           
## 9  "$row$contrib"    "contributions of the rows"   
## 10 "$call"           "summary called parameters"   
## 11 "$call$marge.col" "weights of the columns"      
## 12 "$call$marge.row" "weights of the rows"
# Chi-square statistics
chi2 <- 1944.456
# Degree of freedom
df <- (nrow(housetasks) - 1) * (ncol(housetasks) - 1)
# P-value
pval <- pchisq(chi2, df = df, lower.tail = FALSE)
pval
## [1] 0
library("factoextra")
eig.val <- get_eigenvalue(res.ca)
eig.val
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  0.5428893         48.69222                    48.69222
## Dim.2  0.4450028         39.91269                    88.60491
## Dim.3  0.1270484         11.39509                   100.00000
fviz_screeplot(res.ca, addlabels = TRUE, ylim = c(0, 50))

fviz_screeplot(res.ca) +
 geom_hline(yintercept=33.33, linetype=2, color="red")

# repel= TRUE to avoid text overlapping (slow if many point)
fviz_ca_biplot(res.ca, repel = TRUE)

row <- get_ca_row(res.ca)
row
## Correspondence Analysis - Results for rows
##  ===================================================
##   Name       Description                
## 1 "$coord"   "Coordinates for the rows" 
## 2 "$cos2"    "Cos2 for the rows"        
## 3 "$contrib" "contributions of the rows"
## 4 "$inertia" "Inertia of the rows"
# Coordinates
head(row$coord)
##                 Dim 1      Dim 2       Dim 3
## Laundry    -0.9918368  0.4953220 -0.31672897
## Main_meal  -0.8755855  0.4901092 -0.16406487
## Dinner     -0.6925740  0.3081043 -0.20741377
## Breakfeast -0.5086002  0.4528038  0.22040453
## Tidying    -0.3938084 -0.4343444 -0.09421375
## Dishes     -0.1889641 -0.4419662  0.26694926
# Cos2: quality on the factore map
head(row$cos2)
##                Dim 1     Dim 2      Dim 3
## Laundry    0.7399874 0.1845521 0.07546047
## Main_meal  0.7416028 0.2323593 0.02603787
## Dinner     0.7766401 0.1537032 0.06965666
## Breakfeast 0.5049433 0.4002300 0.09482670
## Tidying    0.4398124 0.5350151 0.02517249
## Dishes     0.1181178 0.6461525 0.23572969
# Contributions to the principal components
head(row$contrib)
##                 Dim 1    Dim 2    Dim 3
## Laundry    18.2867003 5.563891 7.968424
## Main_meal  12.3888433 4.735523 1.858689
## Dinner      5.4713982 1.321022 2.096926
## Breakfeast  3.8249284 3.698613 3.069399
## Tidying     1.9983518 2.965644 0.488734
## Dishes      0.4261663 2.844117 3.634294
head(row$coord)
##                 Dim 1      Dim 2       Dim 3
## Laundry    -0.9918368  0.4953220 -0.31672897
## Main_meal  -0.8755855  0.4901092 -0.16406487
## Dinner     -0.6925740  0.3081043 -0.20741377
## Breakfeast -0.5086002  0.4528038  0.22040453
## Tidying    -0.3938084 -0.4343444 -0.09421375
## Dishes     -0.1889641 -0.4419662  0.26694926
fviz_ca_row(res.ca, repel = TRUE)

fviz_ca_row(res.ca, col.row="steelblue", shape.row = 15)

head(row$cos2, 4)
##                Dim 1     Dim 2      Dim 3
## Laundry    0.7399874 0.1845521 0.07546047
## Main_meal  0.7416028 0.2323593 0.02603787
## Dinner     0.7766401 0.1537032 0.06965666
## Breakfeast 0.5049433 0.4002300 0.09482670
# Color by cos2 values: quality on the factor map
fviz_ca_row(res.ca, col.row = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), 
             repel = TRUE)

# Change the transparency by cos2 values
fviz_ca_row(res.ca, alpha.row="cos2")

library("corrplot")
## Warning: package 'corrplot' was built under R version 4.2.3
## corrplot 0.92 loaded
corrplot(row$cos2, is.corr=FALSE)

# Cos2 of rows on Dim.1 and Dim.2
fviz_cos2(res.ca, choice = "row", axes = 1:2)

head(row$contrib)
##                 Dim 1    Dim 2    Dim 3
## Laundry    18.2867003 5.563891 7.968424
## Main_meal  12.3888433 4.735523 1.858689
## Dinner      5.4713982 1.321022 2.096926
## Breakfeast  3.8249284 3.698613 3.069399
## Tidying     1.9983518 2.965644 0.488734
## Dishes      0.4261663 2.844117 3.634294
library("corrplot")
corrplot(row$contrib, is.corr=FALSE) 

# Contributions of rows to dimension 1
fviz_contrib(res.ca, choice = "row", axes = 1, top = 10)

# Contributions of rows to dimension 2
fviz_contrib(res.ca, choice = "row", axes = 2, top = 10)

# Total contribution to dimension 1 and 2
fviz_contrib(res.ca, choice = "row", axes = 1:2, top = 10)

fviz_ca_row(res.ca, col.row = "contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), 
             repel = TRUE)

# Change the transparency by contrib values
fviz_ca_row(res.ca, alpha.row="contrib",
             repel = TRUE)

col <- get_ca_col(res.ca)
col
## Correspondence Analysis - Results for columns
##  ===================================================
##   Name       Description                   
## 1 "$coord"   "Coordinates for the columns" 
## 2 "$cos2"    "Cos2 for the columns"        
## 3 "$contrib" "contributions of the columns"
## 4 "$inertia" "Inertia of the columns"
# Coordinates of column points
head(col$coord)
##                   Dim 1      Dim 2       Dim 3
## Wife        -0.83762154  0.3652207 -0.19991139
## Alternating -0.06218462  0.2915938  0.84858939
## Husband      1.16091847  0.6019199 -0.18885924
## Jointly      0.14942609 -1.0265791 -0.04644302
# Quality of representation
head(col$cos2)
##                   Dim 1     Dim 2       Dim 3
## Wife        0.801875947 0.1524482 0.045675847
## Alternating 0.004779897 0.1051016 0.890118521
## Husband     0.772026244 0.2075420 0.020431728
## Jointly     0.020705858 0.9772939 0.002000236
# Contributions
head(col$contrib)
##                 Dim 1     Dim 2      Dim 3
## Wife        44.462018 10.312237 10.8220753
## Alternating  0.103739  2.782794 82.5492464
## Husband     54.233879 17.786612  6.1331792
## Jointly      1.200364 69.118357  0.4954991
fviz_ca_col(res.ca)

fviz_ca_col(res.ca, col.col = "cos2", 
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE)

fviz_cos2(res.ca, choice = "col", axes = 1:2)

fviz_contrib(res.ca, choice = "col", axes = 1:2)

fviz_ca_biplot(res.ca, repel = TRUE)

fviz_ca_biplot(res.ca, 
               map ="rowprincipal", arrow = c(TRUE, TRUE),
               repel = TRUE)

fviz_ca_biplot(res.ca, map ="colgreen", arrow = c(TRUE, FALSE),
               repel = TRUE)

# Dimension description
res.desc <- dimdesc(res.ca, axes = c(1,2))
# Description of dimension 1 by row points
head(res.desc[[1]]$row, 4)
##                 coord
## Laundry    -0.9918368
## Main_meal  -0.8755855
## Dinner     -0.6925740
## Breakfeast -0.5086002
# Description of dimension 1 by column points
head(res.desc[[1]]$col, 4)
##                   coord
## Wife        -0.83762154
## Alternating -0.06218462
## Jointly      0.14942609
## Husband      1.16091847
# Description of dimension 2 by row points
res.desc[[2]]$row
##                 coord
## Holidays   -1.4350066
## Finances   -0.6178684
## Insurance  -0.4737832
## Dishes     -0.4419662
## Tidying    -0.4343444
## Shopping   -0.4033171
## Official    0.2536132
## Dinner      0.3081043
## Breakfeast  0.4528038
## Main_meal   0.4901092
## Laundry     0.4953220
## Driving     0.6534143
## Repairs     0.8642647
# Description of dimension 1 by column points
res.desc[[2]]$col
##                  coord
## Jointly     -1.0265791
## Alternating  0.2915938
## Wife         0.3652207
## Husband      0.6019199
data(children)
head(children)
##               unqualified cep bepc high_school_diploma university thirty fifty
## money                  51  64   32                  29         17     59    66
## future                 53  90   78                  75         22    115   117
## unemployment           71 111   50                  40         11     79    88
## circumstances           1   7    5                   5          4      9     8
## hard                    7  11    4                   3          2      2    17
## economic                7  13   12                  11         11     18    19
##               more_fifty
## money                 70
## future                86
## unemployment         177
## circumstances          5
## hard                  18
## economic              17
res.ca <- CA (children, row.sup = 15:18, col.sup = 6:8,
              graph = FALSE)
fviz_ca_biplot(res.ca, repel = TRUE)

fviz_ca_biplot(res.ca, repel = TRUE,
               invisible = c("row.sup", "col.sup"))

res.ca$row.sup
## $coord
##                  Dim 1     Dim 2      Dim 3      Dim 4
## comfort      0.2096705 0.7031677 0.07111168  0.3071354
## disagreement 0.1462777 0.1190106 0.17108916 -0.3132169
## world        0.5233045 0.1429707 0.08399269 -0.1063597
## to_live      0.3083067 0.5020193 0.52093397  0.2557357
## 
## $cos2
##                   Dim 1      Dim 2       Dim 3      Dim 4
## comfort      0.06892759 0.77524032 0.007928672 0.14790342
## disagreement 0.13132177 0.08692632 0.179649183 0.60210272
## world        0.87587685 0.06537746 0.022564054 0.03618163
## to_live      0.13899699 0.36853645 0.396830367 0.09563620
fviz_ca_row(res.ca, repel = TRUE) 

res.ca$col.sup
## $coord
##                  Dim 1       Dim 2       Dim 3       Dim 4
## thirty      0.10541339 -0.05969594 -0.10322613  0.06977996
## fifty      -0.01706444  0.04907657 -0.01568923 -0.01306117
## more_fifty -0.17706810 -0.04813788  0.10077299 -0.08517528
## 
## $cos2
##                Dim 1      Dim 2       Dim 3       Dim 4
## thirty     0.1375601 0.04411543 0.131910759 0.060278490
## fifty      0.0108695 0.08990298 0.009188167 0.006367804
## more_fifty 0.2860989 0.02114509 0.092666735 0.066200714
fviz_ca_col(res.ca, repel = TRUE) 

# Visualize rows with cos2 >= 0.8
fviz_ca_row(res.ca, select.row = list(cos2 = 0.8))

# Top 5 active rows and 5 suppl. rows with the highest cos2
fviz_ca_row(res.ca, select.row = list(cos2 = 5))

# Select by names
name <- list(name = c("employment", "fear", "future"))
fviz_ca_row(res.ca, select.row = name)

# Top 5 contributing rows and columns
fviz_ca_biplot(res.ca, select.row = list(contrib = 5), 
               select.col = list(contrib = 5)) +
  theme_minimal()

Capítulo 5: Análisis de corrsepondencia múltiple

library("FactoMineR")
library("factoextra")
#se cargan los datos 
data(poison)
head(poison[, 1:7], 3)
##   Age Time   Sick Sex   Nausea Vomiting Abdominals
## 1   9   22 Sick_y   F Nausea_y  Vomit_n     Abdo_y
## 2   5    0 Sick_n   F Nausea_n  Vomit_n     Abdo_n
## 3   6   16 Sick_y   F Nausea_n  Vomit_y     Abdo_y
poison.active <- poison[1:55, 5:15]
head(poison.active[, 1:6], 3)
##     Nausea Vomiting Abdominals   Fever   Diarrhae   Potato
## 1 Nausea_y  Vomit_n     Abdo_y Fever_y Diarrhea_y Potato_y
## 2 Nausea_n  Vomit_n     Abdo_n Fever_n Diarrhea_n Potato_y
## 3 Nausea_n  Vomit_y     Abdo_y Fever_y Diarrhea_y Potato_y
# Summary of the 4 first variables
summary(poison.active)[, 1:4]
##       Nausea      Vomiting   Abdominals     Fever   
##  Nausea_n:43   Vomit_n:33   Abdo_n:18   Fever_n:20  
##  Nausea_y:12   Vomit_y:22   Abdo_y:37   Fever_y:35
for (i in 1:4) {
plot(poison.active[,i], main=colnames(poison.active)[i],
ylab = "Count", col="steelblue", las = 2)
}

res.mca <- MCA(poison.active, graph = FALSE)
print(res.mca)
## **Results of the Multiple Correspondence Analysis (MCA)**
## The analysis was performed on 55 individuals, described by 11 variables
## *The results are available in the following objects:
## 
##    name              description                       
## 1  "$eig"            "eigenvalues"                     
## 2  "$var"            "results for the variables"       
## 3  "$var$coord"      "coord. of the categories"        
## 4  "$var$cos2"       "cos2 for the categories"         
## 5  "$var$contrib"    "contributions of the categories" 
## 6  "$var$v.test"     "v-test for the categories"       
## 7  "$ind"            "results for the individuals"     
## 8  "$ind$coord"      "coord. for the individuals"      
## 9  "$ind$cos2"       "cos2 for the individuals"        
## 10 "$ind$contrib"    "contributions of the individuals"
## 11 "$call"           "intermediate results"            
## 12 "$call$marge.col" "weights of columns"              
## 13 "$call$marge.li"  "weights of rows"
library("factoextra")
eig.val <- get_eigenvalue(res.mca)
head(eig.val)
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1 0.33523140        33.523140                    33.52314
## Dim.2 0.12913979        12.913979                    46.43712
## Dim.3 0.10734849        10.734849                    57.17197
## Dim.4 0.09587950         9.587950                    66.75992
## Dim.5 0.07883277         7.883277                    74.64319
## Dim.6 0.07108981         7.108981                    81.75217
fviz_screeplot(res.mca, addlabels = TRUE, ylim = c(0, 45))

fviz_mca_biplot(res.mca,
repel = TRUE, # Avoid text overlapping (slow if many point)
ggtheme = theme_minimal())

var <- get_mca_var(res.mca)
var
## Multiple Correspondence Analysis Results for variables
##  ===================================================
##   Name       Description                  
## 1 "$coord"   "Coordinates for categories" 
## 2 "$cos2"    "Cos2 for categories"        
## 3 "$contrib" "contributions of categories"
# Coordinates
head(var$coord)
##               Dim 1       Dim 2        Dim 3       Dim 4       Dim 5
## Nausea_n  0.2673909  0.12139029 -0.265583253  0.03376130  0.07370500
## Nausea_y -0.9581506 -0.43498187  0.951673323 -0.12097801 -0.26410958
## Vomit_n   0.4790279 -0.40919465  0.084492799  0.27361142  0.05245250
## Vomit_y  -0.7185419  0.61379197 -0.126739198 -0.41041713 -0.07867876
## Abdo_n    1.3180221 -0.03574501 -0.005094243 -0.15360951 -0.06986987
## Abdo_y   -0.6411999  0.01738946  0.002478280  0.07472895  0.03399075
# Cos2: quality on the factore map
head(var$cos2)
##              Dim 1        Dim 2        Dim 3       Dim 4       Dim 5
## Nausea_n 0.2562007 0.0528025759 2.527485e-01 0.004084375 0.019466197
## Nausea_y 0.2562007 0.0528025759 2.527485e-01 0.004084375 0.019466197
## Vomit_n  0.3442016 0.2511603912 1.070855e-02 0.112294813 0.004126898
## Vomit_y  0.3442016 0.2511603912 1.070855e-02 0.112294813 0.004126898
## Abdo_n   0.8451157 0.0006215864 1.262496e-05 0.011479077 0.002374929
## Abdo_y   0.8451157 0.0006215864 1.262496e-05 0.011479077 0.002374929
# Contributions to the principal components
head(var$contrib)
##              Dim 1       Dim 2        Dim 3      Dim 4      Dim 5
## Nausea_n  1.515869  0.81100008 4.670018e+00 0.08449397 0.48977906
## Nausea_y  5.431862  2.90608363 1.673423e+01 0.30277007 1.75504164
## Vomit_n   3.733667  7.07226253 3.627455e-01 4.25893721 0.19036376
## Vomit_y   5.600500 10.60839380 5.441183e-01 6.38840581 0.28554563
## Abdo_n   15.417637  0.02943661 7.192511e-04 0.73219636 0.18424268
## Abdo_y    7.500472  0.01432051 3.499060e-04 0.35620363 0.08963157
fviz_mca_var(res.mca, choice = "mca.cor",
             repel = TRUE, # Avoid text overlapping (slow)
             ggtheme = theme_minimal())

head(round(var$coord, 2), 4)
##          Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Nausea_n  0.27  0.12 -0.27  0.03  0.07
## Nausea_y -0.96 -0.43  0.95 -0.12 -0.26
## Vomit_n   0.48 -0.41  0.08  0.27  0.05
## Vomit_y  -0.72  0.61 -0.13 -0.41 -0.08
fviz_mca_var(res.mca,
repel = TRUE, # Avoid text overlapping (slow)
ggtheme = theme_minimal())

fviz_mca_var(res.mca, col.var="black", shape.var = 15,
repel = TRUE)

head(var$cos2, 4)
##              Dim 1      Dim 2      Dim 3       Dim 4       Dim 5
## Nausea_n 0.2562007 0.05280258 0.25274850 0.004084375 0.019466197
## Nausea_y 0.2562007 0.05280258 0.25274850 0.004084375 0.019466197
## Vomit_n  0.3442016 0.25116039 0.01070855 0.112294813 0.004126898
## Vomit_y  0.3442016 0.25116039 0.01070855 0.112294813 0.004126898
# Color by cos2 values: quality on the factor map
fviz_mca_var(res.mca, col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE, # Avoid text overlapping
ggtheme = theme_minimal())

# Change the transparency by cos2 values
fviz_mca_var(res.mca, alpha.var="cos2",
repel = TRUE,
ggtheme = theme_minimal())

library("corrplot")
corrplot(var$cos2, is.corr=FALSE)

# Cos2 of variable categories on Dim.1 and Dim.2
fviz_cos2(res.mca, choice = "var", axes = 1:2)

head(round(var$contrib,2), 4)
##          Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Nausea_n  1.52  0.81  4.67  0.08  0.49
## Nausea_y  5.43  2.91 16.73  0.30  1.76
## Vomit_n   3.73  7.07  0.36  4.26  0.19
## Vomit_y   5.60 10.61  0.54  6.39  0.29
# Contributions of rows to dimension 1
fviz_contrib(res.mca, choice = "var", axes = 1, top = 15)

# Contributions of rows to dimension 2
fviz_contrib(res.mca, choice = "var", axes = 2, top = 15)

# Total contribution to dimension 1 and 2
fviz_contrib(res.mca, choice = "var", axes = 1:2, top = 15)

fviz_mca_var(res.mca, col.var = "contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE, # avoid text overlapping (slow)
             ggtheme = theme_minimal()
)

# Change the transparency by contrib values
fviz_mca_var(res.mca, alpha.var="contrib",
repel = TRUE, ggtheme = theme_minimal())

ind <- get_mca_ind(res.mca)
ind
## Multiple Correspondence Analysis Results for individuals
##  ===================================================
##   Name       Description                       
## 1 "$coord"   "Coordinates for the individuals" 
## 2 "$cos2"    "Cos2 for the individuals"        
## 3 "$contrib" "contributions of the individuals"
# Coordinates of column points
head(ind$coord)
##        Dim 1       Dim 2       Dim 3       Dim 4       Dim 5
## 1 -0.4525811 -0.26415072  0.17151614  0.01369348 -0.11696806
## 2  0.8361700 -0.03193457 -0.07208249 -0.08550351  0.51978710
## 3 -0.4481892  0.13538726 -0.22484048 -0.14170168 -0.05004753
## 4  0.8803694 -0.08536230 -0.02052044 -0.07275873 -0.22935022
## 5 -0.4481892  0.13538726 -0.22484048 -0.14170168 -0.05004753
## 6 -0.3594324 -0.43604390 -1.20932223  1.72464616  0.04348157
# Quality of representation
head(ind$cos2)
##        Dim 1        Dim 2        Dim 3        Dim 4        Dim 5
## 1 0.34652591 0.1180447167 0.0497683175 0.0003172275 0.0231460846
## 2 0.55589562 0.0008108236 0.0041310808 0.0058126211 0.2148103098
## 3 0.54813888 0.0500176790 0.1379484860 0.0547920948 0.0068349171
## 4 0.74773962 0.0070299584 0.0004062504 0.0051072923 0.0507479873
## 5 0.54813888 0.0500176790 0.1379484860 0.0547920948 0.0068349171
## 6 0.02485357 0.0365775483 0.2813443706 0.5722083217 0.0003637178
# Contributions
head(ind$contrib)
##      Dim 1      Dim 2        Dim 3        Dim 4      Dim 5
## 1 1.110927 0.98238297  0.498254685  0.003555817 0.31554778
## 2 3.792117 0.01435818  0.088003703  0.138637089 6.23134138
## 3 1.089470 0.25806722  0.856229950  0.380768961 0.05776914
## 4 4.203611 0.10259105  0.007132055  0.100387990 1.21319013
## 5 1.089470 0.25806722  0.856229950  0.380768961 0.05776914
## 6 0.700692 2.67693398 24.769968729 56.404214518 0.04360547
fviz_mca_ind(res.mca, col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE, # Avoid text overlapping (slow if many points)
ggtheme = theme_minimal())

# Cos2 of individuals
fviz_cos2(res.mca, choice = "ind", axes = 1:2, top = 20)

# Contribution of individuals to the dimensions
fviz_contrib(res.mca, choice = "ind", axes = 1:2, top = 20)

fviz_mca_ind(res.mca,
label = "none", # hide individual labels
habillage = "Vomiting", # color by groups
palette = c("#00AFBB", "#E7B800"),
addEllipses = TRUE, ellipse.type = "confidence",
ggtheme = theme_minimal())

# habillage = index of the column to be used as grouping variable
fviz_mca_ind(res.mca, habillage = 2, addEllipses = TRUE)

fviz_mca_ind(res.mca, habillage = poison$Vomiting, addEllipses = TRUE)

fviz_ellipses(res.mca, c("Vomiting", "Fever"),
geom = "point")
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <]8;;https://github.com/kassambara/factoextra/issueshttps://github.com/kassambara/factoextra/issues]8;;>.

fviz_ellipses(res.mca, 1:4, geom = "point")

res.desc <- dimdesc(res.mca, axes = c(1,2))
# Description of dimension 1
res.desc[[1]]
## 
## Link between the variable and the categorical variable (1-way anova)
## =============================================
##                   R2      p.value
## Abdominals 0.8451157 4.055640e-23
## Diarrhae   0.7994680 3.910776e-20
## Fever      0.7846788 2.600566e-19
## Mayo       0.3829749 4.756234e-07
## Vomiting   0.3442016 2.510738e-06
## Nausea     0.2562007 8.062777e-05
## Cheese     0.1944181 7.534834e-04
## 
## Link between variable and the categories of the categorical variables
## ================================================================
##                       Estimate      p.value
## Abdominals=Abdo_n    0.5671866 4.055640e-23
## Diarrhae=Diarrhea_n  0.5380920 3.910776e-20
## Fever=Fever_n        0.5330918 2.600566e-19
## Mayo=Mayo_n          0.4644981 4.756234e-07
## Vomiting=Vomit_n     0.3466915 2.510738e-06
## Nausea=Nausea_n      0.3547892 8.062777e-05
## Cheese=Cheese_n      0.3830043 7.534834e-04
## Cheese=Cheese_y     -0.3830043 7.534834e-04
## Nausea=Nausea_y     -0.3547892 8.062777e-05
## Vomiting=Vomit_y    -0.3466915 2.510738e-06
## Mayo=Mayo_y         -0.4644981 4.756234e-07
## Fever=Fever_y       -0.5330918 2.600566e-19
## Diarrhae=Diarrhea_y -0.5380920 3.910776e-20
## Abdominals=Abdo_y   -0.5671866 4.055640e-23
# Description of dimension 2
res.desc[[2]]
## 
## Link between the variable and the categorical variable (1-way anova)
## =============================================
##                  R2      p.value
## Courgette 0.4464145 2.500166e-08
## Potato    0.3957543 2.690662e-07
## Vomiting  0.2511604 9.728027e-05
## Icecream  0.1409011 4.743927e-03
## 
## Link between variable and the categories of the categorical variables
## ================================================================
##                       Estimate      p.value
## Courgette=Courg_n    0.4176013 2.500166e-08
## Potato=Potato_y      0.4977523 2.690662e-07
## Vomiting=Vomit_y     0.1838104 9.728027e-05
## Icecream=Icecream_n  0.2597197 4.743927e-03
## Icecream=Icecream_y -0.2597197 4.743927e-03
## Vomiting=Vomit_n    -0.1838104 9.728027e-05
## Potato=Potato_n     -0.4977523 2.690662e-07
## Courgette=Courg_y   -0.4176013 2.500166e-08
# Biplot of individuals and variable categories
fviz_mca_biplot(res.mca, repel = TRUE,
ggtheme = theme_minimal())

fviz_mca_var(res.mca, choice = "mca.cor",
repel = TRUE)

fviz_mca_var(res.mca, repel = TRUE,
ggtheme= theme_minimal())

fviz_mca_ind(res.mca,
label = "ind.sup", #Show the label of ind.sup only
ggtheme = theme_minimal())

# Visualize variable categories with cos2 >= 0.4
fviz_mca_var(res.mca, select.var = list(cos2 = 0.4))

# Top 10 active variables with the highest cos2
fviz_mca_var(res.mca, select.var= list(cos2 = 10))

# Select by names
name <- list(name = c("Fever_n", "Abdo_y", "Diarrhea_n",
"Fever_Y", "Vomit_y", "Vomit_n"))
fviz_mca_var(res.mca, select.var = name)

# top 5 contributing individuals and variable categories
fviz_mca_biplot(res.mca, select.ind = list(contrib = 5),
select.var = list(contrib = 5),
ggtheme = theme_minimal())

fviz_eig(res.mca) # Scree plot

fviz_mca_biplot(res.mca) # Biplot of rows and columns