Capítulo 6: Análisis factorial de datos mixtos en R

#install.packages(c("FactoMineR", "factoextra"))
library("FactoMineR")
## Warning: package 'FactoMineR' was built under R version 4.2.3
library("factoextra")
## Warning: package 'factoextra' was built under R version 4.2.3
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("FactoMineR")
data(wine)
df <- wine[,c(1,2, 16, 22, 29, 28, 30,31)]
head(df[, 1:7], 4)
##           Label Soil Plante Acidity Harmony Intensity Overall.quality
## 2EL      Saumur Env1  2.000   2.107   3.143     2.857           3.393
## 1CHA     Saumur Env1  2.000   2.107   2.964     2.893           3.214
## 1FON Bourgueuil Env1  1.750   2.179   3.143     3.074           3.536
## 1VAU     Chinon Env2  2.304   3.179   2.038     2.462           2.464
str(df)
## 'data.frame':    21 obs. of  8 variables:
##  $ Label          : Factor w/ 3 levels "Saumur","Bourgueuil",..: 1 1 2 3 1 2 2 1 3 1 ...
##  $ Soil           : Factor w/ 4 levels "Reference","Env1",..: 2 2 2 3 1 1 1 2 2 3 ...
##  $ Plante         : num  2 2 1.75 2.3 1.76 ...
##  $ Acidity        : num  2.11 2.11 2.18 3.18 2.57 ...
##  $ Harmony        : num  3.14 2.96 3.14 2.04 3.64 ...
##  $ Intensity      : num  2.86 2.89 3.07 2.46 3.64 ...
##  $ Overall.quality: num  3.39 3.21 3.54 2.46 3.74 ...
##  $ Typical        : num  3.25 3.04 3.18 2.25 3.44 ...
library(FactoMineR)
res.famd <- FAMD(df, graph = FALSE)
print(res.famd)
## *The results are available in the following objects:
## 
##   name          description                             
## 1 "$eig"        "eigenvalues and inertia"               
## 2 "$var"        "Results for the variables"             
## 3 "$ind"        "results for the individuals"           
## 4 "$quali.var"  "Results for the qualitative variables" 
## 5 "$quanti.var" "Results for the quantitative variables"
library("factoextra")
eig.val <- get_eigenvalue(res.famd)
head(eig.val) 
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  4.8315174        43.922886                    43.92289
## Dim.2  1.8568797        16.880724                    60.80361
## Dim.3  1.5824794        14.386176                    75.18979
## Dim.4  1.1491200        10.446546                    85.63633
## Dim.5  0.6518053         5.925503                    91.56183
fviz_screeplot(res.famd)

var <- get_famd_var(res.famd)
var
## FAMD results for variables 
##  ===================================================
##   Name       Description                      
## 1 "$coord"   "Coordinates"                    
## 2 "$cos2"    "Cos2, quality of representation"
## 3 "$contrib" "Contributions"
# Coordinates of variables
head(var$coord)
##                     Dim.1       Dim.2       Dim.3       Dim.4        Dim.5
## Plante          0.7344160 0.060551966 0.105902048 0.004011299 0.0010340559
## Acidity         0.1732738 0.491118153 0.126394029 0.115376784 0.0045862935
## Harmony         0.8943968 0.023628146 0.040124469 0.003653813 0.0086624633
## Intensity       0.6991811 0.134639254 0.065382234 0.023214984 0.0064730431
## Overall.quality 0.9115699 0.005246728 0.009336677 0.005445276 0.0007961880
## Typical         0.7808611 0.027094327 0.001549791 0.083446627 0.0005912942
# Cos2: quality of representation on the factore map
head(var$cos2)
##                      Dim.1        Dim.2        Dim.3        Dim.4        Dim.5
## Plante          0.53936692 3.666541e-03 1.121524e-02 1.609052e-05 1.069272e-06
## Acidity         0.03002381 2.411970e-01 1.597545e-02 1.331180e-02 2.103409e-05
## Harmony         0.79994566 5.582893e-04 1.609973e-03 1.335035e-05 7.503827e-05
## Intensity       0.48885427 1.812773e-02 4.274836e-03 5.389355e-04 4.190029e-05
## Overall.quality 0.83095973 2.752815e-05 8.717353e-05 2.965103e-05 6.339153e-07
## Typical         0.60974400 7.341026e-04 2.401853e-06 6.963340e-03 3.496288e-07
# Contributions to the  dimensions
head(var$contrib)
##                     Dim.1      Dim.2      Dim.3      Dim.4      Dim.5
## Plante          15.200526  3.2609526 6.69215972  0.3490757 0.15864490
## Acidity          3.586323 26.4485720 7.98708850 10.0404466 0.70362936
## Harmony         18.511716  1.2724651 2.53554453  0.3179662 1.32899551
## Intensity       14.471254  7.2508336 4.13163258  2.0202401 0.99309457
## Overall.quality 18.867156  0.2825562 0.59000304  0.4738648 0.12215119
## Typical         16.161818  1.4591321 0.09793437  7.2617850 0.09071638
# Plot of variables
fviz_famd_var(res.famd, repel = TRUE)

# Contribution to the first dimension
fviz_contrib(res.famd, "var", axes = 1)

# Contribution to the second dimension
fviz_contrib(res.famd, "var", axes = 2)

quanti.var <- get_famd_var(res.famd, "quanti.var")
quanti.var 
## FAMD results for quantitative variables 
##  ===================================================
##   Name       Description                      
## 1 "$coord"   "Coordinates"                    
## 2 "$cos2"    "Cos2, quality of representation"
## 3 "$contrib" "Contributions"
fviz_famd_var(res.famd, "quanti.var", repel = TRUE,
              col.var = "black")

fviz_famd_var(res.famd, "quanti.var", col.var = "contrib", 
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE)

# Color by cos2 values: quality on the factor map
fviz_famd_var(res.famd, "quanti.var", col.var = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), 
             repel = TRUE)

quali.var <- get_famd_var(res.famd, "quali.var")
quali.var 
## FAMD results for qualitative variable categories 
##  ===================================================
##   Name       Description                      
## 1 "$coord"   "Coordinates"                    
## 2 "$cos2"    "Cos2, quality of representation"
## 3 "$contrib" "Contributions"
fviz_famd_var(res.famd, "quali.var", col.var = "contrib", 
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")
             )

ind <- get_famd_ind(res.famd)
ind
## FAMD results for individuals 
##  ===================================================
##   Name       Description                      
## 1 "$coord"   "Coordinates"                    
## 2 "$cos2"    "Cos2, quality of representation"
## 3 "$contrib" "Contributions"
fviz_famd_ind(res.famd, col.ind = "cos2", 
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE)

fviz_mfa_ind(res.famd, 
             habillage = "Label", # color by groups 
             palette = c("#00AFBB", "#E7B800", "#FC4E07"),
             addEllipses = TRUE, ellipse.type = "confidence", 
             repel = TRUE # Avoid text overlapping
             ) 

fviz_ellipses(res.famd, c("Label", "Soil"), repel = TRUE)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <]8;;https://github.com/kassambara/factoextra/issueshttps://github.com/kassambara/factoextra/issues]8;;>.

fviz_ellipses(res.famd, 1:2, geom = "point")

Capítulo 7: análisis de factores múltiples

#install.packages(c("FactoMineR", "factoextra"))

library("FactoMineR")
library("factoextra")
library("FactoMineR")
data(wine)
colnames(wine)
##  [1] "Label"                         "Soil"                         
##  [3] "Odor.Intensity.before.shaking" "Aroma.quality.before.shaking" 
##  [5] "Fruity.before.shaking"         "Flower.before.shaking"        
##  [7] "Spice.before.shaking"          "Visual.intensity"             
##  [9] "Nuance"                        "Surface.feeling"              
## [11] "Odor.Intensity"                "Quality.of.odour"             
## [13] "Fruity"                        "Flower"                       
## [15] "Spice"                         "Plante"                       
## [17] "Phenolic"                      "Aroma.intensity"              
## [19] "Aroma.persistency"             "Aroma.quality"                
## [21] "Attack.intensity"              "Acidity"                      
## [23] "Astringency"                   "Alcohol"                      
## [25] "Balance"                       "Smooth"                       
## [27] "Bitterness"                    "Intensity"                    
## [29] "Harmony"                       "Overall.quality"              
## [31] "Typical"
library(FactoMineR)
data(wine)
res.mfa <- MFA(wine,
              group = c(2, 5, 3, 10, 9, 2),
              type = c("n", "s", "s", "s", "s", "s"),
              name.group = c("origin","odor","visual", "odor.after.shaking", "taste", "overall"),
              num.group.sup = c(1, 6),
              graph = FALSE)

print(res.mfa)
## **Results of the Multiple Factor Analysis (MFA)**
## The analysis was performed on 21 individuals, described by 31 variables
## *Results are available in the following objects :
## 
##    name                 description                                           
## 1  "$eig"               "eigenvalues"                                         
## 2  "$separate.analyses" "separate analyses for each group of variables"       
## 3  "$group"             "results for all the groups"                          
## 4  "$partial.axes"      "results for the partial axes"                        
## 5  "$inertia.ratio"     "inertia ratio"                                       
## 6  "$ind"               "results for the individuals"                         
## 7  "$quanti.var"        "results for the quantitative variables"              
## 8  "$quanti.var.sup"    "results for the quantitative supplementary variables"
## 9  "$quali.var.sup"     "results for the categorical supplementary variables" 
## 10 "$summary.quanti"    "summary for the quantitative variables"              
## 11 "$summary.quali"     "summary for the categorical variables"               
## 12 "$global.pca"        "results for the global PCA"
library("factoextra")
eig.val <- get_eigenvalue(res.mfa)
head(eig.val)
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  3.4619504        49.378382                    49.37838
## Dim.2  1.3667683        19.494446                    68.87283
## Dim.3  0.6154291         8.777969                    77.65080
## Dim.4  0.3721997         5.308747                    82.95954
## Dim.5  0.2703825         3.856511                    86.81605
## Dim.6  0.2024033         2.886912                    89.70297
fviz_screeplot(res.mfa)

group <- get_mfa_var(res.mfa, "group")
group
## Multiple Factor Analysis results for variable groups 
##  ===================================================
##   Name           Description                                          
## 1 "$coord"       "Coordinates"                                        
## 2 "$cos2"        "Cos2, quality of representation"                    
## 3 "$contrib"     "Contributions"                                      
## 4 "$correlation" "Correlation between groups and principal dimensions"
# Coordinates of groups
head(group$coord)
##                        Dim.1      Dim.2      Dim.3      Dim.4      Dim.5
## odor               0.7820738 0.61977283 0.37353451 0.17260092 0.08553276
## visual             0.8546846 0.04014481 0.01438360 0.04550736 0.02966750
## odor.after.shaking 0.9247734 0.46892047 0.18009116 0.10139051 0.11589439
## taste              0.9004187 0.23793016 0.04741982 0.05270088 0.03928784
# Cos2: quality of representation on the factore map
head(group$cos2)
##                        Dim.1       Dim.2        Dim.3       Dim.4        Dim.5
## odor               0.3799491 0.238613517 0.0866745169 0.018506155 0.0045445922
## visual             0.7284016 0.001607007 0.0002062976 0.002065011 0.0008776492
## odor.after.shaking 0.6245535 0.160582210 0.0236855692 0.007507471 0.0098089810
## taste              0.7222292 0.050429542 0.0020031144 0.002474125 0.0013749986
# Contributions to the dimensions
head(group$contrib)
##                       Dim.1     Dim.2     Dim.3    Dim.4    Dim.5
## odor               22.59055 45.345861 60.694972 46.37321 31.63399
## visual             24.68795  2.937207  2.337166 12.22660 10.97242
## odor.after.shaking 26.71250 34.308703 29.262699 27.24089 42.86313
## taste              26.00900 17.408230  7.705163 14.15930 14.53047
fviz_mfa_var(res.mfa, "group")

# Contribution to the first dimension
fviz_contrib(res.mfa, "group", axes = 1)

# Contribution to the second dimension
fviz_contrib(res.mfa, "group", axes = 2)

quanti.var <- get_mfa_var(res.mfa, "quanti.var")
quanti.var
## Multiple Factor Analysis results for quantitative variables 
##  ===================================================
##   Name       Description                      
## 1 "$coord"   "Coordinates"                    
## 2 "$cos2"    "Cos2, quality of representation"
## 3 "$contrib" "Contributions"
# Coordinates
head(quanti.var$coord)
##                                   Dim.1       Dim.2       Dim.3      Dim.4
## Odor.Intensity.before.shaking 0.5908036  0.66723783 -0.02326175  0.3287015
## Aroma.quality.before.shaking  0.8352510 -0.07539908 -0.35417877  0.1414425
## Fruity.before.shaking         0.7160259 -0.15069626 -0.53748761  0.2517063
## Flower.before.shaking         0.4387181 -0.40937751  0.63731284  0.4029075
## Spice.before.shaking          0.0380525  0.86501993  0.12795122 -0.1822298
## Visual.intensity              0.8811873  0.23833245  0.14099033 -0.2128871
##                                     Dim.5
## Odor.Intensity.before.shaking  0.05786231
## Aroma.quality.before.shaking   0.04992114
## Fruity.before.shaking          0.18981578
## Flower.before.shaking          0.12200773
## Spice.before.shaking           0.36741971
## Visual.intensity              -0.17676282
# Cos2: quality on the factore map
head(quanti.var$cos2)
##                                     Dim.1       Dim.2       Dim.3      Dim.4
## Odor.Intensity.before.shaking 0.349048863 0.445206325 0.000541109 0.10804466
## Aroma.quality.before.shaking  0.697644264 0.005685021 0.125442602 0.02000597
## Fruity.before.shaking         0.512693037 0.022709361 0.288892928 0.06335608
## Flower.before.shaking         0.192473567 0.167589944 0.406167661 0.16233443
## Spice.before.shaking          0.001447992 0.748259477 0.016371514 0.03320769
## Visual.intensity              0.776491025 0.056802358 0.019878273 0.04532093
##                                     Dim.5
## Odor.Intensity.before.shaking 0.003348047
## Aroma.quality.before.shaking  0.002492121
## Fruity.before.shaking         0.036030029
## Flower.before.shaking         0.014885886
## Spice.before.shaking          0.134997242
## Visual.intensity              0.031245096
# Contributions to the dimensions
head(quanti.var$contrib)
##                                    Dim.1      Dim.2       Dim.3     Dim.4
## Odor.Intensity.before.shaking 4.49733206 14.5296787  0.03921898 12.948424
## Aroma.quality.before.shaking  8.98882147  0.1855354  9.09194110  2.397581
## Fruity.before.shaking         6.60581103  0.7411389 20.93864000  7.592798
## Flower.before.shaking         2.47993227  5.4694372 29.43858302 19.454686
## Spice.before.shaking          0.01865671 24.4200703  1.18658923  3.979718
## Visual.intensity              7.91221841  1.4660681  1.13941864  4.295418
##                                    Dim.5
## Odor.Intensity.before.shaking  0.5523351
## Aroma.quality.before.shaking   0.4111309
## Fruity.before.shaking          5.9439566
## Flower.before.shaking          2.4557588
## Spice.before.shaking          22.2708049
## Visual.intensity               4.0764862
fviz_mfa_var(res.mfa, "quanti.var", palette = "jco",
col.var.sup = "violet", repel = TRUE)

fviz_mfa_var(res.mfa, "quanti.var", palette = "jco",
col.var.sup = "violet", repel = TRUE,
geom = c("point", "text"), legend = "bottom")

# Contributions to dimension 1
fviz_contrib(res.mfa, choice = "quanti.var", axes = 1, top = 20,
palette = "jco")

# Contributions to dimension 2
fviz_contrib(res.mfa, choice = "quanti.var", axes = 2, top = 20,
palette = "jco")

fviz_mfa_var(res.mfa, "quanti.var", col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
col.var.sup = "violet", repel = TRUE,
geom = c("point", "text"))

# Color by cos2 values: quality on the factor map
fviz_mfa_var(res.mfa, col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
col.var.sup = "violet", repel = TRUE)

fviz_cos2(res.mfa, choice = "quanti.var", axes = 1)

ind <- get_mfa_ind(res.mfa)
ind
## Multiple Factor Analysis results for individuals 
##  ===================================================
##   Name                      Description                      
## 1 "$coord"                  "Coordinates"                    
## 2 "$cos2"                   "Cos2, quality of representation"
## 3 "$contrib"                "Contributions"                  
## 4 "$coord.partiel"          "Partial coordinates"            
## 5 "$within.inertia"         "Within inertia"                 
## 6 "$within.partial.inertia" "Within partial inertia"
fviz_mfa_ind(res.mfa, col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)

fviz_mfa_ind(res.mfa,
habillage = "Label", # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, ellipse.type = "confidence",
repel = TRUE # Avoid text overlapping
)

fviz_ellipses(res.mfa, c("Label", "Soil"), repel = TRUE)

fviz_mfa_ind(res.mfa, partial = "all")

fviz_mfa_ind(res.mfa, partial = c("1DAM", "1VAU", "2ING"))

fviz_mfa_axes(res.mfa)

Capítulo 8: Agrupación jerárquica en componentes principales

Caso 1: Variables continuas

Caso 2: AgrupaciĂłn en datos categĂłricos

Caso 3: AgrupaciĂłn en clĂşsteres en datos mixtos

#install.packages(c("FactoMineR", "factoextra"))
library(factoextra)
library(FactoMineR)
library(FactoMineR)
# Compute PCA with ncp = 3
res.pca <- PCA(USArrests, ncp = 3, graph = FALSE)
# Compute hierarchical clustering on principal components
res.hcpc <- HCPC(res.pca, graph = FALSE)
fviz_dend(res.hcpc, 
          cex = 0.7,                     # Label size
          palette = "jco",               # Color palette see ?ggpubr::ggpar
          rect = TRUE, rect_fill = TRUE, # Add rectangle around groups
          rect_border = "jco",           # Rectangle color
          labels_track_height = 0.8      # Augment the room for labels
          )
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <]8;;https://github.com/kassambara/factoextra/issueshttps://github.com/kassambara/factoextra/issues]8;;>.

fviz_cluster(res.hcpc,
             repel = TRUE,            # Avoid label overlapping
             show.clust.cent = TRUE, # Show cluster centers
             palette = "jco",         # Color palette see ?ggpubr::ggpar
             ggtheme = theme_minimal(),
             main = "Factor map"
             )

# Principal components + tree
plot(res.hcpc, choice = "3D.map")

head(res.hcpc$data.clust, 10)
##             Murder Assault UrbanPop Rape clust
## Alabama       13.2     236       58 21.2     3
## Alaska        10.0     263       48 44.5     4
## Arizona        8.1     294       80 31.0     4
## Arkansas       8.8     190       50 19.5     3
## California     9.0     276       91 40.6     4
## Colorado       7.9     204       78 38.7     4
## Connecticut    3.3     110       77 11.1     2
## Delaware       5.9     238       72 15.8     2
## Florida       15.4     335       80 31.9     4
## Georgia       17.4     211       60 25.8     3
res.hcpc$desc.var$quanti
## $`1`
##             v.test Mean in category Overall mean sd in category Overall sd
## UrbanPop -3.898420         52.07692       65.540       9.691087  14.329285
## Murder   -4.030171          3.60000        7.788       2.269870   4.311735
## Rape     -4.052061         12.17692       21.232       3.130779   9.272248
## Assault  -4.638172         78.53846      170.760      24.700095  82.500075
##               p.value
## UrbanPop 9.682222e-05
## Murder   5.573624e-05
## Rape     5.076842e-05
## Assault  3.515038e-06
## 
## $`2`
##             v.test Mean in category Overall mean sd in category Overall sd
## UrbanPop  2.793185         73.87500       65.540       8.652131  14.329285
## Murder   -2.374121          5.65625        7.788       1.594902   4.311735
##              p.value
## UrbanPop 0.005219187
## Murder   0.017590794
## 
## $`3`
##             v.test Mean in category Overall mean sd in category Overall sd
## Murder    4.357187          13.9375        7.788       2.433587   4.311735
## Assault   2.698255         243.6250      170.760      46.540137  82.500075
## UrbanPop -2.513667          53.7500       65.540       7.529110  14.329285
##               p.value
## Murder   1.317449e-05
## Assault  6.970399e-03
## UrbanPop 1.194833e-02
## 
## $`4`
##            v.test Mean in category Overall mean sd in category Overall sd
## Rape     5.352124         33.19231       21.232       6.996643   9.272248
## Assault  4.356682        257.38462      170.760      41.850537  82.500075
## UrbanPop 3.028838         76.00000       65.540      10.347798  14.329285
## Murder   2.913295         10.81538        7.788       2.001863   4.311735
##               p.value
## Rape     8.692769e-08
## Assault  1.320491e-05
## UrbanPop 2.454964e-03
## Murder   3.576369e-03
res.hcpc$desc.axes$quanti
## $`1`
##          v.test Mean in category  Overall mean sd in category Overall sd
## Dim.1 -5.175764        -1.964502 -5.639933e-16      0.6192556   1.574878
##            p.value
## Dim.1 2.269806e-07
## 
## $`2`
##         v.test Mean in category  Overall mean sd in category Overall sd
## Dim.2 3.585635        0.7428712 -5.369316e-16      0.6137936  0.9948694
##            p.value
## Dim.2 0.0003362596
## 
## $`3`
##          v.test Mean in category  Overall mean sd in category Overall sd
## Dim.1  2.058338        1.0610731 -5.639933e-16      0.5146613  1.5748783
## Dim.3  2.028887        0.3965588  3.535366e-17      0.3714503  0.5971291
## Dim.2 -4.536594       -1.4773302 -5.369316e-16      0.5750284  0.9948694
##            p.value
## Dim.1 3.955769e-02
## Dim.3 4.246985e-02
## Dim.2 5.717010e-06
## 
## $`4`
##         v.test Mean in category  Overall mean sd in category Overall sd
## Dim.1 4.986474         1.892656 -5.639933e-16      0.6126035   1.574878
##            p.value
## Dim.1 6.149115e-07
res.hcpc$desc.ind$para
## Cluster: 1
##         Idaho  South Dakota         Maine          Iowa New Hampshire 
##     0.3674381     0.4993032     0.5012072     0.5533105     0.5891145 
## ------------------------------------------------------------ 
## Cluster: 2
##         Ohio     Oklahoma Pennsylvania       Kansas      Indiana 
##    0.2796100    0.5047549    0.5088363    0.6039091    0.7100820 
## ------------------------------------------------------------ 
## Cluster: 3
##        Alabama South Carolina        Georgia      Tennessee      Louisiana 
##      0.3553460      0.5335189      0.6136865      0.8522640      0.8780872 
## ------------------------------------------------------------ 
## Cluster: 4
##   Michigan    Arizona New Mexico   Maryland      Texas 
##  0.3246254  0.4532480  0.5176322  0.9013514  0.9239792

Caso de variables categĂłricas

# Loading data
library(FactoMineR)
data(tea)
# Performing MCA
res.mca <- MCA(tea, 
               ncp = 20,            # Number of components kept
               quanti.sup = 19,     # Quantitative supplementary variables
               quali.sup = c(20:36), # Qualitative supplementary variables
               graph=FALSE)
# Dendrogram
fviz_dend(res.hcpc, show_labels = FALSE)

# Individuals facor map
fviz_cluster(res.hcpc, geom = "point", main = "Factor map")

# Description by variables
res.hcpc$desc.var$test.chi2
##                    p.value df
## where         8.465616e-79  4
## how           3.144675e-47  4
## price         1.862462e-28 10
## tearoom       9.624188e-19  2
## pub           8.539893e-10  2
## friends       6.137618e-08  2
## resto         3.537876e-07  2
## How           3.616532e-06  6
## Tea           1.778330e-03  4
## sex           1.789593e-03  2
## frequency     1.973274e-03  6
## work          3.052988e-03  2
## tea.time      3.679599e-03  2
## lunch         1.052478e-02  2
## dinner        2.234313e-02  2
## always        3.600913e-02  2
## sugar         3.685785e-02  2
## sophisticated 4.077297e-02  2
# Description by variable categories
res.hcpc$desc.var$category
## $`1`
##                               Cla/Mod   Mod/Cla    Global      p.value
## where=chain store           85.937500 93.750000 64.000000 2.094419e-40
## how=tea bag                 84.117647 81.250000 56.666667 1.478564e-25
## tearoom=Not.tearoom         70.661157 97.159091 80.666667 1.082077e-18
## price=p_branded             83.157895 44.886364 31.666667 1.631861e-09
## pub=Not.pub                 67.088608 90.340909 79.000000 1.249296e-08
## friends=Not.friends         76.923077 45.454545 34.666667 2.177180e-06
## resto=Not.resto             64.705882 81.250000 73.666667 4.546462e-04
## price=p_private label       90.476190 10.795455  7.000000 1.343844e-03
## tea.time=Not.tea time       67.938931 50.568182 43.666667 4.174032e-03
## How=alone                   64.102564 71.022727 65.000000 9.868387e-03
## work=Not.work               63.380282 76.704545 71.000000 1.036429e-02
## sugar=sugar                 66.206897 54.545455 48.333333 1.066744e-02
## always=Not.always           63.959391 71.590909 65.666667 1.079912e-02
## price=p_unknown             91.666667  6.250000  4.000000 1.559798e-02
## frequency=1 to 2/week       75.000000 18.750000 14.666667 1.649092e-02
## frequency=1/day             68.421053 36.931818 31.666667 1.958790e-02
## age_Q=15-24                 68.478261 35.795455 30.666667 2.179803e-02
## price=p_cheap              100.000000  3.977273  2.333333 2.274539e-02
## lunch=Not.lunch             61.328125 89.204545 85.333333 2.681490e-02
## SPC=senior                  42.857143  8.522727 11.666667 4.813710e-02
## lunch=lunch                 43.181818 10.795455 14.666667 2.681490e-02
## always=always               48.543689 28.409091 34.333333 1.079912e-02
## sugar=No.sugar              51.612903 45.454545 51.666667 1.066744e-02
## work=work                   47.126437 23.295455 29.000000 1.036429e-02
## tea.time=tea time           51.479290 49.431818 56.333333 4.174032e-03
## How=lemon                   30.303030  5.681818 11.000000 5.943089e-04
## resto=resto                 41.772152 18.750000 26.333333 4.546462e-04
## How=other                    0.000000  0.000000  3.000000 2.952904e-04
## price=p_variable            44.642857 28.409091 37.333333 1.595638e-04
## frequency=+2/day            45.669291 32.954545 42.333333 9.872288e-05
## friends=friends             48.979592 54.545455 65.333333 2.177180e-06
## how=unpackaged              19.444444  3.977273 12.000000 4.328211e-07
## pub=pub                     26.984127  9.659091 21.000000 1.249296e-08
## where=tea shop               6.666667  1.136364 10.000000 4.770573e-10
## price=p_upscale             18.867925  5.681818 17.666667 9.472539e-11
## how=tea bag+unpackaged      27.659574 14.772727 31.333333 1.927326e-13
## tearoom=tearoom              8.620690  2.840909 19.333333 1.082077e-18
## where=chain store+tea shop  11.538462  5.113636 26.000000 1.133459e-23
##                                v.test
## where=chain store           13.307475
## how=tea bag                 10.449142
## tearoom=Not.tearoom          8.826287
## price=p_branded              6.030764
## pub=Not.pub                  5.692859
## friends=Not.friends          4.736242
## resto=Not.resto              3.506146
## price=p_private label        3.206448
## tea.time=Not.tea time        2.864701
## How=alone                    2.580407
## work=Not.work                2.563432
## sugar=sugar                  2.553408
## always=Not.always            2.549133
## price=p_unknown              2.418189
## frequency=1 to 2/week        2.397866
## frequency=1/day              2.334149
## age_Q=15-24                  2.293869
## price=p_cheap                2.277684
## lunch=Not.lunch              2.214202
## SPC=senior                  -1.976156
## lunch=lunch                 -2.214202
## always=always               -2.549133
## sugar=No.sugar              -2.553408
## work=work                   -2.563432
## tea.time=tea time           -2.864701
## How=lemon                   -3.434198
## resto=resto                 -3.506146
## How=other                   -3.619397
## price=p_variable            -3.775692
## frequency=+2/day            -3.893709
## friends=friends             -4.736242
## how=unpackaged              -5.053925
## pub=pub                     -5.692859
## where=tea shop              -6.226471
## price=p_upscale             -6.475138
## how=tea bag+unpackaged      -7.353743
## tearoom=tearoom             -8.826287
## where=chain store+tea shop -10.029275
## 
## $`2`
##                                         Cla/Mod Mod/Cla   Global      p.value
## where=tea shop                        90.000000  84.375 10.00000 3.703402e-30
## how=unpackaged                        66.666667  75.000 12.00000 5.346850e-20
## price=p_upscale                       49.056604  81.250 17.66667 2.392655e-17
## Tea=green                             27.272727  28.125 11.00000 4.436713e-03
## sophisticated=sophisticated           13.488372  90.625 71.66667 8.080918e-03
## sex=M                                 16.393443  62.500 40.66667 9.511848e-03
## resto=Not.resto                       13.122172  90.625 73.66667 1.587879e-02
## dinner=dinner                         28.571429  18.750  7.00000 1.874042e-02
## escape.exoticism=Not.escape-exoticism 14.556962  71.875 52.66667 2.177458e-02
## how=tea bag+unpackaged                 5.319149  15.625 31.33333 3.876799e-02
## escape.exoticism=escape-exoticism      6.338028  28.125 47.33333 2.177458e-02
## dinner=Not.dinner                      9.318996  81.250 93.00000 1.874042e-02
## resto=resto                            3.797468   9.375 26.33333 1.587879e-02
## Tea=Earl Grey                          7.253886  43.750 64.33333 1.314753e-02
## sex=F                                  6.741573  37.500 59.33333 9.511848e-03
## sophisticated=Not.sophisticated        3.529412   9.375 28.33333 8.080918e-03
## where=chain store+tea shop             2.564103   6.250 26.00000 3.794134e-03
## price=p_variable                       3.571429  12.500 37.33333 1.349384e-03
## age_Q=15-24                            2.173913   6.250 30.66667 6.100227e-04
## price=p_branded                        2.105263   6.250 31.66667 4.024289e-04
## how=tea bag                            1.764706   9.375 56.66667 5.537403e-09
## where=chain store                      1.562500   9.375 64.00000 1.664577e-11
##                                          v.test
## where=tea shop                        11.410559
## how=unpackaged                         9.156781
## price=p_upscale                        8.472945
## Tea=green                              2.845318
## sophisticated=sophisticated            2.648670
## sex=M                                  2.593088
## resto=Not.resto                        2.411690
## dinner=dinner                          2.350655
## escape.exoticism=Not.escape-exoticism  2.294277
## how=tea bag+unpackaged                -2.066641
## escape.exoticism=escape-exoticism     -2.294277
## dinner=Not.dinner                     -2.350655
## resto=resto                           -2.411690
## Tea=Earl Grey                         -2.479748
## sex=F                                 -2.593088
## sophisticated=Not.sophisticated       -2.648670
## where=chain store+tea shop            -2.894789
## price=p_variable                      -3.205264
## age_Q=15-24                           -3.427119
## price=p_branded                       -3.538486
## how=tea bag                           -5.830161
## where=chain store                     -6.732775
## 
## $`3`
##                               Cla/Mod    Mod/Cla   Global      p.value
## where=chain store+tea shop  85.897436  72.826087 26.00000 5.730651e-34
## how=tea bag+unpackaged      67.021277  68.478261 31.33333 1.382641e-19
## tearoom=tearoom             77.586207  48.913043 19.33333 1.252051e-16
## pub=pub                     63.492063  43.478261 21.00000 1.126679e-09
## friends=friends             41.836735  89.130435 65.33333 1.429181e-09
## price=p_variable            51.785714  63.043478 37.33333 1.572243e-09
## resto=resto                 54.430380  46.739130 26.33333 2.406386e-07
## How=other                  100.000000   9.782609  3.00000 1.807938e-05
## frequency=+2/day            41.732283  57.608696 42.33333 4.237330e-04
## tea.time=tea time           38.461538  70.652174 56.33333 8.453564e-04
## work=work                   44.827586  42.391304 29.00000 9.079377e-04
## sex=F                       37.078652  71.739130 59.33333 3.494245e-03
## lunch=lunch                 50.000000  23.913043 14.66667 3.917102e-03
## How=lemon                   51.515152  18.478261 11.00000 8.747530e-03
## sugar=No.sugar              36.129032  60.869565 51.66667 3.484061e-02
## home=home                   31.615120 100.000000 97.00000 3.506563e-02
## home=Not.home                0.000000   0.000000  3.00000 3.506563e-02
## sugar=sugar                 24.827586  39.130435 48.33333 3.484061e-02
## price=p_private label        9.523810   2.173913  7.00000 2.370629e-02
## how=unpackaged              13.888889   5.434783 12.00000 1.645107e-02
## How=alone                   25.128205  53.260870 65.00000 5.300881e-03
## lunch=Not.lunch             27.343750  76.086957 85.33333 3.917102e-03
## sex=M                       21.311475  28.260870 40.66667 3.494245e-03
## Tea=green                    9.090909   3.260870 11.00000 2.545816e-03
## frequency=1 to 2/week       11.363636   5.434783 14.66667 1.604219e-03
## work=Not.work               24.882629  57.608696 71.00000 9.079377e-04
## tea.time=Not.tea time       20.610687  29.347826 43.66667 8.453564e-04
## where=tea shop               3.333333   1.086957 10.00000 1.466234e-04
## price=p_branded             14.736842  15.217391 31.66667 2.746948e-05
## resto=Not.resto             22.171946  53.260870 73.66667 2.406386e-07
## friends=Not.friends          9.615385  10.869565 34.66667 1.429181e-09
## pub=Not.pub                 21.940928  56.521739 79.00000 1.126679e-09
## how=tea bag                 14.117647  26.086957 56.66667 1.082059e-12
## tearoom=Not.tearoom         19.421488  51.086957 80.66667 1.252051e-16
## where=chain store           12.500000  26.086957 64.00000 1.711522e-19
##                               v.test
## where=chain store+tea shop 12.150084
## how=tea bag+unpackaged      9.053653
## tearoom=tearoom             8.278053
## pub=pub                     6.090345
## friends=friends             6.052158
## price=p_variable            6.036775
## resto=resto                 5.164845
## How=other                   4.287379
## frequency=+2/day            3.524844
## tea.time=tea time           3.337500
## work=work                   3.317602
## sex=F                       2.920541
## lunch=lunch                 2.884762
## How=lemon                   2.621767
## sugar=No.sugar              2.110206
## home=home                   2.107600
## home=Not.home              -2.107600
## sugar=sugar                -2.110206
## price=p_private label      -2.261856
## how=unpackaged             -2.398752
## How=alone                  -2.788157
## lunch=Not.lunch            -2.884762
## sex=M                      -2.920541
## Tea=green                  -3.017842
## frequency=1 to 2/week      -3.155139
## work=Not.work              -3.317602
## tea.time=Not.tea time      -3.337500
## where=tea shop             -3.796720
## price=p_branded            -4.193490
## resto=Not.resto            -5.164845
## friends=Not.friends        -6.052158
## pub=Not.pub                -6.090345
## how=tea bag                -7.119644
## tearoom=Not.tearoom        -8.278053
## where=chain store          -9.030332
res.hcpc$desc.axes
## 
## Link between the cluster variable and the quantitative variables
## ================================================================
##              Eta2      P-value
## Dim.2  0.66509105 2.828937e-71
## Dim.1  0.63497903 1.009707e-65
## Dim.4  0.11231020 2.073924e-08
## Dim.14 0.03141943 8.732913e-03
## Dim.6  0.02358138 2.890373e-02
## 
## Description of each cluster by quantitative variables
## =====================================================
## $`1`
##           v.test Mean in category  Overall mean sd in category Overall sd
## Dim.6   2.647552       0.03433626 -2.721637e-17      0.2655618  0.2671712
## Dim.2  -7.796641      -0.13194656 -6.207419e-17      0.1813156  0.3486355
## Dim.1 -12.409741      -0.23196088 -1.678981e-16      0.2143767  0.3850642
##            p.value
## Dim.6 8.107689e-03
## Dim.2 6.357699e-15
## Dim.1 2.314001e-35
## 
## $`2`
##           v.test Mean in category  Overall mean sd in category Overall sd
## Dim.2  13.918285       0.81210870 -6.207419e-17      0.2340345  0.3486355
## Dim.4   4.350620       0.20342610  7.249699e-19      0.3700048  0.2793822
## Dim.14  2.909073       0.10749165  4.512196e-17      0.2161509  0.2207818
## Dim.13  2.341566       0.08930402  1.266782e-17      0.1606616  0.2278809
## Dim.3   2.208179       0.11087544 -4.161891e-17      0.2449710  0.3000159
## Dim.11 -2.234447      -0.08934293  6.504924e-17      0.2066708  0.2389094
##             p.value
## Dim.2  4.905356e-44
## Dim.4  1.357531e-05
## Dim.14 3.625025e-03
## Dim.13 1.920305e-02
## Dim.3  2.723180e-02
## Dim.11 2.545367e-02
## 
## $`3`
##          v.test Mean in category  Overall mean sd in category Overall sd
## Dim.1 13.485906       0.45155993 -1.678981e-16      0.2516544  0.3850642
## Dim.6 -2.221728      -0.05161581 -2.721637e-17      0.2488566  0.2671712
## Dim.4 -4.725270      -0.11479621  7.249699e-19      0.2924881  0.2793822
##            p.value
## Dim.1 1.893256e-41
## Dim.6 2.630166e-02
## Dim.4 2.298093e-06
res.hcpc$desc.ind$para
## Cluster: 1
##       285       152       166       143        71 
## 0.5884476 0.6242123 0.6242123 0.6244176 0.6478185 
## ------------------------------------------------------------ 
## Cluster: 2
##        31        95        53       182       202 
## 0.6620553 0.7442013 0.7610437 0.7948663 0.8154826 
## ------------------------------------------------------------ 
## Cluster: 3
##       172        33       233        18        67 
## 0.7380497 0.7407711 0.7503006 0.7572188 0.7701598