library(FactoMineR) ## ACP, ACM, MFA y clustering
library(factoextra) ## visualizacion de graficos multivariados
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(Factoshiny) ## tableros dinamicos con graficos multivariados
## Warning: package 'Factoshiny' was built under R version 4.2.3
## Loading required package: shiny
## Loading required package: FactoInvestigate
## Warning: package 'FactoInvestigate' was built under R version 4.2.3

Conjunto de datos poison

data(poison)
summary(poison)
##       Age             Time           Sick    Sex         Nausea      Vomiting 
##  Min.   : 4.00   Min.   : 0.00   Sick_n:17   F:28   Nausea_n:43   Vomit_n:33  
##  1st Qu.: 6.00   1st Qu.: 0.00   Sick_y:38   M:27   Nausea_y:12   Vomit_y:22  
##  Median : 8.00   Median :12.00                                                
##  Mean   :16.93   Mean   :10.16                                                
##  3rd Qu.:10.00   3rd Qu.:16.50                                                
##  Max.   :88.00   Max.   :22.00                                                
##   Abdominals     Fever          Diarrhae       Potato       Fish        Mayo   
##  Abdo_n:18   Fever_n:20   Diarrhea_n:20   Potato_n: 3   Fish_n: 1   Mayo_n:10  
##  Abdo_y:37   Fever_y:35   Diarrhea_y:35   Potato_y:52   Fish_y:54   Mayo_y:45  
##                                                                                
##                                                                                
##                                                                                
##                                                                                
##    Courgette       Cheese         Icecream 
##  Courg_n: 5   Cheese_n: 7   Icecream_n: 4  
##  Courg_y:50   Cheese_y:48   Icecream_y:51  
##                                            
##                                            
##                                            
## 

Diagrama de barras para la variable sick

barplot(table(poison$Sick))

barplot(table(poison$Sex))

AFM

AFM.Cualitativo <- MFA(poison, group=c(2,2,5,6), type=c("s","n","n","n"),
                       name.group=c("desc","desc2","symptom","eat"),
                       num.group.sup=1:2)
## Warning: ggrepel: 15 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Warning: ggrepel: 33 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Warning: ggrepel: 32 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ANALISIS CLUSTER

En el análisis de cluster jerárquico se observa que se generan 7 grupos con base al método de Ward y los resultados encontrados en el AFM.

cluster <- HCPC(AFM.Cualitativo,nb.clust = -1,graph=FALSE)
fviz_cluster(cluster,repel = TRUE) ## Visualizacion de los clusters

fviz_dend(cluster) ## dendograma
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <]8;;https://github.com/kassambara/factoextra/issueshttps://github.com/kassambara/factoextra/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Interpretación

Cluster 3: Con base en los valores test se observa que el cluster 3, presenta mayor frecuencia de las categorías sick_y, abdo_y, fever_y, diarrhea_y, vomit_y, mayo_y, cheese_y, Courg_y y Nausea_y. Debido a que al comparar la frecuencia de la categoría dentro del cluster, ésta es mucho más grande que la frecuencia de la categoría en todo el conjunto de datos.

Cluster 5: Se observa que este grupo se caracteriza por frecuencias más grandes que el global en las categorías Sick_n, Abdo_n, Diarrhea_n, Fever_n, Vomit_n, Mayo_n y Nausea_n.

cluster$desc.var ## caracterizacion de los clusters
## 
## Link between the cluster variable and the categorical variables (chi-square test)
## =================================================================================
##                 p.value df
## Potato     4.635492e-10  6
## Fish       4.635492e-10  6
## Cheese     7.462770e-09  6
## Courgette  4.862085e-08  6
## Icecream   7.813089e-08  6
## Sick       2.479158e-07  6
## Abdominals 1.481272e-06  6
## Fever      2.001496e-05  6
## Diarrhae   2.617095e-05  6
## Vomiting   1.442746e-04  6
## Mayo       9.629628e-03  6
## 
## Description of each cluster by the categories
## =============================================
## $`1`
##                 Cla/Mod Mod/Cla    Global      p.value    v.test
## Potato=Potato_n     100     100  5.454545 3.811702e-05  4.118607
## Potato=Potato_y       0       0 94.545455 3.811702e-05 -4.118607
## 
## $`2`
##             Cla/Mod Mod/Cla    Global    p.value    v.test
## Fish=Fish_n     100     100  1.818182 0.01818182  2.361894
## Fish=Fish_y       0       0 98.181818 0.01818182 -2.361894
## 
## $`3`
##                       Cla/Mod    Mod/Cla    Global      p.value    v.test
## Sick=Sick_y         68.421053 100.000000 69.090909 7.603991e-07  4.945266
## Abdominals=Abdo_y   67.567568  96.153846 67.272727 9.845168e-06  4.420546
## Fever=Fever_y       68.571429  92.307692 63.636364 2.436594e-05  4.220594
## Diarrhae=Diarrhea_y 65.714286  88.461538 63.636364 3.137971e-04  3.603635
## Vomiting=Vomit_y    77.272727  65.384615 40.000000 3.461305e-04  3.578078
## Mayo=Mayo_y         57.777778 100.000000 81.818182 6.848183e-04  3.395586
## Cheese=Cheese_y     54.166667 100.000000 87.272727 7.691310e-03  2.665329
## Courgette=Courg_y   52.000000 100.000000 90.909091 3.413715e-02  2.118448
## Nausea=Nausea_y     75.000000  34.615385 21.818182 3.691946e-02  2.086654
## Nausea=Nausea_n     39.534884  65.384615 78.181818 3.691946e-02 -2.086654
## Courgette=Courg_n    0.000000   0.000000  9.090909 3.413715e-02 -2.118448
## Cheese=Cheese_n      0.000000   0.000000 12.727273 7.691310e-03 -2.665329
## Mayo=Mayo_n          0.000000   0.000000 18.181818 6.848183e-04 -3.395586
## Vomiting=Vomit_n    27.272727  34.615385 60.000000 3.461305e-04 -3.578078
## Diarrhae=Diarrhea_n 15.000000  11.538462 36.363636 3.137971e-04 -3.603635
## Fever=Fever_n       10.000000   7.692308 36.363636 2.436594e-05 -4.220594
## Abdominals=Abdo_n    5.555556   3.846154 32.727273 9.845168e-06 -4.420546
## Sick=Sick_n          0.000000   0.000000 30.909091 7.603991e-07 -4.945266
## 
## $`4`
##                    Cla/Mod Mod/Cla    Global      p.value    v.test
## Courgette=Courg_n 80.00000     100  9.090909 1.466039e-05  4.333730
## Vomiting=Vomit_y  18.18182     100 40.000000 2.144815e-02  2.300001
## Vomiting=Vomit_n   0.00000       0 60.000000 2.144815e-02 -2.300001
## Courgette=Courg_y  0.00000       0 90.909091 1.466039e-05 -4.333730
## 
## $`5`
##                       Cla/Mod    Mod/Cla   Global      p.value    v.test
## Sick=Sick_n         64.705882  91.666667 30.90909 1.100140e-06  4.872824
## Abdominals=Abdo_n   61.111111  91.666667 32.72727 2.768483e-06  4.687289
## Diarrhae=Diarrhea_n 55.000000  91.666667 36.36364 1.397339e-05  4.344279
## Fever=Fever_n       55.000000  91.666667 36.36364 1.397339e-05  4.344279
## Vomiting=Vomit_n    36.363636 100.000000 60.00000 8.087378e-04  3.349787
## Mayo=Mayo_n         50.000000  41.666667 18.18182 3.456230e-02  2.113450
## Nausea=Nausea_n     27.906977 100.000000 78.18182 3.496156e-02  2.108803
## Nausea=Nausea_y      0.000000   0.000000 21.81818 3.496156e-02 -2.108803
## Mayo=Mayo_y         15.555556  58.333333 81.81818 3.456230e-02 -2.113450
## Vomiting=Vomit_y     0.000000   0.000000 40.00000 8.087378e-04 -3.349787
## Diarrhae=Diarrhea_y  2.857143   8.333333 63.63636 1.397339e-05 -4.344279
## Fever=Fever_y        2.857143   8.333333 63.63636 1.397339e-05 -4.344279
## Abdominals=Abdo_y    2.702703   8.333333 67.27273 2.768483e-06 -4.687289
## Sick=Sick_y          2.631579   8.333333 69.09091 1.100140e-06 -4.872824
## 
## $`6`
##                  Cla/Mod Mod/Cla   Global      p.value    v.test
## Cheese=Cheese_n 85.71429     100 12.72727 2.414653e-07  5.164203
## Cheese=Cheese_y  0.00000       0 87.27273 2.414653e-07 -5.164203
## 
## $`7`
##                     Cla/Mod Mod/Cla    Global      p.value    v.test
## Icecream=Icecream_n      75     100  7.272727 0.0001524681  3.787015
## Icecream=Icecream_y       0       0 92.727273 0.0001524681 -3.787015
## 
## 
## Link between the cluster variable and the quantitative variables
## ================================================================
##           Eta2     P-value
## Time 0.7149523 1.42004e-11
## 
## Description of each cluster by quantitative variables
## =====================================================
## $`1`
## NULL
## 
## $`2`
##       v.test Mean in category Overall mean sd in category Overall sd    p.value
## Age 2.337297               72     16.92727              0   23.56257 0.01942374
## 
## $`3`
##        v.test Mean in category Overall mean sd in category Overall sd
## Time 5.046439         15.76923     10.16364       3.775505   7.728962
##           p.value
## Time 4.501204e-07
## 
## $`4`
## NULL
## 
## $`5`
##         v.test Mean in category Overall mean sd in category Overall sd
## Time -4.728131             0.75     10.16364       2.487469   7.728962
##          p.value
## Time 2.26596e-06
## 
## $`6`
##         v.test Mean in category Overall mean sd in category Overall sd
## Time -2.272444         3.333333     10.16364       4.714045   7.728962
##         p.value
## Time 0.02305969
## 
## $`7`
## NULL