library(FactoMineR) ## ACP, ACM, MFA y clustering
library(factoextra) ## visualizacion de graficos multivariados
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(Factoshiny) ## tableros dinamicos con graficos multivariados
## Warning: package 'Factoshiny' was built under R version 4.2.3
## Loading required package: shiny
## Loading required package: FactoInvestigate
## Warning: package 'FactoInvestigate' was built under R version 4.2.3
data(poison)
summary(poison)
## Age Time Sick Sex Nausea Vomiting
## Min. : 4.00 Min. : 0.00 Sick_n:17 F:28 Nausea_n:43 Vomit_n:33
## 1st Qu.: 6.00 1st Qu.: 0.00 Sick_y:38 M:27 Nausea_y:12 Vomit_y:22
## Median : 8.00 Median :12.00
## Mean :16.93 Mean :10.16
## 3rd Qu.:10.00 3rd Qu.:16.50
## Max. :88.00 Max. :22.00
## Abdominals Fever Diarrhae Potato Fish Mayo
## Abdo_n:18 Fever_n:20 Diarrhea_n:20 Potato_n: 3 Fish_n: 1 Mayo_n:10
## Abdo_y:37 Fever_y:35 Diarrhea_y:35 Potato_y:52 Fish_y:54 Mayo_y:45
##
##
##
##
## Courgette Cheese Icecream
## Courg_n: 5 Cheese_n: 7 Icecream_n: 4
## Courg_y:50 Cheese_y:48 Icecream_y:51
##
##
##
##
barplot(table(poison$Sick))
barplot(table(poison$Sex))
AFM.Cualitativo <- MFA(poison, group=c(2,2,5,6), type=c("s","n","n","n"),
name.group=c("desc","desc2","symptom","eat"),
num.group.sup=1:2)
## Warning: ggrepel: 15 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 33 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 32 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
En el análisis de cluster jerárquico se observa que se generan 7 grupos con base al método de Ward y los resultados encontrados en el AFM.
cluster <- HCPC(AFM.Cualitativo,nb.clust = -1,graph=FALSE)
fviz_cluster(cluster,repel = TRUE) ## Visualizacion de los clusters
fviz_dend(cluster) ## dendograma
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <]8;;https://github.com/kassambara/factoextra/issueshttps://github.com/kassambara/factoextra/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Cluster 3: Con base en los valores test se observa que el cluster 3, presenta mayor frecuencia de las categorías sick_y, abdo_y, fever_y, diarrhea_y, vomit_y, mayo_y, cheese_y, Courg_y y Nausea_y. Debido a que al comparar la frecuencia de la categoría dentro del cluster, ésta es mucho más grande que la frecuencia de la categoría en todo el conjunto de datos.
Cluster 5: Se observa que este grupo se caracteriza por frecuencias más grandes que el global en las categorías Sick_n, Abdo_n, Diarrhea_n, Fever_n, Vomit_n, Mayo_n y Nausea_n.
cluster$desc.var ## caracterizacion de los clusters
##
## Link between the cluster variable and the categorical variables (chi-square test)
## =================================================================================
## p.value df
## Potato 4.635492e-10 6
## Fish 4.635492e-10 6
## Cheese 7.462770e-09 6
## Courgette 4.862085e-08 6
## Icecream 7.813089e-08 6
## Sick 2.479158e-07 6
## Abdominals 1.481272e-06 6
## Fever 2.001496e-05 6
## Diarrhae 2.617095e-05 6
## Vomiting 1.442746e-04 6
## Mayo 9.629628e-03 6
##
## Description of each cluster by the categories
## =============================================
## $`1`
## Cla/Mod Mod/Cla Global p.value v.test
## Potato=Potato_n 100 100 5.454545 3.811702e-05 4.118607
## Potato=Potato_y 0 0 94.545455 3.811702e-05 -4.118607
##
## $`2`
## Cla/Mod Mod/Cla Global p.value v.test
## Fish=Fish_n 100 100 1.818182 0.01818182 2.361894
## Fish=Fish_y 0 0 98.181818 0.01818182 -2.361894
##
## $`3`
## Cla/Mod Mod/Cla Global p.value v.test
## Sick=Sick_y 68.421053 100.000000 69.090909 7.603991e-07 4.945266
## Abdominals=Abdo_y 67.567568 96.153846 67.272727 9.845168e-06 4.420546
## Fever=Fever_y 68.571429 92.307692 63.636364 2.436594e-05 4.220594
## Diarrhae=Diarrhea_y 65.714286 88.461538 63.636364 3.137971e-04 3.603635
## Vomiting=Vomit_y 77.272727 65.384615 40.000000 3.461305e-04 3.578078
## Mayo=Mayo_y 57.777778 100.000000 81.818182 6.848183e-04 3.395586
## Cheese=Cheese_y 54.166667 100.000000 87.272727 7.691310e-03 2.665329
## Courgette=Courg_y 52.000000 100.000000 90.909091 3.413715e-02 2.118448
## Nausea=Nausea_y 75.000000 34.615385 21.818182 3.691946e-02 2.086654
## Nausea=Nausea_n 39.534884 65.384615 78.181818 3.691946e-02 -2.086654
## Courgette=Courg_n 0.000000 0.000000 9.090909 3.413715e-02 -2.118448
## Cheese=Cheese_n 0.000000 0.000000 12.727273 7.691310e-03 -2.665329
## Mayo=Mayo_n 0.000000 0.000000 18.181818 6.848183e-04 -3.395586
## Vomiting=Vomit_n 27.272727 34.615385 60.000000 3.461305e-04 -3.578078
## Diarrhae=Diarrhea_n 15.000000 11.538462 36.363636 3.137971e-04 -3.603635
## Fever=Fever_n 10.000000 7.692308 36.363636 2.436594e-05 -4.220594
## Abdominals=Abdo_n 5.555556 3.846154 32.727273 9.845168e-06 -4.420546
## Sick=Sick_n 0.000000 0.000000 30.909091 7.603991e-07 -4.945266
##
## $`4`
## Cla/Mod Mod/Cla Global p.value v.test
## Courgette=Courg_n 80.00000 100 9.090909 1.466039e-05 4.333730
## Vomiting=Vomit_y 18.18182 100 40.000000 2.144815e-02 2.300001
## Vomiting=Vomit_n 0.00000 0 60.000000 2.144815e-02 -2.300001
## Courgette=Courg_y 0.00000 0 90.909091 1.466039e-05 -4.333730
##
## $`5`
## Cla/Mod Mod/Cla Global p.value v.test
## Sick=Sick_n 64.705882 91.666667 30.90909 1.100140e-06 4.872824
## Abdominals=Abdo_n 61.111111 91.666667 32.72727 2.768483e-06 4.687289
## Diarrhae=Diarrhea_n 55.000000 91.666667 36.36364 1.397339e-05 4.344279
## Fever=Fever_n 55.000000 91.666667 36.36364 1.397339e-05 4.344279
## Vomiting=Vomit_n 36.363636 100.000000 60.00000 8.087378e-04 3.349787
## Mayo=Mayo_n 50.000000 41.666667 18.18182 3.456230e-02 2.113450
## Nausea=Nausea_n 27.906977 100.000000 78.18182 3.496156e-02 2.108803
## Nausea=Nausea_y 0.000000 0.000000 21.81818 3.496156e-02 -2.108803
## Mayo=Mayo_y 15.555556 58.333333 81.81818 3.456230e-02 -2.113450
## Vomiting=Vomit_y 0.000000 0.000000 40.00000 8.087378e-04 -3.349787
## Diarrhae=Diarrhea_y 2.857143 8.333333 63.63636 1.397339e-05 -4.344279
## Fever=Fever_y 2.857143 8.333333 63.63636 1.397339e-05 -4.344279
## Abdominals=Abdo_y 2.702703 8.333333 67.27273 2.768483e-06 -4.687289
## Sick=Sick_y 2.631579 8.333333 69.09091 1.100140e-06 -4.872824
##
## $`6`
## Cla/Mod Mod/Cla Global p.value v.test
## Cheese=Cheese_n 85.71429 100 12.72727 2.414653e-07 5.164203
## Cheese=Cheese_y 0.00000 0 87.27273 2.414653e-07 -5.164203
##
## $`7`
## Cla/Mod Mod/Cla Global p.value v.test
## Icecream=Icecream_n 75 100 7.272727 0.0001524681 3.787015
## Icecream=Icecream_y 0 0 92.727273 0.0001524681 -3.787015
##
##
## Link between the cluster variable and the quantitative variables
## ================================================================
## Eta2 P-value
## Time 0.7149523 1.42004e-11
##
## Description of each cluster by quantitative variables
## =====================================================
## $`1`
## NULL
##
## $`2`
## v.test Mean in category Overall mean sd in category Overall sd p.value
## Age 2.337297 72 16.92727 0 23.56257 0.01942374
##
## $`3`
## v.test Mean in category Overall mean sd in category Overall sd
## Time 5.046439 15.76923 10.16364 3.775505 7.728962
## p.value
## Time 4.501204e-07
##
## $`4`
## NULL
##
## $`5`
## v.test Mean in category Overall mean sd in category Overall sd
## Time -4.728131 0.75 10.16364 2.487469 7.728962
## p.value
## Time 2.26596e-06
##
## $`6`
## v.test Mean in category Overall mean sd in category Overall sd
## Time -2.272444 3.333333 10.16364 4.714045 7.728962
## p.value
## Time 0.02305969
##
## $`7`
## NULL