Con este método se describen tablas de “individuos” por variables categóricas, ya sean nominales u ordinales. Con el ACM se abordan los siguientes objetivos:
El ACM es una extensión del ACS.
La tabla de datos se denomina de código condensado, (denotada por \(Y\)) y no tiene significado numérico. Las \(n\) filas representan a los individuos y las \(s\) columnas a las variables categóricas. En el lenguaje de diseño de experimentos las columnas son factores y las categorías los niveles de los factores.
La Tabla Disyuntiva Completa es una tabla de n individuos por p categorías, en donde se utiliza un código indicador para cada categoría (1 si el individuo la asume, 0 si no), denominado disyuntivo completo, por sus propiedades.
Con los datos del ejercicio desempeño municipal de DNP realice el analisis de correspondencias simples de una de las tablas de contingencia posible de las siguientes variables:
library("FactoMineR")
library("factoextra")## Loading required package: ggplot2
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
desempeno<-read.table("Desempeno municipal.csv",header=T,sep=",")
row.names(desempeno)<-desempeno$Codigo
desempeno$desempeno_cuartil<-cut(desempeno$Desempeno.Municipal,breaks = quantile(desempeno$Desempeno.Municipal, prob=seq(0,1,0.25)),labels = paste("Desempeno.cuartil", 1:4),include.lowest = TRUE)
desempeno$Ingresos_cuartil<-cut(desempeno$Ingresos.tributarios.y.no.tributarios.per.cápita,breaks = quantile(desempeno$Ingresos.tributarios.y.no.tributarios.per.cápita, prob=seq(0,1,0.25)),labels = paste("Ingresos.Cuartil", 1:4),include.lowest = TRUE)
desempeno.act<-desempeno[,c(
#"Departamento",
"Categoria.de.ruralidad", "Grupo.dotaciones" , "desempeno_cuartil", "Ingresos_cuartil")]
res.mca <- MCA(desempeno.act, graph = FALSE)eig.val <- get_eigenvalue(res.mca)
eig.val## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 6.997544e-01 1.999298e+01 19.99298
## Dim.2 5.250081e-01 1.500023e+01 34.99321
## Dim.3 5.011071e-01 1.431734e+01 49.31056
## Dim.4 3.012861e-01 8.608176e+00 57.91873
## Dim.5 2.723505e-01 7.781444e+00 65.70018
## Dim.6 2.675671e-01 7.644775e+00 73.34495
## Dim.7 2.318819e-01 6.625197e+00 79.97015
## Dim.8 2.254985e-01 6.442815e+00 86.41297
## Dim.9 2.061514e-01 5.890041e+00 92.30301
## Dim.10 1.511127e-01 4.317505e+00 96.62051
## Dim.11 1.182821e-01 3.379488e+00 100.00000
## Dim.12 5.613447e-30 1.603842e-28 100.00000
## Dim.13 5.774596e-31 1.649884e-29 100.00000
## Dim.14 3.023779e-31 8.639369e-30 100.00000
fviz_screeplot(res.mca, addlabels = TRUE)fviz_mca_biplot(res.mca,
#repel = TRUE, # Avoid text overlapping (slow if many point)
ggtheme = theme_minimal())fviz_mca_var(res.mca, choice = "mca.cor",
repel = TRUE, # Avoid text overlapping (slow)
ggtheme = theme_minimal())fviz_mca_var(res.mca,
repel = TRUE, # Avoid text overlapping (slow)
ggtheme = theme_minimal())fviz_mca_var(res.mca, axes = c(2, 3), col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE, # Avoid text overlapping
ggtheme = theme_minimal())Usando los datos de estadisticas vitales
# load("E:/2019 (1)/Sabana/Estadistica aplicada Salud Publica/00. Datos/Datos 2015/avpp13.Rdata")
load("avpp13.Rdata")
# names(defun_avpp13)
defun.act<-defun_avpp13[,c(
"ops_desc_un_digito",
"sexo", "est_civil","gru_ed2","nivel_edu","sit_defun","seg_social")]
defun.act$sexo<-factor(defun.act$sexo, levels=c(1,2,3), labels=c("Masculino", "Femenino", "Indeterminado"))
# table(defun.act$sexo)
defun.act$est_civil<-factor(defun.act$est_civil, levels=c(1,2,3,4,5,6,9), labels=c("No casado(a) más 2 años viviendo","No casado(a) menos 2 años viviendo", "separado, divorciado",
"Viudo(a)" ,
"Soltero",
"Casado" ,
"Sin información" ))
# table(defun.act$est_civil)
defun.act$gru_ed2<-factor(defun.act$gru_ed2, levels=c(1,2,3,4,5,6,7), labels=c("Menor de 1", "De 1 a 4", "De 5 a 14", "De 15 a 44", "De 45 a 64" , "De 65 y mas", "Edad desconocida"))
# table(defun.act$gru_ed2)
defun.act$sit_defun<-factor(defun.act$sit_defun, levels=c(1,2,3,4,5,6,9), labels=c("Hospital clínica","Centro puesto de salud", "Casa domicilio","Lugar de trabajo","Vía pública","Otro","Sin información"))
# table(defun.act$sit_defun)
defun.act$seg_social<-factor(defun.act$seg_social, levels=c(1,2,3,4,5,9), labels=c("Contributivo", "Subsidiado","Excepción" ,"Especial","No asegurado","Sin información"))
# table(defun.act$seg_social)
# str(defun.act)library(FactoMineR)
res.mca <- MCA(defun.act, graph = FALSE)
res.mca## **Results of the Multiple Correspondence Analysis (MCA)**
## The analysis was performed on 203071 individuals, described by 7 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. of the categories"
## 4 "$var$cos2" "cos2 for the categories"
## 5 "$var$contrib" "contributions of the categories"
## 6 "$var$v.test" "v-test for the categories"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "intermediate results"
## 12 "$call$marge.col" "weights of columns"
## 13 "$call$marge.li" "weights of rows"
eig.val <- get_eigenvalue(res.mca)
100/44## [1] 2.272727
eig.val## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 0.41083372 6.5359910 6.535991
## Dim.2 0.31899468 5.0749154 11.610906
## Dim.3 0.24083146 3.8314096 15.442316
## Dim.4 0.20437190 3.2513711 18.693687
## Dim.5 0.17665734 2.8104576 21.504145
## Dim.6 0.17098799 2.7202635 24.224408
## Dim.7 0.16809517 2.6742413 26.898650
## Dim.8 0.15899611 2.5294836 29.428133
## Dim.9 0.15448734 2.4577531 31.885886
## Dim.10 0.15222629 2.4217820 34.307668
## Dim.11 0.15132948 2.4075144 36.715183
## Dim.12 0.15037432 2.3923187 39.107501
## Dim.13 0.14680190 2.3354848 41.442986
## Dim.14 0.14523083 2.3104905 43.753477
## Dim.15 0.14376561 2.2871801 46.040657
## Dim.16 0.14336059 2.2807366 48.321393
## Dim.17 0.14322585 2.2785930 50.599986
## Dim.18 0.14268038 2.2699151 52.869902
## Dim.19 0.14257668 2.2682653 55.138167
## Dim.20 0.14240715 2.2655683 57.403735
## Dim.21 0.14189385 2.2574021 59.661137
## Dim.22 0.14127092 2.2474919 61.908629
## Dim.23 0.14107394 2.2443581 64.152987
## Dim.24 0.14079402 2.2399048 66.392892
## Dim.25 0.13973222 2.2230126 68.615905
## Dim.26 0.13860297 2.2050473 70.820952
## Dim.27 0.13696095 2.1789242 72.999876
## Dim.28 0.13623006 2.1672964 75.167173
## Dim.29 0.13487945 2.1458094 77.312982
## Dim.30 0.13416377 2.1344237 79.447406
## Dim.31 0.13095866 2.0834333 81.530839
## Dim.32 0.12530866 1.9935469 83.524386
## Dim.33 0.12275397 1.9529041 85.477290
## Dim.34 0.12208269 1.9422247 87.419515
## Dim.35 0.11520957 1.8328795 89.252394
## Dim.36 0.10952293 1.7424103 90.994804
## Dim.37 0.09896290 1.5744098 92.569214
## Dim.38 0.09272215 1.4751251 94.044339
## Dim.39 0.08975372 1.4279001 95.472239
## Dim.40 0.07435198 1.1828725 96.655112
## Dim.41 0.06822490 1.0853961 97.740508
## Dim.42 0.05983811 0.9519699 98.692478
## Dim.43 0.04411701 0.7018615 99.394339
## Dim.44 0.03807010 0.6056606 100.000000
fviz_screeplot(res.mca, addlabels = TRUE)fviz_mca_biplot(res.mca,
#repel = TRUE, # Avoid text overlapping (slow if many point)
ggtheme = theme_minimal())fviz_mca_var(res.mca, choice = "mca.cor",
repel = TRUE, # Avoid text overlapping (slow)
ggtheme = theme_minimal())fviz_mca_var(res.mca,
repel = TRUE, # Avoid text overlapping (slow)
ggtheme = theme_minimal())fviz_mca_var(res.mca, axes = c(2, 3), col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE, # Avoid text overlapping
ggtheme = theme_minimal())