library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 4.4.3
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Cargando paquete requerido: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggplot2)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Cargando paquete requerido: carData
## Warning: package 'carData' was built under R version 4.4.2
##
## Adjuntando el paquete: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
# 1. Cargar y preparar los datos
datos <- read_excel("C:/Users/ANA/Downloads/Heart disease Env.xlsx", sheet = "Data")
# Ver la estructura de los datos
str(datos)
## tibble [303 × 10] (S3: tbl_df/tbl/data.frame)
## $ age : num [1:303] 63 37 41 56 57 57 56 44 52 57 ...
## $ sex : num [1:303] 1 1 0 1 0 1 0 1 1 1 ...
## $ cp : num [1:303] 3 2 1 1 0 0 1 1 2 2 ...
## $ fbs : num [1:303] 1 0 0 0 0 0 0 0 1 0 ...
## $ restecg: num [1:303] 0 1 0 1 1 1 0 1 1 1 ...
## $ exang : num [1:303] 0 0 0 0 1 0 0 0 0 0 ...
## $ slope : num [1:303] 0 0 2 2 2 1 1 2 2 2 ...
## $ ca : num [1:303] 0 0 0 0 0 0 0 0 0 0 ...
## $ thal : num [1:303] 1 2 2 2 2 1 2 3 3 2 ...
## $ target : num [1:303] 1 1 1 1 1 1 1 1 1 1 ...
# 2. Recodificar las variables con correcciones para ca y thal
datos_recod <- datos %>%
# Filtrar observaciones problemáticas primero
filter(ca != 4, thal != 0) %>%
# Luego recodificar las variables
mutate(
sex = factor(sex, levels = c(0, 1), labels = c("Mujer", "Hombre")),
cp = factor(cp, levels = 0:3,
labels = c("Angina típica", "Angina atípica",
"Dolor no anginal", "Asintomático")),
fbs = factor(fbs, levels = c(0, 1),
labels = c("Glucosa ≤ 120 mg/dl", "Glucosa > 120 mg/dl")),
restecg = factor(restecg, levels = 0:2,
labels = c("Normal", "Anormalidad ST-T",
"Hipertrofia ventricular")),
exang = factor(exang, levels = c(0, 1),
labels = c("No", "Sí")),
slope = factor(slope, levels = 0:2,
labels = c("Ascendente", "Plano", "Descendente")),
thal = factor(thal, levels = 1:3, # Solo niveles 1-3 después de filtrar
labels = c("Normal", "Defecto fijo", "Defecto reversible")),
target = factor(target, levels = c(0, 1),
labels = c("Sin enfermedad", "Con enfermedad")),
ca = factor(ca, levels = 0:3) # ca ahora solo con niveles 0-3
)
# Verificar las distribuciones corregidas
cat("\nDistribución de ca después de filtrar:\n")
##
## Distribución de ca después de filtrar:
table(datos_recod$ca)
##
## 0 1 2 3
## 173 65 38 20
cat("\nDistribución de thal después de filtrar:\n")
##
## Distribución de thal después de filtrar:
table(datos_recod$thal)
##
## Normal Defecto fijo Defecto reversible
## 18 163 115
# El resto del código permanece exactamente igual desde aquí...
# 3. Preparar variables para MCA
vars_activas <- c("cp", "fbs", "restecg", "exang", "slope", "ca", "thal")
vars_supl <- c("age", "sex")
var_ilust <- "target"
# 4. Realizar el Análisis de Correspondencias Múltiples (MCA)
res.mca <- MCA(datos_recod,
quanti.sup = which(colnames(datos_recod) == "age"),
quali.sup = c(which(colnames(datos_recod) %in% c("sex", "target"))),
graph = FALSE)
# 5. Visualización de los resultados
fviz_mca_var(res.mca,
repel = TRUE,
col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
ggtheme = theme_minimal(),
title = "MCA - Gráfico de categorías variables")

fviz_mca_ind(res.mca,
habillage = which(colnames(datos_recod) == "target"),
addEllipses = TRUE,
palette = c("#00AFBB", "#FC4E07"),
ggtheme = theme_minimal(),
title = "MCA - Gráfico de individuos por condición cardíaca")

fviz_contrib(res.mca, choice = "var", axes = 1, top = 15)

fviz_contrib(res.mca, choice = "var", axes = 2, top = 15)

fviz_mca_var(res.mca,
choice = "mca.cor",
repel = TRUE,
ggtheme = theme_minimal(),
title = "Correlación entre variables y dimensiones MCA")

# 6. Interpretación de resultados
eig.val <- get_eigenvalue(res.mca)
fviz_screeplot(res.mca, addlabels = TRUE, ylim = c(0, 45))

summary(res.mca)
##
## Call:
## MCA(X = datos_recod, quanti.sup = which(colnames(datos_recod) ==
## "age"), quali.sup = c(which(colnames(datos_recod) %in% c("sex",
## "target"))), graph = FALSE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 0.319 0.187 0.170 0.162 0.147 0.144 0.141
## % of var. 15.961 9.356 8.510 8.102 7.369 7.223 7.046
## Cumulative % of var. 15.961 25.316 33.826 41.929 49.297 56.521 63.567
## Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14
## Variance 0.135 0.123 0.120 0.101 0.097 0.084 0.067
## % of var. 6.762 6.175 6.024 5.054 4.843 4.211 3.364
## Cumulative % of var. 70.329 76.503 82.527 87.582 92.425 96.636 100.000
##
## Individuals (the 10 first)
## Dim.1 ctr cos2 Dim.2 ctr cos2
## 1 | 0.197 0.041 0.006 | 2.273 9.326 0.745 |
## 2 | -0.510 0.275 0.096 | 0.572 0.591 0.122 |
## 3 | -0.755 0.603 0.424 | -0.053 0.005 0.002 |
## 4 | -0.895 0.847 0.597 | -0.245 0.109 0.045 |
## 5 | -0.071 0.005 0.005 | -0.401 0.291 0.161 |
## 6 | 0.269 0.077 0.025 | 0.318 0.183 0.035 |
## 7 | -0.444 0.209 0.146 | -0.035 0.002 0.001 |
## 8 | -0.558 0.330 0.215 | -0.338 0.206 0.079 |
## 9 | -0.391 0.162 0.080 | 0.357 0.229 0.066 |
## 10 | -0.783 0.649 0.621 | -0.098 0.018 0.010 |
## Dim.3 ctr cos2
## 1 -0.246 0.120 0.009 |
## 2 0.455 0.412 0.077 |
## 3 -0.151 0.045 0.017 |
## 4 -0.141 0.039 0.015 |
## 5 -0.072 0.010 0.005 |
## 6 0.204 0.083 0.015 |
## 7 -0.016 0.001 0.000 |
## 8 -0.204 0.082 0.029 |
## 9 -0.039 0.003 0.001 |
## 10 0.205 0.084 0.043 |
##
## Categories (the 10 first)
## Dim.1 ctr cos2 v.test Dim.2
## Angina típica | 0.747 11.911 0.508 12.245 | -0.246
## Angina atípica | -1.049 8.149 0.218 -8.023 | -0.270
## Dolor no anginal | -0.606 4.608 0.143 -6.497 | 0.175
## Asintomático | -0.161 0.091 0.002 -0.805 | 1.452
## Glucosa ≤ 120 mg/dl | -0.032 0.039 0.006 -1.323 | -0.241
## Glucosa > 120 mg/dl | 0.187 0.227 0.006 1.323 | 1.418
## restecg_Normal | 0.263 1.522 0.067 4.435 | 0.279
## restecg_Anormalidad ST-T | -0.291 1.879 0.083 -4.961 | -0.304
## restecg_Hipertrofia ventricular | 1.134 0.778 0.018 2.279 | 1.062
## No | -0.480 6.921 0.472 -11.799 | 0.163
## ctr cos2 v.test Dim.3 ctr
## Angina típica 2.198 0.055 -4.027 | -0.069 0.190
## Angina atípica 0.923 0.014 -2.067 | -0.355 1.750
## Dolor no anginal 0.653 0.012 1.872 | 0.645 9.778
## Asintomático 12.516 0.178 7.241 | -1.147 8.576
## Glucosa ≤ 120 mg/dl 3.788 0.342 -10.038 | 0.076 0.417
## Glucosa > 120 mg/dl 22.288 0.342 10.038 | -0.449 2.454
## restecg_Normal 2.919 0.075 4.702 | -0.085 0.298
## restecg_Anormalidad ST-T 3.515 0.091 -5.194 | -0.055 0.125
## restecg_Hipertrofia ventricular 1.163 0.015 2.134 | 5.095 29.448
## No 1.358 0.054 4.002 | 0.029 0.048
## cos2 v.test
## Angina típica 0.004 -1.131 |
## Angina atípica 0.025 -2.715 |
## Dolor no anginal 0.162 6.911 |
## Asintomático 0.111 -5.717 |
## Glucosa ≤ 120 mg/dl 0.034 3.177 |
## Glucosa > 120 mg/dl 0.034 -3.177 |
## restecg_Normal 0.007 -1.432 |
## restecg_Anormalidad ST-T 0.003 -0.933 |
## restecg_Hipertrofia ventricular 0.356 10.243 |
## No 0.002 0.719 |
##
## Categorical variables (eta2)
## Dim.1 Dim.2 Dim.3
## cp | 0.553 0.213 0.242 |
## fbs | 0.006 0.342 0.034 |
## restecg | 0.093 0.099 0.356 |
## exang | 0.472 0.054 0.002 |
## slope | 0.365 0.265 0.053 |
## ca | 0.281 0.144 0.488 |
## thal | 0.464 0.192 0.017 |
##
## Supplementary categories
## Dim.1 cos2 v.test Dim.2 cos2
## Mujer | -0.323 0.049 -3.816 | -0.027 0.000
## Hombre | 0.153 0.049 3.816 | 0.013 0.000
## Sin enfermedad | 0.763 0.495 12.084 | -0.103 0.009
## Con enfermedad | -0.649 0.495 -12.084 | 0.087 0.009
## v.test Dim.3 cos2 v.test
## Mujer -0.321 | 0.131 0.008 1.542 |
## Hombre 0.321 | -0.062 0.008 -1.542 |
## Sin enfermedad -1.629 | -0.037 0.001 -0.579 |
## Con enfermedad 1.629 | 0.031 0.001 0.579 |
##
## Supplementary categorical variables (eta2)
## Dim.1 Dim.2 Dim.3
## sex | 0.049 0.000 0.008 |
## target | 0.495 0.009 0.001 |
##
## Supplementary continuous variable
## Dim.1 Dim.2 Dim.3
## age | 0.297 | 0.113 | -0.008 |
# 7. Análisis adicional con GLM
modelo_glm <- glm(target ~ cp + fbs + restecg + exang + slope + ca + thal,
data = datos_recod, family = binomial())
summary(modelo_glm)
##
## Call:
## glm(formula = target ~ cp + fbs + restecg + exang + slope + ca +
## thal, family = binomial(), data = datos_recod)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.28822 0.95863 -0.301 0.763677
## cpAngina atípica 1.07176 0.54950 1.950 0.051124 .
## cpDolor no anginal 1.82175 0.49009 3.717 0.000201 ***
## cpAsintomático 1.48045 0.59270 2.498 0.012497 *
## fbsGlucosa > 120 mg/dl 0.44482 0.55488 0.802 0.422757
## restecgAnormalidad ST-T 0.57230 0.37069 1.544 0.122613
## restecgHipertrofia ventricular -1.13487 1.74251 -0.651 0.514863
## exangSí -0.87147 0.41296 -2.110 0.034831 *
## slopePlano -0.05269 0.70507 -0.075 0.940426
## slopeDescendente 1.41885 0.73642 1.927 0.054018 .
## ca1 -2.15704 0.46621 -4.627 3.71e-06 ***
## ca2 -3.00147 0.67830 -4.425 9.64e-06 ***
## ca3 -2.64770 0.87913 -3.012 0.002598 **
## thalDefecto fijo 0.93952 0.71605 1.312 0.189494
## thalDefecto reversible -1.22189 0.72424 -1.687 0.091579 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 408.40 on 295 degrees of freedom
## Residual deviance: 200.08 on 281 degrees of freedom
## AIC: 230.08
##
## Number of Fisher Scoring iterations: 6