library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 4.4.3
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Cargando paquete requerido: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggplot2)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Cargando paquete requerido: carData
## Warning: package 'carData' was built under R version 4.4.2
## 
## Adjuntando el paquete: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
# 1. Cargar y preparar los datos
datos <- read_excel("C:/Users/ANA/Downloads/Heart disease Env.xlsx", sheet = "Data")

# Ver la estructura de los datos
str(datos)
## tibble [303 × 10] (S3: tbl_df/tbl/data.frame)
##  $ age    : num [1:303] 63 37 41 56 57 57 56 44 52 57 ...
##  $ sex    : num [1:303] 1 1 0 1 0 1 0 1 1 1 ...
##  $ cp     : num [1:303] 3 2 1 1 0 0 1 1 2 2 ...
##  $ fbs    : num [1:303] 1 0 0 0 0 0 0 0 1 0 ...
##  $ restecg: num [1:303] 0 1 0 1 1 1 0 1 1 1 ...
##  $ exang  : num [1:303] 0 0 0 0 1 0 0 0 0 0 ...
##  $ slope  : num [1:303] 0 0 2 2 2 1 1 2 2 2 ...
##  $ ca     : num [1:303] 0 0 0 0 0 0 0 0 0 0 ...
##  $ thal   : num [1:303] 1 2 2 2 2 1 2 3 3 2 ...
##  $ target : num [1:303] 1 1 1 1 1 1 1 1 1 1 ...
# 2. Recodificar las variables con correcciones para ca y thal
datos_recod <- datos %>%
  # Filtrar observaciones problemáticas primero
  filter(ca != 4, thal != 0) %>%
  
  # Luego recodificar las variables
  mutate(
    sex = factor(sex, levels = c(0, 1), labels = c("Mujer", "Hombre")),
    cp = factor(cp, levels = 0:3, 
                labels = c("Angina típica", "Angina atípica", 
                           "Dolor no anginal", "Asintomático")),
    fbs = factor(fbs, levels = c(0, 1), 
                 labels = c("Glucosa ≤ 120 mg/dl", "Glucosa > 120 mg/dl")),
    restecg = factor(restecg, levels = 0:2,
                     labels = c("Normal", "Anormalidad ST-T", 
                                "Hipertrofia ventricular")),
    exang = factor(exang, levels = c(0, 1), 
                   labels = c("No", "Sí")),
    slope = factor(slope, levels = 0:2,
                   labels = c("Ascendente", "Plano", "Descendente")),
    thal = factor(thal, levels = 1:3,  # Solo niveles 1-3 después de filtrar
                  labels = c("Normal", "Defecto fijo", "Defecto reversible")),
    target = factor(target, levels = c(0, 1),
                    labels = c("Sin enfermedad", "Con enfermedad")),
    ca = factor(ca, levels = 0:3)  # ca ahora solo con niveles 0-3
  )

# Verificar las distribuciones corregidas
cat("\nDistribución de ca después de filtrar:\n")
## 
## Distribución de ca después de filtrar:
table(datos_recod$ca)
## 
##   0   1   2   3 
## 173  65  38  20
cat("\nDistribución de thal después de filtrar:\n")
## 
## Distribución de thal después de filtrar:
table(datos_recod$thal)
## 
##             Normal       Defecto fijo Defecto reversible 
##                 18                163                115
# El resto del código permanece exactamente igual desde aquí...
# 3. Preparar variables para MCA
vars_activas <- c("cp", "fbs", "restecg", "exang", "slope", "ca", "thal")
vars_supl <- c("age", "sex")
var_ilust <- "target"

# 4. Realizar el Análisis de Correspondencias Múltiples (MCA)
res.mca <- MCA(datos_recod, 
               quanti.sup = which(colnames(datos_recod) == "age"),
               quali.sup = c(which(colnames(datos_recod) %in% c("sex", "target"))),
               graph = FALSE)

# 5. Visualización de los resultados
fviz_mca_var(res.mca, 
             repel = TRUE, 
             col.var = "contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             ggtheme = theme_minimal(),
             title = "MCA - Gráfico de categorías variables")

fviz_mca_ind(res.mca, 
             habillage = which(colnames(datos_recod) == "target"),
             addEllipses = TRUE,
             palette = c("#00AFBB", "#FC4E07"),
             ggtheme = theme_minimal(),
             title = "MCA - Gráfico de individuos por condición cardíaca")

fviz_contrib(res.mca, choice = "var", axes = 1, top = 15)

fviz_contrib(res.mca, choice = "var", axes = 2, top = 15)

fviz_mca_var(res.mca, 
             choice = "mca.cor", 
             repel = TRUE,
             ggtheme = theme_minimal(),
             title = "Correlación entre variables y dimensiones MCA")

# 6. Interpretación de resultados
eig.val <- get_eigenvalue(res.mca)
fviz_screeplot(res.mca, addlabels = TRUE, ylim = c(0, 45))

summary(res.mca)
## 
## Call:
## MCA(X = datos_recod, quanti.sup = which(colnames(datos_recod) ==  
##      "age"), quali.sup = c(which(colnames(datos_recod) %in% c("sex",  
##      "target"))), graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance               0.319   0.187   0.170   0.162   0.147   0.144   0.141
## % of var.             15.961   9.356   8.510   8.102   7.369   7.223   7.046
## Cumulative % of var.  15.961  25.316  33.826  41.929  49.297  56.521  63.567
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.135   0.123   0.120   0.101   0.097   0.084   0.067
## % of var.              6.762   6.175   6.024   5.054   4.843   4.211   3.364
## Cumulative % of var.  70.329  76.503  82.527  87.582  92.425  96.636 100.000
## 
## Individuals (the 10 first)
##                                    Dim.1    ctr   cos2    Dim.2    ctr   cos2  
## 1                               |  0.197  0.041  0.006 |  2.273  9.326  0.745 |
## 2                               | -0.510  0.275  0.096 |  0.572  0.591  0.122 |
## 3                               | -0.755  0.603  0.424 | -0.053  0.005  0.002 |
## 4                               | -0.895  0.847  0.597 | -0.245  0.109  0.045 |
## 5                               | -0.071  0.005  0.005 | -0.401  0.291  0.161 |
## 6                               |  0.269  0.077  0.025 |  0.318  0.183  0.035 |
## 7                               | -0.444  0.209  0.146 | -0.035  0.002  0.001 |
## 8                               | -0.558  0.330  0.215 | -0.338  0.206  0.079 |
## 9                               | -0.391  0.162  0.080 |  0.357  0.229  0.066 |
## 10                              | -0.783  0.649  0.621 | -0.098  0.018  0.010 |
##                                  Dim.3    ctr   cos2  
## 1                               -0.246  0.120  0.009 |
## 2                                0.455  0.412  0.077 |
## 3                               -0.151  0.045  0.017 |
## 4                               -0.141  0.039  0.015 |
## 5                               -0.072  0.010  0.005 |
## 6                                0.204  0.083  0.015 |
## 7                               -0.016  0.001  0.000 |
## 8                               -0.204  0.082  0.029 |
## 9                               -0.039  0.003  0.001 |
## 10                               0.205  0.084  0.043 |
## 
## Categories (the 10 first)
##                                     Dim.1     ctr    cos2  v.test     Dim.2
## Angina típica                   |   0.747  11.911   0.508  12.245 |  -0.246
## Angina atípica                  |  -1.049   8.149   0.218  -8.023 |  -0.270
## Dolor no anginal                |  -0.606   4.608   0.143  -6.497 |   0.175
## Asintomático                    |  -0.161   0.091   0.002  -0.805 |   1.452
## Glucosa ≤ 120 mg/dl             |  -0.032   0.039   0.006  -1.323 |  -0.241
## Glucosa > 120 mg/dl             |   0.187   0.227   0.006   1.323 |   1.418
## restecg_Normal                  |   0.263   1.522   0.067   4.435 |   0.279
## restecg_Anormalidad ST-T        |  -0.291   1.879   0.083  -4.961 |  -0.304
## restecg_Hipertrofia ventricular |   1.134   0.778   0.018   2.279 |   1.062
## No                              |  -0.480   6.921   0.472 -11.799 |   0.163
##                                     ctr    cos2  v.test     Dim.3     ctr
## Angina típica                     2.198   0.055  -4.027 |  -0.069   0.190
## Angina atípica                    0.923   0.014  -2.067 |  -0.355   1.750
## Dolor no anginal                  0.653   0.012   1.872 |   0.645   9.778
## Asintomático                     12.516   0.178   7.241 |  -1.147   8.576
## Glucosa ≤ 120 mg/dl               3.788   0.342 -10.038 |   0.076   0.417
## Glucosa > 120 mg/dl              22.288   0.342  10.038 |  -0.449   2.454
## restecg_Normal                    2.919   0.075   4.702 |  -0.085   0.298
## restecg_Anormalidad ST-T          3.515   0.091  -5.194 |  -0.055   0.125
## restecg_Hipertrofia ventricular   1.163   0.015   2.134 |   5.095  29.448
## No                                1.358   0.054   4.002 |   0.029   0.048
##                                    cos2  v.test  
## Angina típica                     0.004  -1.131 |
## Angina atípica                    0.025  -2.715 |
## Dolor no anginal                  0.162   6.911 |
## Asintomático                      0.111  -5.717 |
## Glucosa ≤ 120 mg/dl               0.034   3.177 |
## Glucosa > 120 mg/dl               0.034  -3.177 |
## restecg_Normal                    0.007  -1.432 |
## restecg_Anormalidad ST-T          0.003  -0.933 |
## restecg_Hipertrofia ventricular   0.356  10.243 |
## No                                0.002   0.719 |
## 
## Categorical variables (eta2)
##                                   Dim.1 Dim.2 Dim.3  
## cp                              | 0.553 0.213 0.242 |
## fbs                             | 0.006 0.342 0.034 |
## restecg                         | 0.093 0.099 0.356 |
## exang                           | 0.472 0.054 0.002 |
## slope                           | 0.365 0.265 0.053 |
## ca                              | 0.281 0.144 0.488 |
## thal                            | 0.464 0.192 0.017 |
## 
## Supplementary categories
##                                     Dim.1    cos2  v.test     Dim.2    cos2
## Mujer                           |  -0.323   0.049  -3.816 |  -0.027   0.000
## Hombre                          |   0.153   0.049   3.816 |   0.013   0.000
## Sin enfermedad                  |   0.763   0.495  12.084 |  -0.103   0.009
## Con enfermedad                  |  -0.649   0.495 -12.084 |   0.087   0.009
##                                  v.test     Dim.3    cos2  v.test  
## Mujer                            -0.321 |   0.131   0.008   1.542 |
## Hombre                            0.321 |  -0.062   0.008  -1.542 |
## Sin enfermedad                   -1.629 |  -0.037   0.001  -0.579 |
## Con enfermedad                    1.629 |   0.031   0.001   0.579 |
## 
## Supplementary categorical variables (eta2)
##                                   Dim.1 Dim.2 Dim.3  
## sex                             | 0.049 0.000 0.008 |
## target                          | 0.495 0.009 0.001 |
## 
## Supplementary continuous variable
##                                    Dim.1    Dim.2    Dim.3  
## age                             |  0.297 |  0.113 | -0.008 |
# 7. Análisis adicional con GLM
modelo_glm <- glm(target ~ cp + fbs + restecg + exang + slope + ca + thal,
                 data = datos_recod, family = binomial())
summary(modelo_glm)
## 
## Call:
## glm(formula = target ~ cp + fbs + restecg + exang + slope + ca + 
##     thal, family = binomial(), data = datos_recod)
## 
## Coefficients:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -0.28822    0.95863  -0.301 0.763677    
## cpAngina atípica                1.07176    0.54950   1.950 0.051124 .  
## cpDolor no anginal              1.82175    0.49009   3.717 0.000201 ***
## cpAsintomático                  1.48045    0.59270   2.498 0.012497 *  
## fbsGlucosa > 120 mg/dl          0.44482    0.55488   0.802 0.422757    
## restecgAnormalidad ST-T         0.57230    0.37069   1.544 0.122613    
## restecgHipertrofia ventricular -1.13487    1.74251  -0.651 0.514863    
## exangSí                        -0.87147    0.41296  -2.110 0.034831 *  
## slopePlano                     -0.05269    0.70507  -0.075 0.940426    
## slopeDescendente                1.41885    0.73642   1.927 0.054018 .  
## ca1                            -2.15704    0.46621  -4.627 3.71e-06 ***
## ca2                            -3.00147    0.67830  -4.425 9.64e-06 ***
## ca3                            -2.64770    0.87913  -3.012 0.002598 ** 
## thalDefecto fijo                0.93952    0.71605   1.312 0.189494    
## thalDefecto reversible         -1.22189    0.72424  -1.687 0.091579 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 408.40  on 295  degrees of freedom
## Residual deviance: 200.08  on 281  degrees of freedom
## AIC: 230.08
## 
## Number of Fisher Scoring iterations: 6