Predicción de Accidentes Cardiovasculares

Autores:

Jorge Leonardo Solis: jorge.solis@correounivalle.edu.co

Héctor Fabio Ocampo: hector.ocampo@correounivalle.edu.co

Cargando los Datos

objects(); ls()

## character(0)

## character(0)

wd="D:/Maestria/Primer Semestre/Métodos/Proyecto"
setwd(wd) 

library("easypackages")
lib_req<-c("car","lmtest","visdat","corrplot","MASS","olsrr","pROC")
easypackages::packages(lib_req)

#### Cargando Los Datos
stroke <- read.table("healthcare-dataset-stroke-data.csv",sep=",",header=T,dec=".",na.strings="N/A")
visdat::vis_miss(stroke)              # VisualizaciÃ³n de Datos Faltantes

summary(stroke)

##        id           gender               age         hypertension    
##  Min.   :   67   Length:5110        Min.   : 0.08   Min.   :0.00000  
##  1st Qu.:17741   Class :character   1st Qu.:25.00   1st Qu.:0.00000  
##  Median :36932   Mode  :character   Median :45.00   Median :0.00000  
##  Mean   :36518                      Mean   :43.23   Mean   :0.09746  
##  3rd Qu.:54682                      3rd Qu.:61.00   3rd Qu.:0.00000  
##  Max.   :72940                      Max.   :82.00   Max.   :1.00000  
##                                                                      
##  heart_disease     ever_married        work_type         Residence_type    
##  Min.   :0.00000   Length:5110        Length:5110        Length:5110       
##  1st Qu.:0.00000   Class :character   Class :character   Class :character  
##  Median :0.00000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :0.05401                                                           
##  3rd Qu.:0.00000                                                           
##  Max.   :1.00000                                                           
##                                                                            
##  avg_glucose_level      bmi        smoking_status         stroke       
##  Min.   : 55.12    Min.   :10.30   Length:5110        Min.   :0.00000  
##  1st Qu.: 77.25    1st Qu.:23.50   Class :character   1st Qu.:0.00000  
##  Median : 91.89    Median :28.10   Mode  :character   Median :0.00000  
##  Mean   :106.15    Mean   :28.89                      Mean   :0.04873  
##  3rd Qu.:114.09    3rd Qu.:33.10                      3rd Qu.:0.00000  
##  Max.   :271.74    Max.   :97.60                      Max.   :1.00000  
##                    NA's   :201

##Eliminar columna Id

stroke$id=NULL

Revisando los Datos

Revisando la Columna Gender

## Revisión Columna Gender
unique(stroke$gender)

## [1] "Male"   "Female" "Other"

stroke=transform(stroke, gender=factor(gender,levels=c("Male","Female","Other"),labels=c("Male","Female","Other")))

tableGender = table(stroke$gender)
legend = c(paste("Male: ",tableGender[1]), paste("Female: ",tableGender[2]),paste("Other: ",tableGender[3]))
barplot(tableGender ,col=c("blue","red","black") , legen=legend)

#eliminando el registro con genero other
stroke = stroke[!(stroke$gender=="Other"),]
stroke=transform(stroke, gender=factor(gender,levels=c("Male","Female"),labels=c("Male","Female")))

tableGender = table(stroke$gender)
legend = c(paste("Male: ",tableGender[1]), paste("Female: ",tableGender[2]))
barplot(tableGender ,col=c("blue","red") , legen=legend)

Revisando las demás Columnas

## RevisiÃ³n Columna Age

par(mfrow=c(1,2))
hist(x = stroke$age)
boxplot(stroke$age)

## Revision Columna hypertension
unique(stroke$hypertension)

## [1] 0 1

stroke=transform(stroke, hypertension=factor(hypertension,levels=0:1,labels=c("No Tiene Hipertension","Si Tiene Hipertension")))

tableHypertension = table(stroke$hypertension)
legend = c(paste("No Tiene Hipertension: ",tableHypertension[1]), paste("Si Tiene Hipertencion: ",tableHypertension[2]))
barplot(tableHypertension ,col=c("Blue","red") , legen=legend)

## Revision Columna heart_disease
unique(stroke$heart_disease)

## [1] 1 0

stroke=transform(stroke, heart_disease=factor(heart_disease,levels=0:1,labels=c("No Tiene Enfermedades del Corazon","Si Tiene Enfermedades del Corazon")))

tableHeart_disease = table(stroke$heart_disease)
legend = c(paste("No Tiene Enfermedades del Corazon: ",tableHeart_disease[1]), paste("Si Tiene Enfermedades del Corazon: ",tableHeart_disease[2]))
barplot(tableHeart_disease ,col=c("Blue","red") , legen=legend)

## Revision Columna ever_married
unique(stroke$ever_married)

## [1] "Yes" "No"

stroke=transform(stroke, ever_married=factor(ever_married,levels=c("Yes","No"),labels=c("Si","No")))

tableEver_married = table(stroke$ever_married)
legend = c(paste("SÃ ha estado Casado: ",tableEver_married[1]), paste("No ha estado casado: ",tableEver_married[2]))
barplot(tableEver_married ,col=c("Blue","red") , legen=legend)

## Revision Columna work_type
unique(stroke$work_type)

## [1] "Private"       "Self-employed" "Govt_job"      "children"     
## [5] "Never_worked"

stroke=transform(stroke, work_type=factor(work_type,levels=c("Private","Self-employed","Govt_job","children","Never_worked"),labels=c("Private","Self-employed","Govt_job","children","Never_worked")))

tableWork_type = table(stroke$work_type)
legend = c(tableWork_type[1],tableWork_type[2],tableWork_type[3],tableWork_type[4],tableWork_type[5])
barplot(tableWork_type ,col=c("Blue","red","black","pink","yellow") , legen=legend)

## Revision Columna Residence_type
unique(stroke$Residence_type)

## [1] "Urban" "Rural"

stroke=transform(stroke, Residence_type=factor(Residence_type,levels=c("Urban","Rural"),labels=c("Urbano","Rural")))

tableResidence_type = table(stroke$Residence_type)
legend = c(paste("Urbano: ",tableResidence_type[1]), paste("Rural: ",tableResidence_type[2]))
barplot(tableResidence_type ,col=c("Blue","red") , legen=legend)


## RevisiÃ³n Columna avg_glucose_level

par(mfrow=c(1,2))

hist(x = stroke$avg_glucose_level)
boxplot(stroke$avg_glucose_level)

## RevisiÃ³n Columna bmi

valoresNulosIMC = is.na(stroke$bmi)
promedioIMC = sum(stroke$bmi[!valoresNulosIMC])/length(stroke$bmi[!valoresNulosIMC])
stroke$bmi[valoresNulosIMC]=promedioIMC

par(mfrow=c(1,2))
hist(x = stroke$bmi)
boxplot(stroke$bmi)

## Revision Columna smoking_status

unique(stroke$smoking_status)

## [1] "formerly smoked" "never smoked"    "smokes"          "Unknown"

stroke=transform(stroke, smoking_status=factor(smoking_status,levels=c("formerly smoked","never smoked","smokes","Unknown"),labels=c("formerly smoked","never smoked","smokes","Unknown")))

tableSmoking_status = table(stroke$smoking_status)
legend = c(tableSmoking_status[1],tableSmoking_status[2],tableSmoking_status[3],tableSmoking_status[4])
barplot(tableSmoking_status ,col=c("Blue","red","black","pink") , legen=legend)


## Revision Columna stroke
unique(stroke$stroke)

## [1] 1 0

stroke=transform(stroke, stroke=factor(stroke,levels=0:1,labels=c("No","Si")))

tableStroke = table(stroke$stroke)
legend = c(paste("No SufriÃ³ Accidente Cardiovascular: ",tableStroke[1]), paste("Si SufriÃ³ Accidente Cardiovascular: ",tableStroke[2]))
barplot(tableStroke ,col=c("Blue","red") , legen=legend)

summary(stroke)

##     gender          age                       hypertension 
##  Male  :2115   Min.   : 0.08   No Tiene Hipertension:4611  
##  Female:2994   1st Qu.:25.00   Si Tiene Hipertension: 498  
##                Median :45.00                               
##                Mean   :43.23                               
##                3rd Qu.:61.00                               
##                Max.   :82.00                               
##                            heart_disease  ever_married         work_type   
##  No Tiene Enfermedades del Corazon:4833   Si:3353      Private      :2924  
##  Si Tiene Enfermedades del Corazon: 276   No:1756      Self-employed: 819  
##                                                        Govt_job     : 657  
##                                                        children     : 687  
##                                                        Never_worked :  22  
##                                                                            
##  Residence_type avg_glucose_level      bmi                smoking_status
##  Urbano:2596    Min.   : 55.12    Min.   :10.30   formerly smoked: 884  
##  Rural :2513    1st Qu.: 77.24    1st Qu.:23.80   never smoked   :1892  
##                 Median : 91.88    Median :28.40   smokes         : 789  
##                 Mean   :106.14    Mean   :28.89   Unknown        :1544  
##                 3rd Qu.:114.09    3rd Qu.:32.80                         
##                 Max.   :271.74    Max.   :97.60                         
##  stroke   
##  No:4860  
##  Si: 249  
##           
##           
##           
##

Graficando las relaciones entre las variables.

#### Graficando las relaciones entre las variables predictorias y la respuesta
par(mfrow=c(4,3))

tabla<-prop.table(table(stroke$stroke))
coord<-barplot(tabla, col=c("blue","red"),ylim=c(0,1), main="Stroke")
text(coord,tabla,labels=round(tabla,2), pos=3)

#Graficando las varaibla cuantitativas
lapply(names(stroke[,-c(1,3,4,5,6,7,10,11)]),function(y){
  boxplot(stroke[,y]~stroke[,"stroke"],ylab= y, xlab="stroke",boxwex = 0.5,col=NULL)
  stripchart(stroke[,y] ~ stroke[,"stroke"], vertical = T,
             method = "jitter", pch = 19,
             col = c("blue","red"), add = T)
})

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL

#Graficando las variables cualitativas
lapply(names(stroke[,-c(2,8,9,11)]),function(y){
  tabla<-prop.table(table(stroke$stroke,stroke[,y]))
  coord<-barplot(tabla, col=c("blue","red"),ylim=c(0,1), main=paste("Stroke Vs ",y) )
  text(coord,tabla,labels=round(tabla,2), pos=3)
})

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL

Creando un Primer Modelo Saturado.

#### Ajustando un modelo Saturado.
Modelo_saturado = glm(formula= stroke~., data=stroke,family = "binomial")  # Ajusta un Modelo saturado --> todas las variables
summary(Modelo_saturado)

## 
## Call:
## glm(formula = stroke ~ ., family = "binomial", data = stroke)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1266  -0.3199  -0.1639  -0.0868   3.5608  
## 
## Coefficients:
##                                                  Estimate Std. Error z value
## (Intercept)                                     -7.754204   0.579771 -13.375
## genderFemale                                    -0.013535   0.141867  -0.095
## age                                              0.074977   0.005842  12.834
## hypertensionSi Tiene Hipertension                0.400596   0.164959   2.428
## heart_diseaseSi Tiene Enfermedades del Corazon   0.281023   0.191098   1.471
## ever_marriedNo                                   0.184643   0.225391   0.819
## work_typeSelf-employed                          -0.376874   0.166589  -2.262
## work_typeGovt_job                               -0.142818   0.206502  -0.692
## work_typechildren                                0.836207   0.820461   1.019
## work_typeNever_worked                           -9.504686 309.249371  -0.031
## Residence_typeRural                             -0.083365   0.138323  -0.603
## avg_glucose_level                                0.003976   0.001199   3.317
## bmi                                              0.003539   0.011288   0.313
## smoking_statusnever smoked                      -0.206053   0.175911  -1.171
## smoking_statussmokes                             0.113882   0.215347   0.529
## smoking_statusUnknown                           -0.072098   0.208368  -0.346
##                                                Pr(>|z|)    
## (Intercept)                                     < 2e-16 ***
## genderFemale                                   0.923991    
## age                                             < 2e-16 ***
## hypertensionSi Tiene Hipertension              0.015163 *  
## heart_diseaseSi Tiene Enfermedades del Corazon 0.141408    
## ever_marriedNo                                 0.412664    
## work_typeSelf-employed                         0.023679 *  
## work_typeGovt_job                              0.489185    
## work_typechildren                              0.308112    
## work_typeNever_worked                          0.975481    
## Residence_typeRural                            0.546719    
## avg_glucose_level                              0.000911 ***
## bmi                                            0.753911    
## smoking_statusnever smoked                     0.241459    
## smoking_statussmokes                           0.596923    
## smoking_statusUnknown                          0.729333    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1990.3  on 5108  degrees of freedom
## Residual deviance: 1581.1  on 5093  degrees of freedom
## AIC: 1613.1
## 
## Number of Fisher Scoring iterations: 14

# Predicciones del modelo saturado
Prob_modelo_saturado  = predict(Modelo_saturado,type = "response")

# Buscando el punto de corte para el modelo saturado
roc <- pROC::roc(stroke$stroke , Prob_modelo_saturado , auc = TRUE, ci = TRUE)
print(auc(roc))

## Area under the curve: 0.8469

pROC::plot.roc(roc, legacy.axes = TRUE, print.thres = "best", print.auc = TRUE,
               auc.polygon = FALSE, max.auc.polygon = FALSE, auc.polygon.col = "gainsboro",
               col = 2, grid = TRUE)

# Generar clasificaciones - Convertir probabilidades predichas en clasificaciones
pc=0.040                              # Seleccionamos un punto de corte dicho por la curva ROC
Class_Modelo_saturado = as.factor(ifelse(Prob_modelo_saturado>pc,"Si","No")) 

# Evaluando la bondad de clasificacion
table(stroke$stroke,Class_Modelo_saturado,dnn=c("observado","predicho"))          # Matriz de confusiÃ³n

##          predicho
## observado   No   Si
##        No 3379 1481
##        Si   32  217

caret::confusionMatrix(Class_Modelo_saturado,stroke$stroke,positive = "Si")

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   No   Si
##         No 3379   32
##         Si 1481  217
##                                           
##                Accuracy : 0.7039          
##                  95% CI : (0.6911, 0.7164)
##     No Information Rate : 0.9513          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1507          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.87149         
##             Specificity : 0.69527         
##          Pos Pred Value : 0.12780         
##          Neg Pred Value : 0.99062         
##              Prevalence : 0.04874         
##          Detection Rate : 0.04247         
##    Detection Prevalence : 0.33235         
##       Balanced Accuracy : 0.78338         
##                                           
##        'Positive' Class : Si              
##

ICC_Class_Modelo_saturado=caret::confusionMatrix(Class_Modelo_saturado,stroke$stroke,positive = "Si")
ICC_Class_Modelo_saturado$byClass

##          Sensitivity          Specificity       Pos Pred Value 
##           0.87148594           0.69526749           0.12779741 
##       Neg Pred Value            Precision               Recall 
##           0.99061859           0.12779741           0.87148594 
##                   F1           Prevalence       Detection Rate 
##           0.22290704           0.04873752           0.04247407 
## Detection Prevalence    Balanced Accuracy 
##           0.33235467           0.78337672

Ind_class=list()
Ind_class$Modelo_Saturado=ICC_Class_Modelo_saturado$byClass
data.frame(Ind_class)

##                      Modelo_Saturado
## Sensitivity               0.87148594
## Specificity               0.69526749
## Pos Pred Value            0.12779741
## Neg Pred Value            0.99061859
## Precision                 0.12779741
## Recall                    0.87148594
## F1                        0.22290704
## Prevalence                0.04873752
## Detection Rate            0.04247407
## Detection Prevalence      0.33235467
## Balanced Accuracy         0.78337672

Backward

utilizar un algoritmo backward para eliminar variables que no aportan al modelo y así tener simplicidad y generar una mejor predicción

#### Realizar un backward para generar un modelo con menos variables.
Modelo_RLM1=step(Modelo_saturado,direction = "backward")

## Start:  AIC=1613.06
## stroke ~ gender + age + hypertension + heart_disease + ever_married + 
##     work_type + Residence_type + avg_glucose_level + bmi + smoking_status
## 
##                     Df Deviance    AIC
## - smoking_status     3   1583.9 1609.9
## - gender             1   1581.1 1611.1
## - bmi                1   1581.2 1611.2
## - work_type          4   1587.3 1611.3
## - Residence_type     1   1581.4 1611.4
## - ever_married       1   1581.7 1611.7
## <none>                   1581.1 1613.1
## - heart_disease      1   1583.2 1613.2
## - hypertension       1   1586.7 1616.7
## - avg_glucose_level  1   1591.8 1621.8
## - age                1   1785.5 1815.5
## 
## Step:  AIC=1609.88
## stroke ~ gender + age + hypertension + heart_disease + ever_married + 
##     work_type + Residence_type + avg_glucose_level + bmi
## 
##                     Df Deviance    AIC
## - gender             1   1584.0 1608.0
## - bmi                1   1584.0 1608.0
## - work_type          4   1590.1 1608.1
## - Residence_type     1   1584.4 1608.4
## - ever_married       1   1584.4 1608.4
## <none>                   1583.9 1609.9
## - heart_disease      1   1586.4 1610.4
## - hypertension       1   1589.3 1613.3
## - avg_glucose_level  1   1594.5 1618.5
## - age                1   1790.2 1814.2
## 
## Step:  AIC=1607.97
## stroke ~ age + hypertension + heart_disease + ever_married + 
##     work_type + Residence_type + avg_glucose_level + bmi
## 
##                     Df Deviance    AIC
## - bmi                1   1584.1 1606.1
## - work_type          4   1590.2 1606.2
## - Residence_type     1   1584.5 1606.5
## - ever_married       1   1584.5 1606.5
## <none>                   1584.0 1608.0
## - heart_disease      1   1586.6 1608.6
## - hypertension       1   1589.4 1611.4
## - avg_glucose_level  1   1594.8 1616.8
## - age                1   1790.2 1812.2
## 
## Step:  AIC=1606.06
## stroke ~ age + hypertension + heart_disease + ever_married + 
##     work_type + Residence_type + avg_glucose_level
## 
##                     Df Deviance    AIC
## - work_type          4   1590.2 1604.2
## - Residence_type     1   1584.6 1604.6
## - ever_married       1   1584.6 1604.6
## <none>                   1584.1 1606.1
## - heart_disease      1   1586.7 1606.7
## - hypertension       1   1589.6 1609.6
## - avg_glucose_level  1   1595.6 1615.6
## - age                1   1793.3 1813.3
## 
## Step:  AIC=1604.23
## stroke ~ age + hypertension + heart_disease + ever_married + 
##     Residence_type + avg_glucose_level
## 
##                     Df Deviance    AIC
## - Residence_type     1   1590.8 1602.8
## - ever_married       1   1590.9 1602.9
## <none>                   1590.2 1604.2
## - heart_disease      1   1593.1 1605.1
## - hypertension       1   1595.6 1607.6
## - avg_glucose_level  1   1602.6 1614.6
## - age                1   1810.2 1822.2
## 
## Step:  AIC=1602.76
## stroke ~ age + hypertension + heart_disease + ever_married + 
##     avg_glucose_level
## 
##                     Df Deviance    AIC
## - ever_married       1   1591.5 1601.5
## <none>                   1590.8 1602.8
## - heart_disease      1   1593.6 1603.6
## - hypertension       1   1596.0 1606.0
## - avg_glucose_level  1   1603.2 1613.2
## - age                1   1811.4 1821.4
## 
## Step:  AIC=1601.45
## stroke ~ age + hypertension + heart_disease + avg_glucose_level
## 
##                     Df Deviance    AIC
## <none>                   1591.5 1601.5
## - heart_disease      1   1594.4 1602.4
## - hypertension       1   1596.7 1604.7
## - avg_glucose_level  1   1603.6 1611.6
## - age                1   1847.3 1855.3

summary(Modelo_RLM1)

## 
## Call:
## glm(formula = stroke ~ age + hypertension + heart_disease + avg_glucose_level, 
##     family = "binomial", data = stroke)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0587  -0.3215  -0.1732  -0.0828   3.7706  
## 
## Coefficients:
##                                                 Estimate Std. Error z value
## (Intercept)                                    -7.488996   0.357890 -20.925
## age                                             0.068920   0.005140  13.408
## hypertensionSi Tiene Hipertension               0.381396   0.162599   2.346
## heart_diseaseSi Tiene Enfermedades del Corazon  0.329972   0.187724   1.758
## avg_glucose_level                               0.004121   0.001162   3.547
##                                                Pr(>|z|)    
## (Intercept)                                     < 2e-16 ***
## age                                             < 2e-16 ***
## hypertensionSi Tiene Hipertension               0.01899 *  
## heart_diseaseSi Tiene Enfermedades del Corazon  0.07879 .  
## avg_glucose_level                               0.00039 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1990.3  on 5108  degrees of freedom
## Residual deviance: 1591.4  on 5104  degrees of freedom
## AIC: 1601.4
## 
## Number of Fisher Scoring iterations: 7

# Predicciones del modelo saturado
Prob_modelo_backward = predict(Modelo_RLM1,type = "response")
# Buscando el punto de corte para el modelo saturado
roc <- pROC::roc(stroke$stroke , Prob_modelo_backward , auc = TRUE, ci = TRUE)
print(auc(roc))

## Area under the curve: 0.8442

pROC::plot.roc(roc, legacy.axes = TRUE, print.thres = "best", print.auc = TRUE,
               auc.polygon = FALSE, max.auc.polygon = FALSE, auc.polygon.col = "gainsboro",
               col = 2, grid = TRUE)

# Generar clasificaciones - Convertir probabilidades predichas en clasificaciones
pc=0.032                              # Seleccionamos un punto de corte dicho por la curva ROC
Class_Modelo_backward = as.factor(ifelse(Prob_modelo_backward>pc,"Si","No")) 

# Evaluando la bondad de clasificacion
table(stroke$stroke,Class_Modelo_backward,dnn=c("observado","predicho"))          # Matriz de confusiÃ³n

##          predicho
## observado   No   Si
##        No 3109 1751
##        Si   24  225

caret::confusionMatrix(Class_Modelo_backward,stroke$stroke,positive = "Si")

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   No   Si
##         No 3109   24
##         Si 1751  225
##                                           
##                Accuracy : 0.6526          
##                  95% CI : (0.6393, 0.6656)
##     No Information Rate : 0.9513          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1266          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.90361         
##             Specificity : 0.63971         
##          Pos Pred Value : 0.11387         
##          Neg Pred Value : 0.99234         
##              Prevalence : 0.04874         
##          Detection Rate : 0.04404         
##    Detection Prevalence : 0.38677         
##       Balanced Accuracy : 0.77166         
##                                           
##        'Positive' Class : Si              
##

ICC_Class_Modelo_backward=caret::confusionMatrix(Class_Modelo_backward,stroke$stroke,positive = "Si")
ICC_Class_Modelo_backward$byClass

##          Sensitivity          Specificity       Pos Pred Value 
##           0.90361446           0.63971193           0.11386640 
##       Neg Pred Value            Precision               Recall 
##           0.99233961           0.11386640           0.90361446 
##                   F1           Prevalence       Detection Rate 
##           0.20224719           0.04873752           0.04403993 
## Detection Prevalence    Balanced Accuracy 
##           0.38676845           0.77166320

Ind_class$ModeloBackward=ICC_Class_Modelo_backward$byClass

Comparación entre los dos modelos

data.frame(Ind_class)

##                      Modelo_Saturado ModeloBackward
## Sensitivity               0.87148594     0.90361446
## Specificity               0.69526749     0.63971193
## Pos Pred Value            0.12779741     0.11386640
## Neg Pred Value            0.99061859     0.99233961
## Precision                 0.12779741     0.11386640
## Recall                    0.87148594     0.90361446
## F1                        0.22290704     0.20224719
## Prevalence                0.04873752     0.04873752
## Detection Rate            0.04247407     0.04403993
## Detection Prevalence      0.33235467     0.38676845
## Balanced Accuracy         0.78337672     0.77166320

Preducción De Accidentes Cardiovasculares

Predicción de Accidentes Cardiovasculares

Autores:

Jorge Leonardo Solis: jorge.solis@correounivalle.edu.co

Héctor Fabio Ocampo: hector.ocampo@correounivalle.edu.co

Cargando los Datos

Revisando los Datos

Revisando la Columna Gender

Revisando las demás Columnas

Graficando las relaciones entre las variables.

Creando un Primer Modelo Saturado.

Backward

Comparación entre los dos modelos