CLASE 10. Regresión Logística Binaria Múltiple en R

Autor/a

Gerson Rivera

Fecha de publicación

25 julio 2024

GENERALIDADES

NOTA.

El contenido del documento, define los pasos para la construcción de un modelo de Regresión Logística Binaria Múltiple, sabiendo que cada código tiene su interpretación formal, respecto a la estructura en R.

library(readr)
misdatos <- read_csv("heart_failure_clinical_records_dataset.csv")
Rows: 299 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (13): age, anaemia, creatinine_phosphokinase, diabetes, ejection_fractio...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(misdatos)
misdatos=as.data.frame(misdatos)
str(misdatos)
'data.frame':   299 obs. of  13 variables:
 $ age                     : num  75 55 65 50 65 90 75 60 65 80 ...
 $ anaemia                 : num  0 0 0 1 1 1 1 1 0 1 ...
 $ creatinine_phosphokinase: num  582 7861 146 111 160 ...
 $ diabetes                : num  0 0 0 0 1 0 0 1 0 0 ...
 $ ejection_fraction       : num  20 38 20 20 20 40 15 60 65 35 ...
 $ high_blood_pressure     : num  1 0 0 0 0 1 0 0 0 1 ...
 $ platelets               : num  265000 263358 162000 210000 327000 ...
 $ serum_creatinine        : num  1.9 1.1 1.3 1.9 2.7 2.1 1.2 1.1 1.5 9.4 ...
 $ serum_sodium            : num  130 136 129 137 116 132 137 131 138 133 ...
 $ sex                     : num  1 1 1 1 0 1 1 1 0 1 ...
 $ smoking                 : num  0 0 1 0 0 1 0 1 0 1 ...
 $ time                    : num  4 6 7 7 8 8 10 10 10 10 ...
 $ DEATH_EVENT             : num  1 1 1 1 1 1 1 1 1 1 ...
head(misdatos)
  age anaemia creatinine_phosphokinase diabetes ejection_fraction
1  75       0                      582        0                20
2  55       0                     7861        0                38
3  65       0                      146        0                20
4  50       1                      111        0                20
5  65       1                      160        1                20
6  90       1                       47        0                40
  high_blood_pressure platelets serum_creatinine serum_sodium sex smoking time
1                   1    265000              1.9          130   1       0    4
2                   0    263358              1.1          136   1       0    6
3                   0    162000              1.3          129   1       1    7
4                   0    210000              1.9          137   1       0    7
5                   0    327000              2.7          116   0       0    8
6                   1    204000              2.1          132   1       1    8
  DEATH_EVENT
1           1
2           1
3           1
4           1
5           1
6           1
set.seed(2021)
n=nrow(misdatos)
n
[1] 299
d_ind=sample(n,n*0.70) #0.7*299 
records=misdatos[d_ind,] #training
d_test=misdatos[-d_ind,] #test
names(records)
 [1] "age"                      "anaemia"                 
 [3] "creatinine_phosphokinase" "diabetes"                
 [5] "ejection_fraction"        "high_blood_pressure"     
 [7] "platelets"                "serum_creatinine"        
 [9] "serum_sodium"             "sex"                     
[11] "smoking"                  "time"                    
[13] "DEATH_EVENT"             
cor(records[c(1,3,5,7,8,9)])
                                 age creatinine_phosphokinase ejection_fraction
age                       1.00000000             -0.018732424        0.02564676
creatinine_phosphokinase -0.01873242              1.000000000       -0.00801319
ejection_fraction         0.02564676             -0.008013190        1.00000000
platelets                -0.10971729             -0.002011334        0.04137405
serum_creatinine          0.13568621              0.054162850        0.05174527
serum_sodium             -0.07268846              0.072730561        0.10135513
                            platelets serum_creatinine serum_sodium
age                      -0.109717290       0.13568621  -0.07268846
creatinine_phosphokinase -0.002011334       0.05416285   0.07273056
ejection_fraction         0.041374045       0.05174527   0.10135513
platelets                 1.000000000      -0.08736598   0.07051430
serum_creatinine         -0.087365981       1.00000000  -0.14122197
serum_sodium              0.070514305      -0.14122197   1.00000000
library(MASS)
my_model=glm(data = records,DEATH_EVENT~.,
             family = binomial())
summary(my_model)

Call:
glm(formula = DEATH_EVENT ~ ., family = binomial(), data = records)

Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
(Intercept)               1.102e+01  7.542e+00   1.461  0.14410    
age                       5.624e-02  1.927e-02   2.918  0.00352 ** 
anaemia                   4.698e-01  4.160e-01   1.129  0.25874    
creatinine_phosphokinase  4.584e-04  3.483e-04   1.316  0.18807    
diabetes                  1.068e-01  4.252e-01   0.251  0.80174    
ejection_fraction        -7.459e-02  1.905e-02  -3.916 9.01e-05 ***
high_blood_pressure      -2.212e-01  4.511e-01  -0.490  0.62391    
platelets                -1.896e-06  2.294e-06  -0.826  0.40870    
serum_creatinine          5.489e-01  1.926e-01   2.849  0.00438 ** 
serum_sodium             -7.284e-02  5.314e-02  -1.371  0.17043    
sex                      -1.338e+00  5.321e-01  -2.514  0.01195 *  
smoking                   2.293e-01  5.223e-01   0.439  0.66061    
time                     -2.250e-02  3.911e-03  -5.754 8.73e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 259.12  on 208  degrees of freedom
Residual deviance: 153.24  on 196  degrees of freedom
AIC: 179.24

Number of Fisher Scoring iterations: 6
library(carData)
library(car)

vif es factor de inflacion de la varianza para detectar multicolinealidad

vif(my_model)
                     age                  anaemia creatinine_phosphokinase 
                1.129135                 1.046886                 1.150944 
                diabetes        ejection_fraction      high_blood_pressure 
                1.077959                 1.220016                 1.076420 
               platelets         serum_creatinine             serum_sodium 
                1.059796                 1.116415                 1.096600 
                     sex                  smoking                     time 
                1.624757                 1.451073                 1.269458 

Quitar dos variables con el mayor p-valor

my_model2=glm(data=records,DEATH_EVENT~.-high_blood_pressure-smoking,
              family = binomial(link = "logit"))
summary(my_model2)

Call:
glm(formula = DEATH_EVENT ~ . - high_blood_pressure - smoking, 
    family = binomial(link = "logit"), data = records)

Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
(Intercept)               1.043e+01  7.403e+00   1.409  0.15877    
age                       5.516e-02  1.918e-02   2.876  0.00403 ** 
anaemia                   4.404e-01  4.110e-01   1.072  0.28387    
creatinine_phosphokinase  4.491e-04  3.389e-04   1.325  0.18509    
diabetes                  8.655e-02  4.237e-01   0.204  0.83815    
ejection_fraction        -7.481e-02  1.905e-02  -3.928 8.57e-05 ***
platelets                -1.913e-06  2.283e-06  -0.838  0.40205    
serum_creatinine          5.381e-01  1.888e-01   2.850  0.00437 ** 
serum_sodium             -6.842e-02  5.201e-02  -1.316  0.18831    
sex                      -1.197e+00  4.562e-01  -2.624  0.00870 ** 
time                     -2.232e-02  3.887e-03  -5.742 9.35e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 259.12  on 208  degrees of freedom
Residual deviance: 153.64  on 198  degrees of freedom
AIC: 175.64

Number of Fisher Scoring iterations: 6
my_model2$aic
[1] 175.6393

Quitamos de nuevo dos variables con el mayor p-valor

my_model2=glm(data=records,DEATH_EVENT~.-high_blood_pressure-smoking,
              family = binomial(link = "logit"))
summary(my_model2)

Call:
glm(formula = DEATH_EVENT ~ . - high_blood_pressure - smoking, 
    family = binomial(link = "logit"), data = records)

Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
(Intercept)               1.043e+01  7.403e+00   1.409  0.15877    
age                       5.516e-02  1.918e-02   2.876  0.00403 ** 
anaemia                   4.404e-01  4.110e-01   1.072  0.28387    
creatinine_phosphokinase  4.491e-04  3.389e-04   1.325  0.18509    
diabetes                  8.655e-02  4.237e-01   0.204  0.83815    
ejection_fraction        -7.481e-02  1.905e-02  -3.928 8.57e-05 ***
platelets                -1.913e-06  2.283e-06  -0.838  0.40205    
serum_creatinine          5.381e-01  1.888e-01   2.850  0.00437 ** 
serum_sodium             -6.842e-02  5.201e-02  -1.316  0.18831    
sex                      -1.197e+00  4.562e-01  -2.624  0.00870 ** 
time                     -2.232e-02  3.887e-03  -5.742 9.35e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 259.12  on 208  degrees of freedom
Residual deviance: 153.64  on 198  degrees of freedom
AIC: 175.64

Number of Fisher Scoring iterations: 6
my_model3=glm(data=records,DEATH_EVENT~.-high_blood_pressure-smoking-platelets-diabetes,
              family = binomial(link = "logit"))
summary(my_model3)

Call:
glm(formula = DEATH_EVENT ~ . - high_blood_pressure - smoking - 
    platelets - diabetes, family = binomial(link = "logit"), 
    data = records)

Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
(Intercept)              10.1176501  7.2122510   1.403  0.16066    
age                       0.0543893  0.0187074   2.907  0.00364 ** 
anaemia                   0.4637488  0.4101194   1.131  0.25815    
creatinine_phosphokinase  0.0004687  0.0003342   1.403  0.16075    
ejection_fraction        -0.0738926  0.0188889  -3.912 9.16e-05 ***
serum_creatinine          0.5413446  0.1876921   2.884  0.00392 ** 
serum_sodium             -0.0699922  0.0511068  -1.370  0.17083    
sex                      -1.1717596  0.4515627  -2.595  0.00946 ** 
time                     -0.0219631  0.0038254  -5.741 9.39e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 259.12  on 208  degrees of freedom
Residual deviance: 154.39  on 200  degrees of freedom
AIC: 172.39

Number of Fisher Scoring iterations: 6
my_model3$aic
[1] 172.388
my_model$aic
[1] 179.2415
my_model4=glm(data=records,DEATH_EVENT~.-high_blood_pressure-smoking-platelets-diabetes-creatinine_phosphokinase-serum_sodium ,
              family = binomial(link = "logit"))
summary(my_model4)

Call:
glm(formula = DEATH_EVENT ~ . - high_blood_pressure - smoking - 
    platelets - diabetes - creatinine_phosphokinase - serum_sodium, 
    family = binomial(link = "logit"), data = records)

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)        0.708705   1.385597   0.511  0.60902    
age                0.053327   0.017994   2.964  0.00304 ** 
anaemia            0.401068   0.403233   0.995  0.31992    
ejection_fraction -0.073797   0.018512  -3.986 6.71e-05 ***
serum_creatinine   0.569549   0.176790   3.222  0.00127 ** 
sex               -1.077485   0.441182  -2.442  0.01460 *  
time              -0.021078   0.003659  -5.761 8.37e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 259.12  on 208  degrees of freedom
Residual deviance: 158.21  on 202  degrees of freedom
AIC: 172.21

Number of Fisher Scoring iterations: 5
my_model5=glm(data=records,DEATH_EVENT~.-high_blood_pressure-smoking-platelets-diabetes-creatinine_phosphokinase-serum_sodium-anaemia-sex-1 ,family = binomial(link = "logit"))
summary(my_model5)

Call:
glm(formula = DEATH_EVENT ~ . - high_blood_pressure - smoking - 
    platelets - diabetes - creatinine_phosphokinase - serum_sodium - 
    anaemia - sex - 1, family = binomial(link = "logit"), data = records)

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
age                0.047627   0.010969   4.342 1.41e-05 ***
ejection_fraction -0.062928   0.015835  -3.974 7.07e-05 ***
serum_creatinine   0.612431   0.178637   3.428 0.000607 ***
time              -0.020307   0.003297  -6.159 7.31e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 289.74  on 209  degrees of freedom
Residual deviance: 165.67  on 205  degrees of freedom
AIC: 173.67

Number of Fisher Scoring iterations: 5
library(zoo)
Warning: package 'zoo' was built under R version 4.3.3

Attaching package: 'zoo'
The following objects are masked from 'package:base':

    as.Date, as.Date.numeric
library(lmtest)
Warning: package 'lmtest' was built under R version 4.3.3
waldtest(my_model,my_model5)
Wald test

Model 1: DEATH_EVENT ~ age + anaemia + creatinine_phosphokinase + diabetes + 
    ejection_fraction + high_blood_pressure + platelets + serum_creatinine + 
    serum_sodium + sex + smoking + time
Model 2: DEATH_EVENT ~ (age + anaemia + creatinine_phosphokinase + diabetes + 
    ejection_fraction + high_blood_pressure + platelets + serum_creatinine + 
    serum_sodium + sex + smoking + time) - high_blood_pressure - 
    smoking - platelets - diabetes - creatinine_phosphokinase - 
    serum_sodium - anaemia - sex - 1
  Res.Df Df      F Pr(>F)
1    196                 
2    205 -9 1.1655 0.3191

Bondad de Ajuste

library(ResourceSelection)
Warning: package 'ResourceSelection' was built under R version 4.3.3
ResourceSelection 0.3-6      2023-06-27
hoslem.test(records$DEATH_EVENT,fitted(my_model5))

    Hosmer and Lemeshow goodness of fit (GOF) test

data:  records$DEATH_EVENT, fitted(my_model5)
X-squared = 8.2596, df = 8, p-value = 0.4085
coef(my_model5)
              age ejection_fraction  serum_creatinine              time 
       0.04762671       -0.06292788        0.61243102       -0.02030746 
my_model5$coefficients
              age ejection_fraction  serum_creatinine              time 
       0.04762671       -0.06292788        0.61243102       -0.02030746 
confint(my_model5)
Waiting for profiling to be done...
                        2.5 %      97.5 %
age                0.02712785  0.07051571
ejection_fraction -0.09572358 -0.03334280
serum_creatinine   0.26755137  0.98855160
time              -0.02731683 -0.01431199

Pronósticos

y=predict(my_model5,type='response')
y
        166         231          70         192         251         102 
0.395337332 0.100403958 0.782907081 0.034144461 0.028029152 0.424621188 
        110         103          23         146         123         188 
0.259192557 0.758388640 0.765159807 0.205505369 0.268245654 0.284474942 
        125          26         164         101         159         240 
0.658578649 0.892393393 0.138394211 0.605745862 0.318827534 0.022433962 
         68         133          73         191         274         171 
0.759213560 0.144733928 0.730288932 0.345307371 0.006284166 0.141918277 
        242         297         198         278          79         162 
0.091030220 0.001125953 0.111120945 0.032947115 0.511360771 0.116445356 
        150         114          88          17         230         148 
0.244337847 0.186439489 0.142548706 0.882792890 0.166200267 0.082422799 
         67         130         169         121         201         155 
0.630084525 0.567515754 0.160839958 0.126902100 0.039934661 0.258517279 
        293         126         207         210         279          19 
0.010544370 0.093459443 0.019462792 0.040066777 0.016734066 0.887802844 
         16         236         226         284         127         106 
0.784343438 0.045229512 0.029048934 0.024157346 0.593008011 0.688981827 
         44          22          38         120          27         245 
0.556017419 0.855899307 0.681832007 0.710163515 0.894019698 0.045541110 
         13          33         174         149         115          89 
0.669440978 0.555441613 0.205516540 0.624847107 0.315922634 0.168383525 
         95         173          75         137         291         214 
0.291992370 0.061052966 0.782476760 0.090986708 0.002359596 0.073019672 
        285          94          46         176           7          29 
0.008812797 0.659333870 0.608957966 0.036596226 0.959298131 0.967554059 
        129         147         109         113         227         256 
0.339419859 0.221494096 0.376563903 0.490090614 0.101769551 0.039725469 
        259         235         218         253         138         275 
0.026319711 0.020270242 0.425192989 0.014714604 0.680988745 0.032510885 
         65         289          37         276         157          80 
0.030836646 0.025786636 0.758269882 0.008719470 0.213261305 0.237968571 
        197         187         220          76          58         205 
0.036184536 0.020026246 0.040899673 0.633045075 0.404040780 0.102382882 
        223          64         172          57         139         263 
0.026332184 0.335013147 0.079130007 0.844007734 0.303561812 0.083761035 
        225          20         151          45           1         143 
0.086160350 0.421664523 0.448455981 0.286071315 0.967574113 0.237067347 
        117         258         219         209         228         132 
0.083304779 0.024430386 0.091891616 0.054962390 0.040937434 0.830444338 
        239         249          85         272          39         128 
0.044213115 0.017450683 0.565921143 0.011074388 0.854349946 0.066897466 
        141         165         112         193         158         136 
0.469796577 0.155694064 0.337208199 0.036937801 0.265743863 0.402432254 
        215          71         202         267         262          53 
0.071994962 0.189174633 0.008185057 0.082229261 0.020488416 0.904424693 
        177          43         287          77         118          69 
0.167473913 0.638044435 0.032907162 0.189293127 0.580171199 0.760477293 
         48         232         222         156         252         260 
0.505375720 0.081744096 0.031114435 0.472189689 0.030496720 0.007506146 
        216         244          14         108         264         189 
0.131299292 0.056536818 0.608346786 0.214983764 0.008684507 0.075285552 
        154          93         175         277         281         269 
0.180190295 0.061925716 0.179396727 0.040300421 0.026942039 0.009453343 
        255          41          83          49         211          84 
0.006658840 0.928676190 0.777612889 0.988034265 0.280642932 0.533577040 
        134          24         160         163          56         145 
0.104151261 0.230006195 0.141206330 0.187969008 0.945121432 0.655415823 
        208          63         105          74         243         229 
0.169483091 0.468098441 0.343677511 0.328134597 0.019285768 0.594126598 
        282          11          32         182         196         107 
0.068718354 0.968549768 0.923097769 0.232025677 0.152198121 0.230909790 
        194         237         206          72         119           8 
0.206892447 0.041289750 0.025693645 0.418053795 0.115306305 0.389962353 
        204         296         180           9          28          50 
0.408642548 0.010558667 0.079136308 0.430698001 0.683581841 0.642539655 
        299          31         233          82         292         153 
0.005177953 0.934508416 0.018155176 0.315160545 0.023504159 0.091495439 
        221          15          81          92         111 
0.335829566 0.693091861 0.582933699 0.271640102 0.305730548 
pred=ifelse(as.double(y)>0.5,1,0)
pred
  [1] 0 0 1 0 0 0 0 1 1 0 0 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1
 [38] 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 1 1 1 1 0 1 1 0 1 0 0 0 0 1 0 0 0 0 1
 [75] 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0
[112] 0 1 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 1 1 0 0 0
[149] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 0 1 1 0 0 0 0 0 1 0 1 1 0 0
[186] 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0
data.frame(y=y,pred=pred)
              y pred
166 0.395337332    0
231 0.100403958    0
70  0.782907081    1
192 0.034144461    0
251 0.028029152    0
102 0.424621188    0
110 0.259192557    0
103 0.758388640    1
23  0.765159807    1
146 0.205505369    0
123 0.268245654    0
188 0.284474942    0
125 0.658578649    1
26  0.892393393    1
164 0.138394211    0
101 0.605745862    1
159 0.318827534    0
240 0.022433962    0
68  0.759213560    1
133 0.144733928    0
73  0.730288932    1
191 0.345307371    0
274 0.006284166    0
171 0.141918277    0
242 0.091030220    0
297 0.001125953    0
198 0.111120945    0
278 0.032947115    0
79  0.511360771    1
162 0.116445356    0
150 0.244337847    0
114 0.186439489    0
88  0.142548706    0
17  0.882792890    1
230 0.166200267    0
148 0.082422799    0
67  0.630084525    1
130 0.567515754    1
169 0.160839958    0
121 0.126902100    0
201 0.039934661    0
155 0.258517279    0
293 0.010544370    0
126 0.093459443    0
207 0.019462792    0
210 0.040066777    0
279 0.016734066    0
19  0.887802844    1
16  0.784343438    1
236 0.045229512    0
226 0.029048934    0
284 0.024157346    0
127 0.593008011    1
106 0.688981827    1
44  0.556017419    1
22  0.855899307    1
38  0.681832007    1
120 0.710163515    1
27  0.894019698    1
245 0.045541110    0
13  0.669440978    1
33  0.555441613    1
174 0.205516540    0
149 0.624847107    1
115 0.315922634    0
89  0.168383525    0
95  0.291992370    0
173 0.061052966    0
75  0.782476760    1
137 0.090986708    0
291 0.002359596    0
214 0.073019672    0
285 0.008812797    0
94  0.659333870    1
46  0.608957966    1
176 0.036596226    0
7   0.959298131    1
29  0.967554059    1
129 0.339419859    0
147 0.221494096    0
109 0.376563903    0
113 0.490090614    0
227 0.101769551    0
256 0.039725469    0
259 0.026319711    0
235 0.020270242    0
218 0.425192989    0
253 0.014714604    0
138 0.680988745    1
275 0.032510885    0
65  0.030836646    0
289 0.025786636    0
37  0.758269882    1
276 0.008719470    0
157 0.213261305    0
80  0.237968571    0
197 0.036184536    0
187 0.020026246    0
220 0.040899673    0
76  0.633045075    1
58  0.404040780    0
205 0.102382882    0
223 0.026332184    0
64  0.335013147    0
172 0.079130007    0
57  0.844007734    1
139 0.303561812    0
263 0.083761035    0
225 0.086160350    0
20  0.421664523    0
151 0.448455981    0
45  0.286071315    0
1   0.967574113    1
143 0.237067347    0
117 0.083304779    0
258 0.024430386    0
219 0.091891616    0
209 0.054962390    0
228 0.040937434    0
132 0.830444338    1
239 0.044213115    0
249 0.017450683    0
85  0.565921143    1
272 0.011074388    0
39  0.854349946    1
128 0.066897466    0
141 0.469796577    0
165 0.155694064    0
112 0.337208199    0
193 0.036937801    0
158 0.265743863    0
136 0.402432254    0
215 0.071994962    0
71  0.189174633    0
202 0.008185057    0
267 0.082229261    0
262 0.020488416    0
53  0.904424693    1
177 0.167473913    0
43  0.638044435    1
287 0.032907162    0
77  0.189293127    0
118 0.580171199    1
69  0.760477293    1
48  0.505375720    1
232 0.081744096    0
222 0.031114435    0
156 0.472189689    0
252 0.030496720    0
260 0.007506146    0
216 0.131299292    0
244 0.056536818    0
14  0.608346786    1
108 0.214983764    0
264 0.008684507    0
189 0.075285552    0
154 0.180190295    0
93  0.061925716    0
175 0.179396727    0
277 0.040300421    0
281 0.026942039    0
269 0.009453343    0
255 0.006658840    0
41  0.928676190    1
83  0.777612889    1
49  0.988034265    1
211 0.280642932    0
84  0.533577040    1
134 0.104151261    0
24  0.230006195    0
160 0.141206330    0
163 0.187969008    0
56  0.945121432    1
145 0.655415823    1
208 0.169483091    0
63  0.468098441    0
105 0.343677511    0
74  0.328134597    0
243 0.019285768    0
229 0.594126598    1
282 0.068718354    0
11  0.968549768    1
32  0.923097769    1
182 0.232025677    0
196 0.152198121    0
107 0.230909790    0
194 0.206892447    0
237 0.041289750    0
206 0.025693645    0
72  0.418053795    0
119 0.115306305    0
8   0.389962353    0
204 0.408642548    0
296 0.010558667    0
180 0.079136308    0
9   0.430698001    0
28  0.683581841    1
50  0.642539655    1
299 0.005177953    0
31  0.934508416    1
233 0.018155176    0
82  0.315160545    0
292 0.023504159    0
153 0.091495439    0
221 0.335829566    0
15  0.693091861    1
81  0.582933699    1
92  0.271640102    0
111 0.305730548    0
nd=data.frame(age=50.0,ejection_fraction=35.0,serum_creatinine=0.75,time=67)
nd
  age ejection_fraction serum_creatinine time
1  50                35             0.75   67
#predict(my_model5, newdata = nd)
#rm(list = ls())
str(records)
'data.frame':   209 obs. of  13 variables:
 $ age                     : num  80 60 65 64 50 75 45 80 68 50 ...
 $ anaemia                 : num  0 0 0 1 0 0 0 0 1 0 ...
 $ creatinine_phosphokinase: num  776 166 113 62 2522 ...
 $ diabetes                : num  1 0 1 0 0 0 1 0 0 0 ...
 $ ejection_fraction       : num  38 30 25 60 30 45 35 25 35 30 ...
 $ high_blood_pressure     : num  1 0 0 0 1 1 0 0 1 0 ...
 $ platelets               : num  192000 62000 497000 309000 404000 ...
 $ serum_creatinine        : num  1.3 1.7 1.83 1.5 0.5 1.18 1.3 1.1 0.9 0.7 ...
 $ serum_sodium            : num  135 127 135 135 139 137 142 144 140 141 ...
 $ sex                     : num  0 0 1 0 0 1 1 1 1 1 ...
 $ smoking                 : num  0 0 0 0 0 0 1 1 1 1 ...
 $ time                    : num  130 207 67 174 214 87 88 87 20 112 ...
 $ DEATH_EVENT             : num  1 1 1 0 0 0 0 0 1 0 ...
head(records)
    age anaemia creatinine_phosphokinase diabetes ejection_fraction
166  80       0                      776        1                38
231  60       0                      166        0                30
70   65       0                      113        1                25
192  64       1                       62        0                60
251  50       0                     2522        0                30
102  75       0                      582        0                45
    high_blood_pressure platelets serum_creatinine serum_sodium sex smoking
166                   1    192000             1.30          135   0       0
231                   0     62000             1.70          127   0       0
70                    0    497000             1.83          135   1       0
192                   0    309000             1.50          135   0       0
251                   1    404000             0.50          139   0       0
102                   1    263358             1.18          137   1       0
    time DEATH_EVENT
166  130           1
231  207           1
70    67           1
192  174           0
251  214           0
102   87           0
library(MLmetrics)

Attaching package: 'MLmetrics'
The following object is masked from 'package:base':

    Recall
Accuracy(pred,records$DEATH_EVENT)
[1] 0.8277512
pred2=ifelse(my_model$fitted.values>0.5,1,0)
pred2
166 231  70 192 251 102 110 103  23 146 123 188 125  26 164 101 159 240  68 133 
  1   0   1   0   0   0   0   1   1   0   0   0   1   1   1   0   0   0   1   0 
 73 191 274 171 242 297 198 278  79 162 150 114  88  17 230 148  67 130 169 121 
  1   0   0   0   0   0   0   0   1   0   0   0   0   1   0   0   1   0   0   0 
201 155 293 126 207 210 279  19  16 236 226 284 127 106  44  22  38 120  27 245 
  0   0   0   0   0   0   0   1   1   0   0   0   1   1   0   1   1   1   1   0 
 13  33 174 149 115  89  95 173  75 137 291 214 285  94  46 176   7  29 129 147 
  1   1   0   1   0   0   0   0   1   0   0   0   0   1   1   0   1   1   0   0 
109 113 227 256 259 235 218 253 138 275  65 289  37 276 157  80 197 187 220  76 
  0   0   0   0   0   0   0   0   1   0   0   0   1   0   0   0   0   0   0   1 
 58 205 223  64 172  57 139 263 225  20 151  45   1 143 117 258 219 209 228 132 
  0   0   0   0   0   1   0   0   0   1   1   0   1   0   0   0   0   0   0   1 
239 249  85 272  39 128 141 165 112 193 158 136 215  71 202 267 262  53 177  43 
  0   0   1   0   1   0   0   0   0   0   0   0   0   0   0   0   0   1   0   1 
287  77 118  69  48 232 222 156 252 260 216 244  14 108 264 189 154  93 175 277 
  0   0   1   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0 
281 269 255  41  83  49 211  84 134  24 160 163  56 145 208  63 105  74 243 229 
  0   0   0   1   1   1   0   0   0   0   0   0   1   1   0   0   0   0   0   1 
282  11  32 182 196 107 194 237 206  72 119   8 204 296 180   9  28  50 299  31 
  0   1   1   0   0   0   0   0   0   0   0   0   0   0   0   0   1   1   0   1 
233  82 292 153 221  15  81  92 111 
  0   0   0   0   0   1   1   0   0 
Accuracy(pred2,records$DEATH_EVENT)
[1] 0.8564593