Load libraries

library("stats")
library("psych")
library("readxl")
library("MASS")
library("ISLR")
library("fRegression")
library("vcd")
library("openxlsx")

Heart disease data set

db = read.csv("C:\\Users\\alfon\\OneDrive\\Desktop\\framingham.csv")

Modelo logit

modelo_logit1 = glm(male~age+cigsPerDay+glucose,data=db,family=binomial(link="logit"))
summary(modelo_logit1)
## 
## Call:
## glm(formula = male ~ age + cigsPerDay + glucose, family = binomial(link = "logit"), 
##     data = db)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1473  -0.9121  -0.8348   1.0420   1.6104  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.431766   0.234957  -6.094  1.1e-09 ***
## age          0.009876   0.004121   2.396   0.0166 *  
## cigsPerDay   0.062639   0.003278  19.108  < 2e-16 ***
## glucose      0.001844   0.001431   1.289   0.1975    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 5251.6  on 3824  degrees of freedom
## Residual deviance: 4817.5  on 3821  degrees of freedom
##   (413 observations deleted due to missingness)
## AIC: 4825.5
## 
## Number of Fisher Scoring iterations: 4

Modelo probit

modelo_probit1 = glm(male~age+cigsPerDay+glucose,data=db,family=binomial(link="probit"))
summary(modelo_probit1)
## 
## Call:
## glm(formula = male ~ age + cigsPerDay + glucose, family = binomial(link = "probit"), 
##     data = db)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2059  -0.9132  -0.8376   1.0430   1.6076  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.8692777  0.1431236  -6.074 1.25e-09 ***
## age          0.0058408  0.0025135   2.324   0.0201 *  
## cigsPerDay   0.0385157  0.0019431  19.822  < 2e-16 ***
## glucose      0.0011311  0.0008793   1.286   0.1983    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 5251.6  on 3824  degrees of freedom
## Residual deviance: 4817.5  on 3821  degrees of freedom
##   (413 observations deleted due to missingness)
## AIC: 4825.5
## 
## Number of Fisher Scoring iterations: 4

Criterios de información

CIA_Logit1 = AIC(modelo_logit1)
CIA_Logit1
## [1] 4825.536
CIA_Probit1 = AIC(modelo_probit1)
CIA_Probit1
## [1] 4825.496
predict(modelo_probit1, data.frame(data.frame(age = 32, cigsPerDay = 10, glucose = 98)), type="response")
##         1 
## 0.4260776
predicciones <- ifelse(test = modelo_probit1$fitted.values > 0.5, yes =1, no = 0)
matriz_confusion <- table(predicciones,modelo_probit1$model$male,dnn = c( "predicciones","observaciones"))
matriz_confusion
##             observaciones
## predicciones    0    1
##            0 1690  833
##            1  443  859