Autores:

library(readxl)
rm(list = ls())
getwd()
## [1] "C:/Users/Embag/OneDrive/Documents"
setwd("C:/Users/Embag/OneDrive/Documents")

Importación de la Tabla de Contingencia

diabetes=read_excel("TC.xlsx")
str(diabetes)
## tibble [8 × 6] (S3: tbl_df/tbl/data.frame)
##  $ Sexo            : chr [1:8] "M" "M" "M" "M" ...
##  $ Fuma            : chr [1:8] "F_no" "F_no" "F_Si" "F_Si" ...
##  $ Actividad_Fisica: chr [1:8] "AF_no" "AF_Si" "AF_no" "AF_Si" ...
##  $ No_Diabetes     : num [1:8] 14780 58582 13112 34485 8256 ...
##  $ Pre_Diabetes    : num [1:8] 469 993 398 744 255 632 367 773
##  $ Diabetes        : num [1:8] 3872 6219 3689 4631 2031 ...

Despúes de importar los datos, escogemos las categorías de referencia que nos serán de ayuda al momento de la interpretación de los Chances.

diabetes<-within(diabetes, Actividad_Fisica<-relevel(factor(Actividad_Fisica),ref='AF_no'))
diabetes<-within(diabetes, Fuma<-relevel(factor(Fuma),ref='F_no'))
diabetes<-within(diabetes, Sexo<-relevel(factor(Sexo),ref='M'))

Después, ajustamos diferentes Modelos Multinomiales empezando por el modelo nulo hasta terminar con el modelo saturado. Para cada uno de los modelos se obtuvo el Desvío y AIC.

Modelo 1 | Modelo Nulo

library(VGAM)
## Loading required package: stats4
## Loading required package: splines
# Modelo 1
fit1<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~1,family=multinomial() ,data=diabetes)
summary(fit1)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ 1, 
##     family = multinomial(), data = diabetes)
## 
## Coefficients: 
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1 -3.831814   0.014853  -258.0   <2e-16 ***
## (Intercept):2 -1.799402   0.005742  -313.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
## 
## Residual deviance: 4756.143 on 14 degrees of freedom
## 
## Log-likelihood: -2450.394 on 14 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
## 
## 
## Reference group is level  3  of the response
AIC(fit1)
## [1] 4904.789

Modelo 2. Agregando la variable Sexo

# Modelo 2
fit2<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo,family=multinomial() ,data=diabetes)
summary(fit2)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo, 
##     family = multinomial(), data = diabetes)
## 
## Coefficients: 
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept):1 -3.838403   0.019806 -193.796   <2e-16 ***
## (Intercept):2 -1.882503   0.007911 -237.963   <2e-16 ***
## SexoH:1        0.015117   0.029940    0.505    0.614    
## SexoH:2        0.182043   0.011507   15.820   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
## 
## Residual deviance: 4506.378 on 12 degrees of freedom
## 
## Log-likelihood: -2325.512 on 12 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
## 
## 
## Reference group is level  3  of the response
AIC(fit2)
## [1] 4659.023

Modelo 3. Agregando la variable Fuma

# Modelo 3
fit3<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo+Fuma,family=multinomial() ,data=diabetes)
summary(fit3)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo + 
##     Fuma, family = multinomial(), data = diabetes)
## 
## Coefficients: 
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept):1 -3.945688   0.023943 -164.796   <2e-16 ***
## (Intercept):2 -2.030125   0.009556 -212.437   <2e-16 ***
## SexoH:1       -0.008517   0.030073   -0.283    0.777    
## SexoH:2        0.150288   0.011576   12.983   <2e-16 ***
## FumaF_Si:1     0.255000   0.029844    8.545   <2e-16 ***
## FumaF_Si:2     0.342160   0.011559   29.600   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
## 
## Residual deviance: 3582.52 on 10 degrees of freedom
## 
## Log-likelihood: -1863.583 on 10 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
## 
## 
## Reference group is level  3  of the response
AIC(fit3)
## [1] 3739.166

Modelo 4. Agregando la variable Actividad_Fisica

# Modelo 4
fit4<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo+Fuma+Actividad_Fisica,family=multinomial() ,data=diabetes)
summary(fit4)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo + 
##     Fuma + Actividad_Fisica, family = multinomial(), data = diabetes)
## 
## Coefficients: 
##                         Estimate Std. Error  z value Pr(>|z|)    
## (Intercept):1           -3.57593    0.03290 -108.706  < 2e-16 ***
## (Intercept):2           -1.51578    0.01270 -119.331  < 2e-16 ***
## SexoH:1                  0.01162    0.03012    0.386      0.7    
## SexoH:2                  0.18032    0.01167   15.447  < 2e-16 ***
## FumaF_Si:1               0.21652    0.02998    7.221 5.15e-13 ***
## FumaF_Si:2               0.28448    0.01168   24.346  < 2e-16 ***
## Actividad_FisicaAF_Si:1 -0.49368    0.03205  -15.401  < 2e-16 ***
## Actividad_FisicaAF_Si:2 -0.70660    0.01228  -57.549  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
## 
## Residual deviance: 290.2748 on 8 degrees of freedom
## 
## Log-likelihood: -217.4602 on 8 degrees of freedom
## 
## Number of Fisher scoring iterations: 3 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
## 
## 
## Reference group is level  3  of the response
AIC(fit4)
## [1] 450.9203

Modelo 5. Agregando la interacción Sexo:Fuma

# Modelo 5
fit5<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo*Fuma+Actividad_Fisica,family=multinomial() ,data=diabetes)
summary(fit5)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo * 
##     Fuma + Actividad_Fisica, family = multinomial(), data = diabetes)
## 
## Coefficients: 
##                         Estimate Std. Error  z value Pr(>|z|)    
## (Intercept):1           -3.54618    0.03472 -102.134  < 2e-16 ***
## (Intercept):2           -1.47015    0.01353 -108.677  < 2e-16 ***
## SexoH:1                 -0.06546    0.04300   -1.522 0.127959    
## SexoH:2                  0.07022    0.01676    4.189  2.8e-05 ***
## FumaF_Si:1               0.14752    0.04004    3.684 0.000229 ***
## FumaF_Si:2               0.18297    0.01608   11.381  < 2e-16 ***
## Actividad_FisicaAF_Si:1 -0.49352    0.03205  -15.398  < 2e-16 ***
## Actividad_FisicaAF_Si:2 -0.70636    0.01228  -57.522  < 2e-16 ***
## SexoH:FumaF_Si:1         0.15690    0.06038    2.599 0.009360 ** 
## SexoH:FumaF_Si:2         0.21611    0.02342    9.226  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
## 
## Residual deviance: 200.6259 on 6 degrees of freedom
## 
## Log-likelihood: -172.6357 on 6 degrees of freedom
## 
## Number of Fisher scoring iterations: 3 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
## 
## 
## Reference group is level  3  of the response
AIC(fit5)
## [1] 365.2714

Modelo 6. Agregando la interacción Sexo:Actividad_Fisica

# Modelo 6
fit6<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo*Fuma+Sexo*Actividad_Fisica,family=multinomial() ,data=diabetes)
summary(fit6)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo * 
##     Fuma + Sexo * Actividad_Fisica, family = multinomial(), data = diabetes)
## 
## Coefficients: 
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1                 -3.542307   0.039883 -88.817  < 2e-16 ***
## (Intercept):2                 -1.389158   0.015261 -91.026  < 2e-16 ***
## SexoH:1                       -0.075718   0.063887  -1.185 0.235943    
## SexoH:2                       -0.119484   0.024245  -4.928  8.3e-07 ***
## FumaF_Si:1                     0.147790   0.040103   3.685 0.000228 ***
## FumaF_Si:2                     0.172748   0.016149  10.697  < 2e-16 ***
## Actividad_FisicaAF_Si:1       -0.496482   0.042305 -11.736  < 2e-16 ***
## Actividad_FisicaAF_Si:2       -0.827874   0.016533 -50.074  < 2e-16 ***
## SexoH:FumaF_Si:1               0.155887   0.060642   2.571 0.010151 *  
## SexoH:FumaF_Si:2               0.238684   0.023514  10.151  < 2e-16 ***
## SexoH:Actividad_FisicaAF_Si:1  0.008851   0.064796   0.137 0.891345    
## SexoH:Actividad_FisicaAF_Si:2  0.269122   0.024756  10.871  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
## 
## Residual deviance: 81.6451 on 4 degrees of freedom
## 
## Log-likelihood: -113.1453 on 4 degrees of freedom
## 
## Number of Fisher scoring iterations: 3 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
## 
## 
## Reference group is level  3  of the response
AIC(fit6)
## [1] 250.2906

Modelo 7. Agregando la interacción Fuma:Actividad_Fisica

# Modelo 7
fit7<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo*Fuma+Fuma*Actividad_Fisica+Sexo*Actividad_Fisica,family=multinomial() ,data=diabetes)
summary(fit7)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo * 
##     Fuma + Fuma * Actividad_Fisica + Sexo * Actividad_Fisica, 
##     family = multinomial(), data = diabetes)
## 
## Coefficients: 
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1                    -3.44521    0.04370 -78.830  < 2e-16 ***
## (Intercept):2                    -1.32988    0.01680 -79.170  < 2e-16 ***
## SexoH:1                          -0.04174    0.06287  -0.664 0.506778    
## SexoH:2                          -0.09077    0.02425  -3.743 0.000182 ***
## FumaF_Si:1                       -0.05567    0.05865  -0.949 0.342484    
## FumaF_Si:2                        0.05140    0.02219   2.317 0.020530 *  
## Actividad_FisicaAF_Si:1          -0.63425    0.05108 -12.417  < 2e-16 ***
## Actividad_FisicaAF_Si:2          -0.91818    0.02004 -45.806  < 2e-16 ***
## SexoH:FumaF_Si:1                  0.14332    0.06046   2.370 0.017764 *  
## SexoH:FumaF_Si:2                  0.22005    0.02358   9.333  < 2e-16 ***
## FumaF_Si:Actividad_FisicaAF_Si:1  0.30177    0.06439   4.686 2.78e-06 ***
## FumaF_Si:Actividad_FisicaAF_Si:2  0.19611    0.02482   7.900 2.79e-15 ***
## SexoH:Actividad_FisicaAF_Si:1    -0.02996    0.06510  -0.460 0.645350    
## SexoH:Actividad_FisicaAF_Si:2     0.24083    0.02494   9.656  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
## 
## Residual deviance: 2.2229 on 2 degrees of freedom
## 
## Log-likelihood: -73.4342 on 2 degrees of freedom
## 
## Number of Fisher scoring iterations: 3 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
## 
## 
## Reference group is level  3  of the response
AIC(fit7)
## [1] 174.8685

Modelo 8 | Modelo Saturado

# Modelo 8
fit8<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo*Fuma*Actividad_Fisica,family=multinomial() ,data=diabetes)
## Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 = Xm2, :
## iterations terminated because half-step sizes are very small
## Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 = Xm2, : some
## quantities such as z, residuals, SEs may be inaccurate due to convergence at a
## half-step
summary(fit8)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo * 
##     Fuma * Actividad_Fisica, family = multinomial(), data = diabetes)
## 
## Coefficients: 
##                                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1                          -3.45043    0.04690 -73.566  < 2e-16 ***
## (Intercept):2                          -1.33950    0.01805 -74.197  < 2e-16 ***
## SexoH:1                                -0.02700    0.07901  -0.342 0.732511    
## SexoH:2                                -0.06291    0.03065  -2.052 0.040123 *  
## FumaF_Si:1                             -0.04440    0.06920  -0.642 0.521088    
## FumaF_Si:2                              0.07133    0.02595   2.749 0.005976 ** 
## Actividad_FisicaAF_Si:1                -0.62702    0.05678 -11.043  < 2e-16 ***
## Actividad_FisicaAF_Si:2                -0.90331    0.02245 -40.245  < 2e-16 ***
## SexoH:FumaF_Si:1                        0.11575    0.10792   1.073 0.283486    
## SexoH:FumaF_Si:2                        0.17067    0.04081   4.182 2.89e-05 ***
## SexoH:Actividad_FisicaAF_Si:1          -0.04979    0.09420  -0.529 0.597098    
## SexoH:Actividad_FisicaAF_Si:2           0.20101    0.03669   5.479 4.28e-08 ***
## FumaF_Si:Actividad_FisicaAF_Si:1        0.28562    0.08477   3.369 0.000754 ***
## FumaF_Si:Actividad_FisicaAF_Si:2        0.16374    0.03311   4.946 7.59e-07 ***
## SexoH:FumaF_Si:Actividad_FisicaAF_Si:1  0.03838    0.13029   0.295 0.768320    
## SexoH:FumaF_Si:Actividad_FisicaAF_Si:2  0.07395    0.05000   1.479 0.139152    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
## 
## Residual deviance: -2.161e-13 on 0 degrees of freedom
## 
## Log-likelihood: -72.3228 on 0 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
## 
## 
## Reference group is level  3  of the response
AIC(fit8)
## [1] 176.6455

Tabla con el AIC para todos los modelos

#AIC por modelos
models=(list(fit1, fit2, fit3, fit4, fit5, fit6, fit7, fit8))
aic_values<-data.frame(matrix(ncol = 8, nrow = 1))
colnames(aic_values)<-c("fit1","fit2","fit3","fit4","fit5","fit6","fit7","fit8")
for(i in 1:length(models)){
  aic_values[1,i] <- round(AIC(models[[i]]),3)
}
print(aic_values)
##       fit1     fit2     fit3   fit4    fit5    fit6    fit7    fit8
## 1 4904.789 4659.023 3739.166 450.92 365.271 250.291 174.868 176.646

Interpretaciones

Sexo H vs M

##### sexo H vs M #####
# exp(b11 + b41F + b61AF)

# OR(preD vs NoD | sexo H vs M) si F=0 y AF=0
exp(-0.04174)
## [1] 0.9591191
# OR(preD vs NoD | sexo H vs M) si F=1 y AF=0
exp(-0.04174 -0.05567)
## [1] 0.907184
# OR(preD vs NoD | sexo H vs M) si F=0 y AF=1
exp(-0.04174 -0.63425)
## [1] 0.5086526
# OR(preD vs NoD | sexo H vs M) si F=1 y AF=1
exp(-0.04174 -0.05567 -0.63425)
## [1] 0.4811097
# OR(D vs NoD | sexo H vs M) si F=0 y AF=0
exp(-0.09077)
## [1] 0.9132277
# OR(D vs NoD | sexo H vs M) si F=1 y AF=0
exp(-0.09077 +0.05140)
## [1] 0.9613949
# OR(D vs NoD | sexo H vs M) si F=0 y AF=1
exp(-0.09077 -0.91818)
## [1] 0.3646016
# OR(D vs NoD | sexo H vs M) si F=1 y AF=1
exp(-0.09077 +0.05140 -0.91818)
## [1] 0.3838321

Fumar Sí vs No

##### Fumar si vs no #####
# exp(b21 + b41S + b51AF)

# OR(preD vs NoD | Fumar si vs no) si S=0 y AF=0
exp(-0.05567)
## [1] 0.9458512
# OR(preD vs NoD | Fumar si vs no) si S=1 y AF=0
exp(-0.05567 -0.04174)
## [1] 0.907184
# OR(preD vs NoD | Fumar si vs no) si S=0 y AF=1
exp(-0.05567 -0.63425)
## [1] 0.5016162
# OR(preD vs NoD | Fumar si vs no) si S=1 y AF=1
exp(-0.05567 -0.04174 -0.63425)
## [1] 0.4811097
# OR(D vs NoD | Fumar si vs no) si S=0 y AF=0
exp(0.05140)
## [1] 1.052744
# OR(D vs NoD | Fumar si vs no) si S=1 y AF=0
exp(0.05140 -0.09077)
## [1] 0.9613949
# OR(D vs NoD | Fumar si vs no) si S=0 y AF=1
exp(0.05140 -0.91818)
## [1] 0.4203027
# OR(D vs NoD | Fumar si vs no) si S=1 y AF=1
exp(0.05140 -0.09077 -0.91818)
## [1] 0.3838321

Actividad Física Sí vs No

##### AF si vs no #####
# exp(b31 + b51F + b61S)

# OR(preD vs NoD | AF si vs no) si F=0 y S=0
exp(-0.63425)
## [1] 0.5303331
# OR(preD vs NoD | AF si vs no) si F=1 y S=0
exp(-0.63425 -0.05567)
## [1] 0.5016162
# OR(preD vs NoD | AF si vs no) si F=0 y S=1
exp(-0.63425 -0.04174)
## [1] 0.5086526
# OR(preD vs NoD | AF si vs no) si F=1 y S=1
exp(-0.63425 -0.05567 -0.04174)
## [1] 0.4811097
# OR(D vs NoD | AF si vs no) si F=0 y S=0
exp(-0.91818)
## [1] 0.399245
# OR(D vs NoD | AF si vs no) si F=1 y S=0
exp(-0.91818 +0.05140)
## [1] 0.4203027
# OR(D vs NoD | AF si vs no) si F=0 y S=1
exp(-0.91818 -0.09077)
## [1] 0.3646016
# OR(D vs NoD | AF si vs no) si F=1 y S=1
exp(-0.91818 +0.05140 -0.09077)
## [1] 0.3838321