library(readxl)
rm(list = ls())
getwd()
## [1] "C:/Users/Embag/OneDrive/Documents"
setwd("C:/Users/Embag/OneDrive/Documents")
diabetes=read_excel("TC.xlsx")
str(diabetes)
## tibble [8 × 6] (S3: tbl_df/tbl/data.frame)
## $ Sexo : chr [1:8] "M" "M" "M" "M" ...
## $ Fuma : chr [1:8] "F_no" "F_no" "F_Si" "F_Si" ...
## $ Actividad_Fisica: chr [1:8] "AF_no" "AF_Si" "AF_no" "AF_Si" ...
## $ No_Diabetes : num [1:8] 14780 58582 13112 34485 8256 ...
## $ Pre_Diabetes : num [1:8] 469 993 398 744 255 632 367 773
## $ Diabetes : num [1:8] 3872 6219 3689 4631 2031 ...
Despúes de importar los datos, escogemos las categorías de referencia que nos serán de ayuda al momento de la interpretación de los Chances.
diabetes<-within(diabetes, Actividad_Fisica<-relevel(factor(Actividad_Fisica),ref='AF_no'))
diabetes<-within(diabetes, Fuma<-relevel(factor(Fuma),ref='F_no'))
diabetes<-within(diabetes, Sexo<-relevel(factor(Sexo),ref='M'))
Después, ajustamos diferentes Modelos Multinomiales empezando por el modelo nulo hasta terminar con el modelo saturado. Para cada uno de los modelos se obtuvo el Desvío y AIC.
library(VGAM)
## Loading required package: stats4
## Loading required package: splines
# Modelo 1
fit1<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~1,family=multinomial() ,data=diabetes)
summary(fit1)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ 1,
## family = multinomial(), data = diabetes)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -3.831814 0.014853 -258.0 <2e-16 ***
## (Intercept):2 -1.799402 0.005742 -313.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
##
## Residual deviance: 4756.143 on 14 degrees of freedom
##
## Log-likelihood: -2450.394 on 14 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
##
##
## Reference group is level 3 of the response
AIC(fit1)
## [1] 4904.789
# Modelo 2
fit2<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo,family=multinomial() ,data=diabetes)
summary(fit2)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo,
## family = multinomial(), data = diabetes)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -3.838403 0.019806 -193.796 <2e-16 ***
## (Intercept):2 -1.882503 0.007911 -237.963 <2e-16 ***
## SexoH:1 0.015117 0.029940 0.505 0.614
## SexoH:2 0.182043 0.011507 15.820 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
##
## Residual deviance: 4506.378 on 12 degrees of freedom
##
## Log-likelihood: -2325.512 on 12 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
##
##
## Reference group is level 3 of the response
AIC(fit2)
## [1] 4659.023
# Modelo 3
fit3<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo+Fuma,family=multinomial() ,data=diabetes)
summary(fit3)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo +
## Fuma, family = multinomial(), data = diabetes)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -3.945688 0.023943 -164.796 <2e-16 ***
## (Intercept):2 -2.030125 0.009556 -212.437 <2e-16 ***
## SexoH:1 -0.008517 0.030073 -0.283 0.777
## SexoH:2 0.150288 0.011576 12.983 <2e-16 ***
## FumaF_Si:1 0.255000 0.029844 8.545 <2e-16 ***
## FumaF_Si:2 0.342160 0.011559 29.600 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
##
## Residual deviance: 3582.52 on 10 degrees of freedom
##
## Log-likelihood: -1863.583 on 10 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
##
##
## Reference group is level 3 of the response
AIC(fit3)
## [1] 3739.166
# Modelo 4
fit4<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo+Fuma+Actividad_Fisica,family=multinomial() ,data=diabetes)
summary(fit4)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo +
## Fuma + Actividad_Fisica, family = multinomial(), data = diabetes)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -3.57593 0.03290 -108.706 < 2e-16 ***
## (Intercept):2 -1.51578 0.01270 -119.331 < 2e-16 ***
## SexoH:1 0.01162 0.03012 0.386 0.7
## SexoH:2 0.18032 0.01167 15.447 < 2e-16 ***
## FumaF_Si:1 0.21652 0.02998 7.221 5.15e-13 ***
## FumaF_Si:2 0.28448 0.01168 24.346 < 2e-16 ***
## Actividad_FisicaAF_Si:1 -0.49368 0.03205 -15.401 < 2e-16 ***
## Actividad_FisicaAF_Si:2 -0.70660 0.01228 -57.549 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
##
## Residual deviance: 290.2748 on 8 degrees of freedom
##
## Log-likelihood: -217.4602 on 8 degrees of freedom
##
## Number of Fisher scoring iterations: 3
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
##
##
## Reference group is level 3 of the response
AIC(fit4)
## [1] 450.9203
# Modelo 5
fit5<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo*Fuma+Actividad_Fisica,family=multinomial() ,data=diabetes)
summary(fit5)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo *
## Fuma + Actividad_Fisica, family = multinomial(), data = diabetes)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -3.54618 0.03472 -102.134 < 2e-16 ***
## (Intercept):2 -1.47015 0.01353 -108.677 < 2e-16 ***
## SexoH:1 -0.06546 0.04300 -1.522 0.127959
## SexoH:2 0.07022 0.01676 4.189 2.8e-05 ***
## FumaF_Si:1 0.14752 0.04004 3.684 0.000229 ***
## FumaF_Si:2 0.18297 0.01608 11.381 < 2e-16 ***
## Actividad_FisicaAF_Si:1 -0.49352 0.03205 -15.398 < 2e-16 ***
## Actividad_FisicaAF_Si:2 -0.70636 0.01228 -57.522 < 2e-16 ***
## SexoH:FumaF_Si:1 0.15690 0.06038 2.599 0.009360 **
## SexoH:FumaF_Si:2 0.21611 0.02342 9.226 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
##
## Residual deviance: 200.6259 on 6 degrees of freedom
##
## Log-likelihood: -172.6357 on 6 degrees of freedom
##
## Number of Fisher scoring iterations: 3
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
##
##
## Reference group is level 3 of the response
AIC(fit5)
## [1] 365.2714
# Modelo 6
fit6<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo*Fuma+Sexo*Actividad_Fisica,family=multinomial() ,data=diabetes)
summary(fit6)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo *
## Fuma + Sexo * Actividad_Fisica, family = multinomial(), data = diabetes)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -3.542307 0.039883 -88.817 < 2e-16 ***
## (Intercept):2 -1.389158 0.015261 -91.026 < 2e-16 ***
## SexoH:1 -0.075718 0.063887 -1.185 0.235943
## SexoH:2 -0.119484 0.024245 -4.928 8.3e-07 ***
## FumaF_Si:1 0.147790 0.040103 3.685 0.000228 ***
## FumaF_Si:2 0.172748 0.016149 10.697 < 2e-16 ***
## Actividad_FisicaAF_Si:1 -0.496482 0.042305 -11.736 < 2e-16 ***
## Actividad_FisicaAF_Si:2 -0.827874 0.016533 -50.074 < 2e-16 ***
## SexoH:FumaF_Si:1 0.155887 0.060642 2.571 0.010151 *
## SexoH:FumaF_Si:2 0.238684 0.023514 10.151 < 2e-16 ***
## SexoH:Actividad_FisicaAF_Si:1 0.008851 0.064796 0.137 0.891345
## SexoH:Actividad_FisicaAF_Si:2 0.269122 0.024756 10.871 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
##
## Residual deviance: 81.6451 on 4 degrees of freedom
##
## Log-likelihood: -113.1453 on 4 degrees of freedom
##
## Number of Fisher scoring iterations: 3
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
##
##
## Reference group is level 3 of the response
AIC(fit6)
## [1] 250.2906
# Modelo 7
fit7<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo*Fuma+Fuma*Actividad_Fisica+Sexo*Actividad_Fisica,family=multinomial() ,data=diabetes)
summary(fit7)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo *
## Fuma + Fuma * Actividad_Fisica + Sexo * Actividad_Fisica,
## family = multinomial(), data = diabetes)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -3.44521 0.04370 -78.830 < 2e-16 ***
## (Intercept):2 -1.32988 0.01680 -79.170 < 2e-16 ***
## SexoH:1 -0.04174 0.06287 -0.664 0.506778
## SexoH:2 -0.09077 0.02425 -3.743 0.000182 ***
## FumaF_Si:1 -0.05567 0.05865 -0.949 0.342484
## FumaF_Si:2 0.05140 0.02219 2.317 0.020530 *
## Actividad_FisicaAF_Si:1 -0.63425 0.05108 -12.417 < 2e-16 ***
## Actividad_FisicaAF_Si:2 -0.91818 0.02004 -45.806 < 2e-16 ***
## SexoH:FumaF_Si:1 0.14332 0.06046 2.370 0.017764 *
## SexoH:FumaF_Si:2 0.22005 0.02358 9.333 < 2e-16 ***
## FumaF_Si:Actividad_FisicaAF_Si:1 0.30177 0.06439 4.686 2.78e-06 ***
## FumaF_Si:Actividad_FisicaAF_Si:2 0.19611 0.02482 7.900 2.79e-15 ***
## SexoH:Actividad_FisicaAF_Si:1 -0.02996 0.06510 -0.460 0.645350
## SexoH:Actividad_FisicaAF_Si:2 0.24083 0.02494 9.656 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
##
## Residual deviance: 2.2229 on 2 degrees of freedom
##
## Log-likelihood: -73.4342 on 2 degrees of freedom
##
## Number of Fisher scoring iterations: 3
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
##
##
## Reference group is level 3 of the response
AIC(fit7)
## [1] 174.8685
# Modelo 8
fit8<-vglm(cbind(Pre_Diabetes,Diabetes,No_Diabetes)~Sexo*Fuma*Actividad_Fisica,family=multinomial() ,data=diabetes)
## Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 = Xm2, :
## iterations terminated because half-step sizes are very small
## Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 = Xm2, : some
## quantities such as z, residuals, SEs may be inaccurate due to convergence at a
## half-step
summary(fit8)
## Call:
## vglm(formula = cbind(Pre_Diabetes, Diabetes, No_Diabetes) ~ Sexo *
## Fuma * Actividad_Fisica, family = multinomial(), data = diabetes)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -3.45043 0.04690 -73.566 < 2e-16 ***
## (Intercept):2 -1.33950 0.01805 -74.197 < 2e-16 ***
## SexoH:1 -0.02700 0.07901 -0.342 0.732511
## SexoH:2 -0.06291 0.03065 -2.052 0.040123 *
## FumaF_Si:1 -0.04440 0.06920 -0.642 0.521088
## FumaF_Si:2 0.07133 0.02595 2.749 0.005976 **
## Actividad_FisicaAF_Si:1 -0.62702 0.05678 -11.043 < 2e-16 ***
## Actividad_FisicaAF_Si:2 -0.90331 0.02245 -40.245 < 2e-16 ***
## SexoH:FumaF_Si:1 0.11575 0.10792 1.073 0.283486
## SexoH:FumaF_Si:2 0.17067 0.04081 4.182 2.89e-05 ***
## SexoH:Actividad_FisicaAF_Si:1 -0.04979 0.09420 -0.529 0.597098
## SexoH:Actividad_FisicaAF_Si:2 0.20101 0.03669 5.479 4.28e-08 ***
## FumaF_Si:Actividad_FisicaAF_Si:1 0.28562 0.08477 3.369 0.000754 ***
## FumaF_Si:Actividad_FisicaAF_Si:2 0.16374 0.03311 4.946 7.59e-07 ***
## SexoH:FumaF_Si:Actividad_FisicaAF_Si:1 0.03838 0.13029 0.295 0.768320
## SexoH:FumaF_Si:Actividad_FisicaAF_Si:2 0.07395 0.05000 1.479 0.139152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
##
## Residual deviance: -2.161e-13 on 0 degrees of freedom
##
## Log-likelihood: -72.3228 on 0 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
##
##
## Reference group is level 3 of the response
AIC(fit8)
## [1] 176.6455
#AIC por modelos
models=(list(fit1, fit2, fit3, fit4, fit5, fit6, fit7, fit8))
aic_values<-data.frame(matrix(ncol = 8, nrow = 1))
colnames(aic_values)<-c("fit1","fit2","fit3","fit4","fit5","fit6","fit7","fit8")
for(i in 1:length(models)){
aic_values[1,i] <- round(AIC(models[[i]]),3)
}
print(aic_values)
## fit1 fit2 fit3 fit4 fit5 fit6 fit7 fit8
## 1 4904.789 4659.023 3739.166 450.92 365.271 250.291 174.868 176.646
##### sexo H vs M #####
# exp(b11 + b41F + b61AF)
# OR(preD vs NoD | sexo H vs M) si F=0 y AF=0
exp(-0.04174)
## [1] 0.9591191
# OR(preD vs NoD | sexo H vs M) si F=1 y AF=0
exp(-0.04174 -0.05567)
## [1] 0.907184
# OR(preD vs NoD | sexo H vs M) si F=0 y AF=1
exp(-0.04174 -0.63425)
## [1] 0.5086526
# OR(preD vs NoD | sexo H vs M) si F=1 y AF=1
exp(-0.04174 -0.05567 -0.63425)
## [1] 0.4811097
# OR(D vs NoD | sexo H vs M) si F=0 y AF=0
exp(-0.09077)
## [1] 0.9132277
# OR(D vs NoD | sexo H vs M) si F=1 y AF=0
exp(-0.09077 +0.05140)
## [1] 0.9613949
# OR(D vs NoD | sexo H vs M) si F=0 y AF=1
exp(-0.09077 -0.91818)
## [1] 0.3646016
# OR(D vs NoD | sexo H vs M) si F=1 y AF=1
exp(-0.09077 +0.05140 -0.91818)
## [1] 0.3838321
##### Fumar si vs no #####
# exp(b21 + b41S + b51AF)
# OR(preD vs NoD | Fumar si vs no) si S=0 y AF=0
exp(-0.05567)
## [1] 0.9458512
# OR(preD vs NoD | Fumar si vs no) si S=1 y AF=0
exp(-0.05567 -0.04174)
## [1] 0.907184
# OR(preD vs NoD | Fumar si vs no) si S=0 y AF=1
exp(-0.05567 -0.63425)
## [1] 0.5016162
# OR(preD vs NoD | Fumar si vs no) si S=1 y AF=1
exp(-0.05567 -0.04174 -0.63425)
## [1] 0.4811097
# OR(D vs NoD | Fumar si vs no) si S=0 y AF=0
exp(0.05140)
## [1] 1.052744
# OR(D vs NoD | Fumar si vs no) si S=1 y AF=0
exp(0.05140 -0.09077)
## [1] 0.9613949
# OR(D vs NoD | Fumar si vs no) si S=0 y AF=1
exp(0.05140 -0.91818)
## [1] 0.4203027
# OR(D vs NoD | Fumar si vs no) si S=1 y AF=1
exp(0.05140 -0.09077 -0.91818)
## [1] 0.3838321
##### AF si vs no #####
# exp(b31 + b51F + b61S)
# OR(preD vs NoD | AF si vs no) si F=0 y S=0
exp(-0.63425)
## [1] 0.5303331
# OR(preD vs NoD | AF si vs no) si F=1 y S=0
exp(-0.63425 -0.05567)
## [1] 0.5016162
# OR(preD vs NoD | AF si vs no) si F=0 y S=1
exp(-0.63425 -0.04174)
## [1] 0.5086526
# OR(preD vs NoD | AF si vs no) si F=1 y S=1
exp(-0.63425 -0.05567 -0.04174)
## [1] 0.4811097
# OR(D vs NoD | AF si vs no) si F=0 y S=0
exp(-0.91818)
## [1] 0.399245
# OR(D vs NoD | AF si vs no) si F=1 y S=0
exp(-0.91818 +0.05140)
## [1] 0.4203027
# OR(D vs NoD | AF si vs no) si F=0 y S=1
exp(-0.91818 -0.09077)
## [1] 0.3646016
# OR(D vs NoD | AF si vs no) si F=1 y S=1
exp(-0.91818 +0.05140 -0.09077)
## [1] 0.3838321