t="C:\\Users\\pc\\Downloads\\CAC NGHIEN CUU\\BAI GIANG CAC MON\\GS TUAN\\BG 12.6.19\\Diabetes data.csv"
db=read.csv(t)
head(t)
## [1] "C:\\Users\\pc\\Downloads\\CAC NGHIEN CUU\\BAI GIANG CAC MON\\GS TUAN\\BG 12.6.19\\Diabetes data.csv"
table (db$diabetes)
## 
##    IFG Normal    Yes 
##    243   2680    242
#tao ra bien diabetes chi co hai gia tri
db$diab=ifelse(db$diabetes=="Yes", 1, 0)
table(db$diab)
## 
##    0    1 
## 2923  242
#mo hinh hoi quy logistic
m=glm(diab~whr, family=binomial, data=db)
library(epiDisplay)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet
logistic.display(m)
## 
## Logistic regression predicting diab 
##  
##                  OR(95%CI)             P(Wald's test) P(LR-test)
## whr (cont. var.) 77.81 (14.53,416.54)  < 0.001        < 0.001   
##                                                                 
## Log-likelihood = -838.4938
## No. of observations = 3154
## AIC value = 1680.9876
#dien giai ket qua: 
mean(db$whr, na.omit=T)
## [1] NA
summary(db$whr)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.6100  0.8100  0.8700  0.8678  0.9200  1.3600      11
sd(na.omit(db$whr))
## [1] 0.07653587
#tinh OR tren moi SD
#tao bien moi whr1: 0.076 la do lech chuan.
db$whr1=db$whr/0.076
m=glm(diab~whr1, family=binomial, data=db)
logistic.display(m)
## 
## Logistic regression predicting diab 
##  
##                   OR(95%CI)         P(Wald's test) P(LR-test)
## whr1 (cont. var.) 1.39 (1.23,1.58)  < 0.001        < 0.001   
##                                                              
## Log-likelihood = -838.4938
## No. of observations = 3154
## AIC value = 1680.9876
summary(m)
## 
## Call:
## glm(formula = diab ~ whr1, family = binomial, data = db)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0144  -0.4257  -0.3834  -0.3308   2.6044  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -6.31797    0.76436  -8.266  < 2e-16 ***
## whr1         0.33092    0.06506   5.087 3.64e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1702.6  on 3153  degrees of freedom
## Residual deviance: 1677.0  on 3152  degrees of freedom
##   (11 observations deleted due to missingness)
## AIC: 1681
## 
## Number of Fisher Scoring iterations: 5
#Neu wwhr=1 thi y1 =-6.3+ 4.354*1; ,neu whr =2 thi y2 =-6.3+ 4.354*2
y1 =-6.3+ 4.354*1
y2 =-6.3+ 4.354*2
exp(y2-y1)
## [1] 77.789
# neu whr1=1
y1= -6.3+4.35*1
y2=-6.3+4.35*1.076
exp(y2-y1)
## [1] 1.391803
#
m2=glm(diab~gender, family=binomial, data=db)
logistic.display(m2)
## 
## Logistic regression predicting diab 
##  
##                        OR(95%CI)         P(Wald's test) P(LR-test)
## gender: Male vs Female 1.43 (1.09,1.88)  0.009          0.01      
##                                                                   
## Log-likelihood = -851.3575
## No. of observations = 3165
## AIC value = 1706.715
m3=glm(diab~age, family=binomial, data=db)
logistic.display(m3)
## 
## Logistic regression predicting diab 
##  
##                  OR(95%CI)               P(Wald's test) P(LR-test)
## age (cont. var.) 1.0022 (0.9913,1.0132)  0.693          0.694     
##                                                                   
## Log-likelihood = -854.6003
## No. of observations = 3165
## AIC value = 1713.2007
# dien giai : m3 cho lien quan tuoi voi tieu duong: it lien quan voi p>0.05. va OR xap xi 1