t="C:\\Users\\pc\\Downloads\\CAC NGHIEN CUU\\BAI GIANG CAC MON\\GS TUAN\\BG 12.6.19\\Diabetes data.csv"
db=read.csv(t)
head(t)
## [1] "C:\\Users\\pc\\Downloads\\CAC NGHIEN CUU\\BAI GIANG CAC MON\\GS TUAN\\BG 12.6.19\\Diabetes data.csv"
table (db$diabetes)
##
## IFG Normal Yes
## 243 2680 242
#tao ra bien diabetes chi co hai gia tri
db$diab=ifelse(db$diabetes=="Yes", 1, 0)
table(db$diab)
##
## 0 1
## 2923 242
#mo hinh hoi quy logistic
m=glm(diab~whr, family=binomial, data=db)
library(epiDisplay)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet
logistic.display(m)
##
## Logistic regression predicting diab
##
## OR(95%CI) P(Wald's test) P(LR-test)
## whr (cont. var.) 77.81 (14.53,416.54) < 0.001 < 0.001
##
## Log-likelihood = -838.4938
## No. of observations = 3154
## AIC value = 1680.9876
#dien giai ket qua:
mean(db$whr, na.omit=T)
## [1] NA
summary(db$whr)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.6100 0.8100 0.8700 0.8678 0.9200 1.3600 11
sd(na.omit(db$whr))
## [1] 0.07653587
#tinh OR tren moi SD
#tao bien moi whr1: 0.076 la do lech chuan.
db$whr1=db$whr/0.076
m=glm(diab~whr1, family=binomial, data=db)
logistic.display(m)
##
## Logistic regression predicting diab
##
## OR(95%CI) P(Wald's test) P(LR-test)
## whr1 (cont. var.) 1.39 (1.23,1.58) < 0.001 < 0.001
##
## Log-likelihood = -838.4938
## No. of observations = 3154
## AIC value = 1680.9876
summary(m)
##
## Call:
## glm(formula = diab ~ whr1, family = binomial, data = db)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0144 -0.4257 -0.3834 -0.3308 2.6044
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.31797 0.76436 -8.266 < 2e-16 ***
## whr1 0.33092 0.06506 5.087 3.64e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1702.6 on 3153 degrees of freedom
## Residual deviance: 1677.0 on 3152 degrees of freedom
## (11 observations deleted due to missingness)
## AIC: 1681
##
## Number of Fisher Scoring iterations: 5
#Neu wwhr=1 thi y1 =-6.3+ 4.354*1; ,neu whr =2 thi y2 =-6.3+ 4.354*2
y1 =-6.3+ 4.354*1
y2 =-6.3+ 4.354*2
exp(y2-y1)
## [1] 77.789
# neu whr1=1
y1= -6.3+4.35*1
y2=-6.3+4.35*1.076
exp(y2-y1)
## [1] 1.391803
#
m2=glm(diab~gender, family=binomial, data=db)
logistic.display(m2)
##
## Logistic regression predicting diab
##
## OR(95%CI) P(Wald's test) P(LR-test)
## gender: Male vs Female 1.43 (1.09,1.88) 0.009 0.01
##
## Log-likelihood = -851.3575
## No. of observations = 3165
## AIC value = 1706.715
m3=glm(diab~age, family=binomial, data=db)
logistic.display(m3)
##
## Logistic regression predicting diab
##
## OR(95%CI) P(Wald's test) P(LR-test)
## age (cont. var.) 1.0022 (0.9913,1.0132) 0.693 0.694
##
## Log-likelihood = -854.6003
## No. of observations = 3165
## AIC value = 1713.2007
# dien giai : m3 cho lien quan tuoi voi tieu duong: it lien quan voi p>0.05. va OR xap xi 1