t="C:\\Users\\pc\\Downloads\\CAC NGHIEN CUU\\BAI GIANG CAC MON\\GS TUAN\\BG 12.6.19\\Diabetes data.csv"
db=read.csv(t)
head(db)
##   id age gender height weight waist hip sysbp diabp active hypertension
## 1  1  76 Female    163     53    90  93   160    90      0            1
## 2  1  40 Female    149     51    74  94   100    60      0            0
## 3  1  51 Female    151     55    91 100   120    80      0            0
## 4  1  43 Female    158     62    78  96   120    80      1            0
## 5  2  72 Female    148     47    91  95   130    60      1            0
## 6  2  44   Male    155     48    69  86   120    80      0            0
##     bmi  whr diabetes
## 1 19.95 0.97      IFG
## 2 22.97 0.79   Normal
## 3 24.12 0.91   Normal
## 4 24.84 0.81   Normal
## 5 21.46 0.96      IFG
## 6 19.98 0.80   Normal

#so sanh su khac biet giua vong eo hong whr va tieu dg:khong dung t.test do bien tieu duong la lien tuc khong phai la phan nhom. #so sanh su khac biet vongf eo giua cac gioi tinh

t.test(db$whr~db$gender)
## 
##  Welch Two Sample t-test
## 
## data:  db$whr by db$gender
## t = -18.518, df = 2227.4, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.05432834 -0.04392358
## sample estimates:
## mean in group Female   mean in group Male 
##            0.8521515            0.9012774

analysis of variance (aov hay anova): chi cho biet co khac nhau giua cac nhom nhung khong biet nhom nao khac nhom nao, nen phai dung them TukeyHSD de xem nhom nao khac nhom nao

m=aov(db$whr~db$diabetes)
summary(m)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## db$diabetes    2  0.327 0.16373   28.44 5.75e-13 ***
## Residuals   3151 18.142 0.00576                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 11 observations deleted due to missingness
#cho biet nhom nao khac nhom nao
TukeyHSD(m)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = db$whr ~ db$diabetes)
## 
## $`db$diabetes`
##                     diff         lwr         upr     p adj
## Normal-IFG -0.0280205977 -0.03996465 -0.01607654 0.0000001
## Yes-IFG     0.0005454203 -0.01564594  0.01673679 0.9965662
## Yes-Normal  0.0285660180  0.01659927  0.04053277 0.0000001