#lay so lieu
db=read.csv("C:\\Users\\Thu Bo\\Desktop\\Diabetes data.csv")
head(db)
##   id age gender height weight waist hip sysbp diabp active hypertension
## 1  1  76 Female    163     53    90  93   160    90      0            1
## 2  1  40 Female    149     51    74  94   100    60      0            0
## 3  1  51 Female    151     55    91 100   120    80      0            0
## 4  1  43 Female    158     62    78  96   120    80      1            0
## 5  2  72 Female    148     47    91  95   130    60      1            0
## 6  2  44   Male    155     48    69  86   120    80      0            0
##     bmi  whr diabetes
## 1 19.95 0.97      IFG
## 2 22.97 0.79   Normal
## 3 24.12 0.91   Normal
## 4 24.84 0.81   Normal
## 5 21.46 0.96      IFG
## 6 19.98 0.80   Normal
#tao bien moi
db$diab=ifelse(db$diabetes=="Yes",1,0)
db$group[db$diab==1]="Diabetes"
db$group[db$diab==0]="Non-diabetes"
head(db)
##   id age gender height weight waist hip sysbp diabp active hypertension
## 1  1  76 Female    163     53    90  93   160    90      0            1
## 2  1  40 Female    149     51    74  94   100    60      0            0
## 3  1  51 Female    151     55    91 100   120    80      0            0
## 4  1  43 Female    158     62    78  96   120    80      1            0
## 5  2  72 Female    148     47    91  95   130    60      1            0
## 6  2  44   Male    155     48    69  86   120    80      0            0
##     bmi  whr diabetes diab        group
## 1 19.95 0.97      IFG    0 Non-diabetes
## 2 22.97 0.79   Normal    0 Non-diabetes
## 3 24.12 0.91   Normal    0 Non-diabetes
## 4 24.84 0.81   Normal    0 Non-diabetes
## 5 21.46 0.96      IFG    0 Non-diabetes
## 6 19.98 0.80   Normal    0 Non-diabetes
#mieu ta chi so whr theo dai duong
library(DescTools)
options(scipen=999)
Desc(db$whr~db$diab)
## ------------------------------------------------------------------------- 
## db$whr ~ db$diab
## 
## Summary: 
## n pairs: 3'165, valid: 3'154 (99.7%), missings: 11 (0.3%), groups: 2
## 
##                         
##               0        1
## mean      0.866    0.892
## median    0.870    0.880
## sd        0.077    0.069
## IQR       0.110    0.080
## n         2'913      241
## np      92.359%   7.641%
## NAs          10        1
## 0s            0        0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 25.728, df = 1, p-value = 0.0000003931

#bieu do boxplot
boxplot(db$bmi~db$diab)

#bieu do phan bo histogram
hist(db$whr, col="blue", border= "white")

#bieu do tan xa
plot(db$whr~db$bmi, pch=16, col="blue")