#lay so lieu
db=read.csv("C:\\Users\\Thu Bo\\Desktop\\Diabetes data.csv")
head(db)
## id age gender height weight waist hip sysbp diabp active hypertension
## 1 1 76 Female 163 53 90 93 160 90 0 1
## 2 1 40 Female 149 51 74 94 100 60 0 0
## 3 1 51 Female 151 55 91 100 120 80 0 0
## 4 1 43 Female 158 62 78 96 120 80 1 0
## 5 2 72 Female 148 47 91 95 130 60 1 0
## 6 2 44 Male 155 48 69 86 120 80 0 0
## bmi whr diabetes
## 1 19.95 0.97 IFG
## 2 22.97 0.79 Normal
## 3 24.12 0.91 Normal
## 4 24.84 0.81 Normal
## 5 21.46 0.96 IFG
## 6 19.98 0.80 Normal
#tao bien moi
db$diab=ifelse(db$diabetes=="Yes",1,0)
db$group[db$diab==1]="Diabetes"
db$group[db$diab==0]="Non-diabetes"
head(db)
## id age gender height weight waist hip sysbp diabp active hypertension
## 1 1 76 Female 163 53 90 93 160 90 0 1
## 2 1 40 Female 149 51 74 94 100 60 0 0
## 3 1 51 Female 151 55 91 100 120 80 0 0
## 4 1 43 Female 158 62 78 96 120 80 1 0
## 5 2 72 Female 148 47 91 95 130 60 1 0
## 6 2 44 Male 155 48 69 86 120 80 0 0
## bmi whr diabetes diab group
## 1 19.95 0.97 IFG 0 Non-diabetes
## 2 22.97 0.79 Normal 0 Non-diabetes
## 3 24.12 0.91 Normal 0 Non-diabetes
## 4 24.84 0.81 Normal 0 Non-diabetes
## 5 21.46 0.96 IFG 0 Non-diabetes
## 6 19.98 0.80 Normal 0 Non-diabetes
#mieu ta chi so whr theo dai duong
library(DescTools)
options(scipen=999)
Desc(db$whr~db$diab)
## -------------------------------------------------------------------------
## db$whr ~ db$diab
##
## Summary:
## n pairs: 3'165, valid: 3'154 (99.7%), missings: 11 (0.3%), groups: 2
##
##
## 0 1
## mean 0.866 0.892
## median 0.870 0.880
## sd 0.077 0.069
## IQR 0.110 0.080
## n 2'913 241
## np 92.359% 7.641%
## NAs 10 1
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 25.728, df = 1, p-value = 0.0000003931

#bieu do boxplot
boxplot(db$bmi~db$diab)

#bieu do phan bo histogram
hist(db$whr, col="blue", border= "white")

#bieu do tan xa
plot(db$whr~db$bmi, pch=16, col="blue")
