#Reading data
t= "/Users/locnguyen/Documents/R Console/Datasets for practice/Diabetes data.csv"
db = read.csv(t)
head(db)
## id age gender height weight waist hip sysbp diabp active hypertension
## 1 1 76 Female 163 53 90 93 160 90 0 1
## 2 1 40 Female 149 51 74 94 100 60 0 0
## 3 1 51 Female 151 55 91 100 120 80 0 0
## 4 1 43 Female 158 62 78 96 120 80 1 0
## 5 2 72 Female 148 47 91 95 130 60 1 0
## 6 2 44 Male 155 48 69 86 120 80 0 0
## bmi whr diabetes
## 1 19.95 0.97 IFG
## 2 22.97 0.79 Normal
## 3 24.12 0.91 Normal
## 4 24.84 0.81 Normal
## 5 21.46 0.96 IFG
## 6 19.98 0.80 Normal
db$diab = ifelse(db$diabetes=="Yes", 1, 0)
db$group[db$diab==1] = "Diabetes"
db$group[db$diab==0] = "Non-diabetes"
library(DescTools)
Desc(db$diabetes)
## -------------------------------------------------------------------------
## db$diabetes (factor)
##
## length n NAs unique levels dupes
## 3e+03 3e+03 0 3e+00 3e+00 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 Normal 3e+03 84.7% 3e+03 84.7%
## 2 IFG 2e+02 7.7% 3e+03 92.4%
## 3 Yes 2e+02 7.6% 3e+03 100.0%
Desc(db$diabetes ~ db$gender)
## -------------------------------------------------------------------------
## db$diabetes ~ db$gender
##
##
## Summary:
## n: 3e+03, rows: 3e+00, columns: 2e+00
##
## Pearson's Chi-squared test:
## X-squared = 9.1224, df = 2, p-value = 0.01045
## Likelihood Ratio:
## X-squared = 8.8723, df = 2, p-value = 0.01184
## Mantel-Haenszel Chi-squared:
## X-squared = 0.82614, df = 1, p-value = 0.3634
##
## Phi-Coefficient 0.054
## Contingency Coeff. 0.054
## Cramer's V 0.054
##
##
## db$gender Female Male Sum
## db$diabetes
##
## IFG freq 2e+02 9e+01 2e+02
## perc 5.0% 2.7% 7.7%
## p.row 64.6% 35.4% .
## p.col 7.3% 8.6% .
##
## Normal freq 2e+03 8e+02 3e+03
## perc 58.7% 26.0% 84.7%
## p.row 69.3% 30.7% .
## p.col 85.9% 82.0% .
##
## Yes freq 1e+02 1e+02 2e+02
## perc 4.6% 3.0% 7.6%
## p.row 60.7% 39.3% .
## p.col 6.8% 9.5% .
##
## Sum freq 2e+03 1e+03 3e+03
## perc 68.3% 31.7% 100.0%
## p.row . . .
## p.col . . .
##
Desc(db$whr ~ db$diab)
## -------------------------------------------------------------------------
## db$whr ~ db$diab
##
## Summary:
## n pairs: 3e+03, valid: 3e+03 (99.7%), missings: 1e+01 (0.3%), groups: 2
##
##
## 0 1
## mean 8.658e-01 8.920e-01
## median 8.700e-01 8.800e-01
## sd 7.682e-02 6.873e-02
## IQR 1.100e-01 8.000e-02
## n 3e+03 2e+02
## np 92.359% 7.641%
## NAs 1e+01 1e+00
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 25.728, df = 1, p-value = 3.931e-07
# So sanh BMI giua tieu duong va khong tieu duong
boxplot(db$bmi ~ db$diab)
# Bieu do phan bo WHR
hist(db$whr, col="blue", boder="white")
## Warning in plot.window(xlim, ylim, "", ...): "boder" is not a graphical
## parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "boder" is not a graphical parameter
## Warning in axis(1, ...): "boder" is not a graphical parameter
## Warning in axis(2, ...): "boder" is not a graphical parameter
#Bieu do tuong quan giua BMI va WHR
plot(db$whr ~ db$bmi, pch=16, col="blue")