#Reading data

t= "/Users/locnguyen/Documents/R Console/Datasets for practice/Diabetes data.csv"

db = read.csv(t)
head(db)
##   id age gender height weight waist hip sysbp diabp active hypertension
## 1  1  76 Female    163     53    90  93   160    90      0            1
## 2  1  40 Female    149     51    74  94   100    60      0            0
## 3  1  51 Female    151     55    91 100   120    80      0            0
## 4  1  43 Female    158     62    78  96   120    80      1            0
## 5  2  72 Female    148     47    91  95   130    60      1            0
## 6  2  44   Male    155     48    69  86   120    80      0            0
##     bmi  whr diabetes
## 1 19.95 0.97      IFG
## 2 22.97 0.79   Normal
## 3 24.12 0.91   Normal
## 4 24.84 0.81   Normal
## 5 21.46 0.96      IFG
## 6 19.98 0.80   Normal

Coding data

db$diab = ifelse(db$diabetes=="Yes", 1, 0)
db$group[db$diab==1] = "Diabetes"
db$group[db$diab==0] = "Non-diabetes"

Analysis of data

library(DescTools)
Desc(db$diabetes)
## ------------------------------------------------------------------------- 
## db$diabetes (factor)
## 
##   length      n    NAs unique levels  dupes
##    3e+03  3e+03      0  3e+00  3e+00      y
##          100.0%   0.0%                     
## 
##     level   freq   perc  cumfreq  cumperc
## 1  Normal  3e+03  84.7%    3e+03    84.7%
## 2     IFG  2e+02   7.7%    3e+03    92.4%
## 3     Yes  2e+02   7.6%    3e+03   100.0%

Desc(db$diabetes ~ db$gender)
## ------------------------------------------------------------------------- 
## db$diabetes ~ db$gender
## 
## 
## Summary: 
## n: 3e+03, rows: 3e+00, columns: 2e+00
## 
## Pearson's Chi-squared test:
##   X-squared = 9.1224, df = 2, p-value = 0.01045
## Likelihood Ratio:
##   X-squared = 8.8723, df = 2, p-value = 0.01184
## Mantel-Haenszel Chi-squared:
##   X-squared = 0.82614, df = 1, p-value = 0.3634
## 
## Phi-Coefficient        0.054
## Contingency Coeff.     0.054
## Cramer's V             0.054
## 
##                                               
##               db$gender   Female   Male    Sum
## db$diabetes                                   
##                                               
## IFG           freq         2e+02  9e+01  2e+02
##               perc          5.0%   2.7%   7.7%
##               p.row        64.6%  35.4%      .
##               p.col         7.3%   8.6%      .
##                                               
## Normal        freq         2e+03  8e+02  3e+03
##               perc         58.7%  26.0%  84.7%
##               p.row        69.3%  30.7%      .
##               p.col        85.9%  82.0%      .
##                                               
## Yes           freq         1e+02  1e+02  2e+02
##               perc          4.6%   3.0%   7.6%
##               p.row        60.7%  39.3%      .
##               p.col         6.8%   9.5%      .
##                                               
## Sum           freq         2e+03  1e+03  3e+03
##               perc         68.3%  31.7% 100.0%
##               p.row            .      .      .
##               p.col            .      .      .
## 

Desc(db$whr ~ db$diab)
## ------------------------------------------------------------------------- 
## db$whr ~ db$diab
## 
## Summary: 
## n pairs: 3e+03, valid: 3e+03 (99.7%), missings: 1e+01 (0.3%), groups: 2
## 
##                             
##                 0          1
## mean    8.658e-01  8.920e-01
## median  8.700e-01  8.800e-01
## sd      7.682e-02  6.873e-02
## IQR     1.100e-01  8.000e-02
## n           3e+03      2e+02
## np        92.359%     7.641%
## NAs         1e+01      1e+00
## 0s              0          0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 25.728, df = 1, p-value = 3.931e-07

# So sanh BMI giua tieu duong va khong tieu duong
boxplot(db$bmi ~ db$diab)

# Bieu do phan bo WHR
hist(db$whr, col="blue", boder="white")
## Warning in plot.window(xlim, ylim, "", ...): "boder" is not a graphical
## parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "boder" is not a graphical parameter
## Warning in axis(1, ...): "boder" is not a graphical parameter
## Warning in axis(2, ...): "boder" is not a graphical parameter

#Bieu do tuong quan giua BMI va WHR
plot(db$whr ~ db$bmi, pch=16, col="blue")