#bang dac diem chung cua du lieu (co factor bien phan loai
db=read.csv("D:\\data analysis\\dataset\\Diabetes data.csv")
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~age+height+hip+weight+waist+hip+factor(hypertension)+bmi+whr+diabetes|gender, data=db)
Female (n=2161) |
Male (n=1004) |
Overall (n=3165) |
|
---|---|---|---|
age | |||
Mean (SD) | 52.0 (11.7) | 53.3 (12.5) | 52.4 (12.0) |
Median [Min, Max] | 51.0 [30.0, 93.0] | 52.0 [30.0, 89.0] | 51.0 [30.0, 93.0] |
height | |||
Mean (SD) | 153 (5.52) | 163 (6.11) | 157 (7.33) |
Median [Min, Max] | 153 [132, 173] | 163 [140, 180] | 156 [132, 180] |
Missing | 6 (0.3%) | 1 (0.1%) | 7 (0.2%) |
hip | |||
Mean (SD) | 93.9 (7.66) | 95.4 (7.64) | 94.4 (7.68) |
Median [Min, Max] | 93.0 [64.0, 124] | 95.0 [56.0, 130] | 94.0 [56.0, 130] |
Missing | 9 (0.4%) | 2 (0.2%) | 11 (0.3%) |
weight | |||
Mean (SD) | 56.5 (9.34) | 63.8 (10.5) | 58.8 (10.3) |
Median [Min, Max] | 55.0 [29.0, 98.0] | 63.0 [38.0, 115] | 58.0 [29.0, 115] |
Missing | 2 (0.1%) | 0 (0%) | 2 (0.1%) |
waist | |||
Mean (SD) | 80.1 (10.2) | 86.0 (9.70) | 82.0 (10.4) |
Median [Min, Max] | 79.0 [54.0, 120] | 87.0 [59.0, 120] | 82.0 [54.0, 120] |
Missing | 9 (0.4%) | 2 (0.2%) | 11 (0.3%) |
factor(hypertension) | |||
0 | 1010 (46.7%) | 319 (31.8%) | 1329 (42.0%) |
1 | 1151 (53.3%) | 685 (68.2%) | 1836 (58.0%) |
bmi | |||
Mean (SD) | 24.0 (3.64) | 23.9 (3.49) | 24.0 (3.59) |
Median [Min, Max] | 23.6 [13.3, 43.8] | 23.7 [14.0, 39.8] | 23.7 [13.3, 43.8] |
Missing | 8 (0.4%) | 1 (0.1%) | 9 (0.3%) |
whr | |||
Mean (SD) | 0.852 (0.0761) | 0.901 (0.0660) | 0.868 (0.0765) |
Median [Min, Max] | 0.850 [0.610, 1.21] | 0.900 [0.680, 1.36] | 0.870 [0.610, 1.36] |
Missing | 9 (0.4%) | 2 (0.2%) | 11 (0.3%) |
diabetes | |||
IFG | 157 (7.3%) | 86 (8.6%) | 243 (7.7%) |
Normal | 1857 (85.9%) | 823 (82.0%) | 2680 (84.7%) |
Yes | 147 (6.8%) | 95 (9.5%) | 242 (7.6%) |
# lam khi- bp bang prop.test
db$diab=ifelse(db$diabetes=="Yes", "Yes", "No")
table(db$diab,db$gender)
##
## Female Male
## No 2014 909
## Yes 147 95
tab=table(db$gender, db$diab)
prop.test(tab)
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: tab
## X-squared = 6.496, df = 1, p-value = 0.01081
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.004880546 0.048314356
## sample estimates:
## prop 1 prop 2
## 0.9319759 0.9053785
#dung Desc de lam khi bp
library(DescTools)
Desc(db$diab~db$gender)
## -------------------------------------------------------------------------
## db$diab ~ db$gender
##
##
## Summary:
## n: 3e+03, rows: 2e+00, columns: 2e+00
##
## Pearson's Chi-squared test (cont. adj):
## X-squared = 6.496, df = 1, p-value = 0.01081
## Fisher's exact test p-value = 0.009682
## McNemar's chi-squared = 548.41, df = 1, p-value < 2.2e-16
##
## estimate lwr.ci upr.ci'
##
## odds ratio 1.432 1.093 1.875
## rel. risk (col1) 1.134 1.022 1.259
## rel. risk (col2) 0.792 0.671 0.935
##
##
## Phi-Coefficient 0.047
## Contingency Coeff. 0.047
## Cramer's V 0.047
##
##
## db$gender Female Male Sum
## db$diab
##
## No freq 2e+03 9e+02 3e+03
## perc 63.6% 28.7% 92.4%
## p.row 68.9% 31.1% .
## p.col 93.2% 90.5% .
##
## Yes freq 1e+02 1e+02 2e+02
## perc 4.6% 3.0% 7.6%
## p.row 60.7% 39.3% .
## p.col 6.8% 9.5% .
##
## Sum freq 2e+03 1e+03 3e+03
## perc 68.3% 31.7% 100.0%
## p.row . . .
## p.col . . .
##
##
## ----------
## ' 95% conf. level
m=aov(db$bmi~db$diabetes)
TukeyHSD(m)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = db$bmi ~ db$diabetes)
##
## $`db$diabetes`
## diff lwr upr p adj
## Normal-IFG -0.8236127 -1.3882150 -0.259010503 0.0018313
## Yes-IFG -0.7643882 -1.5297871 0.001010667 0.0503925
## Yes-Normal 0.0592245 -0.5064508 0.624899835 0.9673231
#tao bien moi
women=subset(db,gender=="Female")
dim(women)
## [1] 2161 15
m=aov(bmi~diabetes, data=women)
TukeyHSD(m)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = bmi ~ diabetes, data = women)
##
## $diabetes
## diff lwr upr p adj
## Normal-IFG -0.5165746 -1.228157 0.1950074 0.2044133
## Yes-IFG -0.8256674 -1.808504 0.1571697 0.1198536
## Yes-Normal -0.3090928 -1.042806 0.4246202 0.5845178
#Ve bieu do tuong quan da chieu giua cac bien lien tuc trong dataset
dat=db[, c("age","height","weight","bmi","hip","whr")]
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:DescTools':
##
## AUC, ICC, SD
pairs.panels(dat)