t="C:\\Users\\pc\\Downloads\\CAC NGHIEN CUU\\BAI GIANG CAC MON\\GS TUAN\\BG 12.6.19\\Diabetes data.csv"
db=read.csv(t)
head(db)
## id age gender height weight waist hip sysbp diabp active hypertension
## 1 1 76 Female 163 53 90 93 160 90 0 1
## 2 1 40 Female 149 51 74 94 100 60 0 0
## 3 1 51 Female 151 55 91 100 120 80 0 0
## 4 1 43 Female 158 62 78 96 120 80 1 0
## 5 2 72 Female 148 47 91 95 130 60 1 0
## 6 2 44 Male 155 48 69 86 120 80 0 0
## bmi whr diabetes
## 1 19.95 0.97 IFG
## 2 22.97 0.79 Normal
## 3 24.12 0.91 Normal
## 4 24.84 0.81 Normal
## 5 21.46 0.96 IFG
## 6 19.98 0.80 Normal
#analysis of data: mo ta cac bien theo gender
library(table1)
## Warning: package 'table1' was built under R version 3.5.3
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~age+height+weight+bmi+waist+hip+whr+active+hypertension+diabetes|gender, data=db)
Female (n=2161) |
Male (n=1004) |
Overall (n=3165) |
|
---|---|---|---|
age | |||
Mean (SD) | 52.0 (11.7) | 53.3 (12.5) | 52.4 (12.0) |
Median [Min, Max] | 51.0 [30.0, 93.0] | 52.0 [30.0, 89.0] | 51.0 [30.0, 93.0] |
height | |||
Mean (SD) | 153 (5.52) | 163 (6.11) | 157 (7.33) |
Median [Min, Max] | 153 [132, 173] | 163 [140, 180] | 156 [132, 180] |
Missing | 6 (0.3%) | 1 (0.1%) | 7 (0.2%) |
weight | |||
Mean (SD) | 56.5 (9.34) | 63.8 (10.5) | 58.8 (10.3) |
Median [Min, Max] | 55.0 [29.0, 98.0] | 63.0 [38.0, 115] | 58.0 [29.0, 115] |
Missing | 2 (0.1%) | 0 (0%) | 2 (0.1%) |
bmi | |||
Mean (SD) | 24.0 (3.64) | 23.9 (3.49) | 24.0 (3.59) |
Median [Min, Max] | 23.6 [13.3, 43.8] | 23.7 [14.0, 39.8] | 23.7 [13.3, 43.8] |
Missing | 8 (0.4%) | 1 (0.1%) | 9 (0.3%) |
waist | |||
Mean (SD) | 80.1 (10.2) | 86.0 (9.70) | 82.0 (10.4) |
Median [Min, Max] | 79.0 [54.0, 120] | 87.0 [59.0, 120] | 82.0 [54.0, 120] |
Missing | 9 (0.4%) | 2 (0.2%) | 11 (0.3%) |
hip | |||
Mean (SD) | 93.9 (7.66) | 95.4 (7.64) | 94.4 (7.68) |
Median [Min, Max] | 93.0 [64.0, 124] | 95.0 [56.0, 130] | 94.0 [56.0, 130] |
Missing | 9 (0.4%) | 2 (0.2%) | 11 (0.3%) |
whr | |||
Mean (SD) | 0.852 (0.0761) | 0.901 (0.0660) | 0.868 (0.0765) |
Median [Min, Max] | 0.850 [0.610, 1.21] | 0.900 [0.680, 1.36] | 0.870 [0.610, 1.36] |
Missing | 9 (0.4%) | 2 (0.2%) | 11 (0.3%) |
active | |||
Mean (SD) | 0.569 (0.495) | 0.450 (0.498) | 0.531 (0.499) |
Median [Min, Max] | 1.00 [0.00, 1.00] | 0.00 [0.00, 1.00] | 1.00 [0.00, 1.00] |
hypertension | |||
Mean (SD) | 0.533 (0.499) | 0.682 (0.466) | 0.580 (0.494) |
Median [Min, Max] | 1.00 [0.00, 1.00] | 1.00 [0.00, 1.00] | 1.00 [0.00, 1.00] |
diabetes | |||
IFG | 157 (7.3%) | 86 (8.6%) | 243 (7.7%) |
Normal | 1857 (85.9%) | 823 (82.0%) | 2680 (84.7%) |
Yes | 147 (6.8%) | 95 (9.5%) | 242 (7.6%) |
#do bien acitive va hypertension la bien phan loai nen dung factor truoc de biet la bien nhi phan
table1(~age+height+weight+bmi+waist+hip+whr+factor(active)+factor(hypertension) |diabetes, data=db)
IFG (n=243) |
Normal (n=2680) |
Yes (n=242) |
Overall (n=3165) |
|
---|---|---|---|---|
age | ||||
Mean (SD) | 55.8 (11.1) | 52.0 (12.1) | 52.7 (11.6) | 52.4 (12.0) |
Median [Min, Max] | 55.0 [30.0, 86.0] | 51.0 [30.0, 93.0] | 51.0 [30.0, 82.0] | 51.0 [30.0, 93.0] |
height | ||||
Mean (SD) | 157 (6.98) | 156 (7.33) | 158 (7.52) | 157 (7.33) |
Median [Min, Max] | 157 [132, 176] | 156 [133, 180] | 157 [139, 178] | 156 [132, 180] |
Missing | 0 (0%) | 6 (0.2%) | 1 (0.4%) | 7 (0.2%) |
weight | ||||
Mean (SD) | 61.1 (10.4) | 58.5 (10.2) | 59.6 (10.5) | 58.8 (10.3) |
Median [Min, Max] | 60.0 [39.0, 96.0] | 58.0 [29.0, 113] | 58.8 [36.0, 115] | 58.0 [29.0, 115] |
Missing | 1 (0.4%) | 1 (0.0%) | 0 (0%) | 2 (0.1%) |
bmi | ||||
Mean (SD) | 24.7 (3.48) | 23.9 (3.62) | 23.9 (3.27) | 24.0 (3.59) |
Median [Min, Max] | 24.6 [17.6, 37.5] | 23.6 [13.3, 43.8] | 23.7 [15.8, 39.8] | 23.7 [13.3, 43.8] |
Missing | 1 (0.4%) | 7 (0.3%) | 1 (0.4%) | 9 (0.3%) |
waist | ||||
Mean (SD) | 84.9 (10.6) | 81.6 (10.4) | 83.7 (9.51) | 82.0 (10.4) |
Median [Min, Max] | 85.0 [57.0, 119] | 81.0 [54.0, 120] | 83.0 [63.0, 120] | 82.0 [54.0, 120] |
Missing | 1 (0.4%) | 9 (0.3%) | 1 (0.4%) | 11 (0.3%) |
hip | ||||
Mean (SD) | 95.2 (7.98) | 94.4 (7.66) | 93.9 (7.58) | 94.4 (7.68) |
Median [Min, Max] | 95.0 [77.0, 119] | 94.0 [56.0, 130] | 94.0 [69.0, 115] | 94.0 [56.0, 130] |
Missing | 1 (0.4%) | 9 (0.3%) | 1 (0.4%) | 11 (0.3%) |
whr | ||||
Mean (SD) | 0.891 (0.0679) | 0.863 (0.0772) | 0.892 (0.0687) | 0.868 (0.0765) |
Median [Min, Max] | 0.890 [0.730, 1.12] | 0.860 [0.610, 1.36] | 0.880 [0.680, 1.17] | 0.870 [0.610, 1.36] |
Missing | 1 (0.4%) | 9 (0.3%) | 1 (0.4%) | 11 (0.3%) |
factor(active) | ||||
0 | 116 (47.7%) | 1251 (46.7%) | 117 (48.3%) | 1484 (46.9%) |
1 | 127 (52.3%) | 1429 (53.3%) | 125 (51.7%) | 1681 (53.1%) |
factor(hypertension) | ||||
0 | 64 (26.3%) | 1171 (43.7%) | 94 (38.8%) | 1329 (42.0%) |
1 | 179 (73.7%) | 1509 (56.3%) | 148 (61.2%) | 1836 (58.0%) |
t.test(db$bmi~db$gender)
##
## Welch Two Sample t-test
##
## data: db$bmi by db$gender
## t = 0.66677, df = 2034.2, p-value = 0.505
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1749957 0.3552877
## sample estimates:
## mean in group Female mean in group Male
## 23.98515 23.89500
# ket luan: khong co su khac biet giua hai gioi
# su khac biet ve tieu duong giua nam va nu khong, do tieu duong co 03 gia tri nen dung t-test se khong nen. vi vay tao bien moi co hai gia tri la yes va no
db$diab=ifelse(db$diabetes=="Yes", "Yes", "No")
#dem so tieu duong cho moi nhom nam va nu
tab=table(db$diab, db$gender)
tab
##
## Female Male
## No 2014 909
## Yes 147 95
# kiem dinh z ve su khac biet giua 2 ti le tieu duong giua nam va nu bang kiem dinh chi binh phuong= prop.test()
prop.test(tab)
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: tab
## X-squared = 6.496, df = 1, p-value = 0.01081
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.01557132 0.14758891
## sample estimates:
## prop 1 prop 2
## 0.6890181 0.6074380
# Kl: co su khac biet ve tieu duong giua nam va nu
tab=table(db$gender, db$diab)
tab
##
## No Yes
## Female 2014 147
## Male 909 95
prop.test(tab)
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: tab
## X-squared = 6.496, df = 1, p-value = 0.01081
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.004880546 0.048314356
## sample estimates:
## prop 1 prop 2
## 0.9319759 0.9053785
#su khac biet bmi giua 3 nhom diabetes: do diabetes co 3 nhom nen phan tich phuong sai: ANOVA
#aov: so sanh giua 3 nhom ve bmi
m=aov(bmi~diabetes, data=db)
summary(m)
## Df Sum Sq Mean Sq F value Pr(>F)
## diabetes 2 151 75.27 5.851 0.00291 **
## Residuals 3153 40567 12.87
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 9 observations deleted due to missingness
#TukeyHSD : cho biet su khac biet giua tung nhom voi nhau
TukeyHSD(m)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = bmi ~ diabetes, data = db)
##
## $diabetes
## diff lwr upr p adj
## Normal-IFG -0.8236127 -1.3882150 -0.259010503 0.0018313
## Yes-IFG -0.7643882 -1.5297871 0.001010667 0.0503925
## Yes-Normal 0.0592245 -0.5064508 0.624899835 0.9673231
# tao ra mot data cho nu
women=subset(db, gender=="Female")
dim(women)
## [1] 2161 15
m=aov(bmi~diabetes, data=women)
summary(m)
## Df Sum Sq Mean Sq F value Pr(>F)
## diabetes 2 55 27.50 2.076 0.126
## Residuals 2150 28475 13.24
## 8 observations deleted due to missingness
TukeyHSD(m)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = bmi ~ diabetes, data = women)
##
## $diabetes
## diff lwr upr p adj
## Normal-IFG -0.5165746 -1.228157 0.1950074 0.2044133
## Yes-IFG -0.8256674 -1.808504 0.1571697 0.1198536
## Yes-Normal -0.3090928 -1.042806 0.4246202 0.5845178
# ve bieu do tuong quan da chieu cac bien lien tuc trong dataset
#tao ra mot dataframe moi gom cac bien lien tuc
dat=db[,c("age", "height", "weight","bmi", "waist", "hip", "whr")]
library(psych)
pairs.panels(dat)