#Bài tập ngày 2
##Việc 1: Phân tích mô tả
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
library(compareGroups)
ob = read.csv("C:\\Users\\ADM\\OneDrive - dau\\Desktop\\Obesity data.csv")
str(ob)
## 'data.frame': 1217 obs. of 13 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ gender : chr "F" "M" "F" "F" ...
## $ height : int 150 165 157 156 160 153 155 167 165 158 ...
## $ weight : int 49 52 57 53 51 47 58 65 54 60 ...
## $ bmi : num 21.8 19.1 23.1 21.8 19.9 20.1 24.1 23.3 19.8 24 ...
## $ age : int 53 65 64 56 54 52 66 50 61 58 ...
## $ WBBMC : int 1312 1309 1230 1171 1681 1358 1546 2276 1778 1404 ...
## $ wbbmd : num 0.88 0.84 0.84 0.8 0.98 0.91 0.96 1.11 0.96 0.86 ...
## $ fat : int 17802 8381 19221 17472 7336 14904 20233 17749 10795 21365 ...
## $ lean : int 28600 40229 36057 33094 40621 30068 35599 43301 38613 35534 ...
## $ pcfat : num 37.3 16.8 34 33.8 14.8 32.2 35.3 28 21.1 36.6 ...
## $ hypertension: int 0 1 1 1 0 1 1 1 0 1 ...
## $ diabetes : int 1 0 0 0 0 0 1 1 0 0 ...
summary(ob)
## id gender height weight
## Min. : 1.0 Length:1217 Min. :136.0 Min. :34.00
## 1st Qu.: 309.0 Class :character 1st Qu.:151.0 1st Qu.:49.00
## Median : 615.0 Mode :character Median :155.0 Median :54.00
## Mean : 614.5 Mean :156.7 Mean :55.14
## 3rd Qu.: 921.0 3rd Qu.:162.0 3rd Qu.:61.00
## Max. :1227.0 Max. :185.0 Max. :95.00
## bmi age WBBMC wbbmd fat
## Min. :14.5 Min. :13.00 Min. : 695 Min. :0.650 Min. : 4277
## 1st Qu.:20.2 1st Qu.:35.00 1st Qu.:1481 1st Qu.:0.930 1st Qu.:13768
## Median :22.2 Median :48.00 Median :1707 Median :1.010 Median :16955
## Mean :22.4 Mean :47.15 Mean :1725 Mean :1.009 Mean :17288
## 3rd Qu.:24.3 3rd Qu.:58.00 3rd Qu.:1945 3rd Qu.:1.090 3rd Qu.:20325
## Max. :37.1 Max. :88.00 Max. :3040 Max. :1.350 Max. :40825
## lean pcfat hypertension diabetes
## Min. :19136 Min. : 9.2 Min. :0.000 Min. :0.0000
## 1st Qu.:30325 1st Qu.:27.0 1st Qu.:0.000 1st Qu.:0.0000
## Median :33577 Median :32.4 Median :1.000 Median :0.0000
## Mean :35463 Mean :31.6 Mean :0.507 Mean :0.1109
## 3rd Qu.:39761 3rd Qu.:36.8 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :63059 Max. :48.4 Max. :1.000 Max. :1.0000
names(ob)
## [1] "id" "gender" "height" "weight" "bmi"
## [6] "age" "WBBMC" "wbbmd" "fat" "lean"
## [11] "pcfat" "hypertension" "diabetes"
##Chuẩn bị dữ liệu
ob$gender <- as.factor(ob$gender)
ob$hypertension <- factor(ob$hypertension,
levels = c(0, 1),
labels = c("Không có HA", "Có HA"))
ob$diabetes <- factor(ob$diabetes,
levels = c(0, 1),
labels = c("Không có TĐ", "Có TĐ"))
str(ob)
## 'data.frame': 1217 obs. of 13 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ gender : Factor w/ 2 levels "F","M": 1 2 1 1 2 1 1 2 2 1 ...
## $ height : int 150 165 157 156 160 153 155 167 165 158 ...
## $ weight : int 49 52 57 53 51 47 58 65 54 60 ...
## $ bmi : num 21.8 19.1 23.1 21.8 19.9 20.1 24.1 23.3 19.8 24 ...
## $ age : int 53 65 64 56 54 52 66 50 61 58 ...
## $ WBBMC : int 1312 1309 1230 1171 1681 1358 1546 2276 1778 1404 ...
## $ wbbmd : num 0.88 0.84 0.84 0.8 0.98 0.91 0.96 1.11 0.96 0.86 ...
## $ fat : int 17802 8381 19221 17472 7336 14904 20233 17749 10795 21365 ...
## $ lean : int 28600 40229 36057 33094 40621 30068 35599 43301 38613 35534 ...
## $ pcfat : num 37.3 16.8 34 33.8 14.8 32.2 35.3 28 21.1 36.6 ...
## $ hypertension: Factor w/ 2 levels "Không có HA",..: 1 2 2 2 1 2 2 2 1 2 ...
## $ diabetes : Factor w/ 2 levels "Không có TĐ",..: 2 1 1 1 1 1 2 2 1 1 ...
##Việc 2. Phân tích khác biệt giữa 2 nhóm: Tạo Bảng mô tả
###So sánh đặc điểm theo nhóm diabetes (Tiểu đường) ####Công thức: ~ bien_1 + bien_2 | nhom_chia Sử dụng data = ob
table1(~ age + gender + bmi + pcfat | diabetes, data = ob)
| Không có TĐ (N=1082) |
Có TĐ (N=135) |
Overall (N=1217) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 47.1 (17.4) | 47.5 (16.2) | 47.2 (17.3) |
| Median [Min, Max] | 49.0 [13.0, 87.0] | 47.0 [16.0, 88.0] | 48.0 [13.0, 88.0] |
| gender | |||
| F | 760 (70.2%) | 102 (75.6%) | 862 (70.8%) |
| M | 322 (29.8%) | 33 (24.4%) | 355 (29.2%) |
| bmi | |||
| Mean (SD) | 22.4 (3.04) | 22.4 (3.21) | 22.4 (3.06) |
| Median [Min, Max] | 22.2 [14.5, 37.1] | 22.2 [15.4, 36.3] | 22.2 [14.5, 37.1] |
| pcfat | |||
| Mean (SD) | 31.6 (7.23) | 31.8 (6.79) | 31.6 (7.18) |
| Median [Min, Max] | 32.4 [9.20, 48.4] | 33.0 [10.3, 47.4] | 32.4 [9.20, 48.4] |
table1(~ age + height + weight + bmi + fat + lean | gender, data = ob)
| F (N=862) |
M (N=355) |
Overall (N=1217) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 48.6 (16.4) | 43.7 (18.8) | 47.2 (17.3) |
| Median [Min, Max] | 49.0 [14.0, 85.0] | 44.0 [13.0, 88.0] | 48.0 [13.0, 88.0] |
| height | |||
| Mean (SD) | 153 (5.55) | 165 (6.73) | 157 (7.98) |
| Median [Min, Max] | 153 [136, 170] | 165 [146, 185] | 155 [136, 185] |
| weight | |||
| Mean (SD) | 52.3 (7.72) | 62.0 (9.59) | 55.1 (9.40) |
| Median [Min, Max] | 51.0 [34.0, 95.0] | 62.0 [38.0, 95.0] | 54.0 [34.0, 95.0] |
| bmi | |||
| Mean (SD) | 22.3 (3.05) | 22.7 (3.04) | 22.4 (3.06) |
| Median [Min, Max] | 22.1 [15.2, 37.1] | 22.5 [14.5, 34.7] | 22.2 [14.5, 37.1] |
| fat | |||
| Mean (SD) | 18200 (4950) | 15000 (5110) | 17300 (5210) |
| Median [Min, Max] | 17700 [6220, 40800] | 15100 [4280, 29900] | 17000 [4280, 40800] |
| lean | |||
| Mean (SD) | 32000 (3970) | 43800 (5820) | 35500 (7030) |
| Median [Min, Max] | 31500 [19100, 53400] | 43400 [28600, 63100] | 33600 [19100, 63100] |
###So sánh đặc điểm theo gender (Giới tính) ####So sánh các chỉ số nhân trắc theo giới tính Sử dụng data = ob
table1(~ age + height + weight + bmi + fat + lean | gender, data = ob)
| F (N=862) |
M (N=355) |
Overall (N=1217) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 48.6 (16.4) | 43.7 (18.8) | 47.2 (17.3) |
| Median [Min, Max] | 49.0 [14.0, 85.0] | 44.0 [13.0, 88.0] | 48.0 [13.0, 88.0] |
| height | |||
| Mean (SD) | 153 (5.55) | 165 (6.73) | 157 (7.98) |
| Median [Min, Max] | 153 [136, 170] | 165 [146, 185] | 155 [136, 185] |
| weight | |||
| Mean (SD) | 52.3 (7.72) | 62.0 (9.59) | 55.1 (9.40) |
| Median [Min, Max] | 51.0 [34.0, 95.0] | 62.0 [38.0, 95.0] | 54.0 [34.0, 95.0] |
| bmi | |||
| Mean (SD) | 22.3 (3.05) | 22.7 (3.04) | 22.4 (3.06) |
| Median [Min, Max] | 22.1 [15.2, 37.1] | 22.5 [14.5, 34.7] | 22.2 [14.5, 37.1] |
| fat | |||
| Mean (SD) | 18200 (4950) | 15000 (5110) | 17300 (5210) |
| Median [Min, Max] | 17700 [6220, 40800] | 15100 [4280, 29900] | 17000 [4280, 40800] |
| lean | |||
| Mean (SD) | 32000 (3970) | 43800 (5820) | 35500 (7030) |
| Median [Min, Max] | 31500 [19100, 53400] | 43400 [28600, 63100] | 33600 [19100, 63100] |
###Tạo Bảng so sánh (Sử dụng compareGroups)
my_compare = compareGroups(hypertension ~ age + gender + bmi + pcfat + diabetes, data = ob)
my_table = createTable(my_compare)
print(my_table)
##
## --------Summary descriptives table by 'hypertension'---------
##
## _________________________________________________
## Không có HA Có HA p.overall
## N=600 N=617
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## age 46.7 (17.5) 47.6 (17.0) 0.326
## gender: 0.569
## F 430 (71.7%) 432 (70.0%)
## M 170 (28.3%) 185 (30.0%)
## bmi 22.3 (3.01) 22.5 (3.10) 0.123
## pcfat 31.3 (7.22) 31.9 (7.14) 0.205
## diabetes: 0.003
## Không có TĐ 550 (91.7%) 532 (86.2%)
## Có TĐ 50 (8.33%) 85 (13.8%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯