#Bài tập ngày 2

##Việc 1: Phân tích mô tả

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(compareGroups)
ob = read.csv("C:\\Users\\ADM\\OneDrive - dau\\Desktop\\Obesity data.csv")
str(ob)
## 'data.frame':    1217 obs. of  13 variables:
##  $ id          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ gender      : chr  "F" "M" "F" "F" ...
##  $ height      : int  150 165 157 156 160 153 155 167 165 158 ...
##  $ weight      : int  49 52 57 53 51 47 58 65 54 60 ...
##  $ bmi         : num  21.8 19.1 23.1 21.8 19.9 20.1 24.1 23.3 19.8 24 ...
##  $ age         : int  53 65 64 56 54 52 66 50 61 58 ...
##  $ WBBMC       : int  1312 1309 1230 1171 1681 1358 1546 2276 1778 1404 ...
##  $ wbbmd       : num  0.88 0.84 0.84 0.8 0.98 0.91 0.96 1.11 0.96 0.86 ...
##  $ fat         : int  17802 8381 19221 17472 7336 14904 20233 17749 10795 21365 ...
##  $ lean        : int  28600 40229 36057 33094 40621 30068 35599 43301 38613 35534 ...
##  $ pcfat       : num  37.3 16.8 34 33.8 14.8 32.2 35.3 28 21.1 36.6 ...
##  $ hypertension: int  0 1 1 1 0 1 1 1 0 1 ...
##  $ diabetes    : int  1 0 0 0 0 0 1 1 0 0 ...
summary(ob)
##        id            gender              height          weight     
##  Min.   :   1.0   Length:1217        Min.   :136.0   Min.   :34.00  
##  1st Qu.: 309.0   Class :character   1st Qu.:151.0   1st Qu.:49.00  
##  Median : 615.0   Mode  :character   Median :155.0   Median :54.00  
##  Mean   : 614.5                      Mean   :156.7   Mean   :55.14  
##  3rd Qu.: 921.0                      3rd Qu.:162.0   3rd Qu.:61.00  
##  Max.   :1227.0                      Max.   :185.0   Max.   :95.00  
##       bmi            age            WBBMC          wbbmd            fat       
##  Min.   :14.5   Min.   :13.00   Min.   : 695   Min.   :0.650   Min.   : 4277  
##  1st Qu.:20.2   1st Qu.:35.00   1st Qu.:1481   1st Qu.:0.930   1st Qu.:13768  
##  Median :22.2   Median :48.00   Median :1707   Median :1.010   Median :16955  
##  Mean   :22.4   Mean   :47.15   Mean   :1725   Mean   :1.009   Mean   :17288  
##  3rd Qu.:24.3   3rd Qu.:58.00   3rd Qu.:1945   3rd Qu.:1.090   3rd Qu.:20325  
##  Max.   :37.1   Max.   :88.00   Max.   :3040   Max.   :1.350   Max.   :40825  
##       lean           pcfat       hypertension      diabetes     
##  Min.   :19136   Min.   : 9.2   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:30325   1st Qu.:27.0   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :33577   Median :32.4   Median :1.000   Median :0.0000  
##  Mean   :35463   Mean   :31.6   Mean   :0.507   Mean   :0.1109  
##  3rd Qu.:39761   3rd Qu.:36.8   3rd Qu.:1.000   3rd Qu.:0.0000  
##  Max.   :63059   Max.   :48.4   Max.   :1.000   Max.   :1.0000
names(ob)
##  [1] "id"           "gender"       "height"       "weight"       "bmi"         
##  [6] "age"          "WBBMC"        "wbbmd"        "fat"          "lean"        
## [11] "pcfat"        "hypertension" "diabetes"

##Chuẩn bị dữ liệu

ob$gender <- as.factor(ob$gender)
ob$hypertension <- factor(ob$hypertension, 
                          levels = c(0, 1), 
                          labels = c("Không có HA", "Có HA"))
ob$diabetes <- factor(ob$diabetes, 
                      levels = c(0, 1), 
                      labels = c("Không có TĐ", "Có TĐ"))
str(ob)
## 'data.frame':    1217 obs. of  13 variables:
##  $ id          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ gender      : Factor w/ 2 levels "F","M": 1 2 1 1 2 1 1 2 2 1 ...
##  $ height      : int  150 165 157 156 160 153 155 167 165 158 ...
##  $ weight      : int  49 52 57 53 51 47 58 65 54 60 ...
##  $ bmi         : num  21.8 19.1 23.1 21.8 19.9 20.1 24.1 23.3 19.8 24 ...
##  $ age         : int  53 65 64 56 54 52 66 50 61 58 ...
##  $ WBBMC       : int  1312 1309 1230 1171 1681 1358 1546 2276 1778 1404 ...
##  $ wbbmd       : num  0.88 0.84 0.84 0.8 0.98 0.91 0.96 1.11 0.96 0.86 ...
##  $ fat         : int  17802 8381 19221 17472 7336 14904 20233 17749 10795 21365 ...
##  $ lean        : int  28600 40229 36057 33094 40621 30068 35599 43301 38613 35534 ...
##  $ pcfat       : num  37.3 16.8 34 33.8 14.8 32.2 35.3 28 21.1 36.6 ...
##  $ hypertension: Factor w/ 2 levels "Không có HA",..: 1 2 2 2 1 2 2 2 1 2 ...
##  $ diabetes    : Factor w/ 2 levels "Không có TĐ",..: 2 1 1 1 1 1 2 2 1 1 ...

##Việc 2. Phân tích khác biệt giữa 2 nhóm: Tạo Bảng mô tả

###So sánh đặc điểm theo nhóm diabetes (Tiểu đường) ####Công thức: ~ bien_1 + bien_2 | nhom_chia Sử dụng data = ob

table1(~ age + gender + bmi + pcfat | diabetes, data = ob)
Không có TĐ
(N=1082)
Có TĐ
(N=135)
Overall
(N=1217)
age
Mean (SD) 47.1 (17.4) 47.5 (16.2) 47.2 (17.3)
Median [Min, Max] 49.0 [13.0, 87.0] 47.0 [16.0, 88.0] 48.0 [13.0, 88.0]
gender
F 760 (70.2%) 102 (75.6%) 862 (70.8%)
M 322 (29.8%) 33 (24.4%) 355 (29.2%)
bmi
Mean (SD) 22.4 (3.04) 22.4 (3.21) 22.4 (3.06)
Median [Min, Max] 22.2 [14.5, 37.1] 22.2 [15.4, 36.3] 22.2 [14.5, 37.1]
pcfat
Mean (SD) 31.6 (7.23) 31.8 (6.79) 31.6 (7.18)
Median [Min, Max] 32.4 [9.20, 48.4] 33.0 [10.3, 47.4] 32.4 [9.20, 48.4]
table1(~ age + height + weight + bmi + fat + lean | gender, data = ob)
F
(N=862)
M
(N=355)
Overall
(N=1217)
age
Mean (SD) 48.6 (16.4) 43.7 (18.8) 47.2 (17.3)
Median [Min, Max] 49.0 [14.0, 85.0] 44.0 [13.0, 88.0] 48.0 [13.0, 88.0]
height
Mean (SD) 153 (5.55) 165 (6.73) 157 (7.98)
Median [Min, Max] 153 [136, 170] 165 [146, 185] 155 [136, 185]
weight
Mean (SD) 52.3 (7.72) 62.0 (9.59) 55.1 (9.40)
Median [Min, Max] 51.0 [34.0, 95.0] 62.0 [38.0, 95.0] 54.0 [34.0, 95.0]
bmi
Mean (SD) 22.3 (3.05) 22.7 (3.04) 22.4 (3.06)
Median [Min, Max] 22.1 [15.2, 37.1] 22.5 [14.5, 34.7] 22.2 [14.5, 37.1]
fat
Mean (SD) 18200 (4950) 15000 (5110) 17300 (5210)
Median [Min, Max] 17700 [6220, 40800] 15100 [4280, 29900] 17000 [4280, 40800]
lean
Mean (SD) 32000 (3970) 43800 (5820) 35500 (7030)
Median [Min, Max] 31500 [19100, 53400] 43400 [28600, 63100] 33600 [19100, 63100]

###So sánh đặc điểm theo gender (Giới tính) ####So sánh các chỉ số nhân trắc theo giới tính Sử dụng data = ob

table1(~ age + height + weight + bmi + fat + lean | gender, data = ob)
F
(N=862)
M
(N=355)
Overall
(N=1217)
age
Mean (SD) 48.6 (16.4) 43.7 (18.8) 47.2 (17.3)
Median [Min, Max] 49.0 [14.0, 85.0] 44.0 [13.0, 88.0] 48.0 [13.0, 88.0]
height
Mean (SD) 153 (5.55) 165 (6.73) 157 (7.98)
Median [Min, Max] 153 [136, 170] 165 [146, 185] 155 [136, 185]
weight
Mean (SD) 52.3 (7.72) 62.0 (9.59) 55.1 (9.40)
Median [Min, Max] 51.0 [34.0, 95.0] 62.0 [38.0, 95.0] 54.0 [34.0, 95.0]
bmi
Mean (SD) 22.3 (3.05) 22.7 (3.04) 22.4 (3.06)
Median [Min, Max] 22.1 [15.2, 37.1] 22.5 [14.5, 34.7] 22.2 [14.5, 37.1]
fat
Mean (SD) 18200 (4950) 15000 (5110) 17300 (5210)
Median [Min, Max] 17700 [6220, 40800] 15100 [4280, 29900] 17000 [4280, 40800]
lean
Mean (SD) 32000 (3970) 43800 (5820) 35500 (7030)
Median [Min, Max] 31500 [19100, 53400] 43400 [28600, 63100] 33600 [19100, 63100]

###Tạo Bảng so sánh (Sử dụng compareGroups)

my_compare = compareGroups(hypertension ~ age + gender + bmi + pcfat + diabetes, data = ob)
my_table = createTable(my_compare)
print(my_table)
## 
## --------Summary descriptives table by 'hypertension'---------
## 
## _________________________________________________ 
##                 Không có HA    Có HA    p.overall 
##                    N=600       N=617              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## age             46.7 (17.5) 47.6 (17.0)   0.326   
## gender:                                   0.569   
##     F           430 (71.7%) 432 (70.0%)           
##     M           170 (28.3%) 185 (30.0%)           
## bmi             22.3 (3.01) 22.5 (3.10)   0.123   
## pcfat           31.3 (7.22) 31.9 (7.14)   0.205   
## diabetes:                                 0.003   
##     Không có TĐ 550 (91.7%) 532 (86.2%)           
##     Có TĐ       50 (8.33%)  85 (13.8%)            
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯