dim(ob)
## [1] 1217 13
head(ob,10)
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat hypertension
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3 0
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8 1
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0 1
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8 1
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8 0
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2 1
## 7 7 F 155 58 24.1 66 1546 0.96 20233 35599 35.3 1
## 8 8 M 167 65 23.3 50 2276 1.11 17749 43301 28.0 1
## 9 9 M 165 54 19.8 61 1778 0.96 10795 38613 21.1 0
## 10 10 F 158 60 24.0 58 1404 0.86 21365 35534 36.6 1
## diabetes
## 1 1
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 1
## 8 1
## 9 0
## 10 0
tail(ob)
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat
## 1212 1222 F 153 50 21.4 59 1309 0.87 18328 29147 37.6
## 1213 1223 F 150 44 19.6 44 1474 0.95 12906 28534 30.1
## 1214 1224 F 148 51 23.3 58 1522 0.97 14938 33931 29.6
## 1215 1225 F 149 50 22.5 57 1409 0.93 16777 30598 34.4
## 1216 1226 F 144 49 23.6 67 1266 0.90 20094 27272 41.3
## 1217 1227 F 141 45 22.6 58 1228 0.91 14567 28111 33.2
## hypertension diabetes
## 1212 1 0
## 1213 0 1
## 1214 0 0
## 1215 1 0
## 1216 1 0
## 1217 0 0
summary(ob)
## id gender height weight
## Min. : 1.0 Length:1217 Min. :136.0 Min. :34.00
## 1st Qu.: 309.0 Class :character 1st Qu.:151.0 1st Qu.:49.00
## Median : 615.0 Mode :character Median :155.0 Median :54.00
## Mean : 614.5 Mean :156.7 Mean :55.14
## 3rd Qu.: 921.0 3rd Qu.:162.0 3rd Qu.:61.00
## Max. :1227.0 Max. :185.0 Max. :95.00
## bmi age WBBMC wbbmd fat
## Min. :14.5 Min. :13.00 Min. : 695 Min. :0.650 Min. : 4277
## 1st Qu.:20.2 1st Qu.:35.00 1st Qu.:1481 1st Qu.:0.930 1st Qu.:13768
## Median :22.2 Median :48.00 Median :1707 Median :1.010 Median :16955
## Mean :22.4 Mean :47.15 Mean :1725 Mean :1.009 Mean :17288
## 3rd Qu.:24.3 3rd Qu.:58.00 3rd Qu.:1945 3rd Qu.:1.090 3rd Qu.:20325
## Max. :37.1 Max. :88.00 Max. :3040 Max. :1.350 Max. :40825
## lean pcfat hypertension diabetes
## Min. :19136 Min. : 9.2 Min. :0.000 Min. :0.0000
## 1st Qu.:30325 1st Qu.:27.0 1st Qu.:0.000 1st Qu.:0.0000
## Median :33577 Median :32.4 Median :1.000 Median :0.0000
## Mean :35463 Mean :31.6 Mean :0.507 Mean :0.1109
## 3rd Qu.:39761 3rd Qu.:36.8 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :63059 Max. :48.4 Max. :1.000 Max. :1.0000
#Simply way
ob$sex[ob$gender=="M"] <- "0"
ob$sex[ob$gender=="F"] <- "1"
#simply way 2
ob$sex.b=ifelse(ob$gender=="F",1,0)
table(ob$sex, ob$sex.b)
##
## 0 1
## 0 355 0
## 1 0 862
# Using tidyverse
#for many variable
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#Trinh bay so lieu
##Xem Mean, Median (Min-Max) của các biến
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~height+ weight+ bmi+age+WBBMC+wbbmd+fat+lean+pcfat+hypertension+diabetes, data = ob)
Overall (N=1217) |
|
---|---|
height | |
Mean (SD) | 157 (7.98) |
Median [Min, Max] | 155 [136, 185] |
weight | |
Mean (SD) | 55.1 (9.40) |
Median [Min, Max] | 54.0 [34.0, 95.0] |
bmi | |
Mean (SD) | 22.4 (3.06) |
Median [Min, Max] | 22.2 [14.5, 37.1] |
age | |
Mean (SD) | 47.2 (17.3) |
Median [Min, Max] | 48.0 [13.0, 88.0] |
WBBMC | |
Mean (SD) | 1720 (363) |
Median [Min, Max] | 1710 [695, 3040] |
wbbmd | |
Mean (SD) | 1.01 (0.113) |
Median [Min, Max] | 1.01 [0.650, 1.35] |
fat | |
Mean (SD) | 17300 (5210) |
Median [Min, Max] | 17000 [4280, 40800] |
lean | |
Mean (SD) | 35500 (7030) |
Median [Min, Max] | 33600 [19100, 63100] |
pcfat | |
Mean (SD) | 31.6 (7.18) |
Median [Min, Max] | 32.4 [9.20, 48.4] |
hypertension | |
Mean (SD) | 0.507 (0.500) |
Median [Min, Max] | 1.00 [0, 1.00] |
diabetes | |
Mean (SD) | 0.111 (0.314) |
Median [Min, Max] | 0 [0, 1.00] |
##Xem Mean, Median (Min-Max) của các biến, phân loại theo 01 biến
table1(~height+ weight+ bmi+age+WBBMC+wbbmd+fat+lean+pcfat+hypertension+as.factor(diabetes), data = ob)
Overall (N=1217) |
|
---|---|
height | |
Mean (SD) | 157 (7.98) |
Median [Min, Max] | 155 [136, 185] |
weight | |
Mean (SD) | 55.1 (9.40) |
Median [Min, Max] | 54.0 [34.0, 95.0] |
bmi | |
Mean (SD) | 22.4 (3.06) |
Median [Min, Max] | 22.2 [14.5, 37.1] |
age | |
Mean (SD) | 47.2 (17.3) |
Median [Min, Max] | 48.0 [13.0, 88.0] |
WBBMC | |
Mean (SD) | 1720 (363) |
Median [Min, Max] | 1710 [695, 3040] |
wbbmd | |
Mean (SD) | 1.01 (0.113) |
Median [Min, Max] | 1.01 [0.650, 1.35] |
fat | |
Mean (SD) | 17300 (5210) |
Median [Min, Max] | 17000 [4280, 40800] |
lean | |
Mean (SD) | 35500 (7030) |
Median [Min, Max] | 33600 [19100, 63100] |
pcfat | |
Mean (SD) | 31.6 (7.18) |
Median [Min, Max] | 32.4 [9.20, 48.4] |
hypertension | |
Mean (SD) | 0.507 (0.500) |
Median [Min, Max] | 1.00 [0, 1.00] |
as.factor(diabetes) | |
0 | 1082 (88.9%) |
1 | 135 (11.1%) |
##Hien thi IQR
table1(~height+ weight+ bmi+age+WBBMC+wbbmd+fat+lean+pcfat+as.factor(hypertension)+as.factor(diabetes), data = ob, render.continuous=c(.="Mean(SD)", .="Median[Q1, Q3]"))
Overall (N=1217) |
|
---|---|
height | |
Mean(SD) | 157(7.98) |
Median[Q1, Q3] | 155[151, 162] |
weight | |
Mean(SD) | 55.1(9.40) |
Median[Q1, Q3] | 54.0[49.0, 61.0] |
bmi | |
Mean(SD) | 22.4(3.06) |
Median[Q1, Q3] | 22.2[20.2, 24.3] |
age | |
Mean(SD) | 47.2(17.3) |
Median[Q1, Q3] | 48.0[35.0, 58.0] |
WBBMC | |
Mean(SD) | 1720(363) |
Median[Q1, Q3] | 1710[1480, 1950] |
wbbmd | |
Mean(SD) | 1.01(0.113) |
Median[Q1, Q3] | 1.01[0.930, 1.09] |
fat | |
Mean(SD) | 17300(5210) |
Median[Q1, Q3] | 17000[13800, 20300] |
lean | |
Mean(SD) | 35500(7030) |
Median[Q1, Q3] | 33600[30300, 39800] |
pcfat | |
Mean(SD) | 31.6(7.18) |
Median[Q1, Q3] | 32.4[27.0, 36.8] |
as.factor(hypertension) | |
0 | 600 (49.3%) |
1 | 617 (50.7%) |
as.factor(diabetes) | |
0 | 1082 (88.9%) |
1 | 135 (11.1%) |
##Phan theo gioi tinh
table1(~height+ weight+ bmi+age+WBBMC+wbbmd+fat+lean+pcfat+as.factor(hypertension)+as.factor(diabetes)|gender, data = ob, render.continuous=c(.="Mean(SD)", .="Median[Q1, Q3]"))
F (N=862) |
M (N=355) |
Overall (N=1217) |
|
---|---|---|---|
height | |||
Mean(SD) | 153(5.55) | 165(6.73) | 157(7.98) |
Median[Q1, Q3] | 153[150, 157] | 165[160, 169] | 155[151, 162] |
weight | |||
Mean(SD) | 52.3(7.72) | 62.0(9.59) | 55.1(9.40) |
Median[Q1, Q3] | 51.0[47.0, 57.0] | 62.0[55.0, 68.0] | 54.0[49.0, 61.0] |
bmi | |||
Mean(SD) | 22.3(3.05) | 22.7(3.04) | 22.4(3.06) |
Median[Q1, Q3] | 22.1[20.1, 24.1] | 22.5[20.8, 24.9] | 22.2[20.2, 24.3] |
age | |||
Mean(SD) | 48.6(16.4) | 43.7(18.8) | 47.2(17.3) |
Median[Q1, Q3] | 49.0[39.0, 59.0] | 44.0[24.0, 56.0] | 48.0[35.0, 58.0] |
WBBMC | |||
Mean(SD) | 1600(293) | 2030(336) | 1720(363) |
Median[Q1, Q3] | 1610[1410, 1800] | 2030[1810, 2250] | 1710[1480, 1950] |
wbbmd | |||
Mean(SD) | 0.988(0.111) | 1.06(0.101) | 1.01(0.113) |
Median[Q1, Q3] | 0.990[0.910, 1.07] | 1.06[0.990, 1.13] | 1.01[0.930, 1.09] |
fat | |||
Mean(SD) | 18200(4950) | 15000(5110) | 17300(5210) |
Median[Q1, Q3] | 17700[14800, 21100] | 15100[11400, 18200] | 17000[13800, 20300] |
lean | |||
Mean(SD) | 32000(3970) | 43800(5820) | 35500(7030) |
Median[Q1, Q3] | 31500[29300, 34500] | 43400[40300, 47600] | 33600[30300, 39800] |
pcfat | |||
Mean(SD) | 34.7(5.19) | 24.2(5.76) | 31.6(7.18) |
Median[Q1, Q3] | 34.7[31.5, 38.3] | 24.6[20.4, 28.0] | 32.4[27.0, 36.8] |
as.factor(hypertension) | |||
0 | 430 (49.9%) | 170 (47.9%) | 600 (49.3%) |
1 | 432 (50.1%) | 185 (52.1%) | 617 (50.7%) |
as.factor(diabetes) | |||
0 | 760 (88.2%) | 322 (90.7%) | 1082 (88.9%) |
1 | 102 (11.8%) | 33 (9.3%) | 135 (11.1%) |
##So sanh hai nhom
library(compareGroups)
createTable(compareGroups(gender~age+bmi+age+WBBMC+wbbmd+fat+lean+pcfat+diabetes,data=ob))
##
## --------Summary descriptives table by 'gender'---------
##
## ____________________________________________
## F M p.overall
## N=862 N=355
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## age 48.6 (16.4) 43.7 (18.8) <0.001
## bmi 22.3 (3.05) 22.7 (3.04) 0.013
## age 48.6 (16.4) 43.7 (18.8) <0.001
## WBBMC 1599 (293) 2030 (336) <0.001
## wbbmd 0.99 (0.11) 1.06 (0.10) <0.001
## fat 18240 (4954) 14978 (5113) <0.001
## lean 32045 (3966) 43762 (5819) <0.001
## pcfat 34.7 (5.19) 24.2 (5.76) <0.001
## diabetes 0.12 (0.32) 0.09 (0.29) 0.181
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
##Voi bien dinh luong
### Lap bien moi
createTable(compareGroups(gender~age+bmi+age+WBBMC+wbbmd+fat+lean+pcfat,data=ob))
##
## --------Summary descriptives table by 'gender'---------
##
## _________________________________________
## F M p.overall
## N=862 N=355
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## age 48.6 (16.4) 43.7 (18.8) <0.001
## bmi 22.3 (3.05) 22.7 (3.04) 0.013
## age 48.6 (16.4) 43.7 (18.8) <0.001
## WBBMC 1599 (293) 2030 (336) <0.001
## wbbmd 0.99 (0.11) 1.06 (0.10) <0.001
## fat 18240 (4954) 14978 (5113) <0.001
## lean 32045 (3966) 43762 (5819) <0.001
## pcfat 34.7 (5.19) 24.2 (5.76) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
#Ve bieu do
##histogram
library(ggplot2)
p <- ggplot(data=ob, aes(x=pcfat))
## Thêm màu, label và title
p + geom_histogram(fill="blue",col="white") + labs(x="Perecent body fat", y="Number of people",title="Phân bố tỉ trọng mỡ")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Phân theo giới tính
p = ggplot(data=ob, aes(x=pcfat,fill=gender))
p1=p + geom_histogram(col="white") +labs(x="Perecent body fat",y="Number of people", title="Phân bố tỉ trọng mỡ")
p1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Trục tung theo density,
p + geom_density() + labs(x="Perecent body fat",y="Number of people", title="Phân bố tỉ trọng mỡ")
## Làm mờ fill để xem phần overlap
p + geom_density(alpha=0.5) +labs(x="Perecent body fat",y="Number of people", title="Phân bố tỉ trọng mỡ")
p = ggplot(data=ob, aes(x=bmi,
y=pcfat))
p1 = p + geom_point()
p2 = p + geom_point() +
geom_smooth()
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(p1, p2, ncol=2)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#Theo gioi tinh
p = ggplot(data=ob, aes(x = bmi, y=pcfat, col=gender, fill=gender))
p1 = p + geom_point() + geom_smooth()
p2 = p + geom_point() + geom_smooth(method="lm", formula=y~x+I(x^2)+I(x^3))
p2