t=file.choose()
p <- read.csv(t)
names(p)
##  [1] "id"     "gender" "height" "weight" "bmi"    "age"    "bmc"    "bmd"   
##  [9] "fat"    "lean"   "pcfat"
str(p)
## 'data.frame':    1217 obs. of  11 variables:
##  $ id    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ gender: chr  "F" "M" "F" "F" ...
##  $ height: int  150 165 157 156 160 153 155 167 165 158 ...
##  $ weight: int  49 52 57 53 51 47 58 65 54 60 ...
##  $ bmi   : num  21.8 19.1 23.1 21.8 19.9 20.1 24.1 23.3 19.8 24 ...
##  $ age   : int  53 65 64 56 54 52 66 50 61 58 ...
##  $ bmc   : int  1312 1309 1230 1171 1681 1358 1546 2276 1778 1404 ...
##  $ bmd   : num  0.88 0.84 0.84 0.8 0.98 0.91 0.96 1.11 0.96 0.86 ...
##  $ fat   : int  17802 8381 19221 17472 7336 14904 20233 17749 10795 21365 ...
##  $ lean  : int  28600 40229 36057 33094 40621 30068 35599 43301 38613 35534 ...
##  $ pcfat : num  37.3 16.8 34 33.8 14.8 32.2 35.3 28 21.1 36.6 ...
summary(p)
##        id            gender              height          weight     
##  Min.   :   1.0   Length:1217        Min.   :136.0   Min.   :34.00  
##  1st Qu.: 309.0   Class :character   1st Qu.:151.0   1st Qu.:49.00  
##  Median : 615.0   Mode  :character   Median :155.0   Median :54.00  
##  Mean   : 614.5                      Mean   :156.7   Mean   :55.14  
##  3rd Qu.: 921.0                      3rd Qu.:162.0   3rd Qu.:61.00  
##  Max.   :1227.0                      Max.   :185.0   Max.   :95.00  
##       bmi            age             bmc            bmd             fat       
##  Min.   :14.5   Min.   :13.00   Min.   : 695   Min.   :0.650   Min.   : 4277  
##  1st Qu.:20.2   1st Qu.:35.00   1st Qu.:1481   1st Qu.:0.930   1st Qu.:13768  
##  Median :22.2   Median :48.00   Median :1707   Median :1.010   Median :16955  
##  Mean   :22.4   Mean   :47.15   Mean   :1725   Mean   :1.009   Mean   :17288  
##  3rd Qu.:24.3   3rd Qu.:58.00   3rd Qu.:1945   3rd Qu.:1.090   3rd Qu.:20325  
##  Max.   :37.1   Max.   :88.00   Max.   :3040   Max.   :1.350   Max.   :40825  
##       lean           pcfat     
##  Min.   :19136   Min.   : 9.2  
##  1st Qu.:30325   1st Qu.:27.0  
##  Median :33577   Median :32.4  
##  Mean   :35463   Mean   :31.6  
##  3rd Qu.:39761   3rd Qu.:36.8  
##  Max.   :63059   Max.   :48.4
head(p)
##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0
## 4  4      F    156     53 21.8  56 1171 0.80 17472 33094  33.8
## 5  5      M    160     51 19.9  54 1681 0.98  7336 40621  14.8
## 6  6      F    153     47 20.1  52 1358 0.91 14904 30068  32.2
head(p,3)
##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0
p %>% count(age)
##    age  n
## 1   13  1
## 2   14  4
## 3   16  3
## 4   17  3
## 5   18 21
## 6   19 47
## 7   20 24
## 8   21 23
## 9   22 23
## 10  23 28
## 11  24 21
## 12  25 11
## 13  26 10
## 14  27  7
## 15  28  9
## 16  29  6
## 17  30 14
## 18  31  8
## 19  32  7
## 20  33 12
## 21  34 16
## 22  35 15
## 23  36 17
## 24  37 19
## 25  38 13
## 26  39 20
## 27  40 20
## 28  41 25
## 29  42 24
## 30  43 28
## 31  44 25
## 32  45 24
## 33  46 33
## 34  47 24
## 35  48 27
## 36  49 33
## 37  50 33
## 38  51 38
## 39  52 35
## 40  53 25
## 41  54 33
## 42  55 37
## 43  56 20
## 44  57 25
## 45  58 24
## 46  59 16
## 47  60 16
## 48  61 14
## 49  62 13
## 50  63 16
## 51  64 14
## 52  65 15
## 53  66 14
## 54  67 13
## 55  68 11
## 56  69 15
## 57  70 19
## 58  71 14
## 59  72 14
## 60  73 13
## 61  74  7
## 62  75 10
## 63  76 13
## 64  77 11
## 65  78  9
## 66  79  6
## 67  80 10
## 68  81  2
## 69  82  5
## 70  83  2
## 71  84  5
## 72  85  3
## 73  87  1
## 74  88  1
table(p$age)
## 
## 13 14 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 
##  1  4  3  3 21 47 24 23 23 28 21 11 10  7  9  6 14  8  7 12 16 15 17 19 13 20 
## 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 
## 20 25 24 28 25 24 33 24 27 33 33 38 35 25 33 37 20 25 24 16 16 14 13 16 14 15 
## 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 87 88 
## 14 13 11 15 19 14 14 13  7 10 13 11  9  6 10  2  5  2  5  3  1  1
p %>% count(age)
##    age  n
## 1   13  1
## 2   14  4
## 3   16  3
## 4   17  3
## 5   18 21
## 6   19 47
## 7   20 24
## 8   21 23
## 9   22 23
## 10  23 28
## 11  24 21
## 12  25 11
## 13  26 10
## 14  27  7
## 15  28  9
## 16  29  6
## 17  30 14
## 18  31  8
## 19  32  7
## 20  33 12
## 21  34 16
## 22  35 15
## 23  36 17
## 24  37 19
## 25  38 13
## 26  39 20
## 27  40 20
## 28  41 25
## 29  42 24
## 30  43 28
## 31  44 25
## 32  45 24
## 33  46 33
## 34  47 24
## 35  48 27
## 36  49 33
## 37  50 33
## 38  51 38
## 39  52 35
## 40  53 25
## 41  54 33
## 42  55 37
## 43  56 20
## 44  57 25
## 45  58 24
## 46  59 16
## 47  60 16
## 48  61 14
## 49  62 13
## 50  63 16
## 51  64 14
## 52  65 15
## 53  66 14
## 54  67 13
## 55  68 11
## 56  69 15
## 57  70 19
## 58  71 14
## 59  72 14
## 60  73 13
## 61  74  7
## 62  75 10
## 63  76 13
## 64  77 11
## 65  78  9
## 66  79  6
## 67  80 10
## 68  81  2
## 69  82  5
## 70  83  2
## 71  84  5
## 72  85  3
## 73  87  1
## 74  88  1
table(p$age)
## 
## 13 14 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 
##  1  4  3  3 21 47 24 23 23 28 21 11 10  7  9  6 14  8  7 12 16 15 17 19 13 20 
## 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 
## 20 25 24 28 25 24 33 24 27 33 33 38 35 25 33 37 20 25 24 16 16 14 13 16 14 15 
## 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 87 88 
## 14 13 11 15 19 14 14 13  7 10 13 11  9  6 10  2  5  2  5  3  1  1
p$age_grp = cut(p$age, breaks = seq(13,88,5))
c = p %>% count(age_grp)
table(p$age_grp, p$gender)
##          
##             F   M
##   (13,18]  16  15
##   (18,23]  74  71
##   (23,28]  41  17
##   (28,33]  32  15
##   (33,38]  51  29
##   (38,43]  93  24
##   (43,48] 107  26
##   (48,53] 114  50
##   (53,58] 109  30
##   (58,63]  56  19
##   (63,68]  48  19
##   (68,73]  63  12
##   (73,78]  36  14
##   (78,83]  18   7
##   (83,88]   4   6
c
##    age_grp   n
## 1  (13,18]  31
## 2  (18,23] 145
## 3  (23,28]  58
## 4  (28,33]  47
## 5  (33,38]  80
## 6  (38,43] 117
## 7  (43,48] 133
## 8  (48,53] 164
## 9  (53,58] 139
## 10 (58,63]  75
## 11 (63,68]  67
## 12 (68,73]  75
## 13 (73,78]  50
## 14 (78,83]  25
## 15 (83,88]  10
## 16    <NA>   1
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.0.5
c1= p %>% count(age_grp,gender)
c1
##    age_grp gender   n
## 1  (13,18]      F  16
## 2  (13,18]      M  15
## 3  (18,23]      F  74
## 4  (18,23]      M  71
## 5  (23,28]      F  41
## 6  (23,28]      M  17
## 7  (28,33]      F  32
## 8  (28,33]      M  15
## 9  (33,38]      F  51
## 10 (33,38]      M  29
## 11 (38,43]      F  93
## 12 (38,43]      M  24
## 13 (43,48]      F 107
## 14 (43,48]      M  26
## 15 (48,53]      F 114
## 16 (48,53]      M  50
## 17 (53,58]      F 109
## 18 (53,58]      M  30
## 19 (58,63]      F  56
## 20 (58,63]      M  19
## 21 (63,68]      F  48
## 22 (63,68]      M  19
## 23 (68,73]      F  63
## 24 (68,73]      M  12
## 25 (73,78]      F  36
## 26 (73,78]      M  14
## 27 (78,83]      F  18
## 28 (78,83]      M   7
## 29 (83,88]      F   4
## 30 (83,88]      M   6
## 31    <NA>      M   1
c1 %>%
  ggplot(aes(x=age_grp, y= n)) +
    geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.5) +
    xlab("Age group") + ylab("No.of people") + ggtitle("No.in age group") + 
    theme_economist()

shapiro.test(p$age)
## 
##  Shapiro-Wilk normality test
## 
## data:  p$age
## W = 0.97366, p-value = 4.281e-14
shapiro.test(p$height)
## 
##  Shapiro-Wilk normality test
## 
## data:  p$height
## W = 0.9827, p-value = 7.179e-11
shapiro.test(p$pcfat)
## 
##  Shapiro-Wilk normality test
## 
## data:  p$pcfat
## W = 0.98284, p-value = 8.193e-11
# p nhỏ hơn 0,05 rất nhiều nên ko có phân phối chuẩn
ggplot(data = p, mapping = aes(x=age))+
  geom_histogram(color = "red", fill = "#7A9B57")+
  ggtitle("ABC")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p %>% 
  ggplot(aes(age)) +
  geom_histogram(color = "red", fill = "#7A9B57")+
  ggtitle("ABC")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

library(ggthemes)
c %>%
  ggplot(aes(x=age_grp, y= n)) +
    geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.5) +
    xlab("Age group") + ylab("No.of people") + ggtitle("No.in age group") + 
    theme_economist()

ggplot(data = p, mapping = aes(x=pcfat))+
  geom_histogram(color = "red", fill = "#7A9B57")+
  ggtitle("Pcfat")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p$pcfat_grp = cut(p$pcfat, breaks = seq(9.2,48.4,5))
pc = p %>% count(pcfat_grp)
pc
##     pcfat_grp   n
## 1  (9.2,14.2]  18
## 2 (14.2,19.2]  52
## 3 (19.2,24.2] 110
## 4 (24.2,29.2] 223
## 5 (29.2,34.2] 334
## 6 (34.2,39.2] 318
## 7 (39.2,44.2] 141
## 8        <NA>  21
library(ggthemes)
pc %>%
  ggplot(aes(x=pcfat_grp, y= n)) +
    geom_bar(stat="identity", fill="#f68060", alpha=.5, width=.6) +
    xlab("pcfat group") + ylab("No.people") + ggtitle("No.in pcfat group") + 
    theme_economist()

pcg = table(p$pcfat_grp,p$gender)
pcg
##              
##                 F   M
##   (9.2,14.2]    0  18
##   (14.2,19.2]   4  48
##   (19.2,24.2]  17  93
##   (24.2,29.2]  94 129
##   (29.2,34.2] 280  54
##   (34.2,39.2] 306  12
##   (39.2,44.2] 141   0
library(ggplot2); library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.0.5
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
m = ggplot(data=p, aes(x=pcfat))
m1 = m +
geom_histogram(color="white",
fill="blue")
m1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

m = m +
geom_histogram(aes(y=..density..),color="white", fill="blue")
m2 = m + geom_density(col="red")
grid.arrange(m1, m2, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p$gender[p$gender == "F"] = "female"
p$gender[p$gender == "M"] = "male"
h = ggplot(data=p, aes(x=pcfat,
fill = gender))
h1 = h +
geom_histogram(position="dodge")
h1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

h2 = ggplot(data=p, aes(x=pcfat,
fill=gender, color=gender)) +
geom_density(alpha = 0.1)
h2

grid.arrange(h1, h2, nrow=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p$bmigrp[p$bmi < 18.5 ] = "Thiếu cân"
p$bmigrp[p$bmi >= 18.5 & p$bmi < 24.9 ] = "Bình thường"
p$bmigrp[p$bmi >= 24.9 & p$bmi < 29.9 ] = "Thừa cân"
p$bmigrp[p$bmi >= 29.9 & p$bmi <= 34.9 ] = "Béo phì"
p$bmigrp[p$bmi >= 35 ] = "Béo phì nặng"
p$bmigrp = factor(p$bmigrp, levels = c("Thiếu cân", "Bình thường", "Thừa cân", "Béo phì", "Béo phì nặng"))
library(sjPlot)
## Warning: package 'sjPlot' was built under R version 4.0.5
## Registered S3 method overwritten by 'parameters':
##   method                         from      
##   format.parameters_distribution datawizard
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
library(sjmisc)
## Warning: package 'sjmisc' was built under R version 4.0.5
## 
## Attaching package: 'sjmisc'
## The following object is masked from 'package:purrr':
## 
##     is_empty
## The following object is masked from 'package:tidyr':
## 
##     replace_na
## The following object is masked from 'package:tibble':
## 
##     add_case
t= table(p$bmigrp)
t
## 
##    Thi<U+1EBF>u cân  Bình thu<U+1EDD>ng     Th<U+1EEB>a cân             Béo phì 
##                 107                 857                 238                  12 
## Béo phì n<U+1EB7>ng 
##                   3
barplot(t,col="light blue")

sjt.xtab(p$bmigrp, p$gender, show.row.prc = T, show.col.prc = T)
bmigrp gender Total
female male
Thi<U+1EBF>u cân 76
71 %
8.8 %
31
29 %
8.7 %
107
100 %
8.8 %
Bình thu<U+1EDD>ng 622
72.6 %
72.2 %
235
27.4 %
66.2 %
857
100 %
70.4 %
Th<U+1EEB>a cân 153
64.3 %
17.7 %
85
35.7 %
23.9 %
238
100 %
19.6 %
Béo phì 8
66.7 %
0.9 %
4
33.3 %
1.1 %
12
100 %
1 %
Béo phì n<U+1EB7>ng 3
100 %
0.3 %
0
0 %
0 %
3
100 %
0.2 %
Total 862
70.8 %
100 %
355
29.2 %
100 %
1217
100 %
100 %
χ2=7.540 · df=4 · Cramer’s V=0.079 · Fisher’s p=0.118
r = ggplot(data = p, aes(x=bmigrp, fill = gender))
r = r + geom_bar(aes(y = (..count..)))
r = r + xlab("Tình trạng") + ylab("số người")
r

r = ggplot(data = p, aes(x=bmigrp, fill = gender))
r = r + geom_bar(aes(y = (..count..)/sum(..count..)))
r = r + xlab("Tình trạng") + ylab("tỉ lệ")
r

d = ggplot(data=p, aes(x=gender,
y=pcfat, fill=gender))
d + geom_boxplot()

boxplot(p$pcfat ~ p$bmigrp, col="light green", border="red")

boxplot(p$age ~ p$bmigrp, col="light green", border="red")

library(DescTools)
## Warning: package 'DescTools' was built under R version 4.0.5
## 
## Attaching package: 'DescTools'
## The following object is masked from 'package:sjmisc':
## 
##     %nin%
Desc(p$bmi)
## ------------------------------------------------------------------------------ 
## p$bmi (numeric)
## 
##   length       n    NAs  unique     0s   mean  meanCI'
##    1'217   1'217      0     149      0  22.40   22.22
##           100.0%   0.0%           0.0%          22.57
##                                                      
##      .05     .10    .25  median    .75    .90     .95
##    17.70   18.66  20.20   22.20  24.30  26.30   27.50
##                                                      
##    range      sd  vcoef     mad    IQR   skew    kurt
##    22.60    3.06   0.14    2.97   4.10   0.53    0.95
##                                                      
## lowest : 14.5, 15.2, 15.4 (2), 15.6, 15.8
## highest: 32.5, 34.7 (2), 35.6, 36.3, 37.1
## 
## ' 95%-CI (classic)

Desc(p$bmigrp)
## ------------------------------------------------------------------------------ 
## p$bmigrp (factor)
## 
##   length      n    NAs unique levels  dupes
##    1'217  1'217      0      5      5      y
##          100.0%   0.0%                     
## 
##                  level  freq   perc  cumfreq  cumperc
## 1   Bình thu<U+1EDD>ng   857  70.4%      857    70.4%
## 2      Th<U+1EEB>a cân   238  19.6%    1'095    90.0%
## 3     Thi<U+1EBF>u cân   107   8.8%    1'202    98.8%
## 4              Béo phì    12   1.0%    1'214    99.8%
## 5  Béo phì n<U+1EB7>ng     3   0.2%    1'217   100.0%

Desc(p$age ~ p$bmigrp)
## ------------------------------------------------------------------------------ 
## p$age ~ p$bmigrp
## 
## Summary: 
## n pairs: 1'217, valid: 1'217 (100.0%), missings: 0 (0.0%), groups: 5
## 
##                                                                      
##            Thi<U+1EBF>u cân   Bình thu<U+1EDD>ng      Th<U+1EEB>a cân
## mean                 40.888               46.256               53.034
## median               35.000               48.000               52.000
## sd                   20.432               16.963               15.374
## IQR                  36.000               23.000               22.750
## n                       107                  857                  238
## np                   8.792%              70.419%              19.556%
## NAs                       0                    0                    0
## 0s                        0                    0                    0
##                                                 
##                     Béo phì  Béo phì n<U+1EB7>ng
## mean                 47.833               57.333
## median               50.500               57.000
## sd                   16.140                3.512
## IQR                  15.500                3.500
## n                        12                    3
## np                   0.986%               0.247%
## NAs                       0                    0
## 0s                        0                    0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 42.913, df = 4, p-value = 1.078e-08

p$gender=ifelse(p$gender == "male", 1, 0)
library(psych)
## Warning: package 'psych' was built under R version 4.0.5
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:DescTools':
## 
##     AUC, ICC, SD
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
p1 = p[,c("gender", "height",   "weight",   "bmi",  "age",  "bmc",  "bmd",  "fat",  "lean", "pcfat")]
pairs.panels(p1)

cor(p1)
##             gender      height      weight         bmi         age          bmc
## gender  1.00000000  0.67077713  0.46955838  0.07096352 -0.12823407  0.539203649
## height  0.67077713  1.00000000  0.59766675 -0.01640275 -0.37311646  0.691945350
## weight  0.46955838  0.59766675  1.00000000  0.78691712 -0.04819844  0.610006100
## bmi     0.07096352 -0.01640275  0.78691712  1.00000000  0.22862793  0.225461272
## age    -0.12823407 -0.37311646 -0.04819844  0.22862793  1.00000000 -0.462907393
## bmc     0.53920365  0.69194535  0.61000610  0.22546127 -0.46290739  1.000000000
## bmd     0.29154032  0.38160279  0.34056024  0.12863581 -0.44155558  0.883668572
## fat    -0.28441678 -0.06349351  0.57584040  0.76921338  0.21754740  0.007271522
## lean    0.75818630  0.76816704  0.84047786  0.45385450 -0.19438829  0.739428246
## pcfat  -0.66576817 -0.47982065  0.05655573  0.44091833  0.30710026 -0.403526172
##                bmd          fat       lean       pcfat
## gender  0.29154032 -0.284416777  0.7581863 -0.66576817
## height  0.38160279 -0.063493511  0.7681670 -0.47982065
## weight  0.34056024  0.575840396  0.8404779  0.05655573
## bmi     0.12863581  0.769213384  0.4538545  0.44091833
## age    -0.44155558  0.217547400 -0.1943883  0.30710026
## bmc     0.88366857  0.007271522  0.7394282 -0.40352617
## bmd     1.00000000 -0.060155111  0.4348068 -0.30440674
## fat    -0.06015511  1.000000000  0.1150510  0.82416190
## lean    0.43480680  0.115051048  1.0000000 -0.44557451
## pcfat  -0.30440674  0.824161905 -0.4455745  1.00000000
corrplot(cor(p1), type = "upper", method = "number")

corrplot(cor(p1), method = "circle")

p$gender=ifelse(p$gender == 1, "male", "female")
z = ggplot(data=p, aes(x=age,
y=pcfat))
z1 = z + geom_point()
z2 = z1 +
geom_smooth()
grid.arrange(z1, z2, ncol=2)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

k = ggplot(data=p, aes(x=age, y=pcfat, col=gender, fill=gender))
k1 = k + geom_point() + geom_smooth()
k2 = k + geom_point() + geom_smooth(method="lm", formula=y~x+I(x^2)+I(x^3))
grid.arrange(k1, k2, ncol=2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

library(table1)
## Warning: package 'table1' was built under R version 4.0.5
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1(~age + height + weight + bmi + pcfat | gender,
data=p)
female
(N=862)
male
(N=355)
Overall
(N=1217)
age
Mean (SD) 48.6 (16.4) 43.7 (18.8) 47.2 (17.3)
Median [Min, Max] 49.0 [14.0, 85.0] 44.0 [13.0, 88.0] 48.0 [13.0, 88.0]
height
Mean (SD) 153 (5.55) 165 (6.73) 157 (7.98)
Median [Min, Max] 153 [136, 170] 165 [146, 185] 155 [136, 185]
weight
Mean (SD) 52.3 (7.72) 62.0 (9.59) 55.1 (9.40)
Median [Min, Max] 51.0 [34.0, 95.0] 62.0 [38.0, 95.0] 54.0 [34.0, 95.0]
bmi
Mean (SD) 22.3 (3.05) 22.7 (3.04) 22.4 (3.06)
Median [Min, Max] 22.1 [15.2, 37.1] 22.5 [14.5, 34.7] 22.2 [14.5, 37.1]
pcfat
Mean (SD) 34.7 (5.19) 24.2 (5.76) 31.6 (7.18)
Median [Min, Max] 34.7 [14.6, 48.4] 24.6 [9.20, 39.0] 32.4 [9.20, 48.4]
#Ảnh hưởng của tuổi đến tỉ trọng mỡ ?
l1 = lm(pcfat ~ age, data=p)
summary(l1)
## 
## Call:
## lm(formula = pcfat ~ age, data = p)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.7114  -4.0069   0.8378   4.9514  18.2192 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25.58408    0.57003   44.88   <2e-16 ***
## age          0.12769    0.01135   11.25   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.839 on 1215 degrees of freedom
## Multiple R-squared:  0.09431,    Adjusted R-squared:  0.09357 
## F-statistic: 126.5 on 1 and 1215 DF,  p-value: < 2.2e-16
#Phương trình: pcfat = 25.6 + 0.13*age
#Diễn giải: Mỗi năm tăng độ tuổi, tỉ trọng mỡ tăng 0.13% (SE 0.011), và mối liên quan này có ý nghĩa thống kê (P < 0.0001)
l1 = lm(pcfat ~ age, data=p)
plot(p$pcfat ~ p$age, pch=16, col="blue")
abline(l1, col="red")

#Tỉ trọng mỡ khác nhau giữa nam và nữ
l2 = lm(pcfat ~ gender, data=p)
summary(l2)
## 
## Call:
## lm(formula = pcfat ~ gender, data = p)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.0724  -3.2724   0.1484   3.6276  14.8439 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  34.6724     0.1826   189.9   <2e-16 ***
## gendermale  -10.5163     0.3381   -31.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.362 on 1215 degrees of freedom
## Multiple R-squared:  0.4432, Adjusted R-squared:  0.4428 
## F-statistic: 967.3 on 1 and 1215 DF,  p-value: < 2.2e-16
#Phương trình pcfat = 34.7 – 10.5*gender(M)
#Diễn giải: Nam có tỉ trọng mỡ thấp hơn nữ 10.5% (SE 0.34%), và sự khác biệt này có ý nghĩa thống kê (P < 0.0001). 
#Khác biệt giữa nam và nữ giải thích 44% những khác biệt về phương sai của tỉ trọng mỡ.