Dùng bộ dữ liệu Obesity

dat= read.csv ("C:/Users/ADMIN/OneDrive/Statistical courses/Dinh Tien Hoang-Jun2019/Datasets cho thuc hanh/obesity data.csv")
dim (dat)
## [1] 1217   11
head (dat)
##   id gender height weight  bmi age WBBMC wbbmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53  1312  0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65  1309  0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64  1230  0.84 19221 36057  34.0
## 4  4      F    156     53 21.8  56  1171  0.80 17472 33094  33.8
## 5  5      M    160     51 19.9  54  1681  0.98  7336 40621  14.8
## 6  6      F    153     47 20.1  52  1358  0.91 14904 30068  32.2

Biểu đồ phân bố Histogram

library (ggplot2); library(gridExtra)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
p = ggplot (data = dat, aes(x= pcfat))
p + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p + geom_histogram (color= 'white', fill= 'blue')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p1= p + geom_histogram(color= 'white', fill= 'blue', aes (y= ..density..)) 
p1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p2= p + geom_histogram(color= 'white', fill= 'blue', aes (y= ..density..)) + geom_density(col= "red")
p2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

grid.arrange(p1, p2, ncol= 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Phân tích theo giới tính

p= ggplot (data= dat, aes(x= pcfat, fill= gender))
p1= p + geom_histogram(position = "dodge")

p2= ggplot(data = dat, aes(x= pcfat, color= gender, fill= gender)) + geom_density(alpha = 0.5)
grid.arrange(p1, p2, nrow= 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.