Dùng bộ dữ liệu Obesity
dat= read.csv ("C:/Users/ADMIN/OneDrive/Statistical courses/Dinh Tien Hoang-Jun2019/Datasets cho thuc hanh/obesity data.csv")
dim (dat)
## [1] 1217 11
head (dat)
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2
Biểu đồ phân bố Histogram
library (ggplot2); library(gridExtra)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
p = ggplot (data = dat, aes(x= pcfat))
p + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p + geom_histogram (color= 'white', fill= 'blue')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p1= p + geom_histogram(color= 'white', fill= 'blue', aes (y= ..density..))
p1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p2= p + geom_histogram(color= 'white', fill= 'blue', aes (y= ..density..)) + geom_density(col= "red")
p2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

grid.arrange(p1, p2, ncol= 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Phân tích theo giới tính
p= ggplot (data= dat, aes(x= pcfat, fill= gender))
p1= p + geom_histogram(position = "dodge")
p2= ggplot(data = dat, aes(x= pcfat, color= gender, fill= gender)) + geom_density(alpha = 0.5)
grid.arrange(p1, p2, nrow= 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
