#Load data Obesity and transfer variable “gender”
ob=read.csv("/Users/osx/Desktop/Dataset for TDTU workshop 4-2022/obesity data.csv")
ob$gender[ob$gender=="F"]=1
ob$gender[ob$gender=="M"]=0
#cut variable “bmi” to 4 groups
ob$obese = cut(ob$bmi, breaks=c(0, 18.5, 25.0, 30.0, Inf), labels=c("underweight", "normal", "overweight", "obese"))
ob$lean=ob$lean/1000
ob$fat=ob$fat/1000
library(ggplot2)
library(gridExtra)
q= ggplot(data=ob, aes(pcfat)) + geom_histogram(fill="blue", color ="white")
q1 = ggplot(data=ob, aes(x=pcfat, y= ..density..)) +geom_histogram(fill="blue", color="white")
q1= q1 + geom_density(col="red")
q1 = q1 + labs(title = "Distribution of percent body fat", x= "Percent body fat", y = "Number of people")
grid.arrange(q, q1, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Hãy vẽ biểu đồ phân bố tỉ trọng mỡ (pcfat) theo giới tính
q= ggplot(data=ob, aes(x=pcfat, fill=gender))
q = q+ geom_histogram(position="dodge")
q
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.