t="D:\\Datasets for practice\\obesity data.csv"
ob=read.csv(t)
head(ob)
##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0
## 4  4      F    156     53 21.8  56 1171 0.80 17472 33094  33.8
## 5  5      M    160     51 19.9  54 1681 0.98  7336 40621  14.8
## 6  6      F    153     47 20.1  52 1358 0.91 14904 30068  32.2

mo ta phan bo cua pcfat cho nam va nu

library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
p=ggplot(data=ob, aes(x=pcfat))
p=p+geom_histogram(aes(y=..density..), fill="blue", col="white")+geom_density(col="red")
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Phan bo pcfat theo gioi

p=ggplot(data=ob, aes(x=pcfat, fill=gender, col=gender))
p=ggplot(data=ob,aes(x=pcfat,fill=gender,col=gender))+geom_density(alpha=0.1)
p

# tao bien phan loai cho 4 nhom

head(ob,3)
##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0
ob$obesity[ob$bmi<18.5]="underweight"
ob$obesity[ob$bmi>=18.5&ob$bmi<25]="normal"
ob$obesity[ob$bmi>=25&ob$bmi<30]="overweight"
ob$obesity[ob$bmi>30]="obese"
head(ob,3)
##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat obesity
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3  normal
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8  normal
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0  normal

#ve bieu ho phan bo pcfat theo 4 nhom

ggplot(data=ob,aes(x=pcfat,fill=obesity,col=obesity))+geom_density(alpha=0.1)

# Thuc hanh ve bieu do Boxplot #ve bieu do hop: geom_boxplot #ve bieu do hop them phan bo doi tuong: geom_jitter

p=ggplot(data=ob, aes(x=gender,y=pcfat,color=gender))
p= p+geom_boxplot()
p1= p+geom_jitter(alpha=0.3)
p1

# ve bieu do histogram

p=ggplot(data=ob,aes(x=pcfat,fill=gender,color=gender))
p2=p+geom_density(alpha=0.1)
p2

library(gridExtra)
grid.arrange(p1,p2,ncol=2)

# ve bieu do tuong quan-scatter plot

p=ggplot(data=ob,aes(x=bmi,y=pcfat))
p+geom_point()

# ve bieu do theo nhom

p=ggplot(data=ob, aes(x=bmi,y=pcfat,fill=gender, col=gender))
p+geom_point()

# ve đường biểu diễn-smooth # lm: vẽ theo đường thẳng #formula: đặt phương trình bậc 2 cho đường biểu diễn #xlab: đặt tên trục hoành, ylab: đặt tên trục tung

p=ggplot(data=ob, aes(x=bmi,y=pcfat,fill=gender, col=gender))
p+geom_point()+geom_smooth(method = lm)

p+geom_point()+geom_smooth(method = lm, formula = y~x+I(x^2))

p+geom_point()+geom_smooth(method = lm, formula = y~x+I(x^2))+xlab("Body mass index")+ylab("percent body fat")

# thay đổi theme

library(ggthemes)
p=ggplot(data=ob, aes(x=bmi,y=pcfat,fill=gender, col=gender))
p= p+geom_point()+geom_smooth(method = "lm")
p=p+geom_smooth(method = "lm", formula = y~x+I(x^2))
p=p+xlab("Body mass index")+ylab("percent body fat")
p=p+ggtitle("Bieu do tuong quan")
p=p+theme_bw()
p