t="D:\\Datasets for practice\\obesity data.csv"
ob=read.csv(t)
head(ob)
## id gender height weight bmi age bmc bmd fat lean pcfat
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
p=ggplot(data=ob, aes(x=pcfat))
p=p+geom_histogram(aes(y=..density..), fill="blue", col="white")+geom_density(col="red")
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Phan bo pcfat theo gioi
p=ggplot(data=ob, aes(x=pcfat, fill=gender, col=gender))
p=ggplot(data=ob,aes(x=pcfat,fill=gender,col=gender))+geom_density(alpha=0.1)
p
# tao bien phan loai cho 4 nhom
head(ob,3)
## id gender height weight bmi age bmc bmd fat lean pcfat
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0
ob$obesity[ob$bmi<18.5]="underweight"
ob$obesity[ob$bmi>=18.5&ob$bmi<25]="normal"
ob$obesity[ob$bmi>=25&ob$bmi<30]="overweight"
ob$obesity[ob$bmi>30]="obese"
head(ob,3)
## id gender height weight bmi age bmc bmd fat lean pcfat obesity
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3 normal
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8 normal
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0 normal
#ve bieu ho phan bo pcfat theo 4 nhom
ggplot(data=ob,aes(x=pcfat,fill=obesity,col=obesity))+geom_density(alpha=0.1)
# Thuc hanh ve bieu do Boxplot #ve bieu do hop: geom_boxplot #ve bieu do hop them phan bo doi tuong: geom_jitter
p=ggplot(data=ob, aes(x=gender,y=pcfat,color=gender))
p= p+geom_boxplot()
p1= p+geom_jitter(alpha=0.3)
p1
# ve bieu do histogram
p=ggplot(data=ob,aes(x=pcfat,fill=gender,color=gender))
p2=p+geom_density(alpha=0.1)
p2
library(gridExtra)
grid.arrange(p1,p2,ncol=2)
# ve bieu do tuong quan-scatter plot
p=ggplot(data=ob,aes(x=bmi,y=pcfat))
p+geom_point()
# ve bieu do theo nhom
p=ggplot(data=ob, aes(x=bmi,y=pcfat,fill=gender, col=gender))
p+geom_point()
# ve đường biểu diễn-smooth # lm: vẽ theo đường thẳng #formula: đặt phương trình bậc 2 cho đường biểu diễn #xlab: đặt tên trục hoành, ylab: đặt tên trục tung
p=ggplot(data=ob, aes(x=bmi,y=pcfat,fill=gender, col=gender))
p+geom_point()+geom_smooth(method = lm)
p+geom_point()+geom_smooth(method = lm, formula = y~x+I(x^2))
p+geom_point()+geom_smooth(method = lm, formula = y~x+I(x^2))+xlab("Body mass index")+ylab("percent body fat")
# thay đổi theme
library(ggthemes)
p=ggplot(data=ob, aes(x=bmi,y=pcfat,fill=gender, col=gender))
p= p+geom_point()+geom_smooth(method = "lm")
p=p+geom_smooth(method = "lm", formula = y~x+I(x^2))
p=p+xlab("Body mass index")+ylab("percent body fat")
p=p+ggtitle("Bieu do tuong quan")
p=p+theme_bw()
p