t = "C:\\Users\\SONY\\Google Drive\\HOC TAP\\CH 22\\Phan tich du lieu\\Workshop GS Tuan\\Phan tich du lieu va ung dung\\Datasets for practice\\obesity data.csv"
ob = read.csv(t)
attach(ob)
head(ob)
##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0
## 4  4      F    156     53 21.8  56 1171 0.80 17472 33094  33.8
## 5  5      M    160     51 19.9  54 1681 0.98  7336 40621  14.8
## 6  6      F    153     47 20.1  52 1358 0.91 14904 30068  32.2
library(ggplot2)
# Ve histogram
p = ggplot(data = ob, aes(x = pcfat))
p + geom_histogram(fill = "blue", col = "white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Ve density cua pcfat
 p + geom_histogram(aes(y = ..density..), fill = "blue", col = "white") + geom_density(col = "red", alpha = 0.05)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Ve histogram pcfat theo gioi tinh
p1 = ggplot(data = ob, aes(x = pcfat, fill = gender, col = gender))
p1 + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Ve ty le phan tram pcfat theo gioi tinh
p1 = ggplot(data = ob, aes(x = pcfat, fill = gender, col = gender))
p1 + geom_histogram(dat = ob, aes(y = ..density..), bins = 50)

# Ve density theo gioi tinh
histogram = p1 + geom_density(alpha = 0.1)
histogram

# Ve boxplot
p2 = ggplot(data = ob, aes(x = gender, y = pcfat, color = gender))
p2 + geom_boxplot()

# Hien thi tat ca du lieu
p2 = ggplot(data = ob, aes(x = gender, y = pcfat, color = gender))
p2 + geom_boxplot() + geom_jitter(alpha = 0.3)

# Them trung vi boxplot
p2 = ggplot(data = ob, aes(x = gender, y = pcfat, color = gender))
boxplot = p2 + geom_boxplot(notch = T, notchwidth = 0.3) + geom_jitter(alpha = 0.3)
boxplot

Bieu dien nhieu bieu do bang gridExtra

library(gridExtra)
grid.arrange(histogram, boxplot, ncol =2)

# Bieu do tuong quan
p3 = ggplot(data =ob, aes(x = bmi, y = pcfat))
p3 + geom_point() 

# Ve bieu do tuong quan theo gioi tinh
p4 = ggplot(data =ob, aes(x = bmi, y = pcfat, fill = gender, col = gender))
p4 + geom_point()

# Tuong quan
p4 + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Viet ham so tuong quan bac 2
p4 + geom_point() + geom_smooth(method = lm, formula = y ~ x + I(x^2))

# Hoan thien bieu do
library(ggthemes)
p4 + geom_point() + geom_smooth(method = lm, formula = y ~ x + I(x^2)) + xlab("Body mass index") + ylab("Body percent fat") + theme_economist() + ggtitle("Phan tich moi tuong quan") +  theme(plot.title = element_text(hjust = 0.5))