t = "C:\\Users\\SONY\\Google Drive\\HOC TAP\\CH 22\\Phan tich du lieu\\Workshop GS Tuan\\Phan tich du lieu va ung dung\\Datasets for practice\\obesity data.csv"
ob = read.csv(t)
attach(ob)
head(ob)
## id gender height weight bmi age bmc bmd fat lean pcfat
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2
library(ggplot2)
# Ve histogram
p = ggplot(data = ob, aes(x = pcfat))
p + geom_histogram(fill = "blue", col = "white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Ve density cua pcfat
p + geom_histogram(aes(y = ..density..), fill = "blue", col = "white") + geom_density(col = "red", alpha = 0.05)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Ve histogram pcfat theo gioi tinh
p1 = ggplot(data = ob, aes(x = pcfat, fill = gender, col = gender))
p1 + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Ve ty le phan tram pcfat theo gioi tinh
p1 = ggplot(data = ob, aes(x = pcfat, fill = gender, col = gender))
p1 + geom_histogram(dat = ob, aes(y = ..density..), bins = 50)
# Ve density theo gioi tinh
histogram = p1 + geom_density(alpha = 0.1)
histogram
# Ve boxplot
p2 = ggplot(data = ob, aes(x = gender, y = pcfat, color = gender))
p2 + geom_boxplot()
# Hien thi tat ca du lieu
p2 = ggplot(data = ob, aes(x = gender, y = pcfat, color = gender))
p2 + geom_boxplot() + geom_jitter(alpha = 0.3)
# Them trung vi boxplot
p2 = ggplot(data = ob, aes(x = gender, y = pcfat, color = gender))
boxplot = p2 + geom_boxplot(notch = T, notchwidth = 0.3) + geom_jitter(alpha = 0.3)
boxplot
Bieu dien nhieu bieu do bang gridExtra
library(gridExtra)
grid.arrange(histogram, boxplot, ncol =2)
# Bieu do tuong quan
p3 = ggplot(data =ob, aes(x = bmi, y = pcfat))
p3 + geom_point()
# Ve bieu do tuong quan theo gioi tinh
p4 = ggplot(data =ob, aes(x = bmi, y = pcfat, fill = gender, col = gender))
p4 + geom_point()
# Tuong quan
p4 + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# Viet ham so tuong quan bac 2
p4 + geom_point() + geom_smooth(method = lm, formula = y ~ x + I(x^2))
# Hoan thien bieu do
library(ggthemes)
p4 + geom_point() + geom_smooth(method = lm, formula = y ~ x + I(x^2)) + xlab("Body mass index") + ylab("Body percent fat") + theme_economist() + ggtitle("Phan tich moi tuong quan") + theme(plot.title = element_text(hjust = 0.5))