#Reading data

x <- file.choose()
ob <- read.csv("C:\\Users\\Q-Anh\\Desktop\\Prediction Model - NVT Prof\\Dataset\\obesity data.csv", header= TRUE)
head(ob)
##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0
## 4  4      F    156     53 21.8  56 1171 0.80 17472 33094  33.8
## 5  5      M    160     51 19.9  54 1681 0.98  7336 40621  14.8
## 6  6      F    153     47 20.1  52 1358 0.91 14904 30068  32.2

#histogram plotting

require(ggplot2)
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
p1 <- ggplot(data=ob, aes(x= pcfat, fill=gender, color=gender)) +
geom_histogram(aes(y=..density..), fill="#FF00FF", col="#228B22") +
theme(legend.position="none")
p1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#histogram density

p2 <- ggplot(data=ob, aes(x= pcfat, fill=gender, color=gender)) +
geom_density(alpha=0.3) +
theme(legend.position="none")
p2

#create new variable

ob$obesity[ob$bmi < 18.5] <- "Underweight"
ob$obesity[ob$bmi >= 18.5 & ob$bmi < 25] <- "Normal"
ob$obesity[ob$bmi >= 25 & ob$bmi < 30] <- "Overweight"
ob$obesity[ob$bmi >= 30] <- "Obese"

#plotting followed obesity

p3 <- ggplot(data=ob, aes(x=pcfat, fill=obesity, col=obesity))+
geom_density(alpha = 0.1)
p3

#plotting in boxplot library(ggplot2) p4 <- ggplot(data=ob, aes(x=gender, y=pcfat, col=gender)) + geom_boxplot() + geom_jitter(alpha=0.1) p4 #plotting in histogram

p5 <- ggplot(data=ob, aes(x=pcfat, fill=gender, col=gender)) +
geom_density(alpha=0.05)
p5

#Illustrate 2 plots

library(gridExtra)
library(ggthemes)
p4 <- ggplot(data=ob, aes(x=gender, y=pcfat, col=gender)) +
geom_boxplot() +
geom_jitter(alpha=0.1)
p5 <- ggplot(data=ob, aes(x=pcfat, fill=gender, col=gender)) +
geom_density(alpha=0.05)
grid.arrange(p4, p5, ncol=2)

#scatter plotting

p6 <- ggplot(data=ob, aes(x=bmi, y=pcfat, fill=gender, col=gender))+
geom_point() +
geom_smooth(method="lm", formula= y ~ x + I(x^2)) + xlab("Body Mass Index")+ ylab("Percentage Body Fat")+ theme_economist(base_size = 8, base_family="sans", horizontal = TRUE)
p6