#Reading data
x <- file.choose()
ob <- read.csv("C:\\Users\\Q-Anh\\Desktop\\Prediction Model - NVT Prof\\Dataset\\obesity data.csv", header= TRUE)
head(ob)
## id gender height weight bmi age bmc bmd fat lean pcfat
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2
#histogram plotting
require(ggplot2)
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
p1 <- ggplot(data=ob, aes(x= pcfat, fill=gender, color=gender)) +
geom_histogram(aes(y=..density..), fill="#FF00FF", col="#228B22") +
theme(legend.position="none")
p1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#histogram density
p2 <- ggplot(data=ob, aes(x= pcfat, fill=gender, color=gender)) +
geom_density(alpha=0.3) +
theme(legend.position="none")
p2
#create new variable
ob$obesity[ob$bmi < 18.5] <- "Underweight"
ob$obesity[ob$bmi >= 18.5 & ob$bmi < 25] <- "Normal"
ob$obesity[ob$bmi >= 25 & ob$bmi < 30] <- "Overweight"
ob$obesity[ob$bmi >= 30] <- "Obese"
#plotting followed obesity
p3 <- ggplot(data=ob, aes(x=pcfat, fill=obesity, col=obesity))+
geom_density(alpha = 0.1)
p3
#plotting in boxplot library(ggplot2) p4 <- ggplot(data=ob, aes(x=gender, y=pcfat, col=gender)) + geom_boxplot() + geom_jitter(alpha=0.1) p4 #plotting in histogram
p5 <- ggplot(data=ob, aes(x=pcfat, fill=gender, col=gender)) +
geom_density(alpha=0.05)
p5
#Illustrate 2 plots
library(gridExtra)
library(ggthemes)
p4 <- ggplot(data=ob, aes(x=gender, y=pcfat, col=gender)) +
geom_boxplot() +
geom_jitter(alpha=0.1)
p5 <- ggplot(data=ob, aes(x=pcfat, fill=gender, col=gender)) +
geom_density(alpha=0.05)
grid.arrange(p4, p5, ncol=2)
#scatter plotting
p6 <- ggplot(data=ob, aes(x=bmi, y=pcfat, fill=gender, col=gender))+
geom_point() +
geom_smooth(method="lm", formula= y ~ x + I(x^2)) + xlab("Body Mass Index")+ ylab("Percentage Body Fat")+ theme_economist(base_size = 8, base_family="sans", horizontal = TRUE)
p6