Library
suppressMessages(library(ggplot2))
suppressMessages(library(tidyverse))
suppressMessages(library(ggthemes))
1. Import data
ob = read.csv("~/Desktop/R-dir/R studying/dataset/obesity data.csv")
- wbbmd: whole body bone mineral density
- pcfat: percent body fat
2. Histogram of pcfat distribution
ggplot(data=ob, aes(x=pcfat)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data=ob, aes(x=pcfat)) + geom_histogram(fill = "blue", col = "white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data=ob, aes(x=pcfat)) + geom_histogram(fill = "blue", col = "white") +
xlab("Percent body fat") +
ylab("Number of people") +
ggtitle("Distribution of body fat") +
theme_economist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

3. The distribution of body fat (pcfat) by sex (gender)
##
ggplot(data=ob, aes(x=pcfat, fill = gender)) + geom_histogram(col = "white") +
xlab("Percent body fat") +
ylab("Number of people") +
ggtitle("Distribution of body fat") +
theme_economist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data=ob, aes(x=pcfat)) + geom_histogram(aes(fill = gender), col = "white") +
xlab("Percent body fat") +
ylab("Number of people") +
ggtitle("Distribution of body fat") +
theme_economist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##
ggplot(data=ob, aes(x=pcfat, fill=gender)) + geom_density(alpha = 0.8) +
xlab("Percent body fat") +
ylab("Number of people") +
ggtitle("Distribution of body fat") +
theme_economist()

4 Barplot
ob$OB[ob$bmi < 18.5] = "Underweight"
ob$OB[ob$bmi >= 18.5 & ob$bmi < 25.0] = "Normal"
ob$OB[ob$bmi >= 25.0 & ob$bmi < 29.9] = "Overweight"
ob$OB[ob$bmi >= 30.0] = "Obese"
ob$OB = factor(ob$OB, levels=c("Underweight", "Normal", "Overweight", "Obese"))
temp = ob %>% group_by(gender) %>% count(OB) %>% mutate(pct = n/sum(n))
temp$pct = round(temp$pct*100, 1)
temp
## # A tibble: 8 × 4
## # Groups: gender [2]
## gender OB n pct
## <chr> <fct> <int> <dbl>
## 1 F Underweight 76 8.8
## 2 F Normal 626 72.6
## 3 F Overweight 149 17.3
## 4 F Obese 11 1.3
## 5 M Underweight 31 8.7
## 6 M Normal 239 67.3
## 7 M Overweight 81 22.8
## 8 M Obese 4 1.1
ggplot(data = temp, aes(x = OB, y = pct, fill = gender, group = gender)) +
geom_bar(stat = "identity", position = "dodge")

ggplot(data = temp, aes(x = OB, y = pct, fill = gender, group = gender)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(x = OB, y = pct, label = pct, group = gender),
position = position_dodge(width = 1), vjust = -0.5, col = "blue")

ggplot(data = temp, aes(x = OB, y = pct, fill = gender, group = gender)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(x = OB, y = pct, label = pct, group = gender),
position = position_dodge(width = 1), vjust = -0.5, col = "blue") +
xlab("Obesity status") +
ylab("Percent") +
theme(legend.position = "none")

temp2 = ob %>% group_by(OB) %>% summarise(mpcfat = mean(pcfat))
temp2$mpcfat = round(temp2$mpcfat, 1)
temp2
## # A tibble: 4 × 2
## OB mpcfat
## <fct> <dbl>
## 1 Underweight 24.5
## 2 Normal 31.4
## 3 Overweight 35.4
## 4 Obese 38.1
ggplot(data = temp2, aes(x = OB, y = mpcfat, fill = OB)) +
geom_bar(stat = "identity")

ggplot(data = temp2, aes(x = OB, y = mpcfat, fill = OB)) +
geom_bar(stat = "identity") +
theme(legend.position="none")

ggplot(data = temp2, aes(x = OB, y = mpcfat, fill = OB)) +
geom_bar(stat = "identity") +
theme(legend.position="none") +
geom_text(aes(label=mpcfat))

5. Boxplot
ggplot(data = ob, aes(x = gender, y = pcfat, col = gender)) +
geom_boxplot()

ggplot(data = ob, aes(x = gender, y = pcfat, col = gender)) +
geom_boxplot() +
geom_jitter(alpha=0.2)

6. Relationship between bmi and pcfat
ggplot(data = ob, aes(x = bmi, y = pcfat)) +
geom_point()

ggplot(data = ob, aes(x = bmi, y = pcfat, col = gender)) +
geom_point()

ggplot(data = ob, aes(x = bmi, y = pcfat, col = gender)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = ob, aes(x = bmi, y = pcfat, col = gender)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x + I(x^2))

suppressMessages(library(ggExtra))
p = ggplot(data = ob, aes(x = bmi, y = pcfat, col = gender)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x + I(x^2)) +
theme(legend.position = "bottom")
ggMarginal(p, groupColour = T, groupFill = T)

suppressMessages(library(ggridges))
ggplot(data = ob, aes(x = pcfat, y = OB, fill = gender)) +
geom_density_ridges() +
theme_ridges() +
theme(legend.position = "none") + labs(x = "Percent body fat")
## Picking joint bandwidth of 1.59

ggplot(data = ob, aes(x = pcfat, y = OB, fill = gender)) +
geom_density_ridges() +
theme_ridges() +
theme(legend.position = "none") + labs(x = "Percent body fat") +
theme_economist()
## Picking joint bandwidth of 1.59
