Library

suppressMessages(library(ggplot2))
suppressMessages(library(tidyverse))
suppressMessages(library(ggthemes))

1. Import data

ob = read.csv("~/Desktop/R-dir/R studying/dataset/obesity data.csv")

2. Histogram of pcfat distribution

ggplot(data=ob, aes(x=pcfat)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data=ob, aes(x=pcfat)) + geom_histogram(fill = "blue", col = "white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data=ob, aes(x=pcfat)) + geom_histogram(fill = "blue", col = "white") +
  xlab("Percent body fat") +
  ylab("Number of people") +
  ggtitle("Distribution of body fat") +
  theme_economist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

3. The distribution of body fat (pcfat) by sex (gender)

##
ggplot(data=ob, aes(x=pcfat, fill = gender)) + geom_histogram(col = "white") +
  xlab("Percent body fat") +
  ylab("Number of people") +
  ggtitle("Distribution of body fat") +
  theme_economist()  
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data=ob, aes(x=pcfat)) + geom_histogram(aes(fill = gender), col = "white") +
  xlab("Percent body fat") +
  ylab("Number of people") +
  ggtitle("Distribution of body fat") +
  theme_economist() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##
ggplot(data=ob, aes(x=pcfat, fill=gender)) + geom_density(alpha = 0.8) +
  xlab("Percent body fat") +
  ylab("Number of people") +
  ggtitle("Distribution of body fat") +
  theme_economist() 

4 Barplot

ob$OB[ob$bmi < 18.5] = "Underweight"
ob$OB[ob$bmi >= 18.5 & ob$bmi < 25.0] = "Normal"
ob$OB[ob$bmi >= 25.0 & ob$bmi < 29.9] = "Overweight"
ob$OB[ob$bmi >= 30.0] = "Obese"
ob$OB = factor(ob$OB, levels=c("Underweight", "Normal", "Overweight", "Obese"))
temp = ob %>% group_by(gender) %>% count(OB) %>% mutate(pct = n/sum(n))
temp$pct = round(temp$pct*100, 1)
temp
## # A tibble: 8 × 4
## # Groups:   gender [2]
##   gender OB              n   pct
##   <chr>  <fct>       <int> <dbl>
## 1 F      Underweight    76   8.8
## 2 F      Normal        626  72.6
## 3 F      Overweight    149  17.3
## 4 F      Obese          11   1.3
## 5 M      Underweight    31   8.7
## 6 M      Normal        239  67.3
## 7 M      Overweight     81  22.8
## 8 M      Obese           4   1.1
ggplot(data = temp, aes(x = OB, y = pct, fill = gender, group = gender)) +
  geom_bar(stat = "identity", position = "dodge")

ggplot(data = temp, aes(x = OB, y = pct, fill = gender, group = gender)) +
  geom_bar(stat = "identity", position = "dodge") + 
  geom_text(aes(x = OB, y = pct, label = pct, group = gender), 
            position = position_dodge(width = 1), vjust = -0.5, col = "blue")

ggplot(data = temp, aes(x = OB, y = pct, fill = gender, group = gender)) +
  geom_bar(stat = "identity", position = "dodge") + 
  geom_text(aes(x = OB, y = pct, label = pct, group = gender), 
            position = position_dodge(width = 1), vjust = -0.5, col = "blue") +
  xlab("Obesity status") +
  ylab("Percent") +
  theme(legend.position = "none")

temp2 = ob %>% group_by(OB) %>% summarise(mpcfat = mean(pcfat))
temp2$mpcfat = round(temp2$mpcfat, 1)
temp2
## # A tibble: 4 × 2
##   OB          mpcfat
##   <fct>        <dbl>
## 1 Underweight   24.5
## 2 Normal        31.4
## 3 Overweight    35.4
## 4 Obese         38.1
ggplot(data = temp2, aes(x = OB, y = mpcfat, fill = OB)) + 
  geom_bar(stat = "identity")

ggplot(data = temp2, aes(x = OB, y = mpcfat, fill = OB)) + 
  geom_bar(stat = "identity") + 
  theme(legend.position="none")

ggplot(data = temp2, aes(x = OB, y = mpcfat, fill = OB)) + 
  geom_bar(stat = "identity") + 
  theme(legend.position="none") +
  geom_text(aes(label=mpcfat))

5. Boxplot

ggplot(data = ob, aes(x = gender, y = pcfat, col = gender)) +
  geom_boxplot()

ggplot(data = ob, aes(x = gender, y = pcfat, col = gender)) +
  geom_boxplot() + 
  geom_jitter(alpha=0.2)

6. Relationship between bmi and pcfat

ggplot(data = ob, aes(x = bmi, y = pcfat)) +
  geom_point()

ggplot(data = ob, aes(x = bmi, y = pcfat, col = gender)) +
  geom_point()

ggplot(data = ob, aes(x = bmi, y = pcfat, col = gender)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = ob, aes(x = bmi, y = pcfat, col = gender)) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x + I(x^2))

suppressMessages(library(ggExtra))
p = ggplot(data = ob, aes(x = bmi, y = pcfat, col = gender)) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x + I(x^2)) +
  theme(legend.position = "bottom")

ggMarginal(p, groupColour = T, groupFill = T)

suppressMessages(library(ggridges))

ggplot(data = ob, aes(x = pcfat, y = OB, fill = gender)) +
  geom_density_ridges() + 
  theme_ridges() + 
  theme(legend.position = "none") + labs(x = "Percent body fat")
## Picking joint bandwidth of 1.59

ggplot(data = ob, aes(x = pcfat, y = OB, fill = gender)) +
  geom_density_ridges() + 
  theme_ridges() + 
  theme(legend.position = "none") + labs(x = "Percent body fat") +
  theme_economist()
## Picking joint bandwidth of 1.59