library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)

ob = read.csv("~/Desktop/Lectures - regression analysis/Datasets/Obesity data.csv")

ob$OB[ob$bmi < 18.5] = "Underweight"
ob$OB[ob$bmi >= 18.5 & ob$bmi < 25.0] = "Normal"
ob$OB[ob$bmi >= 25.0 & ob$bmi < 29.9] = "Overweight"
ob$OB[ob$bmi >= 30.0] = "Obese"
ob$OB = factor(ob$OB, levels=c("Underweight", "Normal", "Overweight", "Obese"))

# Count number by gender
temp = ob %>% group_by(gender) %>% count(OB) %>% mutate(pct = n/sum(n))
temp$pct = round(temp$pct*100, 1)

p = ggplot(data=temp, aes(x=OB, y=pct, fill=gender, group=gender)) 

p = p + geom_bar(stat="identity", position="dodge") 

p = p + geom_text(aes(x=OB, y=pct, label=pct, group=gender), position = position_dodge(width=1), vjust=-0.5, col="blue")

p + labs(x="Obesity status", y="Percent") + theme(legend.position="none")