library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
ob = read.csv("~/Desktop/Lectures - regression analysis/Datasets/Obesity data.csv")
ob$OB[ob$bmi < 18.5] = "Underweight"
ob$OB[ob$bmi >= 18.5 & ob$bmi < 25.0] = "Normal"
ob$OB[ob$bmi >= 25.0 & ob$bmi < 29.9] = "Overweight"
ob$OB[ob$bmi >= 30.0] = "Obese"
ob$OB = factor(ob$OB, levels=c("Underweight", "Normal", "Overweight", "Obese"))
# Count number by gender
temp = ob %>% group_by(gender) %>% count(OB) %>% mutate(pct = n/sum(n))
temp$pct = round(temp$pct*100, 1)
p = ggplot(data=temp, aes(x=OB, y=pct, fill=gender, group=gender))
p = p + geom_bar(stat="identity", position="dodge")
p = p + geom_text(aes(x=OB, y=pct, label=pct, group=gender), position = position_dodge(width=1), vjust=-0.5, col="blue")
p + labs(x="Obesity status", y="Percent") + theme(legend.position="none")
