library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata


Plot the total number of TB cases in the world across years:

who_tidy <- who %>%
  pivot_longer(
    cols = new_sp_m014:newrel_f65, 
    names_to = "key", 
    values_to = "cases", 
    values_drop_na = TRUE
  ) %>%
  mutate(key = stringr::str_replace(key, "newrel", "new_rel")) %>%
  separate(key, c("new", "type", "sexage"), sep = "_") %>%
  select(-new, -iso2, -iso3) %>%
  separate(sexage, c("sex", "age"), sep = 1)

who_summary <- who_tidy %>%
  group_by(year) %>%
  summarise(total_cases = sum(cases))

ggplot(who_summary, mapping = aes(x = year, y = total_cases)) +
  geom_line(color = "steelblue", linewidth = 1) +
  geom_point(color = "steelblue", size = 2) +
  labs(
    title = "Total Number of TB Cases in the World Across Years",
    x = "Year",
    y = "Total Cases"
  )


Find out which country has the highest male-to-female ratio of TB cases in 2010:

ratio_top10_2010 <- who_tidy %>%
  filter(year == 2010) %>%
  group_by(country, sex) %>% 
  summarise(total_cases = sum(cases, na.rm = TRUE), .groups = "drop") %>%
  pivot_wider(names_from = sex, values_from = total_cases) %>%
  filter(f > 0) %>% 
  mutate(male_to_female_ratio = m / f) %>%
  arrange(desc(male_to_female_ratio)) %>%
  head(10)

ggplot(data = ratio_top10_2010, mapping = aes(x = reorder(country, male_to_female_ratio), y = male_to_female_ratio)) +
  geom_col(fill = "coral") +
  coord_flip() + 
  scale_y_continuous(limits = c(0, 5), breaks = seq(0, 5, by = 1)) +
  labs(
    title = "Top 10 Countries with Highest Male-to-Female TB Ratio (2010)",
    x = "Country",
    y = "Ratio"
  )


Antigua and Barbuda has the highest ratio in 2010.