library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Data tidying of who data set

who_tidy <- who %>%
  pivot_longer(
    cols = new_sp_m014:newrel_f65, 
    names_to = "key", 
    values_to = "cases", 
    values_drop_na = TRUE
  ) %>%
  mutate(key = stringr::str_replace(key, "newrel", "new_rel")) %>%
  separate(key, into = c("new", "type", "sexage"), sep = "_") %>%
  select(-new, -iso2, -iso3) %>%
  separate(sexage, into = c("sex", "age"), sep = 1)

who_tidy
## # A tibble: 76,046 × 6
##    country      year type  sex   age   cases
##    <chr>       <dbl> <chr> <chr> <chr> <dbl>
##  1 Afghanistan  1997 sp    m     014       0
##  2 Afghanistan  1997 sp    m     1524     10
##  3 Afghanistan  1997 sp    m     2534      6
##  4 Afghanistan  1997 sp    m     3544      3
##  5 Afghanistan  1997 sp    m     4554      5
##  6 Afghanistan  1997 sp    m     5564      2
##  7 Afghanistan  1997 sp    m     65        0
##  8 Afghanistan  1997 sp    f     014       5
##  9 Afghanistan  1997 sp    f     1524     38
## 10 Afghanistan  1997 sp    f     2534     36
## # ℹ 76,036 more rows

Plot the total number of TB cases in the world across years

yearly_totals <- who_tidy %>%
  group_by(year) %>%
  summarize(total_cases = sum(cases), .groups = 'drop')

ggplot(yearly_totals, aes(x = year, y = total_cases)) +
  geom_line(color = "steelblue", linewidth = 1) +
  geom_point(color = "darkblue", size = 2) +
  labs(
    title = "Total Global Tuberculosis Cases Over Time",
    x = "Year",
    y = "Total Cases"
  ) +
  scale_y_continuous(labels = scales::comma) + 
  theme_minimal()

Find out which country has the highest male-to-female ratio of TB cases in 2010.

highest_ratio_2010 <- who_tidy %>%
  filter(year == 2010) %>%
  group_by(country, sex) %>%
  summarize(total_cases = sum(cases), .groups = 'drop') %>%
  pivot_wider(
    names_from = sex, 
    values_from = total_cases, 
    values_fill = 0 
  ) %>%
  filter(f > 0) %>%
  mutate(ratio = m / f) %>%
  arrange(desc(ratio)) %>%
  slice_head(n = 1)


highest_ratio_2010
## # A tibble: 1 × 4
##   country                 f     m ratio
##   <chr>               <dbl> <dbl> <dbl>
## 1 Antigua and Barbuda     1     5     5