Informative visualization of the WHO TB data

Top 5 countries + Rest of the world (1995 and later)

library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# display untidy data
who
# tidy data
who5 <- who %>% 
  pivot_longer(cols = new_sp_m014:newrel_f65, names_to = "key", values_to = "cases", values_drop_na = TRUE) %>%  
  mutate(key = str_replace(key, "newrel", "new_rel")) %>% 
  separate(key, c("new", "var", "sexage")) %>% 
  select(-new, -iso2, -iso3) %>% 
  separate(sexage, c("sex", "age"), sep = 1)

#display tidy data
who5
# Subset 5 countries with highest case count
top5countries <- who5 %>% 
  group_by(country) %>% 
  summarise(sumofcountrycases=sum(cases)) %>% 
  filter(rank(desc(sumofcountrycases))<=5)

# For each country, year, and sex compute the total number of cases of TB. 
who6 <- who5 %>% 
  group_by(country, year, sex) %>% 
  filter(year>=1995) %>% 
  summarise(sc=sum(cases))

# Build data.frame for top 5 countries
who7 <- inner_join(who6,top5countries, by = "country")

# Calculate the number of countries and cases for the rest of the world

who7_rem_countries <- who6 %>% anti_join(top5countries, by = "country") %>% group_by(country) %>% distinct(country) %>% ungroup() 

who7_rem_no_of_countries <- tally(who7_rem_countries)

who7_rem <- who6 %>% 
  anti_join(top5countries, by = "country") %>% 
  group_by(year, sex) %>% 
  summarise(sc=sum(sc)) %>% 
  mutate(country=paste("Rest of the world (",who7_rem_no_of_countries," countries) "))

# Add "Rest of the world" data.frame as one consolidated data set to the other five
who8 <- bind_rows(who7, who7_rem)

#display summary of tidy data used for ggplot
who8
ggplot(data = who8, mapping = aes(x = year, y = sc, color=sex, fill=sex)) + 
  geom_area() + 
  theme_minimal() +
  labs(title = "Tuberculosis Cases (1995-2015) - World Health Organization",
  x = "Year",
  y = "Number of Cases") +
  facet_wrap(facets = vars(country))