Top 5 countries + Rest of the world (1995 and later)
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# display untidy data
who
# tidy data
who5 <- who %>%
pivot_longer(cols = new_sp_m014:newrel_f65, names_to = "key", values_to = "cases", values_drop_na = TRUE) %>%
mutate(key = str_replace(key, "newrel", "new_rel")) %>%
separate(key, c("new", "var", "sexage")) %>%
select(-new, -iso2, -iso3) %>%
separate(sexage, c("sex", "age"), sep = 1)
#display tidy data
who5
# Subset 5 countries with highest case count
top5countries <- who5 %>%
group_by(country) %>%
summarise(sumofcountrycases=sum(cases)) %>%
filter(rank(desc(sumofcountrycases))<=5)
# For each country, year, and sex compute the total number of cases of TB.
who6 <- who5 %>%
group_by(country, year, sex) %>%
filter(year>=1995) %>%
summarise(sc=sum(cases))
# Build data.frame for top 5 countries
who7 <- inner_join(who6,top5countries, by = "country")
# Calculate the number of countries and cases for the rest of the world
who7_rem_countries <- who6 %>% anti_join(top5countries, by = "country") %>% group_by(country) %>% distinct(country) %>% ungroup()
who7_rem_no_of_countries <- tally(who7_rem_countries)
who7_rem <- who6 %>%
anti_join(top5countries, by = "country") %>%
group_by(year, sex) %>%
summarise(sc=sum(sc)) %>%
mutate(country=paste("Rest of the world (",who7_rem_no_of_countries," countries) "))
# Add "Rest of the world" data.frame as one consolidated data set to the other five
who8 <- bind_rows(who7, who7_rem)
#display summary of tidy data used for ggplot
who8
ggplot(data = who8, mapping = aes(x = year, y = sc, color=sex, fill=sex)) +
geom_area() +
theme_minimal() +
labs(title = "Tuberculosis Cases (1995-2015) - World Health Organization",
x = "Year",
y = "Number of Cases") +
facet_wrap(facets = vars(country))
