library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
Plot the total number of TB cases in the world across years:
who_tidy <- who %>%
pivot_longer(
cols = new_sp_m014:newrel_f65,
names_to = "key",
values_to = "cases",
values_drop_na = TRUE
) %>%
mutate(key = stringr::str_replace(key, "newrel", "new_rel")) %>%
separate(key, c("new", "type", "sexage"), sep = "_") %>%
select(-new, -iso2, -iso3) %>%
separate(sexage, c("sex", "age"), sep = 1)
who_summary <- who_tidy %>%
group_by(year) %>%
summarise(total_cases = sum(cases))
ggplot(who_summary, mapping = aes(x = year, y = total_cases)) +
geom_line(color = "steelblue", linewidth = 1) +
geom_point(color = "steelblue", size = 2) +
labs(
title = "Total Number of TB Cases in the World Across Years",
x = "Year",
y = "Total Cases"
)
Find out which country has the highest male-to-female ratio of TB
cases in 2010:
ratio_top10_2010 <- who_tidy %>%
filter(year == 2010) %>%
group_by(country, sex) %>%
summarise(total_cases = sum(cases, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = sex, values_from = total_cases) %>%
filter(f > 0) %>%
mutate(male_to_female_ratio = m / f) %>%
arrange(desc(male_to_female_ratio)) %>%
head(10)
ggplot(data = ratio_top10_2010, mapping = aes(x = reorder(country, male_to_female_ratio), y = male_to_female_ratio)) +
geom_col(fill = "coral") +
coord_flip() +
scale_y_continuous(limits = c(0, 5), breaks = seq(0, 5, by = 1)) +
labs(
title = "Top 10 Countries with Highest Male-to-Female TB Ratio (2010)",
x = "Country",
y = "Ratio"
)
Antigua and Barbuda has the highest ratio in 2010.