Data tidying of who data set step by step

who1<-who%>%
  pivot_longer(cols = new_sp_m014:newrel_f65,
               names_to = "key",
               values_to = "cases",
               values_drop_na = TRUE)

who2<-who1%>%
  mutate(key = str_replace(key, "rewrel", "new_rel"))

who3<-who2%>%
  separate(key, c("new", "type", "sexage"), sep = "_")

who4<-who3%>%
  select(-new, -iso2, -iso3)

who5<-who4%>%
  separate(sexage, c("sex", "age"), sep = 1)

Plot the total number of TB cases in the world across years

who5%>%
  group_by(year)%>%
  summarise(total_cases=sum(cases, na.rm=T))%>%
  ggplot(aes(x=year, y=total_cases))+
  geom_point(color="steelblue")+
  geom_line(color="black", linewidth = 1)+
  labs(
    title = "the total number of TB cases in the world across years",
    x = "year",
    y = "number of cases"
  )

Find out which country has the highest male-to-female ratio of TB cases in 2010.

who5%>%
  filter(year==2010)%>%
  group_by(country, sex)%>%
  summarise(total_cases=sum(cases, na.rm=T))%>%
  pivot_wider(
    names_from=sex,
    values_from=total_cases)%>%
  mutate(male_female_ratio=m/f)%>%
  arrange(desc(male_female_ratio))
## # A tibble: 204 × 4
## # Groups:   country [204]
##    country                    f     m male_female_ratio
##    <chr>                  <dbl> <dbl>             <dbl>
##  1 Anguilla                   0     1            Inf   
##  2 Bermuda                    0     1            Inf   
##  3 British Virgin Islands     0     1            Inf   
##  4 Antigua and Barbuda        1     5              5   
##  5 Seychelles                 3    14              4.67
##  6 Qatar                    105   475              4.52
##  7 Cuba                     184   588              3.20
##  8 Jamaica                   31    97              3.13
##  9 Trinidad and Tobago       53   161              3.04
## 10 American Samoa             1     3              3   
## # ℹ 194 more rows