Data tidying of who data set step by step
who1<-who%>%
pivot_longer(cols = new_sp_m014:newrel_f65,
names_to = "key",
values_to = "cases",
values_drop_na = TRUE)
who2<-who1%>%
mutate(key = str_replace(key, "rewrel", "new_rel"))
who3<-who2%>%
separate(key, c("new", "type", "sexage"), sep = "_")
who4<-who3%>%
select(-new, -iso2, -iso3)
who5<-who4%>%
separate(sexage, c("sex", "age"), sep = 1)
Plot the total number of TB cases in the world across years
who5%>%
group_by(year)%>%
summarise(total_cases=sum(cases, na.rm=T))%>%
ggplot(aes(x=year, y=total_cases))+
geom_point(color="steelblue")+
geom_line(color="black", linewidth = 1)+
labs(
title = "the total number of TB cases in the world across years",
x = "year",
y = "number of cases"
)

Find out which country has the highest male-to-female ratio of TB
cases in 2010.
who5%>%
filter(year==2010)%>%
group_by(country, sex)%>%
summarise(total_cases=sum(cases, na.rm=T))%>%
pivot_wider(
names_from=sex,
values_from=total_cases)%>%
mutate(male_female_ratio=m/f)%>%
arrange(desc(male_female_ratio))
## # A tibble: 204 × 4
## # Groups: country [204]
## country f m male_female_ratio
## <chr> <dbl> <dbl> <dbl>
## 1 Anguilla 0 1 Inf
## 2 Bermuda 0 1 Inf
## 3 British Virgin Islands 0 1 Inf
## 4 Antigua and Barbuda 1 5 5
## 5 Seychelles 3 14 4.67
## 6 Qatar 105 475 4.52
## 7 Cuba 184 588 3.20
## 8 Jamaica 31 97 3.13
## 9 Trinidad and Tobago 53 161 3.04
## 10 American Samoa 1 3 3
## # ℹ 194 more rows