Wrangle the data to calculate difference scores
tb_long <- read_rds("https://github.com/datascienceprogram/ids_course_data/raw/master/tb_long.rds")
tb_long <- tb_long %>%
mutate(sex = ifelse(sex == "m", "male", "female")) %>%
pivot_wider(names_from = sex, values_from = count) %>%
mutate(diff = male - female) %>%
filter(!is.na(diff), year == 2012) %>%
mutate(age_group = as.factor(case_when(
age_group == "014" ~ "0-4 years",
age_group == "04" ~ "4 years",
age_group == "514" ~ "5-14 years",
age_group == "1524" ~ "15-24 years",
age_group == "2534" ~ "25-34 years",
age_group == "3544" ~ "35-44 years",
age_group == "4554" ~ "45-54 years",
age_group == "5564" ~ "55-64 years",
age_group == "65" ~ "65+ years"
))) %>%
filter(
country %in% c(
"United States of America",
"Canada",
"Mexico",
"Guatemala",
"Cuba",
"Haiti",
"Dominican Republic",
"Honduras",
"Nicaragua",
"El Salvador",
"Costa Rica",
"Panama",
"Jamaica",
"Trinidad and Tobago",
"Belize",
"Bahamas",
"Barbados",
"Saint Lucia",
"Grenada",
"Saint Vincent and the Grenadines",
"Antigua and Barbuda",
"Dominica",
"Saint Kitts and Nevis"
), !is.na(age_group))
Plot some things to investigate
ggplot(data = tb_long, aes(x = country, y = diff, fill = country)) +
geom_col()+
geom_hline(yintercept = 0) +
facet_wrap("age_group") +
theme(legend.position = "none") +
theme(axis.text.x=element_text(angle = 90, size = 5)) +
coord_flip()
