library(tidyverse)
library (plotly)
library(gganimate)
The main data source is the WHO Global Tuberculosis Report dataset, available at: https://extranet.who.int/tme/generateCSV.asp?ds=estimates These data are compiled and published annually by the World Health Organization from national TB programs and global health monitoring systems.
The dataset includes global data (~ 200 countries), but this project will focus on 10 Southeast Asia countries, such as Myanmar, Thailand, Indonesia, Malaysia, Vietnam, Philippines, Laos, Brunei, Singapore and Cambodia.
TB_data <- read_csv("C:/Users/User/OneDrive/Coursera Courses/Data visualization & dashboarding with R - John Hopkin/Course 5/Module 3/2) Peer review assignment 2/TB_burden_countries_2025-07-28.csv")
TB_data <- TB_data %>% filter(country %in% c("Myanmar", "Thailand", "Indonesia", "Malaysia", "Viet Nam", "Philippines", "Lao People's Democratic Republic", "Cambodia", "Brunei Darussalam", "Singapore"))
For my first figure, I am going to create a Line Plot: Trends of TB incidence rate (per 100,000 population) (y axis) over time (2000–2023) (x axis) for Southeast Asian countries. I will make interactive plot by ggplotly.
fig_1 <- ggplot(TB_data, aes(x=year, y=e_inc_100k, color=country, group=country,
text = paste("Country:", country,
"<br>Year:", year,
"<br>Incidence:", e_inc_100k))) +
geom_line() +
labs(x = "Year", y = "TB Incidence Rate (per 100,000)", title = "Trend of TB Incidence Over Time", color="Country")
ggplotly(fig_1, tooltip = "text")
For my second figure, I am going to create a Bar Chart: Comparison of TB mortality (with and without HIV) across Southeast Asian countries.I will use animated plot (by gganimate) across the years.
TB_data_long <- TB_data %>%
select(country, year, mor_withoutHIV = e_mort_exc_tbhiv_100k, mor_withHIV = e_mort_tbhiv_100k) %>%
pivot_longer(cols = c(mor_withoutHIV, mor_withHIV),
names_to = "category",
values_to = "mortality")
fig_2 <- ggplot(TB_data_long, aes(y=country, x=mortality, fill=category)) +
geom_bar(stat="identity", position = "dodge") +
labs(x = "TB Mortality Rate (per 100,000)", y = "Country", title = "TB Mortality in ASEAN Countries", subtitle = "Year: {closest_state}") +
scale_fill_discrete(name = "HIV Status",
labels = c("TB with HIV", "TB without HIV")) +
transition_states(year, transition_length = 3, state_length = 4)
animate(fig_2)
For the third figure, I will display a Scatter Plot: Plot TB incidence rate (per 100,000 population) on the x-axis against TB mortality rate (per 100,000 population) on the y-axis for Southeast Asian countries for the recent 3 years (2021, 2022, 2023).
TB_data_3year <-TB_data %>% filter(year %in% 2021:2023)
ggplot(TB_data_3year, aes(x=e_inc_100k, y=e_mort_100k, color=country)) +
geom_point() +
labs(x = "TB Incidence Rate (per 100,000)", y = "TB Mortality Rate (per 100,000)", title = "TB Incidence vs TB Mortality in ASEAN Countries", color="Country") +
facet_wrap(~year)
For the fourth figure, I will display a Box Plot: Plot country on the x-axis and TB treatment coverage on the y-axis for Southeast Asian countries.
ggplot(TB_data, aes(x=country, y=c_cdr, fill=country)) +
geom_boxplot() +
labs(x = "Country", y = "TB Treatment Coverage", title = "TB Treatment Coverage in ASEAN Countries") +
guides(fill = "none") +
theme(axis.text.x = element_text(angle = 45))
For my fifth figure, I am going to create a Stacked Area Plot: TB incidence rate (per 100,000 population) - with or without HIV (y axis) over time (2000–2023) (x axis) for Myanmar.
TB_data_long2 <- TB_data %>% filter(country=="Myanmar") %>%
select(country, year, e_inc_100k, e_inc_tbhiv_100k) %>% pivot_longer(cols = c(e_inc_100k, e_inc_tbhiv_100k),
names_to = "category",
values_to = "tb_incidence")
ggplot(TB_data_long2, aes(x=year, y=tb_incidence, fill=category)) +
geom_area() +
labs(x = "Year", y = "TB Incidence Rate (per 100,000)", title = "Trend of TB Incidence over Time in Myanmar (with or without HIV)") +
scale_fill_discrete(name = "HIV Status",
labels = c("All TB", "TB with HIV"))
For my sixth figure, I am going to create a Lollipop Plot: TB incidence rate (per 100,000 population) for Southeast Asian countries in 2023.
TB_data_2023 <-TB_data %>% filter(year==2023)
ggplot(TB_data_2023, aes(x = e_inc_100k, y = country)) +
geom_segment(aes(x = 0, xend = e_inc_100k, y = country, yend = country), color = "black") +
geom_point(color = "blue", size = 4) +
labs(
title = "TB Incidence Rate in ASEAN Countries (2023)",
x = "TB Incidence Rate (per 100,000 population)",
y = "Country"
)
For my seventh figure, I am going to create a Dot Plot: TB mortality rate (per 100,000 population) for Southeast Asian countries in 2023.
ggplot(TB_data_2023, aes(y = e_mort_100k, x = country)) +
geom_point(color = "blue", size = 3) +
labs(
title = "TB Mortality Rate in ASEAN Countries (2023)",
y = "TB Mortality Rate (per 100,000 population)",
x = "Country") +
theme(axis.text.x = element_text(angle = 45))
For my eighth figure, I am going to create a Pie Chart: TB Treatment Coverage for Southeast Asian countries in 2023.
pie(TB_data_2023$c_cdr,
labels = TB_data_2023$country,
main = "TB Treatment Coverage by Country (2023)",
col = rainbow(length(TB_data_2023$country)))