library(tidyverse)
library (plotly)
library(gganimate)

Import My Data

The main data source is the WHO Global Tuberculosis Report dataset, available at: https://extranet.who.int/tme/generateCSV.asp?ds=estimates These data are compiled and published annually by the World Health Organization from national TB programs and global health monitoring systems.

The dataset includes global data (~ 200 countries), but this project will focus on 10 Southeast Asia countries, such as Myanmar, Thailand, Indonesia, Malaysia, Vietnam, Philippines, Laos, Brunei, Singapore and Cambodia.

TB_data <- read_csv("C:/Users/User/OneDrive/Coursera Courses/Data visualization & dashboarding with R - John Hopkin/Course 5/Module 3/2) Peer review assignment 2/TB_burden_countries_2025-07-28.csv")

TB_data <- TB_data %>% filter(country %in% c("Myanmar", "Thailand", "Indonesia", "Malaysia", "Viet Nam", "Philippines", "Lao People's Democratic Republic", "Cambodia", "Brunei Darussalam", "Singapore"))

Figure 1

For my first figure, I am going to create a Line Plot: Trends of TB incidence rate (per 100,000 population) (y axis) over time (2000–2023) (x axis) for Southeast Asian countries. I will make interactive plot by ggplotly.

fig_1 <- ggplot(TB_data, aes(x=year, y=e_inc_100k, color=country, group=country, 
      text = paste("Country:", country,
      "<br>Year:", year,
      "<br>Incidence:", e_inc_100k))) +
  geom_line() +
  labs(x = "Year", y = "TB Incidence Rate (per 100,000)", title = "Trend of TB Incidence Over Time", color="Country")

ggplotly(fig_1, tooltip = "text")

Figure 2

For my second figure, I am going to create a Bar Chart: Comparison of TB mortality (with and without HIV) across Southeast Asian countries.I will use animated plot (by gganimate) across the years.

TB_data_long <- TB_data %>%
  select(country, year, mor_withoutHIV = e_mort_exc_tbhiv_100k, mor_withHIV = e_mort_tbhiv_100k) %>%
  pivot_longer(cols = c(mor_withoutHIV, mor_withHIV),
               names_to = "category",
               values_to = "mortality")

fig_2 <- ggplot(TB_data_long, aes(y=country, x=mortality, fill=category)) +
  geom_bar(stat="identity", position = "dodge") +
  labs(x = "TB Mortality Rate (per 100,000)", y = "Country", title = "TB Mortality in ASEAN Countries", subtitle = "Year: {closest_state}") +
  scale_fill_discrete(name = "HIV Status", 
                      labels = c("TB with HIV", "TB without HIV")) +
    transition_states(year, transition_length = 3, state_length = 4) 

animate(fig_2)

Figure 3

For the third figure, I will display a Scatter Plot: Plot TB incidence rate (per 100,000 population) on the x-axis against TB mortality rate (per 100,000 population) on the y-axis for Southeast Asian countries for the recent 3 years (2021, 2022, 2023).

TB_data_3year <-TB_data %>% filter(year %in% 2021:2023)

ggplot(TB_data_3year, aes(x=e_inc_100k, y=e_mort_100k, color=country)) +
  geom_point() +
  labs(x = "TB Incidence Rate (per 100,000)", y = "TB Mortality Rate (per 100,000)", title = "TB Incidence vs TB Mortality in ASEAN Countries", color="Country") +
  facet_wrap(~year)

Figure 4

For the fourth figure, I will display a Box Plot: Plot country on the x-axis and TB treatment coverage on the y-axis for Southeast Asian countries.

ggplot(TB_data, aes(x=country, y=c_cdr, fill=country)) +
  geom_boxplot() +
  labs(x = "Country", y = "TB Treatment Coverage", title = "TB Treatment Coverage in ASEAN Countries") +
  guides(fill = "none") +
  theme(axis.text.x = element_text(angle = 45))

Figure 5

For my fifth figure, I am going to create a Stacked Area Plot: TB incidence rate (per 100,000 population) - with or without HIV (y axis) over time (2000–2023) (x axis) for Myanmar.

TB_data_long2 <- TB_data %>% filter(country=="Myanmar") %>%
  select(country, year, e_inc_100k, e_inc_tbhiv_100k) %>%  pivot_longer(cols = c(e_inc_100k, e_inc_tbhiv_100k),
               names_to = "category",
               values_to = "tb_incidence")

ggplot(TB_data_long2, aes(x=year, y=tb_incidence, fill=category)) +
  geom_area() +
  labs(x = "Year", y = "TB Incidence Rate (per 100,000)", title = "Trend of TB Incidence over Time in Myanmar (with or without HIV)") +
  scale_fill_discrete(name = "HIV Status", 
                      labels = c("All TB", "TB with HIV"))

Figure 6

For my sixth figure, I am going to create a Lollipop Plot: TB incidence rate (per 100,000 population) for Southeast Asian countries in 2023.

TB_data_2023 <-TB_data %>% filter(year==2023)

ggplot(TB_data_2023, aes(x = e_inc_100k, y = country)) +
  geom_segment(aes(x = 0, xend = e_inc_100k, y = country, yend = country), color = "black") +
  geom_point(color = "blue", size = 4) +
  labs(
    title = "TB Incidence Rate in ASEAN Countries (2023)",
    x = "TB Incidence Rate (per 100,000 population)",
    y = "Country"
  ) 

Figure 7

For my seventh figure, I am going to create a Dot Plot: TB mortality rate (per 100,000 population) for Southeast Asian countries in 2023.

ggplot(TB_data_2023, aes(y = e_mort_100k, x = country)) +
  geom_point(color = "blue", size = 3) +
  labs(
    title = "TB Mortality Rate in ASEAN Countries (2023)",
    y = "TB Mortality Rate (per 100,000 population)",
    x = "Country") +
  theme(axis.text.x = element_text(angle = 45))

Figure 8

For my eighth figure, I am going to create a Pie Chart: TB Treatment Coverage for Southeast Asian countries in 2023.

pie(TB_data_2023$c_cdr,
    labels = TB_data_2023$country,
    main = "TB Treatment Coverage by Country (2023)",
    col = rainbow(length(TB_data_2023$country)))