Assignment 3: Perspectives

Author
Affiliation

Parsa Keyvani

Georgetown University

Code
font_add_google("Roboto", "Roboto")
showtext_auto() 

# Loading the dataset
who_tb <-read_csv("/Users/parsakeyvani/Desktop/Adv Data viz/Assignments /spring-2024-lab3-perspectives-keyvanip/data/who_tb.csv")

# Importing my custom theme
my_theme <- theme_bw() +
  theme(
    plot.background = element_rect(fill = "white"),
    panel.background = element_rect(fill = "white"),
    axis.title = element_text(size = 10, family = "Roboto"),
    axis.text = element_text(family = "Roboto"),
    title = element_text(size = 12,family = "Roboto"),
    panel.grid.major = element_blank(),  
    panel.grid.minor = element_blank()  
  )

Visual Perspective 1

Question

What is the top 10 countries with the highest combined TB value in 2012?

Code
# Preprocessing the data
p1 <-who_tb %>%
  drop_na() %>%
  filter(year== 2012) %>%
  group_by(country) %>%
  summarise(value = sum(value)) %>%
  top_n(10) %>%
  arrange(desc(value)) %>%
  mutate(value = value/1000) %>%
  mutate(country = ifelse(country == "Democratic Republic of the Congo", "Congo", country))

# changing to factor and reordering based on value for color coding
p1$country <- factor(p1$country, levels = p1$country[order(-p1$value)])

# Plotting the data
ggplot(p1, aes(x=value, y=reorder(country, value), fill=country)) +
  geom_bar(stat="identity") +
  my_theme + 
  labs(title = "Top 10 Countries With The <span style='color:#004c6d;'>Highest</span> Combined <br>TB Value (in thousands) in 2012", 
       x = "Value",
       y = "Country") +
  scale_fill_manual(values=c("#004c6d", "#215f7e", "#377290", "#4c86a2", "#619ab4", 
                             "#76afc6", "#8cc4d9", "#a2d9ec", "#b8efff", "#c6fdff")) +
  theme(legend.position = "none", plot.title = element_markdown())

Visual Perspective 2

Question

What is the top 10 countries with the lowest combined TB value in 2012?

Code
# Preprocessing the data
p2 <-who_tb %>%
  drop_na() %>%
  filter(year== 2012, value > 50) %>%
  group_by(country) %>%
  summarise(value = sum(value)) %>%
  top_n(-10) %>%
  arrange(value)

# changing to factor and reordering based on value for color coding
p2$country <- factor(p2$country, levels = p2$country[order(-p2$value)])

# Plotting the data
ggplot(p2, aes(x=value, y=reorder(country, value, FUN=desc), fill=country)) +
  geom_bar(stat="identity") +
  my_theme + 
  labs(title = "Top 10 Countries With The <span style='color:#a2d9ec;'>Lowest</span> Combined <br>TB Value in 2012", 
       x = "Value",
       y = "Country") +
  scale_fill_manual(values=c("#004c6d", "#215f7e", "#377290", "#4c86a2", "#619ab4", 
                             "#76afc6", "#8cc4d9", "#a2d9ec", "#b8efff", "#c6fdff")) +
  theme(legend.position = "none", plot.title = element_markdown())

Visual Perspective 3

Question

Which age group had the highest TB rate in 2012 for the U.S.?

Code
# Preprocessing the data
p3 <-who_tb %>%
  drop_na() %>%
  filter(country == "United States of America", year== 2012) %>%
  group_by(age) %>%
  summarise(value = sum(value)) %>%
  mutate(age = case_when(
    age == "014" ~ "0-14",
    age == "1524" ~ "15-24",
    age == "3544" ~ "35-44",
    age == "2534" ~ "25-34",
    age == "5564" ~ "55-64",
    age == "4554" ~ "45-54",
    age == "65" ~ "65>",
    .default = as.character(age)))

ggplot(p3, aes(x= age, y = value, fill = age)) +
  geom_bar(stat="identity") +
  my_theme +
  labs(title = "Age Group With The <span style='color:#004c6d;'>Highest</span> TB rate in 2012 for the U.S.", 
       x = "Age Group",
       y = "Value (thousands)") +
  scale_fill_manual(values=c("#63a1b4", "#63a1b4", "#63a1b4", "#63a1b4", "#63a1b4", "#63a1b4", "#004c6d")) +
  theme(legend.position = "none", plot.title = element_markdown())

Visual Perspective 4

Question

How do men and women differ in TB rate over the years in the U.S.?

Code
# Preprocessing the data
p4 <-who_tb %>%
  drop_na() %>%
  filter(country == "United States of America") %>%
  group_by(year, gender) %>%
  summarise(value = sum(value)) %>%
  mutate(gender = case_when(
    gender == "f" ~ "Female",
    gender == "m" ~ "Male",
    .default = as.character(gender))) %>%
  rename(Gender = gender)
  

ggplot(p4, aes(x= year, y = value, color = Gender)) +
  geom_line() +
  my_theme +
  labs(title = "TB Rate 1995-2012 in the U.S (Male vs Female)",
       subtitle= "Reference: World Health Organization. (1995-2008). Tuberculosis control and research strategies:\nMemorandum from a WHO meeting. Bulletin of the World Health Organization timeline.", 
       x = "Year",
       y = "Value (thousands)") +
  scale_fill_manual(values=c("#003f5c", "#ff6361")) +
  theme(legend.position = c(0.92, 0.85),
        legend.title = element_text(size = 7),
        legend.text = element_text(size = 6),
        plot.subtitle = element_text(size = 7,family = "Roboto")) +
  annotate("pointrange", x = 1995, y = c(5629,2462) , ymin = -Inf, ymax = 5629, colour = "grey", size = 0.3, linewidth = 0.2) + 
  annotate("pointrange", x = 2008, y = c(3206,1536) , ymin = -Inf, ymax = 3206, colour = "grey", size = 0.3, linewidth = 0.2) +
  annotate("text", x = 1995.2, y = 3800, label = "The WHO declares TB a global\n emergency with deaths from higher\n than any previous year in history.", hjust = 0, color = "black", size= 2) +
  annotate("text", x = 2004, y = 2500, label = "The Department of Health launches\nTuberculosis Prevention and\nTreatment in 2008", hjust = 0, color = "black", size= 2)

Visual Perspective 5

Question

How does Iran Compare to the U.S. in TB rate in both men and women combined from 1995-2012?

Code
p5 <-who_tb %>%
  drop_na() %>%
  filter(country %in% c("United States of America", "Iran (Islamic Republic of)")) %>%
  group_by(country,year) %>%
  summarise(value = sum(value)) %>%
  mutate(country = ifelse(country == "Iran (Islamic Republic of)", "Iran", country)) %>%
  mutate(country = ifelse(country == "United States of America", "USA", country)) %>%
  pivot_wider(names_from = country, values_from = value)


world_mean <-who_tb %>%
  drop_na() %>%
  filter(value > 50, year>= 1995 & year <2013) %>%
  group_by(year) %>%
  summarise(value = round(mean(value))) %>%
  rename("Median" = value)

complete_data <-full_join(p5, world_mean)

ggplot(complete_data, aes(x= year)) +
  geom_line(aes(y= USA), color= "brown") + 
  geom_line(aes(y= Median), color= "darkgrey", linetype="longdash") + 
  geom_line(aes(y= Iran), color= "blue") + 
  my_theme +
  scale_x_continuous(breaks = seq(min(complete_data$year), max(complete_data$year), by = 2)) +
  scale_y_continuous(breaks = seq(min(complete_data$Median), max(complete_data$Iran), by = 1200)) +
  annotate("text", x = 2012, y = 3950, label = "UNITED STATES", hjust = 1, color = "brown", size= 2.5) +
  annotate("text", x = 2012, y = 2300, label = "WORLD MEDIAN", hjust = 1, color = "grey", size= 2.5) +
  annotate("text", x = 2012, y = 5700, label = "IRAN", hjust = 1, color = "blue", size= 2.5) +
  labs(title = "USA vs IRN in TB Value in Men and Women Combined (1995-2012)",
       x= "Year",
       y= "Value (thousands)")