font_add_google("Roboto", "Roboto")showtext_auto() # Loading the datasetwho_tb <-read_csv("/Users/parsakeyvani/Desktop/Adv Data viz/Assignments /spring-2024-lab3-perspectives-keyvanip/data/who_tb.csv")# Importing my custom thememy_theme <-theme_bw() +theme(plot.background =element_rect(fill ="white"),panel.background =element_rect(fill ="white"),axis.title =element_text(size =10, family ="Roboto"),axis.text =element_text(family ="Roboto"),title =element_text(size =12,family ="Roboto"),panel.grid.major =element_blank(), panel.grid.minor =element_blank() )
Visual Perspective 1
Question
What is the top 10 countries with the highest combined TB value in 2012?
Code
# Preprocessing the datap1 <-who_tb %>%drop_na() %>%filter(year==2012) %>%group_by(country) %>%summarise(value =sum(value)) %>%top_n(10) %>%arrange(desc(value)) %>%mutate(value = value/1000) %>%mutate(country =ifelse(country =="Democratic Republic of the Congo", "Congo", country))# changing to factor and reordering based on value for color codingp1$country <-factor(p1$country, levels = p1$country[order(-p1$value)])# Plotting the dataggplot(p1, aes(x=value, y=reorder(country, value), fill=country)) +geom_bar(stat="identity") + my_theme +labs(title ="Top 10 Countries With The <span style='color:#004c6d;'>Highest</span> Combined <br>TB Value (in thousands) in 2012", x ="Value",y ="Country") +scale_fill_manual(values=c("#004c6d", "#215f7e", "#377290", "#4c86a2", "#619ab4", "#76afc6", "#8cc4d9", "#a2d9ec", "#b8efff", "#c6fdff")) +theme(legend.position ="none", plot.title =element_markdown())
Visual Perspective 2
Question
What is the top 10 countries with the lowest combined TB value in 2012?
Code
# Preprocessing the datap2 <-who_tb %>%drop_na() %>%filter(year==2012, value >50) %>%group_by(country) %>%summarise(value =sum(value)) %>%top_n(-10) %>%arrange(value)# changing to factor and reordering based on value for color codingp2$country <-factor(p2$country, levels = p2$country[order(-p2$value)])# Plotting the dataggplot(p2, aes(x=value, y=reorder(country, value, FUN=desc), fill=country)) +geom_bar(stat="identity") + my_theme +labs(title ="Top 10 Countries With The <span style='color:#a2d9ec;'>Lowest</span> Combined <br>TB Value in 2012", x ="Value",y ="Country") +scale_fill_manual(values=c("#004c6d", "#215f7e", "#377290", "#4c86a2", "#619ab4", "#76afc6", "#8cc4d9", "#a2d9ec", "#b8efff", "#c6fdff")) +theme(legend.position ="none", plot.title =element_markdown())
Visual Perspective 3
Question
Which age group had the highest TB rate in 2012 for the U.S.?
Code
# Preprocessing the datap3 <-who_tb %>%drop_na() %>%filter(country =="United States of America", year==2012) %>%group_by(age) %>%summarise(value =sum(value)) %>%mutate(age =case_when( age =="014"~"0-14", age =="1524"~"15-24", age =="3544"~"35-44", age =="2534"~"25-34", age =="5564"~"55-64", age =="4554"~"45-54", age =="65"~"65>",.default =as.character(age)))ggplot(p3, aes(x= age, y = value, fill = age)) +geom_bar(stat="identity") + my_theme +labs(title ="Age Group With The <span style='color:#004c6d;'>Highest</span> TB rate in 2012 for the U.S.", x ="Age Group",y ="Value (thousands)") +scale_fill_manual(values=c("#63a1b4", "#63a1b4", "#63a1b4", "#63a1b4", "#63a1b4", "#63a1b4", "#004c6d")) +theme(legend.position ="none", plot.title =element_markdown())
Visual Perspective 4
Question
How do men and women differ in TB rate over the years in the U.S.?
Code
# Preprocessing the datap4 <-who_tb %>%drop_na() %>%filter(country =="United States of America") %>%group_by(year, gender) %>%summarise(value =sum(value)) %>%mutate(gender =case_when( gender =="f"~"Female", gender =="m"~"Male",.default =as.character(gender))) %>%rename(Gender = gender)ggplot(p4, aes(x= year, y = value, color = Gender)) +geom_line() + my_theme +labs(title ="TB Rate 1995-2012 in the U.S (Male vs Female)",subtitle="Reference: World Health Organization. (1995-2008). Tuberculosis control and research strategies:\nMemorandum from a WHO meeting. Bulletin of the World Health Organization timeline.", x ="Year",y ="Value (thousands)") +scale_fill_manual(values=c("#003f5c", "#ff6361")) +theme(legend.position =c(0.92, 0.85),legend.title =element_text(size =7),legend.text =element_text(size =6),plot.subtitle =element_text(size =7,family ="Roboto")) +annotate("pointrange", x =1995, y =c(5629,2462) , ymin =-Inf, ymax =5629, colour ="grey", size =0.3, linewidth =0.2) +annotate("pointrange", x =2008, y =c(3206,1536) , ymin =-Inf, ymax =3206, colour ="grey", size =0.3, linewidth =0.2) +annotate("text", x =1995.2, y =3800, label ="The WHO declares TB a global\n emergency with deaths from higher\n than any previous year in history.", hjust =0, color ="black", size=2) +annotate("text", x =2004, y =2500, label ="The Department of Health launches\nTuberculosis Prevention and\nTreatment in 2008", hjust =0, color ="black", size=2)
Visual Perspective 5
Question
How does Iran Compare to the U.S. in TB rate in both men and women combined from 1995-2012?
Code
p5 <-who_tb %>%drop_na() %>%filter(country %in%c("United States of America", "Iran (Islamic Republic of)")) %>%group_by(country,year) %>%summarise(value =sum(value)) %>%mutate(country =ifelse(country =="Iran (Islamic Republic of)", "Iran", country)) %>%mutate(country =ifelse(country =="United States of America", "USA", country)) %>%pivot_wider(names_from = country, values_from = value)world_mean <-who_tb %>%drop_na() %>%filter(value >50, year>=1995& year <2013) %>%group_by(year) %>%summarise(value =round(mean(value))) %>%rename("Median"= value)complete_data <-full_join(p5, world_mean)ggplot(complete_data, aes(x= year)) +geom_line(aes(y= USA), color="brown") +geom_line(aes(y= Median), color="darkgrey", linetype="longdash") +geom_line(aes(y= Iran), color="blue") + my_theme +scale_x_continuous(breaks =seq(min(complete_data$year), max(complete_data$year), by =2)) +scale_y_continuous(breaks =seq(min(complete_data$Median), max(complete_data$Iran), by =1200)) +annotate("text", x =2012, y =3950, label ="UNITED STATES", hjust =1, color ="brown", size=2.5) +annotate("text", x =2012, y =2300, label ="WORLD MEDIAN", hjust =1, color ="grey", size=2.5) +annotate("text", x =2012, y =5700, label ="IRAN", hjust =1, color ="blue", size=2.5) +labs(title ="USA vs IRN in TB Value in Men and Women Combined (1995-2012)",x="Year",y="Value (thousands)")