Introduction

The purpose of this project was to create interactive and animated plot using themes like plotly, gganimate, and etc. The data set that we used in our visualization is from Our World in Data. All the data has been cleaned using data.table package and run on a custom to ensure standardize plot across our analysis.

Set Up

## Clear environment
rm(list = ls())

## Loading Library 
pacman::p_load(tidyverse,readr, data.table, kableExtra, leaflet, ggpubr, gganimate, magick,ggthemes, plotly) 

Loading Data

malaria_death_region <- fread("/Users/khawajahassan/Practical Data Visualization/Project/global-malaria-deaths-by-world-region.csv")
malaria_occurence_country <- fread("/Users/khawajahassan/Practical Data Visualization/Project/incidence-of-malaria-sdgs.csv")                                      
malaria_death_age <- fread("/Users/khawajahassan/Practical Data Visualization/Project/malaria-deaths-by-age.csv")

Define Custome Theme

theme_tatti <- function(){ 
  font <- "Georgia"   #assign font family up front
  
  theme_minimal() %+replace%    
    
    theme(
      
      #grid elements
      panel.grid.minor = element_blank(),
      panel.background = element_blank(),
      
      #text elements
      plot.title = element_text(             
        family = font,            
        size = 10,                
        hjust = 0.5,                
        vjust = 2,
        color = "Black"),               
      
      plot.subtitle = element_text(
        family = font,
        size = 10,
        hjust = 0.5,
        color = "#2ca25f"),
      
      plot.caption = element_text(  
        family = font,
        size = 6,        
        hjust = 1),      
      
      axis.title = element_text(    
        family = font,   
        size = 10),      
      
      axis.text = element_text(     
        family = font,   
        size = 9),       
      
      axis.text.x = element_text(   
        margin=margin(5, b = 10))
    )
}

Checking for Missing Value

#malaria_death_country (No missing Value)
to_filter <- sapply(malaria_death_region  , function(x) sum(is.na(x)))


#malaria_occurence_country (No Missing Value)
to_filter <- sapply(malaria_occurence_country , function(x) sum(is.na(x)))


#malaria_death_age (No Missing Value)
to_filter <- sapply(malaria_death_age , function(x) sum(is.na(x)))

Regional Distribution of Malaria Deaths

The following visualization is showing number of deaths caused by Malaria in our defined regions over the time period of 2000 -2015.

malaria_death_region <- malaria_death_region[Entity != "Europe"]
malaria_death_region <- malaria_death_region[,Code:=NULL]

  
a <- ggplot(malaria_death_region) +
  aes(
    x = Year,
    y = `Malaria Deaths by World Region (WHO (2016))`,
    colour = Entity
  ) +
  geom_line(size = 0.5) +
  scale_color_viridis_d(option = "viridis", direction = 1) +
  theme_tatti() +
  facet_wrap(vars(Entity), scales = "free", ncol = 1L)+
  ggtitle(" Region Wise Malaria Deaths (2000-2015)") +
  geom_point()+
  scale_x_continuous(breaks = 0:200)+
  labs(x="Years", y= "Malaria Deaths by World Region")
a + transition_reveal(Year)

Malaria Occurrence count

For this visualization we had to first extract the top ten countries where the number of malaria reported cases were highest. Then based on our final table we build box-plot to show how is the distribution of in terms of there mean and quartile ranges of each countries.

malaria_occurence_country <- setnames(malaria_occurence_country,"Incidence of malaria (per 1,000 population at risk)",'Incidence_of_malaria')
Average_occurence_Country <- malaria_occurence_country[,mean(Incidence_of_malaria),by= Entity]
Average_occurence_Country <- Average_occurence_Country[order(-rank(V1))] 
Average_occurence_Country <- Average_occurence_Country[1:10,]



top <- malaria_occurence_country[malaria_occurence_country$Entity== "Burkina Faso" |
                            malaria_occurence_country$Entity=="Cote d'Ivoire"|
                            malaria_occurence_country$Entity=="Central African Republic"|
                            malaria_occurence_country$Entity=="Benin"|
                            malaria_occurence_country$Entity=="Sierra Leone"|
                            malaria_occurence_country$Entity=="Democratic Republic of Congo"|
                            malaria_occurence_country$Entity=="Mali"|
                            malaria_occurence_country$Entity=="Mozambique"|
                            malaria_occurence_country$Entity=="Liberia"|
                              malaria_occurence_country$Entity=="Togo",]

boxplot <- ggplot(top) +
  aes(x = Entity, y = Incidence_of_malaria, fill = Entity) +
  geom_boxplot(shape = "circle") +
  scale_fill_viridis_d(option = "inferno", direction = 1) +
  labs(
    y = "Number of Occurence",
    x="",
    title = "Top Ten Country with Malaria Cases",
    caption = "Source : Our World in Data"
  ) +
  theme_tatti() +
  theme(
    legend.position = "left",
    plot.title = element_text(face = "bold",
                              hjust = 0.5),
    axis.text.x=element_blank(),
    axis.ticks.x=element_blank()
  )

boxplot+transition_states(Entity, wrap =FALSE)+ shadow_mark(alpha=0.5)+
  enter_grow()+
  exit_fade()+
  ease_aes("back-out")

Interactive map with Plotly

This is plot is a really interesting plot which is made using Plotly package. We filter the our visualization for Africa only since it had the highest number of cases. Moreover, it shows how the cases have been changing over the given time range.Furthermore, what make this graph interesting is that you can use Lasso tool to select only desired countries and see their variation. Lastly, you can hover to see the name and number of cases for selected countries.

is.na(malaria_occurence_country) <- malaria_occurence_country== ""
row.has.na <- apply(malaria_occurence_country, 1, function(x){any(is.na(x))})

malaria_occurence_country <- malaria_occurence_country[!row.has.na,]

malaria_occurence_country$hover <- paste0(malaria_occurence_country$Entity, "\n", round(malaria_occurence_country$Incidence_of_malaria))


map2<- plot_geo(malaria_occurence_country,
                locationnode= 'africa', 
                frame=~Year) %>%  add_trace(locations= ~ Code ,
                                            z= ~ Incidence_of_malaria,
                                            zmax=max(malaria_occurence_country$Incidence_of_malaria),
                                            zmin=0,
                                            color= ~Incidence_of_malaria,
                                            text = ~hover,
                                            hoverinfo = 'text') %>% 
  layout(geo=list(scope='africa'), title="Numer of Malaria Occurence in Africa\n2000- 2018")
map2

Time series of Number of deaths by Country

The following diagram shows the top 5 Countries with highest death rate over the given time range. Moreover, we can as year pass the number of deaths are falling down due to increase in awareness and vaccination being readily available.

## Time series of Number of deaths by Country## 

time_series <- top %>%
  filter(Code %in% c("COD", "LBR", "MLI", "MOZ", "SLE")) %>%
  ggplot() +
  aes(
    x = Year,
    y = Incidence_of_malaria,
    colour = Entity,
    group = Entity
  ) +
  geom_line(size = 0.95) +
  scale_color_viridis_d(option = "viridis", direction = 1) +
  labs(
    y = "Incidence of Death",
    title = "Death Rate from Malaria",
    caption = "Source: Our World in Data"
  ) +
  theme_tatti() +
  theme(legend.position = "bottom")

  time_series + transition_reveal(Year)+
  view_follow(fixed_y = T)

Malaria by Age Group

The last visualization is stack group chart for number death based on age group category from 1990 to 2019.

is.na(malaria_death_age) <- malaria_death_age== ""
row.has.naa <- apply(malaria_death_age, 1, function(x){any(is.na(x))})
malaria_death_age<- malaria_death_age[!row.has.naa,]

                                        
malaria_death_age <- malaria_death_age[Entity != "World"]


malaria_death_age <- setnames(malaria_death_age,"Deaths - Malaria - Sex: Both - Age: Under 5 (Number)",'Under_5')
malaria_death_age <- setnames(malaria_death_age,"Deaths - Malaria - Sex: Both - Age: 70+ years (Number)",'plus_70')
malaria_death_age <- setnames(malaria_death_age,"Deaths - Malaria - Sex: Both - Age: 5-14 years (Number)",'Between_5_14')
malaria_death_age <- setnames(malaria_death_age,"Deaths - Malaria - Sex: Both - Age: 15-49 years (Number)",'Between_15_49')
malaria_death_age <- setnames(malaria_death_age,"Deaths - Malaria - Sex: Both - Age: 50-69 years (Number)",'Between_50_69')

year_age <- malaria_death_age[,.(Under_5 = sum(Under_5),
                                 Between_5_14 = sum(Between_5_14),
                                 Plus_70 = sum(plus_70),
                                 Between_15_49 = sum(Between_15_49),
                                 Between_50_69 = sum(Between_50_69)),
                              by=.(Year = as.factor(Year))]



year_age <- year_age %>% gather("Category","Number",-1)
year_age$Year <- as.numeric(year_age$Year)


age_dis <- ggplot(year_age) +
  aes(x = Year, y = Number, fill = Category,text=Category) +
  geom_area(size = 1.5) +
  scale_fill_viridis_d(option = "viridis", direction = -1) +
  ggtitle("Death from Malaria by Age") +
  labs(caption = "Source : Our World in Data",
       y="Number of Deaths" )
age_dis  

Conclusion

In this publication , we used some powerful packages to create some animated and interactive plots. However, all these graphs could have been done without animation and interaction based on case specific need.