Motivations

The original chart được tạo bởi Cédric Scherer sử dụng dữ liệu mà cần nhiều hiệu chỉnh lại. Và vì thời điểm chart được thực hiện thì data vẫn chưa được tốt. Mặt khác R codes được sử dụng để tạo ra chart là có những chỗ phức tạp quá mức cần thiết. Dựa trên ý tưởng của Cédric Scherer và với dữ liệu từ COVID19 Package chúng ta có thể đưa ra insights về tình trạng người chết vì Covid ở 25 quốc gia có số lượng người chết lớn nhất như sau:

R Codes

Dưới đây là R codes:

# Clear R environment: 

rm(list = ls())

# Load Covid-19 data: 

covidData <- COVID19::covid19(level = 1) # Ref: https://covid19datahub.io/articles/r.html

#===========================
#  Prepare data for ploting
#===========================

library(tidyverse) # For data manipulation and visualization. 
library(lubridate) # For date-time processing.

some_nations <- c("France", "Chile", "Peru", "South Africa", "Belgium")

covidData %>% 
  filter(!administrative_area_level_1 %in% some_nations) %>% 
  group_by(administrative_area_level_1) %>% 
  slice(which.max(date)) %>% 
  select(date, administrative_area_level_1, deaths) %>% 
  ungroup() %>% 
  arrange(-deaths) %>% 
  slice(1:25) %>% 
  pull(administrative_area_level_1) -> countries12

covidData %>% 
  select(date, administrative_area_level_1, deaths) %>% 
  filter(administrative_area_level_1 %in% countries12) %>% 
  rename(country = administrative_area_level_1) -> df12

df12 %>% 
  group_by(country) %>%
  mutate(lag1_deaths = lag(deaths, n = 1L)) %>% 
  mutate(deaths = replace_na(deaths, 0)) %>% 
  mutate(lag1_deaths = replace_na(lag1_deaths, 0)) %>% 
  mutate(daily_deaths = deaths - lag1_deaths) %>% 
  mutate(daily_deaths = ifelse(daily_deaths < 0, 0, daily_deaths)) -> df12

library(zoo)

df12 %>% 
  mutate(avg7_daily_deaths = rollmean(daily_deaths, 7, align = "right", fill = NA)) %>% 
  mutate(max_deaths_avg7 = max(avg7_daily_deaths, na.rm = TRUE)) %>% 
  mutate(rate_to_max = 100*avg7_daily_deaths / max_deaths_avg7) -> df_for_ploting

df_for_ploting %>% 
  group_by(country) %>% 
  slice(which.max(avg7_daily_deaths)) %>% 
  mutate(rate_to_max = 100) %>% 
  mutate(death_text = as.character(round(avg7_daily_deaths, 0))) -> day_max_deaths

df_for_ploting %>% 
  group_by(country) %>% 
  slice(which.max(date)) %>% 
  arrange(-deaths) %>% 
  mutate(total_death_thousand = round(deaths / 1000, 0)) -> df_total_death

df_for_ploting %>% 
  group_by(country) %>% 
  slice(which.min(date)) -> df_start_date

df_start_date %>% 
  mutate(my_m = month(date, label = TRUE, abbr = TRUE)) %>% 
  mutate(my_d = day(date)) %>% 
  mutate(my_d = case_when(str_count(my_d) == 1 ~ str_c("0", my_d), TRUE ~ as.character(my_d))) %>% 
  mutate(date_text = str_c(my_m, my_d, sep = "-")) -> df_start_date
  
df_for_ploting %>% 
  group_by(country) %>% 
  slice(which.max(date)) %>% 
  mutate(n_deaths = round(avg7_daily_deaths, 0) %>% as.character()) -> df_end_date

#==========================
#   Data Visualization
#==========================

theme_set(theme_minimal())

library(gghighlight) # Ref: https://github.com/yutannihilation/gghighlight

color_now_deaths <- "orange"
color_max_deaths <- "#CE3240" 

library(showtext) # Package for using extra fonts. 

my_font <- "Roboto Condensed" 

# Load font for ploting: 

font_add_google(name = my_font, family = my_font) 

showtext_auto() # Automatically render text. 

p_title <- "<span style = 'color:#CE3240; font-size:17pt'>The Deadliest Days</span><span style = 'font-size:17pt'> Due to COVID-19 for Top-25 Countries by Total Deaths Confirmed"

p_subtitle <- "Each panel shows the trajectory of confirmed deaths due to COVID-19 as percentage of daily deaths relative to the <i style='color:#CE3240'>worst day so far</i> for top-25 countries. For each day,<br>starting with the date of first reported death, the 7-day rolling average of confimed deaths per day is divided by the <b style='color:#CE3240;'>highest average number of daily deaths</b><br>so far defined as 100%. The <i style='color:orange;'>orange points</i> show the lastest number of cofirmed deaths due to COVID-19.</span>"

p_caption <- "Note: Charts contain all confirmed deaths due to COVID-19 from January 01<sup>nd</sup> 2020 to July 26<sup>th</sup> 2022 for top-25 countries.<br>Data: Johns Hopkins University Center for Systems Science and Engineering (CSSE) | Graphic: Nguyen Chi Dung"

library(ggtext) # Ref: https://cran.r-project.org/web/packages/ggtext/index.html

df_for_ploting %>% 
  ggplot(aes(x = date, y = rate_to_max, group = country)) + 
  geom_line(alpha = 0.15, size = 0.7) + 
  gghighlight(use_direct_label = FALSE, unhighlighted_params = list(color = "grey70")) +
  geom_line(size = 0.7, color = "grey30") + 
  geom_point(data = day_max_deaths, color = color_max_deaths, size = 2) + 
  geom_text(data = day_max_deaths, aes(label = death_text), vjust = -0.5, size = 3.5, family = my_font) + 
  geom_point(data = df_end_date, color = "black", size = 2, shape = 21, fill = color_now_deaths) + 
  geom_text(data = df_end_date, aes(label = n_deaths), vjust = -0.5, size = 3.5, family = my_font) + 
  facet_wrap(~ country) + 
  coord_cartesian(clip = "off") + 
  theme(axis.title = element_blank()) + 
  theme(panel.grid.minor = element_blank()) + 
  theme(panel.grid.major.x = element_blank()) + 
  scale_y_continuous(limits = c(0, 125), breaks = seq(0, 100, 25), labels = str_c(seq(0, 100, 25), "%")) + 
  theme(axis.text = element_text(family = my_font, size = 9)) + 
  theme(plot.margin = unit(rep(0.6, 4), "cm")) + 
  theme(panel.spacing = unit(0.5, "lines")) +  
  theme(strip.text = element_text(color = "grey20", family = my_font, hjust = 0, size = 9, face = "bold", vjust = -1.5)) + 
  labs(title = p_title, 
       subtitle = p_subtitle, 
       caption = p_caption) + 
  theme(plot.title.position = "plot") +  
  theme(plot.caption.position = "plot") + 
  theme(plot.title = element_markdown(family = my_font, face = "bold", size = 18)) +  
  theme(plot.subtitle = element_markdown(family = my_font, size = 11, color = "grey30")) + 
  theme(plot.caption = element_markdown(family = my_font, color = "grey40", size = 10, hjust = 0, vjust = -4))