The original chart được tạo bởi Cédric Scherer sử dụng dữ liệu mà cần nhiều hiệu chỉnh lại. Và vì thời điểm chart được thực hiện thì data vẫn chưa được tốt. Mặt khác R codes được sử dụng để tạo ra chart là có những chỗ phức tạp quá mức cần thiết. Dựa trên ý tưởng của Cédric Scherer và với dữ liệu từ COVID19 Package chúng ta có thể đưa ra insights về tình trạng người chết vì Covid ở 25 quốc gia có số lượng người chết lớn nhất như sau:
Dưới đây là R codes:
# Clear R environment:
rm(list = ls())
# Load Covid-19 data:
<- COVID19::covid19(level = 1) # Ref: https://covid19datahub.io/articles/r.html
covidData
#===========================
# Prepare data for ploting
#===========================
library(tidyverse) # For data manipulation and visualization.
library(lubridate) # For date-time processing.
<- c("France", "Chile", "Peru", "South Africa", "Belgium")
some_nations
%>%
covidData filter(!administrative_area_level_1 %in% some_nations) %>%
group_by(administrative_area_level_1) %>%
slice(which.max(date)) %>%
select(date, administrative_area_level_1, deaths) %>%
ungroup() %>%
arrange(-deaths) %>%
slice(1:25) %>%
pull(administrative_area_level_1) -> countries12
%>%
covidData select(date, administrative_area_level_1, deaths) %>%
filter(administrative_area_level_1 %in% countries12) %>%
rename(country = administrative_area_level_1) -> df12
%>%
df12 group_by(country) %>%
mutate(lag1_deaths = lag(deaths, n = 1L)) %>%
mutate(deaths = replace_na(deaths, 0)) %>%
mutate(lag1_deaths = replace_na(lag1_deaths, 0)) %>%
mutate(daily_deaths = deaths - lag1_deaths) %>%
mutate(daily_deaths = ifelse(daily_deaths < 0, 0, daily_deaths)) -> df12
library(zoo)
%>%
df12 mutate(avg7_daily_deaths = rollmean(daily_deaths, 7, align = "right", fill = NA)) %>%
mutate(max_deaths_avg7 = max(avg7_daily_deaths, na.rm = TRUE)) %>%
mutate(rate_to_max = 100*avg7_daily_deaths / max_deaths_avg7) -> df_for_ploting
%>%
df_for_ploting group_by(country) %>%
slice(which.max(avg7_daily_deaths)) %>%
mutate(rate_to_max = 100) %>%
mutate(death_text = as.character(round(avg7_daily_deaths, 0))) -> day_max_deaths
%>%
df_for_ploting group_by(country) %>%
slice(which.max(date)) %>%
arrange(-deaths) %>%
mutate(total_death_thousand = round(deaths / 1000, 0)) -> df_total_death
%>%
df_for_ploting group_by(country) %>%
slice(which.min(date)) -> df_start_date
%>%
df_start_date mutate(my_m = month(date, label = TRUE, abbr = TRUE)) %>%
mutate(my_d = day(date)) %>%
mutate(my_d = case_when(str_count(my_d) == 1 ~ str_c("0", my_d), TRUE ~ as.character(my_d))) %>%
mutate(date_text = str_c(my_m, my_d, sep = "-")) -> df_start_date
%>%
df_for_ploting group_by(country) %>%
slice(which.max(date)) %>%
mutate(n_deaths = round(avg7_daily_deaths, 0) %>% as.character()) -> df_end_date
#==========================
# Data Visualization
#==========================
theme_set(theme_minimal())
library(gghighlight) # Ref: https://github.com/yutannihilation/gghighlight
<- "orange"
color_now_deaths <- "#CE3240"
color_max_deaths
library(showtext) # Package for using extra fonts.
<- "Roboto Condensed"
my_font
# Load font for ploting:
font_add_google(name = my_font, family = my_font)
showtext_auto() # Automatically render text.
<- "<span style = 'color:#CE3240; font-size:17pt'>The Deadliest Days</span><span style = 'font-size:17pt'> Due to COVID-19 for Top-25 Countries by Total Deaths Confirmed"
p_title
<- "Each panel shows the trajectory of confirmed deaths due to COVID-19 as percentage of daily deaths relative to the <i style='color:#CE3240'>worst day so far</i> for top-25 countries. For each day,<br>starting with the date of first reported death, the 7-day rolling average of confimed deaths per day is divided by the <b style='color:#CE3240;'>highest average number of daily deaths</b><br>so far defined as 100%. The <i style='color:orange;'>orange points</i> show the lastest number of cofirmed deaths due to COVID-19.</span>"
p_subtitle
<- "Note: Charts contain all confirmed deaths due to COVID-19 from January 01<sup>nd</sup> 2020 to July 26<sup>th</sup> 2022 for top-25 countries.<br>Data: Johns Hopkins University Center for Systems Science and Engineering (CSSE) | Graphic: Nguyen Chi Dung"
p_caption
library(ggtext) # Ref: https://cran.r-project.org/web/packages/ggtext/index.html
%>%
df_for_ploting ggplot(aes(x = date, y = rate_to_max, group = country)) +
geom_line(alpha = 0.15, size = 0.7) +
gghighlight(use_direct_label = FALSE, unhighlighted_params = list(color = "grey70")) +
geom_line(size = 0.7, color = "grey30") +
geom_point(data = day_max_deaths, color = color_max_deaths, size = 2) +
geom_text(data = day_max_deaths, aes(label = death_text), vjust = -0.5, size = 3.5, family = my_font) +
geom_point(data = df_end_date, color = "black", size = 2, shape = 21, fill = color_now_deaths) +
geom_text(data = df_end_date, aes(label = n_deaths), vjust = -0.5, size = 3.5, family = my_font) +
facet_wrap(~ country) +
coord_cartesian(clip = "off") +
theme(axis.title = element_blank()) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major.x = element_blank()) +
scale_y_continuous(limits = c(0, 125), breaks = seq(0, 100, 25), labels = str_c(seq(0, 100, 25), "%")) +
theme(axis.text = element_text(family = my_font, size = 9)) +
theme(plot.margin = unit(rep(0.6, 4), "cm")) +
theme(panel.spacing = unit(0.5, "lines")) +
theme(strip.text = element_text(color = "grey20", family = my_font, hjust = 0, size = 9, face = "bold", vjust = -1.5)) +
labs(title = p_title,
subtitle = p_subtitle,
caption = p_caption) +
theme(plot.title.position = "plot") +
theme(plot.caption.position = "plot") +
theme(plot.title = element_markdown(family = my_font, face = "bold", size = 18)) +
theme(plot.subtitle = element_markdown(family = my_font, size = 11, color = "grey30")) +
theme(plot.caption = element_markdown(family = my_font, color = "grey40", size = 10, hjust = 0, vjust = -4))