Github repo: https://github.com/roboton/corona
library(tidyverse)
library(rvest)
library(magrittr)
library(lubridate)
Scrape data from worldometers
cor_html <- read_html("https://www.worldometers.info/coronavirus/#countries")
tests_html <- read_html("https://www.worldometers.info/coronavirus/covid-19-testing/")
Clean and join country-level case detail and test data
cor_data <- cor_html %>% html_node("#main_table_countries") %>%
html_table() %>%
# removes commas in numbers
mutate_if(is.character, function(x) { gsub("[,:]", "", x) }) %>%
# turn everything but country column into a numeric
mutate_at(vars(-`Country,Other`), as.numeric) %>%
# compute a mortality rate
replace_na(list("TotalDeaths" = 0)) %>%
mutate(`Case Fatality rate`= TotalDeaths * 100 / TotalCases)
tests_data <- tests_html %>%
html_nodes("table") %>%
# Extract the first table
extract2(1) %>%
html_table(header = T) %>%
# Missing variable names so fill in
as_tibble(.name_repair = "unique") %>%
# Get of rid of commas
mutate_if(is.character, function(x) { gsub("[,:]", "", x) }) %>%
# turn columns except country into numeric
mutate_at(vars(-Country), as.numeric) %>%
# rename South Korea to S. Korea
mutate(Country=if_else(Country == "South Korea", "S. Korea", Country))
(join_data <- cor_data %>%
left_join(tests_data, by=c("Country,Other" = "Country")))
Analysis
Crude mortality rate
join_data %>%
filter(!is.na(Population)) %>%
mutate(`Crude mortality rate`= 100 * TotalDeaths / Population) %>%
ggplot(aes(`Crude mortality rate`,
reorder(`Country,Other`, `Crude mortality rate`),)) +
geom_point() + ylab("Country") +
scale_x_continuous(labels = function(x) format(x, scientific = FALSE))

Positive test rate
join_data %>%
filter(!is.na(Population)) %>%
mutate(`Positive test rate`= 100 * TotalCases / `Tests Performed`) %>%
ggplot(aes(`Positive test rate`,
reorder(`Country,Other`, `Positive test rate`),
color=`Tests per Million People`)) +
geom_point() +
scale_x_continuous(labels = function(x) format(x, scientific = FALSE)) +
ylab("Country")

Case fatality rate by test saturation
One misleading stat we often see are mortality rates based on non-uniform testing. In some countries, testing capabilities are limited so only the very sick are being tested. If you’re only testing the very sick it will give you a mortality rate that is biased upward. Below we take a look at the mortailty rate (as defined by number of deaths / number of positive cases) by the amount of tests per million people within a country.
join_data %>%
select(
`Tests per Million People`, `Case Fatality rate`, `Country,Other`, Population,
TotalCases) %>%
filter(complete.cases(.)) %>%
filter(`Case Fatality rate` > 0) %>%
mutate(low_high = if_else(`Tests per Million People` < 1000, "low", "high")) %>%
ggplot(aes(`Tests per Million People`, `Case Fatality rate`, color=TotalCases,
group=low_high)) +
geom_point() +
geom_line(stat="smooth", method = "lm", alpha=0.3) +
#geom_hline(yintercept=1, alpha=0.5, color="green") +
geom_text(aes(label=`Country,Other`),hjust=0, vjust=2, size=3) +
#geom_line(stat="smooth", method="lm", alpha=0.2, color="blue") +
ylim(0, 7)

LS0tCnRpdGxlOiAiQ292aWQtMTkgYW5hbHlzaXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCkdpdGh1YiByZXBvOiBbaHR0cHM6Ly9naXRodWIuY29tL3JvYm90b24vY29yb25hXShodHRwczovL2dpdGh1Yi5jb20vcm9ib3Rvbi9jb3JvbmEpCgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShydmVzdCkKbGlicmFyeShtYWdyaXR0cikKbGlicmFyeShsdWJyaWRhdGUpCmBgYAoKIyBTY3JhcGUgZGF0YSBmcm9tIHdvcmxkb21ldGVycwoKYGBge3J9CmNvcl9odG1sIDwtIHJlYWRfaHRtbCgiaHR0cHM6Ly93d3cud29ybGRvbWV0ZXJzLmluZm8vY29yb25hdmlydXMvI2NvdW50cmllcyIpCnRlc3RzX2h0bWwgPC0gcmVhZF9odG1sKCJodHRwczovL3d3dy53b3JsZG9tZXRlcnMuaW5mby9jb3JvbmF2aXJ1cy9jb3ZpZC0xOS10ZXN0aW5nLyIpCmBgYAoKIyBDbGVhbiBhbmQgam9pbiBjb3VudHJ5LWxldmVsIGNhc2UgZGV0YWlsIGFuZCB0ZXN0IGRhdGEKCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmNvcl9kYXRhIDwtIGNvcl9odG1sICU+JSBodG1sX25vZGUoIiNtYWluX3RhYmxlX2NvdW50cmllcyIpICU+JQogIGh0bWxfdGFibGUoKSAlPiUKICAjIHJlbW92ZXMgY29tbWFzIGluIG51bWJlcnMKICBtdXRhdGVfaWYoaXMuY2hhcmFjdGVyLCBmdW5jdGlvbih4KSB7IGdzdWIoIlssOl0iLCAiIiwgeCkgfSkgJT4lCiAgIyB0dXJuIGV2ZXJ5dGhpbmcgYnV0IGNvdW50cnkgY29sdW1uIGludG8gYSBudW1lcmljCiAgbXV0YXRlX2F0KHZhcnMoLWBDb3VudHJ5LE90aGVyYCksIGFzLm51bWVyaWMpICU+JQogICMgY29tcHV0ZSBhIG1vcnRhbGl0eSByYXRlCiAgcmVwbGFjZV9uYShsaXN0KCJUb3RhbERlYXRocyIgPSAwKSkgJT4lCiAgbXV0YXRlKGBDYXNlIEZhdGFsaXR5IHJhdGVgPSBUb3RhbERlYXRocyAqIDEwMCAvIFRvdGFsQ2FzZXMpCgp0ZXN0c19kYXRhIDwtIHRlc3RzX2h0bWwgJT4lCiAgaHRtbF9ub2RlcygidGFibGUiKSAlPiUKICAjIEV4dHJhY3QgdGhlIGZpcnN0IHRhYmxlCiAgZXh0cmFjdDIoMSkgJT4lCiAgaHRtbF90YWJsZShoZWFkZXIgPSBUKSAlPiUKICAjIE1pc3NpbmcgdmFyaWFibGUgbmFtZXMgc28gZmlsbCBpbgogIGFzX3RpYmJsZSgubmFtZV9yZXBhaXIgPSAidW5pcXVlIikgJT4lCiAgIyBHZXQgb2YgcmlkIG9mIGNvbW1hcwogIG11dGF0ZV9pZihpcy5jaGFyYWN0ZXIsIGZ1bmN0aW9uKHgpIHsgZ3N1YigiWyw6XSIsICIiLCB4KSB9KSAlPiUKICAjIHR1cm4gY29sdW1ucyBleGNlcHQgY291bnRyeSBpbnRvIG51bWVyaWMKICBtdXRhdGVfYXQodmFycygtQ291bnRyeSksIGFzLm51bWVyaWMpICU+JQogICMgcmVuYW1lIFNvdXRoIEtvcmVhIHRvIFMuIEtvcmVhCiAgbXV0YXRlKENvdW50cnk9aWZfZWxzZShDb3VudHJ5ID09ICJTb3V0aCBLb3JlYSIsICJTLiBLb3JlYSIsIENvdW50cnkpKQoKKGpvaW5fZGF0YSA8LSBjb3JfZGF0YSAlPiUKICBsZWZ0X2pvaW4odGVzdHNfZGF0YSwgYnk9YygiQ291bnRyeSxPdGhlciIgPSAiQ291bnRyeSIpKSkKYGBgCiAgCiMgQW5hbHlzaXMKCiMjIENydWRlIG1vcnRhbGl0eSByYXRlCgpgYGB7cn0Kam9pbl9kYXRhICU+JQogIGZpbHRlcighaXMubmEoUG9wdWxhdGlvbikpICU+JQogIG11dGF0ZShgQ3J1ZGUgbW9ydGFsaXR5IHJhdGVgPSAxMDAgKiBUb3RhbERlYXRocyAvIFBvcHVsYXRpb24pICU+JQogIGdncGxvdChhZXMoYENydWRlIG1vcnRhbGl0eSByYXRlYCwKICAgICAgICAgICAgIHJlb3JkZXIoYENvdW50cnksT3RoZXJgLCBgQ3J1ZGUgbW9ydGFsaXR5IHJhdGVgKSwpKSArCiAgZ2VvbV9wb2ludCgpICsgeWxhYigiQ291bnRyeSIpICsKICBzY2FsZV94X2NvbnRpbnVvdXMobGFiZWxzID0gZnVuY3Rpb24oeCkgZm9ybWF0KHgsIHNjaWVudGlmaWMgPSBGQUxTRSkpCmBgYAoKIyMgUG9zaXRpdmUgdGVzdCByYXRlCgpgYGB7cn0Kam9pbl9kYXRhICU+JQogIGZpbHRlcighaXMubmEoUG9wdWxhdGlvbikpICU+JQogIG11dGF0ZShgUG9zaXRpdmUgdGVzdCByYXRlYD0gMTAwICogVG90YWxDYXNlcyAvIGBUZXN0cyBQZXJmb3JtZWRgKSAlPiUKICBnZ3Bsb3QoYWVzKGBQb3NpdGl2ZSB0ZXN0IHJhdGVgLAogICAgICAgICAgICAgcmVvcmRlcihgQ291bnRyeSxPdGhlcmAsIGBQb3NpdGl2ZSB0ZXN0IHJhdGVgKSwKICAgICAgICAgICAgIGNvbG9yPWBUZXN0cyBwZXIgTWlsbGlvbiBQZW9wbGVgKSkgKwogIGdlb21fcG9pbnQoKSArCiAgc2NhbGVfeF9jb250aW51b3VzKGxhYmVscyA9IGZ1bmN0aW9uKHgpIGZvcm1hdCh4LCBzY2llbnRpZmljID0gRkFMU0UpKSArCiAgeWxhYigiQ291bnRyeSIpCmBgYAoKIyMgQ2FzZSBmYXRhbGl0eSByYXRlIGJ5IHRlc3Qgc2F0dXJhdGlvbgoKT25lIG1pc2xlYWRpbmcgc3RhdCB3ZSBvZnRlbiBzZWUgYXJlIG1vcnRhbGl0eSByYXRlcyBiYXNlZCBvbiBub24tdW5pZm9ybSB0ZXN0aW5nLiAgSW4gc29tZSBjb3VudHJpZXMsIHRlc3RpbmcgY2FwYWJpbGl0aWVzIGFyZSBsaW1pdGVkIHNvIG9ubHkgdGhlIHZlcnkgc2ljayBhcmUgYmVpbmcgdGVzdGVkLiAgSWYgeW91J3JlIG9ubHkgdGVzdGluZyB0aGUgdmVyeSBzaWNrIGl0IHdpbGwgZ2l2ZSB5b3UgYSBtb3J0YWxpdHkgcmF0ZSB0aGF0IGlzIGJpYXNlZCB1cHdhcmQuICBCZWxvdyB3ZSB0YWtlIGEgbG9vayBhdCB0aGUgbW9ydGFpbHR5IHJhdGUgKGFzIGRlZmluZWQgYnkgbnVtYmVyIG9mIGRlYXRocyAvIG51bWJlciBvZiBwb3NpdGl2ZSBjYXNlcykgYnkgdGhlIGFtb3VudCBvZiB0ZXN0cyBwZXIgbWlsbGlvbiBwZW9wbGUgd2l0aGluIGEgY291bnRyeS4KICAKYGBge3J9CmpvaW5fZGF0YSAlPiUKICBzZWxlY3QoCiAgICBgVGVzdHMgcGVyIE1pbGxpb24gUGVvcGxlYCwgYENhc2UgRmF0YWxpdHkgcmF0ZWAsIGBDb3VudHJ5LE90aGVyYCwgUG9wdWxhdGlvbiwKICAgIFRvdGFsQ2FzZXMpICU+JQogIGZpbHRlcihjb21wbGV0ZS5jYXNlcyguKSkgJT4lIAogIGZpbHRlcihgQ2FzZSBGYXRhbGl0eSByYXRlYCA+IDApICU+JQogIG11dGF0ZShsb3dfaGlnaCA9IGlmX2Vsc2UoYFRlc3RzIHBlciBNaWxsaW9uIFBlb3BsZWAgPCAxMDAwLCAibG93IiwgImhpZ2giKSkgJT4lCiAgZ2dwbG90KGFlcyhgVGVzdHMgcGVyIE1pbGxpb24gUGVvcGxlYCwgYENhc2UgRmF0YWxpdHkgcmF0ZWAsIGNvbG9yPVRvdGFsQ2FzZXMsCiAgICAgICAgICAgICBncm91cD1sb3dfaGlnaCkpICsKICBnZW9tX3BvaW50KCkgKwogIGdlb21fbGluZShzdGF0PSJzbW9vdGgiLCBtZXRob2QgPSAibG0iLCBhbHBoYT0wLjMpICsKICAjZ2VvbV9obGluZSh5aW50ZXJjZXB0PTEsIGFscGhhPTAuNSwgY29sb3I9ImdyZWVuIikgKwogIGdlb21fdGV4dChhZXMobGFiZWw9YENvdW50cnksT3RoZXJgKSxoanVzdD0wLCB2anVzdD0yLCBzaXplPTMpICsKICAjZ2VvbV9saW5lKHN0YXQ9InNtb290aCIsIG1ldGhvZD0ibG0iLCBhbHBoYT0wLjIsIGNvbG9yPSJibHVlIikgKwogIHlsaW0oMCwgNykKYGBgCgo=