Today is
Sys.time()
## [1] "2020-09-10 18:49:58 +07"
library(dplyr, warn.conflicts=F, quietly=T)
library(ggplot2, warn.conflicts=F, quietly=T)
library(plotly, warn.conflicts=F, quietly=T)
# set.seed(1234)
filename <- "covid.csv"
#if (!file.exists(filename)) {
download.file("https://covid.ourworldindata.org/data/owid-covid-data.csv", destfile = filename, quiet=T)
#}
covid <- read.csv("covid.csv", header = T)
covid$date <- as.Date(covid$date)
fn.fix <- function(label) {
sapply(1:nrow(covid), function (n) {
x <- covid[[label]][n]
if (is.na(x)) {
this_loc <- subset(covid, location == covid$location[n])
m <- mean(c(
max(0, subset(this_loc, date < covid$date[n])[[label]], na.rm = T),
min(max(covid[[label]], na.rm = T), subset(this_loc, date > covid$date[n])[[label]], na.rm = T)
))
if (is.na(m)) {
return(0)
}
return(m)
}
return(x)
})
}
covid$total_cases_fixed <- fn.fix("total_cases")
select(
covid[covid$location == "Thailand" & covid$total_cases_fixed > 0,],
location, date, total_cases, total_cases_fixed
)
covid$total_cases_per_million_fixed <- fn.fix("total_cases_per_million")
covid$total_deaths_per_million_fixed <- fn.fix("total_deaths_per_million")
select(
covid[covid$location == "Thailand" & covid$total_deaths_per_million_fixed > 0,],
location, date, total_deaths_per_million, total_deaths_per_million_fixed
)
location_lookup <- covid %>% distinct(location)
fn.create_lookup <- function(label) {
sapply(location_lookup$location, function (loc) {
max(0, covid[covid$location == loc & covid$new_cases > 0,][[label]], na.rm = T)
})
}
location_lookup$max_total_cases <- fn.create_lookup("total_cases")
location_lookup$max_total_cases_per_mil <- fn.create_lookup("total_cases_per_million")
location_lookup$max_total_deaths_per_mil <- fn.create_lookup("total_deaths_per_million")
location_lookup[location_lookup$location == "Thailand",]
date_over_100_lookup <- sapply(location_lookup$location, function (loc) {
this_loc <- covid[covid$location == loc & covid$total_cases_fixed >= 100,]
if (length(this_loc$date) > 0) {
return(min(this_loc$date))
}
return(NA)
})
covid$day_over_100 <- sapply(1:nrow(covid), function (n) {
d <- covid$date[n] - date_over_100_lookup[[covid$location[n]]]
if (is.na(d) | d < 0) {
return(NA)
}
return(d)
})
select(
covid[!is.na(covid$day_over_100),],
location, date, day_over_100
) %>% distinct(location, .keep_all = T) %>% arrange(date)
fn.create_nth_lookup <- function(label) {
length(location_lookup[[label]]) - rank(location_lookup[[label]]) + 1
}
location_lookup$nth_cases <- fn.create_nth_lookup("max_total_cases_per_mil")
location_lookup$nth_deaths <- fn.create_nth_lookup("max_total_deaths_per_mil")
location_lookup %>% arrange(desc(max_total_cases_per_mil)) %>% select(location, max_total_cases_per_mil)
location_lookup %>% arrange(desc(max_total_deaths_per_mil)) %>% select(location, max_total_deaths_per_mil)
subset(
location_lookup,
location == "Thailand"
) %>% select(location, nth_cases, nth_deaths, max_total_cases_per_mil, max_total_deaths_per_mil)
location_lookup %>%
arrange(nth_cases + nth_deaths) %>%
select(location, nth_cases, nth_deaths, max_total_cases_per_mil, max_total_deaths_per_mil)
location_lookup %>%
filter(location != "World") %>%
arrange(desc(max_total_cases)) %>%
select(location, nth_cases, nth_deaths, max_total_cases_per_mil, max_total_deaths_per_mil)
ggplotly(ggplot(
covid %>% subset(
!is.na(day_over_100) &
new_cases > 0 &
(location %in% (
location_lookup %>% arrange(desc(max_total_cases_per_mil))
)[1:10,]$location | location == "Thailand")),
aes(day_over_100, new_cases_per_million, color = location))
+ geom_line()
+ scale_y_log10()
+ labs(
title = "New cases per million since day over 100 (highest total cases per miliion)",
x = "Day over 100", y = "Number of new cases per million")
)
ggplotly(ggplot(
covid %>% subset(
!is.na(day_over_100) &
new_deaths > 0 &
(location %in% (
location_lookup %>% arrange(desc(max_total_deaths_per_mil))
)[1:10,]$location | location == "Thailand")),
aes(day_over_100, new_deaths_per_million, color = location))
+ geom_line()
+ scale_y_log10()
+ labs(
title = "New deaths per million since day over 100 (highest total deaths per miliion)",
x = "Day over 100", y = "Number of new deaths per million")
)