library(tidyverse)
library(rvest)The data is scraped from a table in Worldometer coronavirus site
Why yesterday? Because, today’s data is incomplete, since not all countries update data at the same time.
covid_yesterday <- read_html("https://www.worldometers.info/coronavirus/#countries") %>%
html_node(xpath='//*[@id="main_table_countries_yesterday"]') %>%
html_table() %>%
as_tibble()covid_yesterday %>%
janitor::clean_names() %>%
filter(!is.na(number)) %>%
select(country_other,new_cases,population) %>%
mutate(across(c(new_cases,population),parse_number)) %>%
top_n(20,new_cases) %>%
mutate(new_per_1m=new_cases/population*100000, pop_m=round(population / 1000000, 2)) %>%
select(-population) %>%
arrange(-new_per_1m)The table at COVID19 info page of Turkey Ministry of Health will be imported
#website down, so skipping
library(webdriver)
pjs <- run_phantomjs()
ses <- Session$new(port = pjs$port)tr_covid_url <- "https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html"
ses$go(tr_covid_url)
tables <- ses$getSource() %>%
read_html() %>%
html_table(dec=",")
tr_locale <- readr::locale(decimal_mark = ",", grouping_mark = ".")daily_tr <- tables[[1]] %>%
janitor::clean_names() %>%
select(date,number_of_cases_today) %>%
mutate(number_of_cases_today=parse_number(number_of_cases_today,locale=tr_locale)) %>%
mutate(date = parse_date(date,"%b %d, %Y")) %>%
filter(!is.na(number_of_cases_today)) %>%
as_tibble() Daily case data start on 2020-11-25, let’s get better data
daily_tr %>%
ggplot(aes(date,number_of_cases_today)) +
geom_line()initial <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv") %>%
janitor::clean_names() select_countries <- c("Turkey","Brazil","US","Poland","Germany","France")
p <- initial %>%
filter(country_region %in% select_countries) %>%
select(country_region, starts_with("x")) %>%
pivot_longer(-country_region, names_to="date", values_to="cases") %>%
mutate(date=str_remove(date,"x")) %>%
mutate(date=str_replace_all(date,"_","/")) %>%
mutate(date=parse_date(date,"%m/%d/%y")) %>%
mutate(new_case=cases-lag(cases)) %>%
filter(!is.na(new_case), new_case > 0, new_case < 400000) %>%
ggplot(aes(date,new_case, color=country_region)) +
geom_line() library(plotly)
ggplotly(p)