1 Load libraries

library(tidyverse)
library(rvest)

2 Input data - Worldometer

The data is scraped from a table in Worldometer coronavirus site

Why yesterday? Because, today’s data is incomplete, since not all countries update data at the same time.

covid_yesterday <- read_html("https://www.worldometers.info/coronavirus/#countries") %>% 
  html_node(xpath='//*[@id="main_table_countries_yesterday"]') %>%
  html_table() %>%
  as_tibble()

3 Clean and anaylze data

covid_yesterday %>%
  janitor::clean_names() %>% 
  filter(!is.na(number)) %>% 
  select(country_other,new_cases,population) %>%
  mutate(across(c(new_cases,population),parse_number)) %>%
  top_n(20,new_cases) %>%
  mutate(new_per_1m=new_cases/population*100000, pop_m=round(population / 1000000, 2)) %>%
  select(-population) %>%
  arrange(-new_per_1m)

4 Input data - Turkey Ministry of Health

The table at COVID19 info page of Turkey Ministry of Health will be imported

#website down, so skipping
library(webdriver)
pjs <- run_phantomjs()
ses <- Session$new(port = pjs$port)
tr_covid_url <- "https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html"

ses$go(tr_covid_url)

tables <- ses$getSource() %>%
  read_html() %>%
  html_table(dec=",")

tr_locale <- readr::locale(decimal_mark = ",", grouping_mark = ".")
daily_tr <- tables[[1]] %>% 
  janitor::clean_names() %>%
  select(date,number_of_cases_today) %>%
  mutate(number_of_cases_today=parse_number(number_of_cases_today,locale=tr_locale)) %>%
  mutate(date = parse_date(date,"%b  %d,  %Y")) %>% 
  filter(!is.na(number_of_cases_today)) %>%
  as_tibble() 

Daily case data start on 2020-11-25, let’s get better data

daily_tr %>% 
  ggplot(aes(date,number_of_cases_today)) +
  geom_line()

5 John Hopkins data

initial <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv") %>%
  janitor::clean_names() 
select_countries <- c("Turkey","Brazil","US","Poland","Germany","France")

p <- initial %>% 
  filter(country_region %in% select_countries) %>%
  select(country_region, starts_with("x")) %>%
  pivot_longer(-country_region, names_to="date", values_to="cases") %>%
  mutate(date=str_remove(date,"x")) %>%
  mutate(date=str_replace_all(date,"_","/")) %>%
  mutate(date=parse_date(date,"%m/%d/%y")) %>%
  mutate(new_case=cases-lag(cases)) %>%
  filter(!is.na(new_case), new_case > 0, new_case < 400000) %>% 
  ggplot(aes(date,new_case, color=country_region)) +
  geom_line() 
library(plotly)

ggplotly(p)