library(magrittr) # pipe operations
library(lubridate) # date operations
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(tidyverse) # ggplot2, tidyr, dplyr...
## -- Attaching packages ------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.5
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ---------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x tidyr::extract()         masks magrittr::extract()
## x dplyr::filter()          masks stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x purrr::set_names()       masks magrittr::set_names()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()
library(gridExtra) # multiple grid-based plots on a page
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggforce) # accelerating ggplot2
library(kableExtra) # complex tables
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(leaflet) ## map

## load data into R
data.confirmed <- read.csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv')
data.deaths <- read.csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv')
data.recovered <- read.csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv')
dim(data.confirmed)
## [1] 477  63
data.confirmed[1:10, 1:10] %>%
  kable('pandoc', booktabs=T, caption='Raw Data (Confirmed, First 10 Columns only)') %>%
  kable_styling(font_size=6, latex_options = c('striped', 'hold_position', 'repeat_header'))
## Warning in kable_styling(., font_size = 6, latex_options = c("striped", : Please
## specify format in kable. kableExtra can customize either HTML or LaTeX outputs.
## See https://haozhu233.github.io/kableExtra/ for details.
Raw Data (Confirmed, First 10 Columns only)
Province.State Country.Region Lat Long X1.22.20 X1.23.20 X1.24.20 X1.25.20 X1.26.20 X1.27.20
Thailand 15.0000 101.0000 2 3 5 7 8 8
Japan 36.0000 138.0000 2 1 2 2 4 4
Singapore 1.2833 103.8333 0 1 3 3 4 5
Nepal 28.1667 84.2500 0 0 0 1 1 1
Malaysia 2.5000 112.5000 0 0 0 3 4 4
British Columbia Canada 49.2827 -123.1207 0 0 0 0 0 0
New South Wales Australia -33.8688 151.2093 0 0 0 0 3 4
Victoria Australia -37.8136 144.9631 0 0 0 0 1 1
Queensland Australia -28.0167 153.4000 0 0 0 0 0 0
Cambodia 11.5500 104.9167 0 0 0 0 0 1
n.col <- ncol(data.confirmed)
## get dates from column names
dates <- names(data.confirmed)[5:n.col] %>% substr(2,8) %>% mdy()
range(dates)
## [1] "2020-01-22" "2020-03-20"
min.date <- min(dates)
max.date <- max(dates)
max.date.txt <- max.date %>% format('%d %B %Y')

## select last column, which is the number of latest confirmed cases
x <- data.confirmed
x$confirmed <- x[, ncol(x)]
x %<>% select(c(Country.Region, Province.State, Lat, Long, confirmed)) %>%
  mutate(txt=paste0(Country.Region, ' - ', Province.State, ': ', confirmed))
m <- leaflet(width=1200, height=800) %>% addTiles()
# circle marker (units in pixels)
m %<>% addCircleMarkers(x$Long, x$Lat,
                        radius=2+log2(x$confirmed), stroke=F,
                        color='red', fillOpacity=0.5,
                        popup=x$txt)
# world
m
## China
m %>% setView(95, 35, zoom=4)
## Australia and New Zealand
m %>% setView(135, -27, zoom=4)
## US and Canada
m %>% setView(-105, 40, zoom=4)
## Europe
m %>% setView(10, 50, zoom=4)
## Brazil
m %>% setView(-39, -25, zoom=4)
## data cleaning and transformation
cleanData <- function(data) {
  ## remove some columns
  data %<>% select(-c(Province.State, Lat, Long)) %>% rename(country=Country.Region)
  ## convert from wide to long format
  data %<>% gather(key=date, value=count, -country)
  data %<>% mutate(date = date %>% substr(2,8) %>% mdy())
  ## aggregate by country
  data %<>% group_by(country, date) %>% summarise(count=sum(count, na.rm=T)) %>% as.data.frame()
  return(data)
}
## clean the three datasets
data.confirmed <- data.confirmed %>% cleanData() %>% rename(confirmed=count)
data.deaths <- data.deaths %>% cleanData() %>% rename(deaths=count)
data.recovered <- data.recovered %>% cleanData() %>% rename(recovered=count)
## merge above 3 datasets into one, by country and date
data <- data.confirmed %>% merge(data.deaths) %>% merge(data.recovered)
## countries/regions with confirmed cases, excl. cruise ships
countries <- data %>% pull(country) %>% setdiff('Cruise Ship')
## first 10 records when it first broke out in China
data %>% filter(country=='Brazil') %>% tail(25) %>%
  kable('pandoc', booktabs=T, caption='Raw Data (with last 10 Columns Only)',
        format.args=list(big.mark=',')) %>%
  kable_styling(latex_options = c('striped', 'hold_position', 'repeat_header'))
## Warning in kable_styling(., latex_options = c("striped", "hold_position", :
## Please specify format in kable. kableExtra can customize either HTML or LaTeX
## outputs. See https://haozhu233.github.io/kableExtra/ for details.
Raw Data (with last 10 Columns Only)
country date confirmed deaths recovered
35 Brazil 2020-02-25 0 0 0
36 Brazil 2020-02-26 1 0 0
37 Brazil 2020-02-27 1 0 0
38 Brazil 2020-02-28 1 0 0
39 Brazil 2020-02-29 2 0 0
40 Brazil 2020-03-01 2 0 0
41 Brazil 2020-03-02 2 0 0
42 Brazil 2020-03-03 2 0 0
43 Brazil 2020-03-04 4 0 0
44 Brazil 2020-03-05 4 0 0
45 Brazil 2020-03-06 13 0 0
46 Brazil 2020-03-07 13 0 0
47 Brazil 2020-03-08 20 0 0
48 Brazil 2020-03-09 25 0 0
49 Brazil 2020-03-10 31 0 0
50 Brazil 2020-03-11 38 0 0
51 Brazil 2020-03-12 52 0 0
52 Brazil 2020-03-13 151 0 0
53 Brazil 2020-03-14 151 0 0
54 Brazil 2020-03-15 162 0 0
55 Brazil 2020-03-16 200 0 1
56 Brazil 2020-03-17 321 1 2
57 Brazil 2020-03-18 372 3 2
58 Brazil 2020-03-19 621 6 2
59 Brazil 2020-03-20 793 11 2
## counts for the whole world
data.world <- data %>% group_by(date) %>%
  summarise(country='World',
            confirmed = sum(confirmed),
            deaths = sum(deaths),
            recovered = sum(recovered))
data %<>% rbind(data.world)
data %<>% mutate(remaining.confirmed = confirmed - deaths - recovered)


## sort by country and date
data %<>% arrange(country, date)
## daily increases of deaths and recovered cases
## set NA to the increases on day1
n <- nrow(data)
day1 <- min(data$date)
data %<>% mutate(new.confirmed = ifelse(date == day1, NA, confirmed - lag(confirmed, n=1)),
                 new.deaths = ifelse(date == day1, NA, deaths - lag(deaths, n=1)),
                 new.recovered = ifelse(date == day1, NA, recovered - lag(recovered, n=1)))
## check for decreases in confirmed, recovered and deaths
# idx <- which(data$new.confirmed < 0)
# idx2 <- idx - 1
# data[c(idx, idx2), ] %>% arrange(country, date)
# idx <- which(data$new.recovered < 0)
# idx2 <- idx - 1
# data[c(idx, idx2), ] %>% arrange(country, date)
# idx <- which(data$new.deaths < 0)
# idx2 <- idx - 1
# data[c(idx, idx2), ] %>% arrange(country, date)
## change negative number of new cases to zero
data %<>% mutate(new.confirmed = ifelse(new.confirmed < 0, 0, new.confirmed),
                 new.deaths = ifelse(new.deaths < 0, 0, new.deaths),
                 new.recovered = ifelse(new.recovered < 0, 0, new.recovered))
## death rate based on total deaths and recovered cases
data %<>% mutate(rate.upper = (100 * deaths / (deaths + recovered)) %>% round(1))
## lower bound: death rate based on total confirmed cases
data %<>% mutate(rate.lower = (100 * deaths / confirmed) %>% round(1))
## death rate based on the number of death/recovered on every single day
data %<>% mutate(rate.daily = (100 * new.deaths / (new.deaths + new.recovered)) %>% round(1))

## ranking by confirmed cases
data.latest <- data %>% filter(date == max(date)) %>%
  select(country, date,
         confirmed, new.confirmed, remaining.confirmed,
         recovered, deaths, new.deaths) %>%
  mutate(ranking = dense_rank(desc(confirmed)))


k.top <- 25
## top 20 countries: 21 incl. 'World'
top.countries <- data.latest %>% filter(ranking <= k.top + 1) %>%
  arrange(ranking) %>% pull(country) %>% as.character()
top.countries %>% setdiff('World') %>% print()
##  [1] "China"          "Italy"          "Spain"          "Germany"       
##  [5] "Iran"           "US"             "France"         "Korea, South"  
##  [9] "Switzerland"    "United Kingdom" "Netherlands"    "Austria"       
## [13] "Belgium"        "Norway"         "Sweden"         "Denmark"       
## [17] "Malaysia"       "Portugal"       "Japan"          "Canada"        
## [21] "Czechia"        "Brazil"         "Australia"      "Cruise Ship"   
## [25] "Israel"
## add 'Others'
top.countries %<>% c('Others')
## put all others in a single group of 'Others'
df <- data.latest %>% filter(!is.na(country)) %>%
  mutate(country=ifelse(ranking <= k.top + 1, as.character(country), 'Others')) %>%
  mutate(country=country %>% factor(levels=c(top.countries)))
df %<>% group_by(country) %>%
  summarise(confirmed=sum(confirmed), new.confirmed=sum(new.confirmed),
            remaining.confirmed=sum(remaining.confirmed),
            recovered=sum(recovered), deaths=sum(deaths), new.deaths=sum(new.deaths)) %>%
  mutate(death.rate=(100 * deaths/confirmed) %>% round(1))
df %<>% select(c(country, confirmed, deaths, death.rate,
                 new.confirmed, new.deaths, remaining.confirmed))
## convert from wide to long format, for drawing area plots
df.long <- df %>% filter(country!='World') %>%
  gather(key=type, value=count, -country)
## set factor levels to show them with proper text and in a desirable order
df.long %<>% mutate(type=recode_factor(type,
                                       confirmed='Total Confirmed',
                                       deaths='Total Deaths',
                                       death.rate='Death Rate (%)',
                                       new.confirmed='New Confirmed (compared with one day before)',
                                       new.deaths='New Deaths (compared with one day before)',
                                       remaining.confirmed='Remaining Confirmed'))
## bar chart
df.long %>% ggplot(aes(x=country, y=count, fill=country, group=country)) +
  geom_bar(stat='identity') +
  geom_text(aes(label=count, y=count), size=2, vjust=0) +
  xlab('') + ylab('') +
  labs(title=paste0('Top 20 Countries with Most Confirmed Cases - ', max.date.txt)) +
  scale_fill_discrete(name='Country', labels=df$txt) +
  theme(legend.title=element_blank(),
        legend.position='none',
        plot.title=element_text(size=11),
        axis.text=element_text(size=7),
        axis.text.x=element_text(angle=45, hjust=1)) +
  facet_wrap(~type, ncol=1, scales='free_y')
## Warning: Unknown or uninitialised column: 'txt'.

df %>% mutate(death.rate=death.rate %>% format(nsmall=1) %>% paste0('%')) %>%
  kable('pandoc', booktabs=T, row.names=T, align=c('l', rep('r', 6)),
        caption=paste0('Cases in Top 20 Countries - ', max.date.txt,
                       '. See a complete list of all infected countries at the end of this report.'),
        format.args=list(big.mark=',')) %>%
  kable_styling(font_size=7, latex_options=c('striped', 'hold_position', 'repeat_header'))
## Warning in kable_styling(., font_size = 7, latex_options = c("striped", : Please
## specify format in kable. kableExtra can customize either HTML or LaTeX outputs.
## See https://haozhu233.github.io/kableExtra/ for details.
Cases in Top 20 Countries - 20 março 2020. See a complete list of all infected countries at the end of this report.
country confirmed deaths death.rate new.confirmed new.deaths remaining.confirmed
1 World 272,166 11,299 4.2% 29,458 1,432 173,611
2 China 81,250 3,253 4.0% 94 4 6,731
3 Italy 47,021 4,032 8.6% 5,986 627 38,549
4 Spain 20,410 1,043 5.1% 2,447 213 17,779
5 Germany 19,848 67 0.3% 4,528 23 19,601
6 Iran 19,644 1,433 7.3% 1,237 149 11,466
7 US 19,100 244 1.3% 5,423 44 18,856
8 France 12,726 450 3.5% 1,779 207 12,264
9 Korea, South 8,652 94 1.1% 87 3 7,018
10 Switzerland 5,294 54 1.0% 1,219 13 5,225
11 United Kingdom 4,014 178 4.4% 1,298 40 3,769
12 Netherlands 3,003 107 3.6% 536 30 2,894
13 Austria 2,388 6 0.3% 375 0 2,373
14 Belgium 2,257 37 1.6% 462 16 2,219
15 Norway 1,914 7 0.4% 168 0 1,906
16 Sweden 1,639 16 1.0% 200 5 1,607
17 Denmark 1,337 9 0.7% 112 3 1,327
18 Malaysia 1,030 3 0.3% 130 1 940
19 Portugal 1,020 6 0.6% 235 3 1,009
20 Japan 963 33 3.4% 39 4 739
21 Canada 943 12 1.3% 143 3 922
22 Czechia 833 0 0.0% 139 0 829
23 Brazil 793 11 1.4% 172 5 780
24 Australia 791 7 0.9% 110 1 758
25 Cruise Ship 712 7 1.0% 0 0 380
26 Israel 705 0 0.0% 28 0 691
27 Others 13,879 190 1.4% 2,511 39 12,979
data.long <- data %>% filter(country %in% top.countries) %>%
  select(c(country, date, rate.upper, rate.lower, rate.daily)) %>%
  mutate(country=factor(country, levels=top.countries)) %>%
  gather(key=type, value=count, -c(country, date))
# set factor levels to show them in a desirable order
data.long %<>% mutate(type=recode_factor(type, rate.daily='Daily',
                                         rate.lower='Lower bound',
                                         rate.upper='Upper bound'))

## three death rates
rate.max <- data.long$count %>% max(na.rm=T)
ggplot(data.long, aes(x=date, y=count, color=type)) +
  geom_line() +
  xlab('') + ylab('Death Rate (%)') +
  theme(legend.position='bottom', legend.title=element_blank(),
        legend.text=element_text(size=8),
        legend.key.size=unit(0.5, 'cm'),
        axis.text.x=element_text(angle=45, hjust=1)) +
  ylim(c(0, 100)) +
  facet_wrap(~country, ncol=4)
## Warning: Removed 1 row(s) containing missing values (geom_path).

plot1 <- df %>% filter(country!='World') %>%
  ggplot(aes(x=confirmed, y=deaths, col=death.rate)) +
  geom_text(aes(label=country), size=2.5, check_overlap=T, vjust=-0.8) +
  geom_point() +
  xlab('Total Confirmed') + ylab('Total Deaths') +
  labs(col="Death Rate (%)") +
  scale_color_gradient(low='#56B1F7', high='#132B43') +
  scale_x_log10() + scale_y_log10()
plot2 <- df %>% filter(country!='World') %>%
  ggplot(aes(x=new.confirmed, y=new.deaths, col=death.rate)) +
  geom_text(aes(label=country), size=2.5, check_overlap=T, vjust=-0.7) +
  geom_point() +
  xlab('New Confirmed') + ylab('New Deaths') +
  labs(col="Death Rate (%)") +
  scale_color_gradient(low='#56B1F7', high='#132B43') +
  scale_x_log10() + scale_y_log10()
grid.arrange(plot1, plot2, ncol=1)
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis

## convert from wide to long format, for drawing area plots
data.long <- data %>%
  select(c(country, date, confirmed, remaining.confirmed, recovered, deaths)) %>%
  gather(key=type, value=count, -c(country, date))
## set factor levels to show them in a desirable order
data.long %<>% mutate(type=recode_factor(type, confirmed='Confirmed',
                                         remaining.confirmed='Remaining Confirmed',
                                         recovered='Recovered',
                                         deaths='Deaths'))
## plot: cases by type
df <- data.long %>% filter(country %in% top.countries) %<>%
  mutate(country=country %>% factor(levels=c(top.countries)))
p <- df %>% filter(country != 'World') %>%
  ggplot(aes(x=date, y=count)) + xlab('') + ylab('Count') +
  theme(legend.title=element_blank(),
        legend.text=element_text(size=8),
        legend.key.size=unit(0.5, 'cm'),
        plot.title=element_text(size=11),
        axis.text.x=element_text(angle=45, hjust=1)) +
  facet_wrap(~type, ncol=2, scales='free_y')
## area plot
plot1 <- p + geom_area(aes(fill=country)) +
  labs(title=paste0('Cases around the World - ', max.date.txt))
## line plot and in log scale
linetypes <- rep(c("solid", "dashed", "dotted"), each=9)
colors <- rep(c('black', 'blue', 'red', 'green', 'orange', 'purple', 'yellow', 'grey'), 4)
plot2 <- p + geom_line(aes(color=country, linetype=country)) +
  scale_linetype_manual(values=linetypes) +
  scale_color_manual(values=colors) +
  labs(title=paste0('Cases around the World - Log Scale - ', max.date.txt)) +
  scale_y_continuous(trans='log10')
## show 2 plots together
grid.arrange(plot1, plot2, ncol=1)
## Warning: Transformation introduced infinite values in continuous y-axis

## plot: excluding China
p <- df %>% filter(!(country %in% c('World', 'China'))) %>%
  ggplot(aes(x=date, y=count)) + xlab('') + ylab('Count') +
  theme(legend.title=element_blank(),
        legend.text=element_text(size=8),
        legend.key.size=unit(0.5, 'cm'),
        plot.title=element_text(size=11),
        axis.text.x=element_text(angle=45, hjust=1)) +
  facet_wrap(~type, ncol=2, scales='free_y')
p + geom_area(aes(fill=country)) +
  labs(title=paste0('Cases around the World (excl. China) - ', max.date.txt))

## area plot
# plot1 <- p + geom_area(aes(fill=country)) +
# labs(title=paste0('Cases around the World (excl. China) - ', max.date.txt))
## line plot in log scale
# plot2 <- p + geom_line(aes(color=country, linetype=country)) +
# scale_linetype_manual(values=linetypes) +
# scale_color_manual(values=colors) +
# labs(title=paste0('Cases around the World (excl. China) - Log Scale - ', max.date.txt)) +
# scale_y_continuous(trans='log10')
# ## show 2 plots together
# grid.arrange(plot1, plot2, ncol=1)
## if Australia in not in top 20, add it in and remove 'Others'
if(!('Australia' %in% top.countries)) {
  top.countries %<>% setdiff('Others') %>% c('Australia')
  df <- data.long %>% filter(country %in% top.countries) %<>%
    mutate(country=country %>% factor(levels=c(top.countries)))
}
  ## cases by country - area plot
  df %>% filter(type != 'Confirmed') %>%
    ggplot(aes(x=date, y=count, fill=type)) +
    geom_area(alpha=0.5) +
    # xlab('') + ylab('') +
    labs(title=paste0('Numbers of COVID-19 Cases in Top 20 Countries (plus World and Australia) - ',
                      max.date.txt)) +
    scale_fill_manual(values=c('red', 'green', 'black')) +
    theme(legend.title=element_blank(), legend.position='bottom',
          plot.title = element_text(size=9),
          axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          legend.key.size=unit(0.4, 'cm'),
          # legend.text=element_text(size=7),
          strip.text.x=element_text(size=7),
          axis.text=element_text(size=7),
          axis.text.x=element_text(angle=45, hjust=1)) +
    facet_wrap(~country, ncol=4, scales='free_y')  

  ## cases by country - line plot - log scale
  p <- df %>% ggplot(aes(x=date, y=count, color=type)) +
    geom_line() +
    labs(title=paste0('Numbers of COVID-19 Cases in Top 20 Countries (plus World and Australia) - ',
                      max.date.txt)) +
    scale_color_manual(values=c('purple', 'red', 'green', 'black')) +
    theme(legend.title=element_blank(), legend.position='bottom',
          plot.title = element_text(size=9),
          axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          legend.key.size=unit(0.4, 'cm'),
          # legend.text=element_text(size=7),
          strip.text.x=element_text(size=7),
          axis.text=element_text(size=7),
          axis.text.x=element_text(angle=45, hjust=1)) +
    scale_y_continuous(trans='log10')
  p + facet_wrap(~country, ncol=4, scales='free_y')
## Warning: Transformation introduced infinite values in continuous y-axis

## plot over multiple pages
# p + facet_wrap_paginate(~country, nrow=4, ncol=3, page=1, scales='free_y')
# p + facet_wrap_paginate(~country, nrow=4, ncol=3, page=2, scales='free_y')
  
  
# data %<>% filter(country=='China')
# data %<>% filter(country=='Australia')
  data %<>% filter(country=='Brazil')
  n <- nrow(data)
  ## remaining confirmed and its increase
  plot1 <- ggplot(data, aes(x=date, y=remaining.confirmed)) +
    geom_point() + geom_smooth() +
    xlab('') + ylab('Count') + labs(title='Remaining Confirmed Cases') +
    theme(axis.text.x=element_text(angle=45, hjust=1))
  plot2 <- ggplot(data, aes(x=date, y=new.confirmed)) +
    geom_point() + geom_smooth() +
    xlab('') + ylab('Count') + labs(title='New Confirmed Cases') +
    theme(axis.text.x=element_text(angle=45, hjust=1))
  ## show two plots side by side
  grid.arrange(plot1, plot2, ncol=2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

  ## a scatter plot with a smoothed line and vertical x-axis labels
  plot1 <- ggplot(data, aes(x=date, y=deaths)) +
    geom_point() + geom_smooth() +
    xlab('') + ylab('Count') + labs(title='Accumulative Deaths') +
    theme(axis.text.x=element_text(angle=45, hjust=1))
  plot2 <- ggplot(data, aes(x=date, y=recovered)) +
    geom_point() + geom_smooth() +
    xlab('') + ylab('Count') + labs(title='Accumulative Recovered Cases') +
    theme(axis.text.x=element_text(angle=45, hjust=1))
  plot3 <- ggplot(data, aes(x=date, y=new.deaths)) +
    geom_point() + geom_smooth() +
    xlab('') + ylab('Count') + labs(title='New Deaths') +
    theme(axis.text.x=element_text(angle=45, hjust=1))
  plot4 <- ggplot(data, aes(x=date, y=new.recovered)) +
    geom_point() + geom_smooth() +
    xlab('') + ylab('Count') + labs(title='New Recovered Cases') +
    theme(axis.text.x=element_text(angle=45, hjust=1))
  ## show four plots together, with 2 plots in each row
  grid.arrange(plot1, plot2, plot3, plot4, nrow=2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

  ## three death rates
  plot1 <- ggplot(data, aes(x=date)) +
    geom_line(aes(y=rate.upper, colour='Upper bound')) +
    geom_line(aes(y=rate.lower, colour='Lower bound')) +
    geom_line(aes(y=rate.daily, colour='Daily')) +
    xlab('') + ylab('Death Rate (%)') + labs(title='Overall') +
    theme(legend.position='bottom', legend.title=element_blank(),
          legend.text=element_text(size=8),
          legend.key.size=unit(0.5, 'cm'),
          axis.text.x=element_text(angle=45, hjust=1)) +
    ylim(0, 90)
  ## focusing on last 2 weeks
  plot2 <- ggplot(data[n-(14:0),], aes(x=date)) +
    geom_line(aes(y=rate.upper, colour='Upper bound')) +
    geom_line(aes(y=rate.lower, colour='Lower bound')) +
    geom_line(aes(y=rate.daily, colour='Daily')) +
    xlab('') + ylab('Death Rate (%)') + labs(title='Last two weeks') +
    theme(legend.position='bottom', legend.title=element_blank(),
          legend.text=element_text(size=8),
          legend.key.size=unit(0.5, 'cm'),
          axis.text.x=element_text(angle=45, hjust=1))
  # + ylim(0, 10)
  grid.arrange(plot1, plot2, ncol=2)
## Warning: Removed 54 row(s) containing missing values (geom_path).
## Warning: Removed 35 row(s) containing missing values (geom_path).
## Warning: Removed 57 row(s) containing missing values (geom_path).
## Warning: Removed 10 row(s) containing missing values (geom_path).

## Warning: Removed 10 row(s) containing missing values (geom_path).

  ## sort by date descendingly and re-order columns
  data %<>% arrange(desc(date)) %>%
    select(c(date, confirmed, deaths, recovered, remaining.confirmed,
             new.confirmed, new.deaths, new.recovered, rate.lower, rate.upper, rate.daily))
  
  ## output as a table
  data %>% kable('pandoc', booktabs=T, longtable=T, caption='Cases in the Whole World',
                 format.args=list(big.mark=',')) %>%
    kable_styling(font_size=5, latex_options=c('striped', 'hold_position', 'repeat_header'))
## Warning in kable_styling(., font_size = 5, latex_options = c("striped", : Please
## specify format in kable. kableExtra can customize either HTML or LaTeX outputs.
## See https://haozhu233.github.io/kableExtra/ for details.
Cases in the Whole World
date confirmed deaths recovered remaining.confirmed new.confirmed new.deaths new.recovered rate.lower rate.upper rate.daily
2020-03-20 793 11 2 780 172 5 0 1.4 84.6 100
2020-03-19 621 6 2 613 249 3 0 1.0 75.0 100
2020-03-18 372 3 2 367 51 2 0 0.8 60.0 100
2020-03-17 321 1 2 318 121 1 1 0.3 33.3 50
2020-03-16 200 0 1 199 38 0 1 0.0 0.0 0
2020-03-15 162 0 0 162 11 0 0 0.0 NaN NaN
2020-03-14 151 0 0 151 0 0 0 0.0 NaN NaN
2020-03-13 151 0 0 151 99 0 0 0.0 NaN NaN
2020-03-12 52 0 0 52 14 0 0 0.0 NaN NaN
2020-03-11 38 0 0 38 7 0 0 0.0 NaN NaN
2020-03-10 31 0 0 31 6 0 0 0.0 NaN NaN
2020-03-09 25 0 0 25 5 0 0 0.0 NaN NaN
2020-03-08 20 0 0 20 7 0 0 0.0 NaN NaN
2020-03-07 13 0 0 13 0 0 0 0.0 NaN NaN
2020-03-06 13 0 0 13 9 0 0 0.0 NaN NaN
2020-03-05 4 0 0 4 0 0 0 0.0 NaN NaN
2020-03-04 4 0 0 4 2 0 0 0.0 NaN NaN
2020-03-03 2 0 0 2 0 0 0 0.0 NaN NaN
2020-03-02 2 0 0 2 0 0 0 0.0 NaN NaN
2020-03-01 2 0 0 2 0 0 0 0.0 NaN NaN
2020-02-29 2 0 0 2 1 0 0 0.0 NaN NaN
2020-02-28 1 0 0 1 0 0 0 0.0 NaN NaN
2020-02-27 1 0 0 1 0 0 0 0.0 NaN NaN
2020-02-26 1 0 0 1 1 0 0 0.0 NaN NaN
2020-02-25 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-24 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-23 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-22 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-21 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-20 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-19 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-18 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-17 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-16 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-15 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-14 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-13 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-12 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-11 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-10 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-09 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-08 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-07 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-06 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-05 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-04 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-03 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-02 0 0 0 0 0 0 0 NaN NaN NaN
2020-02-01 0 0 0 0 0 0 0 NaN NaN NaN
2020-01-31 0 0 0 0 0 0 0 NaN NaN NaN
2020-01-30 0 0 0 0 0 0 0 NaN NaN NaN
2020-01-29 0 0 0 0 0 0 0 NaN NaN NaN
2020-01-28 0 0 0 0 0 0 0 NaN NaN NaN
2020-01-27 0 0 0 0 0 0 0 NaN NaN NaN
2020-01-26 0 0 0 0 0 0 0 NaN NaN NaN
2020-01-25 0 0 0 0 0 0 0 NaN NaN NaN
2020-01-24 0 0 0 0 0 0 0 NaN NaN NaN
2020-01-23 0 0 0 0 0 0 0 NaN NaN NaN
2020-01-22 0 0 0 0 NA NA NA NaN NaN NA
  data.latest %>% arrange(desc(confirmed)) %>% select(-c(date, ranking)) %>%
    kable('pandoc', booktabs=T, longtable=T, row.names=T,
          caption=paste0('Cases by Country (', max.date.txt, ')'),
          format.args=list(big.mark=',')) %>%
    kable_styling(font_size=7, latex_options=c('striped', 'hold_position', 'repeat_header'))
## Warning in kable_styling(., font_size = 7, latex_options = c("striped", : Please
## specify format in kable. kableExtra can customize either HTML or LaTeX outputs.
## See https://haozhu233.github.io/kableExtra/ for details.
Cases by Country (20 março 2020)
country confirmed new.confirmed remaining.confirmed recovered deaths new.deaths
1 World 272,166 29,458 173,611 87,256 11,299 1,432
2 China 81,250 94 6,731 71,266 3,253 4
3 Italy 47,021 5,986 38,549 4,440 4,032 627
4 Spain 20,410 2,447 17,779 1,588 1,043 213
5 Germany 19,848 4,528 19,601 180 67 23
6 Iran 19,644 1,237 11,466 6,745 1,433 149
7 US 19,100 5,423 18,856 0 244 44
8 France 12,726 1,779 12,264 12 450 207
9 Korea, South 8,652 87 7,018 1,540 94 3
10 Switzerland 5,294 1,219 5,225 15 54 13
11 United Kingdom 4,014 1,298 3,769 67 178 40
12 Netherlands 3,003 536 2,894 2 107 30
13 Austria 2,388 375 2,373 9 6 0
14 Belgium 2,257 462 2,219 1 37 16
15 Norway 1,914 168 1,906 1 7 0
16 Sweden 1,639 200 1,607 16 16 5
17 Denmark 1,337 112 1,327 1 9 3
18 Malaysia 1,030 130 940 87 3 1
19 Portugal 1,020 235 1,009 5 6 3
20 Japan 963 39 739 191 33 4
21 Canada 943 143 922 9 12 3
22 Czechia 833 139 829 4 0 0
23 Brazil 793 172 780 2 11 5
24 Australia 791 110 758 26 7 1
25 Cruise Ship 712 0 380 325 7 0
26 Israel 705 28 691 14 0 0
27 Ireland 683 126 675 5 3 0
28 Pakistan 501 47 485 13 3 1
29 Greece 495 77 470 19 6 0
30 Luxembourg 484 149 480 0 4 0
31 Qatar 470 10 460 10 0 0
32 Finland 450 50 440 10 0 0
33 Chile 434 196 428 6 0 0
34 Poland 425 70 419 1 5 0
35 Iceland 409 79 404 5 0 0
36 Singapore 385 40 261 124 0 0
37 Indonesia 369 58 322 15 32 7
38 Ecuador 367 168 362 0 5 2
39 Turkey 359 167 355 0 4 1
40 Saudi Arabia 344 70 336 8 0 0
41 Slovenia 341 55 340 0 1 0
42 Thailand 322 50 279 42 1 0
43 Romania 308 31 283 25 0 0
44 Bahrain 285 7 184 100 1 0
45 Egypt 285 29 238 39 8 2
46 Estonia 283 16 282 1 0 0
47 Russia 253 54 243 9 1 0
48 India 244 50 219 20 5 1
49 Peru 234 0 230 1 3 3
50 Philippines 230 13 204 8 18 1
51 Iraq 208 16 142 49 17 4
52 South Africa 202 52 202 0 0 0
53 Mexico 164 46 159 4 1 0
54 Lebanon 163 6 155 4 4 0
55 Kuwait 159 11 141 18 0 0
56 San Marino 144 25 126 4 14 3
57 United Arab Emirates 140 0 107 31 2 2
58 Panama 137 28 136 0 1 0
59 Slovakia 137 14 136 0 1 0
60 Armenia 136 21 135 1 0 0
61 Serbia 135 32 133 1 1 1
62 Taiwan* 135 27 107 26 2 1
63 Argentina 128 31 122 3 3 0
64 Colombia 128 26 127 1 0 0
65 Croatia 128 23 122 5 1 0
66 Bulgaria 127 33 124 0 3 0
67 Latvia 111 25 110 1 0 0
68 Uruguay 94 15 94 0 0 0
69 Vietnam 91 6 75 16 0 0
70 Algeria 90 3 47 32 11 2
71 Bosnia and Herzegovina 89 26 87 2 0 0
72 Costa Rica 89 20 88 0 1 0
73 Hungary 85 12 80 2 3 2
74 Jordan 85 16 84 1 0 0
75 Brunei 78 3 77 1 0 0
76 Morocco 77 14 73 1 3 1
77 Andorra 75 22 74 1 0 0
78 Sri Lanka 73 13 70 3 0 0
79 Dominican Republic 72 38 70 0 2 0
80 Albania 70 6 68 0 2 0
81 Belarus 69 18 64 5 0 0
82 Cyprus 67 0 67 0 0 0
83 North Macedonia 67 19 66 1 0 0
84 Moldova 66 17 64 1 1 0
85 Malta 64 11 62 2 0 0
86 Tunisia 54 15 53 0 1 0
87 Cambodia 51 14 50 1 0 0
88 Kazakhstan 49 5 46 0 3 3
89 Lithuania 49 13 48 1 0 0
90 Oman 48 0 36 12 0 0
91 Azerbaijan 44 0 37 6 1 0
92 Georgia 43 3 42 1 0 0
93 Venezuela 42 0 42 0 0 0
94 Burkina Faso 40 7 39 0 1 0
95 New Zealand 39 11 39 0 0 0
96 Senegal 38 7 36 2 0 0
97 Uzbekistan 33 10 33 0 0 0
98 Martinique 32 9 31 0 1 0
99 Ukraine 29 13 26 0 3 1
100 Liechtenstein 28 0 28 0 0 0
101 Afghanistan 24 2 23 1 0 0
102 Honduras 24 12 24 0 0 0
103 Bangladesh 20 3 16 3 1 0
104 Cameroon 20 7 20 0 0 0
105 Congo (Kinshasa) 18 4 18 0 0 0
106 Rwanda 17 9 17 0 0 0
107 Cuba 16 5 15 0 1 0
108 Ghana 16 5 16 0 0 0
109 Jamaica 16 1 13 2 1 0
110 Bolivia 15 3 15 0 0 0
111 Montenegro 14 11 14 0 0 0
112 Maldives 13 0 13 0 0 0
113 Paraguay 13 2 13 0 0 0
114 Guatemala 12 3 11 0 1 0
115 Mauritius 12 9 12 0 0 0
116 Nigeria 12 4 11 1 0 0
117 Monaco 11 4 11 0 0 0
118 Cote d’Ivoire 9 0 8 1 0 0
119 Ethiopia 9 3 9 0 0 0
120 Togo 9 8 8 1 0 0
121 Trinidad and Tobago 9 0 9 0 0 0
122 Guyana 7 0 6 0 1 0
123 Kenya 7 0 7 0 0 0
124 Seychelles 7 1 7 0 0 0
125 Equatorial Guinea 6 0 6 0 0 0
126 Kyrgyzstan 6 3 6 0 0 0
127 Mongolia 6 0 6 0 0 0
128 Tanzania 6 0 6 0 0 0
129 Barbados 5 0 5 0 0 0
130 Suriname 4 3 4 0 0 0
131 Bahamas, The 3 0 3 0 0 0
132 Central African Republic 3 2 3 0 0 0
133 Congo (Brazzaville) 3 0 3 0 0 0
134 Gabon 3 2 2 0 1 1
135 Madagascar 3 3 3 0 0 0
136 Namibia 3 0 3 0 0 0
137 Benin 2 0 2 0 0 0
138 Bhutan 2 1 2 0 0 0
139 Haiti 2 2 2 0 0 0
140 Kosovo 2 0 2 0 0 0
141 Liberia 2 0 2 0 0 0
142 Mauritania 2 0 2 0 0 0
143 Saint Lucia 2 0 2 0 0 0
144 Sudan 2 0 1 0 1 0
145 Zambia 2 0 2 0 0 0
146 Angola 1 1 1 0 0 0
147 Antigua and Barbuda 1 0 1 0 0 0
148 Cabo Verde 1 1 1 0 0 0
149 Chad 1 0 1 0 0 0
150 Djibouti 1 0 1 0 0 0
151 El Salvador 1 0 1 0 0 0
152 Eswatini 1 0 1 0 0 0
153 Fiji 1 0 1 0 0 0
154 Gambia, The 1 0 1 0 0 0
155 Guinea 1 0 1 0 0 0
156 Holy See 1 0 1 0 0 0
157 Nepal 1 0 0 1 0 0
158 Nicaragua 1 0 1 0 0 0
159 Niger 1 1 1 0 0 0
160 Papua New Guinea 1 1 1 0 0 0
161 Saint Vincent and the Grenadines 1 0 1 0 0 0
162 Somalia 1 0 1 0 0 0
163 Zimbabwe 1 1 1 0 0 0