4.1 Top 10 countries with most confirmed cases:
## ranking by confirmed cases
data.latest <- data %>% filter(date == max(date)) %>%
select(country, date, confirmed, deaths, recovered, remaining.confirmed) %>%
mutate(ranking = dense_rank(desc(confirmed)))
## top 10 countries: 12 incl. 'World' and 'Others'
top.countries <- data.latest %>% filter(ranking <= 12) %>%
arrange(ranking) %>% pull(country) %>% as.character()
## move 'Others' to the end
top.countries %<>% setdiff('Others') %>% c('Others')
top.countries
## [1] "World" "Mainland China" "Italy" "South Korea"
## [5] "Iran" "France" "Germany" "Spain"
## [9] "US" "Japan" "Switzerland" "Others"
df <- data.latest %>% filter(!is.na(country) & country!='World') %>%
mutate(country=ifelse(ranking <= 12, as.character(country), 'Others')) %>%
mutate(country=country %>% factor(levels=c(top.countries)))
df %<>% group_by(country) %>% summarise(confirmed=sum(confirmed))
## precentage and label
df %<>% mutate(per = (100*confirmed/sum(confirmed)) %>% round(1)) %>%
mutate(txt = paste0(country, ':', confirmed, '(', per, '%)'))
# pie(df$confirmed, labels=df$txt, cex=0.7)
df %>% ggplot(aes(fill=country)) +
geom_bar(aes(x='', y=per), stat='identity') +
coord_polar("y", start=0) +
xlab('') + ylab('Percentage (%)') +
labs(title="Top 10 Countries with Most Confirmed Cases") +
scale_fill_discrete(name='Country', labels=df$txt)

## convert from wide to long format, for purpose of drawing a area plot
data.long <- data %>% select(c(country, date, confirmed, remaining.confirmed, recovered, deaths)) %>%
gather(key=type, value=count, -c(country, date))
## set factor levels to show them in a desirable order
data.long %<>% mutate(type = factor(type, c('confirmed', 'remaining.confirmed', 'recovered', 'deaths')))
## cases by type
df <- data.long %>% filter(country %in% top.countries) %<>%
mutate(country=country %>% factor(levels=c(top.countries)))
df %>% filter(country != 'World') %>%
ggplot(aes(x=date, y=count, fill=country)) +
geom_area() + xlab('Date') + ylab('Count') +
labs(title='Cases around the World') +
theme(legend.title=element_blank()) +
facet_wrap(~type, ncol=2, scales='free_y')

## excluding Mainland China
df %>% filter(!(country %in% c('World', 'Mainland China'))) %>%
ggplot(aes(x=date, y=count, fill=country)) +
geom_area() + xlab('Date') + ylab('Count') +
labs(title='Cases around the World (excl. China)') +
theme(legend.title=element_blank()) +
facet_wrap(~type, ncol=2, scales='free_y')

## if Australia in not in top 10, add it in and remove 'Others'
if(!('Australia' %in% top.countries)) {
top.countries %<>% setdiff('Others') %>% c('Australia')
df <- data.long %>% filter(country %in% top.countries) %<>%
mutate(country=country %>% factor(levels=c(top.countries)))
}
## cases by country
df %>% filter(type != 'confirmed') %>%
ggplot(aes(x=date, y=count, fill=type)) +
geom_area(alpha=0.5) + xlab('Date') + ylab('Count') +
labs(title='COVID-19 Cases by Country') +
scale_fill_manual(values=c('red', 'green', 'black')) +
theme(legend.title=element_blank(), legend.position='bottom') +
facet_wrap(~country, ncol=3, scales='free_y')

4.2 Current (or Remaining) Confirmed Cases:
# data %<>% filter(country=='Mainland China')
# data %<>% filter(country=='Australia')
data %<>% filter(country=='World')
n <- nrow(data)
## current confirmed and its increase
plot1 <- ggplot(data, aes(x=date, y=remaining.confirmed)) +
geom_point() + geom_smooth() +
xlab('Date') + ylab('Count') + labs(title='Current Confirmed Cases')
plot2 <- ggplot(data, aes(x=date, y=confirmed.inc)) +
geom_point() + geom_smooth() +
xlab('Date') + ylab('Count') + labs(title='Increase in Current Confirmed')
# + ylim(0, 4500)
grid.arrange(plot1, plot2, ncol=2)

4.3 Deaths and Cured Cases:
## a scatter plot with a smoothed line and vertical x-axis labels
plot1 <- ggplot(data, aes(x=date, y=deaths)) +
geom_point() + geom_smooth() +
xlab('Date') + ylab('Count') + labs(title='Deaths')
plot2 <- ggplot(data, aes(x=date, y=recovered)) +
geom_point() + geom_smooth() +
xlab('Date') + ylab('Count') + labs(title='Recovered Cases')
plot3 <- ggplot(data, aes(x=date, y=deaths.inc)) +
geom_point() + geom_smooth() +
xlab('Date') + ylab('Count') + labs(title='Increase in Deaths')
plot4 <- ggplot(data, aes(x=date, y=recovered.inc)) +
geom_point() + geom_smooth() +
xlab('Date') + ylab('Count') + labs(title='Increase in Recovered Cases')
## show four plots together, with 2 plots in each row
grid.arrange(plot1, plot2, plot3, plot4, nrow=2)

4.4 Death Rates:
## three death rates
plot1 <- ggplot(data, aes(x=date)) +
geom_line(aes(y=rate.upper, colour='Upper bound')) +
geom_line(aes(y=rate.lower, colour='Lower bound')) +
geom_line(aes(y=rate.daily, colour='Daily')) +
xlab('Date') + ylab('Death Rate (%)') + labs(title='Overall') +
theme(legend.position='bottom', legend.title=element_blank()) +
ylim(0, 90)
## focusing on last 2 weeks
plot2 <- ggplot(data[n-(14:0),], aes(x=date)) +
geom_line(aes(y=rate.upper, colour='Upper bound')) +
geom_line(aes(y=rate.lower, colour='Lower bound')) +
geom_line(aes(y=rate.daily, colour='Daily')) +
xlab('Date') + ylab('Death Rate (%)') + labs(title='Last two weeks') +
theme(legend.position='bottom', legend.title=element_blank()) +
ylim(0, 10)
grid.arrange(plot1, plot2, ncol=2)
