Introduction

this file is to visulize updated covid 19 situation worldwidely, to better demonstrate the distribution of each region, we use map and the color of points to show how the number is.



Visualization Step by step
1. Import all the relevant libraries

library(tidyverse)
## ── Attaching packages ────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ───────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(viridis)
## Loading required package: viridisLite
library(readr)
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
options(highcharter.theme = hc_theme_smpl(tooltip = list(valueDecimals = 2)))
2.Data Import

-2.1 Read data of Lastest covid-19 information

Confirmed<-read_csv("data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
## 
## ── Column specification ───────────────────────────────
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
Deaths<- read_csv("data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")
## 
## ── Column specification ───────────────────────────────
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
Recover<-read_csv("data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")
## 
## ── Column specification ───────────────────────────────
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.

3. Map Visualization

-3.1 Import and Generate World Map


world <-map_data("world")
ggplot() +geom_polygon(data=world, aes(x=long, y=lat, group=group), fill="grey")

-3.2 Generate worldwide latest comfirmed cases in map visualization

ggplot()+
  geom_polygon(data=world, aes(x=long, y=lat, group=group), fill="grey")+
  geom_point(data=Confirmed,aes(x=Long,y=Lat,size=`3/27/21`,color=`3/27/21`),alpha=0.5)+
  theme_void() +
  theme(legend.position="bottom")
## Warning: Removed 1 rows containing missing values (geom_point).


-3.3 Generate latest Death Cases map visualization

breaks<-c(1,10,100,1000,100000)
labels<-c("1-9", "10-99", "100-999","1,000-100,000", "100,000+")
ggplot()+
  geom_polygon(data=world, aes(x=long, y=lat, group=group), fill="grey")+
  geom_point(data=Deaths,aes(x=Long,y=Lat,size=`3/27/21`,color=`3/27/21`),alpha=0.5,fill="red")+
   scale_size_continuous(name="Deathscases", trans="log", range=c(1,7), breaks=breaks,labels=labels) +
  scale_colour_viridis_c(option="inferno", direction=-1,name="Deaths cases", trans="log", breaks=breaks,labels=labels) +
  guides(colour=guide_legend()) +
  theme(legend.position="bottom")
## Warning: Transformation introduced infinite values in discrete y-axis

## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 27 rows containing missing values (geom_point).

-3.4 Generate map visualization for latest Recovered cases

breaks<-c(1,10,100,1000,100000)
labels<-c("1-9", "10-99", "100-999","1,000-100,000", "100,000+")
ggplot()+
  geom_polygon(data=world, aes(x=long, y=lat, group=group), fill="grey")+
  geom_point(data=Recover,aes(x=Long,y=Lat,size=`3/27/21`,color=`3/27/21`),alpha=0.3,fill="red")+
   scale_size_continuous(name="Recovercases", trans="log", range=c(1,7), breaks=breaks,labels=labels) +
  scale_colour_viridis_c(option="inferno", direction=-1,name="Recovered cases", trans="log", breaks=breaks,labels=labels) +
  guides(colour=guide_legend()) +
  theme(legend.position="bottom")
## Warning: Transformation introduced infinite values in discrete y-axis

## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 7 rows containing missing values (geom_point).

4.Interactive Visualization

-4.1 Data Processing to generate interactive visualization


-4.1.1 Data for interactive bar chart

sum_confirmed<-sum(Confirmed[,'3/27/21'])
sum_Deaths<-sum(Deaths[,'3/27/21'])
sum_Recover<-sum(Recover[,'3/27/21'])
#data preparation for interactive bar chart
status<-c("Confirmed","Deaths","Recovered")
number<-c(sum_confirmed,sum_Deaths,sum_Recover)
conclusion.data<-data.frame(status,number)
conclusion.data
##      status    number
## 1 Confirmed 126726672
## 2    Deaths   2777336
## 3 Recovered  71768317

-4.1.2 Data Preprocessing to generate interactive line chart visualization

#data preparation for the interactive line chart
date<-c("3/23/21","3/24/21","3/25/21","3/26/21","3/27/21")
region_confirmed<-
  Confirmed%>%
  select("Country/Region","3/23/21","3/24/21","3/25/21","3/26/21","3/27/21")
region_dead<-
  Deaths%>%
  select("Country/Region","3/23/21","3/24/21","3/25/21","3/26/21","3/27/21")
region_recovered<-
  Recover%>%
  select("Country/Region","3/23/21","3/24/21","3/25/21","3/26/21","3/27/21")
dates<-c("3/23/21","3/24/21","3/25/21","3/26/21","3/27/21")
confirm_week<-c(sum(region_confirmed[,2])/100000,sum(region_confirmed[,3])/100000,sum(region_confirmed[,4])/100000,sum(region_confirmed[,5])/100000,sum(region_confirmed[,6])/100000)
death_week<-c(sum(region_dead[,2])/1000,sum(region_dead[,3])/1000,sum(region_dead[,4])/1000,sum(region_dead[,5])/1000,sum(region_dead[,6])/1000)
recover_week<-c(sum(region_recovered[,2])/100000,sum(region_recovered[,3])/100000,sum(region_recovered[,4])/100000,sum(region_recovered[,5])/100000,sum(region_recovered[,6])/100000)
confirm_flow = data.frame(
  date = dates,
  confirm_week= confirm_week
)
death_flow = data.frame(
  date = dates,
  death_week = death_week
)
recover_flow = data.frame(
  date = dates,
  recover_week = recover_week
)

flow<-data.frame(
  date = dates,
  confirm_100000= confirm_week,
  death_1000 = death_week,
  recover_100000 = recover_week
  )

flow
##      date confirm_100000 death_1000 recover_100000
## 1 3/23/21       1242.046   2734.065       704.5102
## 2 3/24/21       1248.414   2743.729       707.8666
## 3 3/25/21       1254.917   2755.210       710.9848
## 4 3/26/21       1261.309   2767.546       714.6306
## 5 3/27/21       1267.267   2777.336       717.6832

-4.2 interactive barplot to visualize latest date confirmed cases, new death cases and new recovered cases

cd<-conclusion.data %>%
  hchart('column',hcaes(x=status,y=number,color = number))
cd

-4.3 Interactive line chart to observe the overall trend of new confirmed cases, new death cases, and new recovered cases in the latest one week

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p <- plot_ly(flow, x = ~date, y = ~confirm_100000, name = 'confirm*100000', type = 'scatter', mode = 'lines+markers') %>%
    add_trace(y = ~death_1000, name = 'death*1000', mode = 'lines+markers') %>%
    add_trace(y = ~recover_100000, name = 'recover*100000', mode = 'lines+markers')
p

Write up

1. Data challenge and solution

Data challenge for map visualization:

Challenge:due to the restriction of map size, ti is difficult to display all the datapoints for each country under each status on the one map simultanrously.

Solution: sperate the visulization, one map for one status for easier observing the cases distribution word widely.

Trulli
Data challenge for interactive visualization

Challenge:scalle for data points at different status are different so it is hard to format them into one chart

Solution: rescale the data for each status to make all the data points under close scale (data standardization)

Trulli

2.Insights Found


1. Confirmed cases mainly present in America State and Europe.The number of daily new reported confirmed cases remains at a high level.Also, with the help of vaccination and mature treatments applied on convid-19, the number of updated recover cases also remains at the high level.
2. In the past week, the trend of covid-`19 spread has been quite stable, instead of explosive growth.The number of newly confirmed cases still higher than newly recovered cases.