this file is to visulize updated covid 19 situation worldwidely, to better demonstrate the distribution of each region, we use map and the color of points to show how the number is.
Visualization Step by step
1. Import all the relevant libraries
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(viridis)
## Loading required package: viridisLite
library(readr)
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
options(highcharter.theme = hc_theme_smpl(tooltip = list(valueDecimals = 2)))
2.Data Import -2.1 Read data of Lastest covid-19 information
Confirmed<-read_csv("data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
##
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
Deaths<- read_csv("data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")
##
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
Recover<-read_csv("data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")
##
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
-3.1 Import and Generate World Map
world <-map_data("world")
ggplot() +geom_polygon(data=world, aes(x=long, y=lat, group=group), fill="grey")
-3.2 Generate worldwide latest comfirmed cases in map visualization
ggplot()+
geom_polygon(data=world, aes(x=long, y=lat, group=group), fill="grey")+
geom_point(data=Confirmed,aes(x=Long,y=Lat,size=`3/27/21`,color=`3/27/21`),alpha=0.5)+
theme_void() +
theme(legend.position="bottom")
## Warning: Removed 1 rows containing missing values (geom_point).
-3.3 Generate latest Death Cases map visualization
breaks<-c(1,10,100,1000,100000)
labels<-c("1-9", "10-99", "100-999","1,000-100,000", "100,000+")
ggplot()+
geom_polygon(data=world, aes(x=long, y=lat, group=group), fill="grey")+
geom_point(data=Deaths,aes(x=Long,y=Lat,size=`3/27/21`,color=`3/27/21`),alpha=0.5,fill="red")+
scale_size_continuous(name="Deathscases", trans="log", range=c(1,7), breaks=breaks,labels=labels) +
scale_colour_viridis_c(option="inferno", direction=-1,name="Deaths cases", trans="log", breaks=breaks,labels=labels) +
guides(colour=guide_legend()) +
theme(legend.position="bottom")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 27 rows containing missing values (geom_point).
-3.4 Generate map visualization for latest Recovered cases
breaks<-c(1,10,100,1000,100000)
labels<-c("1-9", "10-99", "100-999","1,000-100,000", "100,000+")
ggplot()+
geom_polygon(data=world, aes(x=long, y=lat, group=group), fill="grey")+
geom_point(data=Recover,aes(x=Long,y=Lat,size=`3/27/21`,color=`3/27/21`),alpha=0.3,fill="red")+
scale_size_continuous(name="Recovercases", trans="log", range=c(1,7), breaks=breaks,labels=labels) +
scale_colour_viridis_c(option="inferno", direction=-1,name="Recovered cases", trans="log", breaks=breaks,labels=labels) +
guides(colour=guide_legend()) +
theme(legend.position="bottom")
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 7 rows containing missing values (geom_point).
-4.1 Data Processing to generate interactive visualization
-4.1.1 Data for interactive bar chart
sum_confirmed<-sum(Confirmed[,'3/27/21'])
sum_Deaths<-sum(Deaths[,'3/27/21'])
sum_Recover<-sum(Recover[,'3/27/21'])
#data preparation for interactive bar chart
status<-c("Confirmed","Deaths","Recovered")
number<-c(sum_confirmed,sum_Deaths,sum_Recover)
conclusion.data<-data.frame(status,number)
conclusion.data
## status number
## 1 Confirmed 126726672
## 2 Deaths 2777336
## 3 Recovered 71768317
-4.1.2 Data Preprocessing to generate interactive line chart visualization
#data preparation for the interactive line chart
date<-c("3/23/21","3/24/21","3/25/21","3/26/21","3/27/21")
region_confirmed<-
Confirmed%>%
select("Country/Region","3/23/21","3/24/21","3/25/21","3/26/21","3/27/21")
region_dead<-
Deaths%>%
select("Country/Region","3/23/21","3/24/21","3/25/21","3/26/21","3/27/21")
region_recovered<-
Recover%>%
select("Country/Region","3/23/21","3/24/21","3/25/21","3/26/21","3/27/21")
dates<-c("3/23/21","3/24/21","3/25/21","3/26/21","3/27/21")
confirm_week<-c(sum(region_confirmed[,2])/100000,sum(region_confirmed[,3])/100000,sum(region_confirmed[,4])/100000,sum(region_confirmed[,5])/100000,sum(region_confirmed[,6])/100000)
death_week<-c(sum(region_dead[,2])/1000,sum(region_dead[,3])/1000,sum(region_dead[,4])/1000,sum(region_dead[,5])/1000,sum(region_dead[,6])/1000)
recover_week<-c(sum(region_recovered[,2])/100000,sum(region_recovered[,3])/100000,sum(region_recovered[,4])/100000,sum(region_recovered[,5])/100000,sum(region_recovered[,6])/100000)
confirm_flow = data.frame(
date = dates,
confirm_week= confirm_week
)
death_flow = data.frame(
date = dates,
death_week = death_week
)
recover_flow = data.frame(
date = dates,
recover_week = recover_week
)
flow<-data.frame(
date = dates,
confirm_100000= confirm_week,
death_1000 = death_week,
recover_100000 = recover_week
)
flow
## date confirm_100000 death_1000 recover_100000
## 1 3/23/21 1242.046 2734.065 704.5102
## 2 3/24/21 1248.414 2743.729 707.8666
## 3 3/25/21 1254.917 2755.210 710.9848
## 4 3/26/21 1261.309 2767.546 714.6306
## 5 3/27/21 1267.267 2777.336 717.6832
-4.2 interactive barplot to visualize latest date confirmed cases, new death cases and new recovered cases
cd<-conclusion.data %>%
hchart('column',hcaes(x=status,y=number,color = number))
cd
-4.3 Interactive line chart to observe the overall trend of new confirmed cases, new death cases, and new recovered cases in the latest one week
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p <- plot_ly(flow, x = ~date, y = ~confirm_100000, name = 'confirm*100000', type = 'scatter', mode = 'lines+markers') %>%
add_trace(y = ~death_1000, name = 'death*1000', mode = 'lines+markers') %>%
add_trace(y = ~recover_100000, name = 'recover*100000', mode = 'lines+markers')
p
1. Data challenge and solution
Data challenge for map visualization:Challenge:due to the restriction of map size, ti is difficult to display all the datapoints for each country under each status on the one map simultanrously.
Solution: sperate the visulization, one map for one status for easier observing the cases distribution word widely.
Challenge:scalle for data points at different status are different so it is hard to format them into one chart
Solution: rescale the data for each status to make all the data points under close scale (data standardization)
2.Insights Found
1. Confirmed cases mainly added in America State.
2. In the past week, the trend of covid-`19 spread has been quite stable, instead of explosive growth.