#install library

library(readr) 
library(dplyr)
library(tidyr)
library(ggplot2)
library(glue)
library(plotly)
library(shiny)
library(shinydashboard)
library(flexdashboard)
library(tidyverse)
library(lubridate)
library(leaflet)

1 Introduction

the data is about corona day-to-day data from 22-01-2020 until 4-03-2020, covering number of cases confirmed, number of deaths, number of heals accross the world. There are 3 research objectives of this analysis as followed : 1. To compare corona spreading in China vs Non-China, along with its number of total recovered and death. 2. To know heal rate and death rate of corona in each country. 3. To see spreading of Corona across the world day-by-day.

2 1. Read Data

#read data corona and geojson

#data corona
corona <- read_csv("covid_19_5March.csv")

# read data geojson
states <- geojsonio::geojson_read("custom.geo.json", what = "sp")

3 2. Exploratory Data Analysis

# to see data structure
str(corona)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 6880 obs. of  8 variables:
##  $ Province/State: chr  "Anhui" "Beijing" "Chongqing" "Fujian" ...
##  $ Country/Region: chr  "Mainland China" "Mainland China" "Mainland China" "Mainland China" ...
##  $ Lat           : num  31.8 40.2 30.1 26.1 36.1 ...
##  $ Long          : num  117 116 108 118 104 ...
##  $ Date          : chr  "1/22/20" "1/22/20" "1/22/20" "1/22/20" ...
##  $ Confirmed     : num  1 14 6 1 0 26 2 1 4 1 ...
##  $ Deaths        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Recovered     : num  0 0 0 0 0 0 0 0 0 0 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Province/State` = col_character(),
##   ..   `Country/Region` = col_character(),
##   ..   Lat = col_double(),
##   ..   Long = col_double(),
##   ..   Date = col_character(),
##   ..   Confirmed = col_double(),
##   ..   Deaths = col_double(),
##   ..   Recovered = col_double()
##   .. )
# to see if there is missing value of observation
colSums(is.na(corona))
## Province/State Country/Region            Lat           Long           Date 
##           3354              0              0              0              0 
##      Confirmed         Deaths      Recovered 
##              0              0              0
# change data structure for `Date`
corona$Date <- mdy(corona$Date)

# there are 3354 missing data of "province/states" so I decided to remove it from data frame
corona1 <- corona %>% 
  select(-`Province/State`) %>% 
  mutate(country = `Country/Region`,
         country = case_when(country =="Mainland China" ~ "China",
                             T ~ country),
         area = ifelse(country == "China", "China", "Not China"),
         area = as.factor(area))

# to input data to plygon (states) using leftjoin with variable `name` so need to match data corona. There are several countries that have different name. Therefore need to adjust first
corona1 <- corona1 %>% 
  mutate(country = case_when(country == "Czech Republic" ~ "Czech Rep.",
                             country == "Dominican Republic" ~ "Dominican Rep.",
                             country == "South Korea" ~ "Korea",
                             country == "United Arab Emirates" ~ "United Arab Emirate",
                             country == "UK" ~ "United Kingdom",
                             country == "US" ~ "United States",
                              T ~ country),
         name = as.factor(country))

# groupby name and date

corona2 <- corona1 %>% 
  group_by(name) %>% 
  summarise(total_confirmed = sum(Confirmed),
            total_death = sum(Deaths),
            total_recovered = sum(Recovered)) %>% 
  ungroup() 


#left join data corona2 and states, joining by `name`
states@data <- left_join(x = states@data, y = corona2, by = "name")

# data for leaflet
bins <- c(1, 10, 100, 1000, 10000, Inf)
pal <- colorBin("YlOrRd", domain = states$total_confirmed, bins = bins)

4 Data Analysis

4.1 Objective number 1

To compare corona spreading in China vs Non-China, along with its number of total recovered and death.

corona11 <- corona1 %>% 
  group_by(Date) %>% 
  summarise(total_confirmed = sum(Confirmed),
            total_death = sum(Deaths),
            total_recovered = sum(Recovered)) %>% 
  ungroup() 

corona11_long <- pivot_longer(data = corona11, cols = -Date) %>% 
  mutate(text = glue(
    "Type Case: {name}
     Total Case = {value}"
  ))


plot1 <- ggplot(data = corona11_long, aes(x = Date, y = value)) +
  geom_line(aes(color = name), ) +
  geom_point(aes(color = name, text = text, show.legend= T)) +
  theme()+
  labs(title = "Corona Case 2019-2020",
       x = "Date",
       y = "Total Case")

ggplotly(plot1, tooltip = "text")

4.2 Objectiver Number 2

To know heal rate and death rate of corona in each country.

#Since the corona data is cummulative so we take the last date published (2020-03-04)

corona2 <- corona1 %>%
  filter(Date == "2020-03-04") %>% 
  mutate(death_ratio = Deaths/Confirmed,
         heal_ratio = Recovered/Confirmed) %>% 
  select(Date, heal_ratio, death_ratio)

corona2_long <- pivot_longer(data = corona2, cols = -Date) %>%
  mutate(ratio = name,
         text = glue(
    "Type Case: {ratio}
     Total Case = {value}"
  ))


plot2 <- ggplot(data = corona2_long, aes(x = Date, y = value, text=text)) +
  geom_col(aes(fill= ratio), show.legend = T)+
  coord_flip()+
  theme()+
  labs(title = "Death ratio vs Heal Ratio of COVID-19",
       subtitle = "Ratio by Country",
       x = "Date",
       y = "Case_ratio")

ggplotly(plot2, tooltip = "text")

4.3 Objective Number 3

To see spreading of Corona across the world day-by-day

m <- leaflet(states) %>% 
  addTiles() %>% 
  setView(0,0,1.5) %>% 
  addPolygons(
  fillColor = ~pal(total_confirmed),
  weight = 2,
  opacity = 1,
  color = "white",
  dashArray = "3",
  fillOpacity = 0.7) %>% 
  addLegend(pal = pal, values = ~total_confirmed, opacity = 0.7, title = "Total Corona Confirmed",
  position = "bottomright")

m