Calling libs

library(tidyverse)
library(tidylog)
library(dplyr)
library(dslabs)
library(ggplot2)
library(plotly)

Reading data

d <- read_csv("processed.csv")
#(present/past)-1
d %>%
   arrange(date) %>%
   group_by(country_region) %>%
   filter(confirmed_num > 0) %>%
   mutate(growth_confirmed = confirmed_num/lag(confirmed_num) -1) 
## # A tibble: 8,903 x 6
## # Groups:   country_region [185]
##    date       country_region deaths_num confirmed_num recovered_num
##    <date>     <chr>               <dbl>         <dbl>         <dbl>
##  1 2020-01-22 China                  17           548            28
##  2 2020-01-22 Japan                   0             2             0
##  3 2020-01-22 Korea, South            0             1             0
##  4 2020-01-22 Taiwan*                 0             1             0
##  5 2020-01-22 Thailand                0             2             0
##  6 2020-01-22 US                      0             1             0
##  7 2020-01-23 China                  18           643            30
##  8 2020-01-23 Japan                   0             2             0
##  9 2020-01-23 Korea, South            0             1             0
## 10 2020-01-23 Singapore               0             1             0
## # … with 8,893 more rows, and 1 more variable: growth_confirmed <dbl>

Gulf countries

g_countries <- c("Saudi Arabia", "United Arab Emirates", "Bahrain", "Qatar", "Kuwait", "Oman")


i_max <- which.max(d$date)


d$date[i_max]
## [1] "2020-04-20"
g_gulf <-
  d %>%
  filter(date == d$date[i_max], country_region %in% g_countries) 

Saudi Arabia has the highest number of death cases but the number is reasonable given the number of people living in the kingdom.

g_gulf %>%
  ggplot(aes(x = reorder(country_region, -deaths_num), y = deaths_num)) +
  geom_col() +
  labs(
    x = "",
    y = "number of death cases",
    title = "Saudi Arabia has the highest death cases among gulf countries"
  )

highest confirmed and deaths countries

US has highest confirmed numbers.

highest_confirmed_countries <-
  d %>%
  filter(date == d$date[i_max]) %>%
  pivot_longer(-c(date, country_region, confirmed_num)) %>%
  arrange(desc(confirmed_num)) %>%
  pull(country_region) %>% 
  unique() %>%
  head(10) 

  highest_confirmed_countries
##  [1] "US"             "Spain"          "Italy"          "France"        
##  [5] "Germany"        "United Kingdom" "Turkey"         "China"         
##  [9] "Iran"           "Russia"
highest_confirmed_countries <-
 d %>%
  filter(date == d$date[i_max], country_region %in% 
  highest_confirmed_countries) %>%
  pivot_longer(-c(date, country_region, confirmed_num))%>% ggplot(aes(x = reorder(country_region, -confirmed_num),y = confirmed_num)
  ) +
    geom_col(position = "dodge") + 
   labs(
     
     x = "",
     y = "confirmed_num",
     title = " US has highest confirmed numbers"
   )

highest_confirmed_countries

  highest_deaths_countries <-
   d %>%
  filter(date == d$date[i_max]) %>%
  pivot_longer(-c(date, country_region, deaths_num)) %>%
  arrange(desc(deaths_num)) %>%
  pull(country_region) %>% 
  unique() %>%
  head(10) 
  
 highest_deaths_countries 
##  [1] "US"             "Italy"          "Spain"          "France"        
##  [5] "United Kingdom" "Belgium"        "Iran"           "Germany"       
##  [9] "China"          "Netherlands"

US has highest death numbers

highest_deaths_countries <-
 d %>%
  filter(date == d$date[i_max], country_region %in% 
  highest_deaths_countries) %>%
  pivot_longer(-c(date, country_region, deaths_num))%>% ggplot(aes(x = reorder(country_region, -deaths_num),y = deaths_num)
  ) +
    geom_col(position = "dodge") + 
   labs(
     x = "",
     y = "deaths_num",
     title = " US has highest death numbers"
   )

highest_deaths_countries

highest_recovered_countries <-
   d %>%
  filter(date == d$date[i_max]) %>%
  pivot_longer(-c(date, country_region, recovered_num)) %>%
  arrange(desc(recovered_num)) %>%
  pull(country_region) %>% 
  unique() %>%
  head(10) 
  highest_recovered_countries 
##  [1] "Germany"     "Spain"       "China"       "US"          "Iran"       
##  [6] "Italy"       "France"      "Brazil"      "Switzerland" "Turkey"

Germany has the highest recovered cases.

highest_recovered_countries <-
d %>%
  filter(date == d$date[i_max], country_region %in% 
  highest_recovered_countries) %>%
  pivot_longer(-c(date, country_region, recovered_num))%>% ggplot(aes(x = reorder(country_region, -recovered_num),y = recovered_num)
  ) +
    geom_col(position = "dodge") + 
   labs(
     x = "",
     y = "recovered_num",
     title = " Germany has highest recovery numbers"
   )

highest_recovered_countries

number of deaths in Saudi Arabia compared to top 10 countries

deaths_top_countries <-
d %>%
  filter(date == d$date[i_max]) %>%
  select(-confirmed_num, -recovered_num) %>%
  arrange(desc(deaths_num)) %>%
  head(10)



Saudi <-
  d %>%
  filter(date == d$date[i_max], country_region == "Saudi Arabia") %>%
  select(-confirmed_num, -recovered_num)

deaths_top_countries <- 
  deaths_top_countries %>%
  rbind(Saudi)


deaths_top_countries %>%
  ggplot(aes(reorder(x = country_region, deaths_num), y = deaths_num))+
  geom_col()+
  geom_col(data = Saudi)+
  coord_flip()+
  scale_y_continuous(labels = scales::comma)+
  labs(
    x = "number of death cases",
    y= "",
    title = "Saudi Arabia has the lowest number of death cases compared with other countries"
  )

comparing Saudi Arabia to countries have the same range of number of deaths

Saudi Arabia has the third highest number of confirmed cases

similar_countries <-
d%>%
  filter(date == d$date[i_max],
         confirmed_num < 10700 & confirmed_num > 10000) %>%
  pull(country_region)
similar_countries <-
  d %>% 
  filter(country_region %in% similar_countries) %>% 
  ggplot(aes(x = date, y = confirmed_num, col= country_region))+
  geom_point()
ggplotly()

Saudi Arabia has the second highest number of death cases

d%>%
  filter(date == d$date[i_max],
         deaths_num < 130 & deaths_num > 100) %>%
arrange(desc(confirmed_num)) %>%
  select(-recovered_num, -confirmed_num) %>%
  pivot_longer(-c(date, country_region), names_to = "case_type") %>%
  ggplot(aes(x = reorder(country_region,value), y = value, fill = case_type))+
  geom_col(position = "dodge")+
  coord_flip()

Saudi Arabia has the fourth highest number of recovered cases.

d%>%
 filter(date == d$date[i_max],
         recovered_num < 2000 & recovered_num > 1480) %>% 
  arrange(desc(confirmed_num)) %>%
  select(-deaths_num, -confirmed_num) %>%
  pivot_longer(-c( date, country_region), names_to = "case_type") %>%
  ggplot(aes(x = reorder(country_region,value), y = value, fill = case_type))+
  geom_col(position = "dodge")+
  coord_flip()

d_similar <-
   d %>%
  filter(confirmed_num >= 5) %>%
  arrange(date) %>%
  group_by(country_region) %>%
  mutate(day_num = 1:n()) %>%
  select(-deaths_num, -recovered_num)

d_similar %>%
   ggplot(aes(x = day_num, y = confirmed_num, col = country_region))+
   geom_line()+
   geom_text(data = d_similar %>% filter(date == d$date[i_max]), aes(label = country_region))+
   labs(
     x = "days since first confirmed case"
   )+
   theme(
     legend.position = "none"
   )

 ggplotly()
d_all_100 <-
  d %>%
  filter(confirmed_num >= 60000) %>%
  arrange(date) %>%
  group_by(country_region) %>%
  mutate(day_num = 1:n()) %>%
  select(-deaths_num, -recovered_num)

 d_all_100 %>%
   ggplot(aes(x = day_num, y = confirmed_num, col = country_region))+
   geom_line()+
   geom_text(data = d_all_100 %>% filter(date == d$date[i_max]), aes(label = country_region))+
   labs(
     x = "days since first confirmed case"
   )+
   theme(
     legend.position = "none"
   )

 ggplotly()
d_all_1400 <-
d %>%
  filter(confirmed_num >= 100000) %>%
  arrange(date) %>%
  group_by(country_region) %>%
  mutate(day_num = 1:n()) %>%
  select(-deaths_num, -recovered_num)

d_all_1400 %>%
  ggplot(aes(x = day_num, y = confirmed_num, col = country_region))+ geom_line()+
  geom_text(data = d_all_1400 %>% filter(date == d$date[i_max]),
            aes(label = country_region))+
  labs(
    x = "days since first confirmed case"
  )+
  theme(
    legend.position = "none"
  )

ggplotly()