library(tidyverse)
library(tidylog)
library(dplyr)
library(dslabs)
library(ggplot2)
library(plotly)
d <- read.csv("processesd.csv")
g_countries <- c("Saudi Arabia", "United Arab Emirates", "Bahrain", "Qatar", "Kuwait", "Oman")



i_max <- which.max(d$date)


d$date[i_max]
## [1] 2020-04-09
## 79 Levels: 2020-01-22 2020-01-23 2020-01-24 2020-01-25 ... 2020-04-09
g_gulf <-
  d %>%
  filter(date == d$date[i_max], country_region %in% g_countries) 
g_gulf %>%
  ggplot(aes(x = reorder(country_region, -deaths_num), y = deaths_num)) +
  geom_col() +
  labs(
    x = "",
    y = "number of death cases",
    title = "Saudi Arabia has the highest death cases among gulf countries"
  )

## highest confirmed and deaths countries

highest_confirmed_countries <-
  d %>%
  filter(date == d$date[i_max]) %>%
  pivot_longer(-c(X, date, country_region, confirmed_num)) %>%
  arrange(desc(confirmed_num)) %>%
  pull(country_region) %>% 
  unique() %>%
  head(10) 
highest_confirmed_countries
##  [1] US             Spain          Italy          France         Germany       
##  [6] China          Iran           United Kingdom Turkey         Belgium       
## 184 Levels: Afghanistan Albania Algeria Andorra Angola ... Zimbabwe
 d %>%
  filter(date == d$date[i_max], country_region %in% 
  highest_confirmed_countries) %>%
  pivot_longer(-c(X, date, country_region, confirmed_num))%>% ggplot(aes(x = reorder(country_region, -confirmed_num),y = confirmed_num)
  ) +
    geom_col(position = "dodge") + 
   labs(
     
     x = "",
     y = "confirmed_num",
     title = " US has highest confirmed numbers"
   )

  highest_deaths_countries <-
   d %>%
  filter(date == d$date[i_max]) %>%
  pivot_longer(-c(X, date, country_region, deaths_num)) %>%
  arrange(desc(deaths_num)) %>%
  pull(country_region) %>% 
  unique() %>%
  head(10) 
highest_deaths_countries
##  [1] Italy          US             Spain          France         United Kingdom
##  [6] Iran           China          Germany        Belgium        Netherlands   
## 184 Levels: Afghanistan Albania Algeria Andorra Angola ... Zimbabwe
 d %>%
  filter(date == d$date[i_max], country_region %in% 
  highest_deaths_countries) %>%
  pivot_longer(-c(X, date, country_region, deaths_num))%>% ggplot(aes(x = reorder(country_region, -deaths_num),y = deaths_num)
  ) +
    geom_col(position = "dodge") + 
   labs(
     x = "",
     y = "deaths_num",
     title = " Italy has highest death numbers"
   )

highest_recorved_countries <-
   d %>%
  filter(date == d$date[i_max]) %>%
  pivot_longer(-c(X, date, country_region, recorved_num)) %>%
  arrange(desc(recorved_num)) %>%
  pull(country_region) %>% 
  unique() %>%
  head(10) 
  highest_recorved_countries 
##  [1] China        Germany      Spain        Iran         Italy       
##  [6] US           France       Switzerland  Korea, South Austria     
## 184 Levels: Afghanistan Albania Algeria Andorra Angola ... Zimbabwe
highest_recorved_countries <-
d %>%
  filter(date == d$date[i_max], country_region %in% 
  highest_recorved_countries) %>%
  pivot_longer(-c(X, date, country_region, recorved_num))%>% ggplot(aes(x = reorder(country_region, -recorved_num),y = recorved_num)
  ) +
    geom_col(position = "dodge") + 
   labs(
     x = "",
     y = "recorved_num",
     title = " China has highest recorved numbers"
   )
highest_recorved_countries

number of deaths in Saudi Arabia compared to top 10 % countries

deaths_top_countries <-
d %>%
  filter(date == d$date[i_max]) %>%
  select(-confirmed_num, -recorved_num) %>%
  arrange(desc(deaths_num)) %>%
  head(10)



Saudi <-
  d %>%
  filter(date == d$date[i_max], country_region == "Saudi Arabia") %>%
  select(-confirmed_num, -recorved_num)

deaths_top_countries <- 
  deaths_top_countries %>%
  rbind(Saudi)


deaths_top_countries %>%
  ggplot(aes(reorder(x = country_region, deaths_num), y = deaths_num))+
  geom_col()+
  geom_col(data = Saudi)+
  coord_flip()+
  scale_y_continuous(labels = scales::comma)+
  labs(
    x = "number of death cases",
    y= "",
    title = "Saudi Arabia has the lowest number of death cases compared with other countries"
  )

comparing Saudi Arabia to countries have the same range of number of deaths

Saudi Arabia has the first highest number of recovered cases.

d%>%
  filter(date == d$date[i_max],
         confirmed_num < 4000 & confirmed_num > 3000) %>% 
  arrange(desc(confirmed_num)) %>%
  pivot_longer(-c(X, date, country_region), names_to = "case_type") %>%
  ggplot(aes(x = reorder(country_region,value), y = value, fill = case_type))+
  geom_col(position = "dodge")+
  coord_flip()

similar_countries <-
d%>%
  filter(date == d$date[i_max],
         confirmed_num < 4000 & confirmed_num > 3000) %>%
  pull(country_region)
d%>%
  filter(country_region %in% similar_countries) %>%
  ggplot(aes(x = date, y = confirmed_num))+
  geom_point(aes(color = country_region))

  ggplotly()
d_similar <-
  d %>%
  filter(confirmed_num > 0) %>%
  arrange(date) %>%
  group_by(country_region) %>%
  mutate(day_num = 1:n()) %>%
  filter(country_region %in% similar_countries) %>%
  select(-deaths_num, -recorved_num)


d_similar %>%
ggplot(aes(x = day_num, y = confirmed_num, col = country_region))+
 geom_line()+
 geom_text(data = d_similar %>% filter(date == d$date[i_max]), aes(label = country_region))+
  labs(
    x = "days since confirmed case"
  )

ggplotly()
d_all_100 <-
  d %>%
  filter(confirmed_num >= 100) %>%
  arrange(date) %>%
  group_by(country_region) %>%
  mutate(day_num = 1:n()) %>%
  select(-deaths_num, -recorved_num)

 d_all_100 %>%
   ggplot(aes(x = day_num, y = confirmed_num, col = country_region))+
   geom_line()+
   geom_text(data = d_all_100 %>% filter(date == d$date[i_max]), aes(label = country_region))+
   labs(
     x = "days since first confirmed case"
   )+
   theme(
     legend.position = "none"
   )

 ggplotly()
d_all_1400 <-
d %>%
  filter(confirmed_num >= 1400) %>%
  arrange(date) %>%
  group_by(country_region) %>%
  mutate(day_num = 1:n()) %>%
  select(-deaths_num, -recorved_num)

d_all_1400 %>%
  ggplot(aes(x = day_num, y = confirmed_num, col = country_region))+ geom_line()+
  geom_text(data = d_all_1400 %>% filter(date == d$date[i_max]),
            aes(label = country_region))+
  labs(
    x = "days since first confirmed case"
  )+
  theme(
    legend.position = "none"
  )

ggplotly()