library(tidyverse)
library(tidylog)
library(dplyr)
library(dslabs)
library(ggplot2)
library(plotly)
d <- read.csv("processesd.csv")
g_countries <- c("Saudi Arabia", "United Arab Emirates", "Bahrain", "Qatar", "Kuwait", "Oman")
i_max <- which.max(d$date)
d$date[i_max]
## [1] 2020-04-09
## 79 Levels: 2020-01-22 2020-01-23 2020-01-24 2020-01-25 ... 2020-04-09
g_gulf <-
d %>%
filter(date == d$date[i_max], country_region %in% g_countries)
g_gulf %>%
ggplot(aes(x = reorder(country_region, -deaths_num), y = deaths_num)) +
geom_col() +
labs(
x = "",
y = "number of death cases",
title = "Saudi Arabia has the highest death cases among gulf countries"
)
## highest confirmed and deaths countries
highest_confirmed_countries <-
d %>%
filter(date == d$date[i_max]) %>%
pivot_longer(-c(X, date, country_region, confirmed_num)) %>%
arrange(desc(confirmed_num)) %>%
pull(country_region) %>%
unique() %>%
head(10)
highest_confirmed_countries
## [1] US Spain Italy France Germany
## [6] China Iran United Kingdom Turkey Belgium
## 184 Levels: Afghanistan Albania Algeria Andorra Angola ... Zimbabwe
d %>%
filter(date == d$date[i_max], country_region %in%
highest_confirmed_countries) %>%
pivot_longer(-c(X, date, country_region, confirmed_num))%>% ggplot(aes(x = reorder(country_region, -confirmed_num),y = confirmed_num)
) +
geom_col(position = "dodge") +
labs(
x = "",
y = "confirmed_num",
title = " US has highest confirmed numbers"
)
highest_deaths_countries <-
d %>%
filter(date == d$date[i_max]) %>%
pivot_longer(-c(X, date, country_region, deaths_num)) %>%
arrange(desc(deaths_num)) %>%
pull(country_region) %>%
unique() %>%
head(10)
highest_deaths_countries
## [1] Italy US Spain France United Kingdom
## [6] Iran China Germany Belgium Netherlands
## 184 Levels: Afghanistan Albania Algeria Andorra Angola ... Zimbabwe
d %>%
filter(date == d$date[i_max], country_region %in%
highest_deaths_countries) %>%
pivot_longer(-c(X, date, country_region, deaths_num))%>% ggplot(aes(x = reorder(country_region, -deaths_num),y = deaths_num)
) +
geom_col(position = "dodge") +
labs(
x = "",
y = "deaths_num",
title = " Italy has highest death numbers"
)
highest_recorved_countries <-
d %>%
filter(date == d$date[i_max]) %>%
pivot_longer(-c(X, date, country_region, recorved_num)) %>%
arrange(desc(recorved_num)) %>%
pull(country_region) %>%
unique() %>%
head(10)
highest_recorved_countries
## [1] China Germany Spain Iran Italy
## [6] US France Switzerland Korea, South Austria
## 184 Levels: Afghanistan Albania Algeria Andorra Angola ... Zimbabwe
highest_recorved_countries <-
d %>%
filter(date == d$date[i_max], country_region %in%
highest_recorved_countries) %>%
pivot_longer(-c(X, date, country_region, recorved_num))%>% ggplot(aes(x = reorder(country_region, -recorved_num),y = recorved_num)
) +
geom_col(position = "dodge") +
labs(
x = "",
y = "recorved_num",
title = " China has highest recorved numbers"
)
highest_recorved_countries
deaths_top_countries <-
d %>%
filter(date == d$date[i_max]) %>%
select(-confirmed_num, -recorved_num) %>%
arrange(desc(deaths_num)) %>%
head(10)
Saudi <-
d %>%
filter(date == d$date[i_max], country_region == "Saudi Arabia") %>%
select(-confirmed_num, -recorved_num)
deaths_top_countries <-
deaths_top_countries %>%
rbind(Saudi)
deaths_top_countries %>%
ggplot(aes(reorder(x = country_region, deaths_num), y = deaths_num))+
geom_col()+
geom_col(data = Saudi)+
coord_flip()+
scale_y_continuous(labels = scales::comma)+
labs(
x = "number of death cases",
y= "",
title = "Saudi Arabia has the lowest number of death cases compared with other countries"
)
Saudi Arabia has the first highest number of recovered cases.
d%>%
filter(date == d$date[i_max],
confirmed_num < 4000 & confirmed_num > 3000) %>%
arrange(desc(confirmed_num)) %>%
pivot_longer(-c(X, date, country_region), names_to = "case_type") %>%
ggplot(aes(x = reorder(country_region,value), y = value, fill = case_type))+
geom_col(position = "dodge")+
coord_flip()
similar_countries <-
d%>%
filter(date == d$date[i_max],
confirmed_num < 4000 & confirmed_num > 3000) %>%
pull(country_region)
d%>%
filter(country_region %in% similar_countries) %>%
ggplot(aes(x = date, y = confirmed_num))+
geom_point(aes(color = country_region))
ggplotly()
d_similar <-
d %>%
filter(confirmed_num > 0) %>%
arrange(date) %>%
group_by(country_region) %>%
mutate(day_num = 1:n()) %>%
filter(country_region %in% similar_countries) %>%
select(-deaths_num, -recorved_num)
d_similar %>%
ggplot(aes(x = day_num, y = confirmed_num, col = country_region))+
geom_line()+
geom_text(data = d_similar %>% filter(date == d$date[i_max]), aes(label = country_region))+
labs(
x = "days since confirmed case"
)
ggplotly()
d_all_100 <-
d %>%
filter(confirmed_num >= 100) %>%
arrange(date) %>%
group_by(country_region) %>%
mutate(day_num = 1:n()) %>%
select(-deaths_num, -recorved_num)
d_all_100 %>%
ggplot(aes(x = day_num, y = confirmed_num, col = country_region))+
geom_line()+
geom_text(data = d_all_100 %>% filter(date == d$date[i_max]), aes(label = country_region))+
labs(
x = "days since first confirmed case"
)+
theme(
legend.position = "none"
)
ggplotly()
d_all_1400 <-
d %>%
filter(confirmed_num >= 1400) %>%
arrange(date) %>%
group_by(country_region) %>%
mutate(day_num = 1:n()) %>%
select(-deaths_num, -recorved_num)
d_all_1400 %>%
ggplot(aes(x = day_num, y = confirmed_num, col = country_region))+ geom_line()+
geom_text(data = d_all_1400 %>% filter(date == d$date[i_max]),
aes(label = country_region))+
labs(
x = "days since first confirmed case"
)+
theme(
legend.position = "none"
)
ggplotly()