library(ggplot2)
library(dplyr)
library(Hmisc) # %nin%
data <- read.csv("owid-covid-data.csv")
glimpse(data)
## Rows: 95,743
## Columns: 60
## $ iso_code <chr> "AFG", "AFG", "AFG", "AFG", "AFG~
## $ continent <chr> "Asia", "Asia", "Asia", "Asia", ~
## $ location <chr> "Afghanistan", "Afghanistan", "A~
## $ date <chr> "2020-02-24", "2020-02-25", "202~
## $ total_cases <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 4,~
## $ new_cases <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0,~
## $ new_cases_smoothed <dbl> NA, NA, NA, NA, NA, 0.143, 0.143~
## $ total_deaths <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_deaths <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_deaths_smoothed <dbl> NA, NA, NA, NA, NA, 0, 0, 0, 0, ~
## $ total_cases_per_million <dbl> 0.026, 0.026, 0.026, 0.026, 0.02~
## $ new_cases_per_million <dbl> 0.026, 0.000, 0.000, 0.000, 0.00~
## $ new_cases_smoothed_per_million <dbl> NA, NA, NA, NA, NA, 0.004, 0.004~
## $ total_deaths_per_million <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_deaths_per_million <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_deaths_smoothed_per_million <dbl> NA, NA, NA, NA, NA, 0, 0, 0, 0, ~
## $ reproduction_rate <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ icu_patients <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ icu_patients_per_million <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ hosp_patients <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ hosp_patients_per_million <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ weekly_icu_admissions <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ weekly_icu_admissions_per_million <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ weekly_hosp_admissions <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ weekly_hosp_admissions_per_million <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_tests <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ total_tests <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ total_tests_per_thousand <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_tests_per_thousand <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_tests_smoothed <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_tests_smoothed_per_thousand <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ positive_rate <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ tests_per_case <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ tests_units <chr> "", "", "", "", "", "", "", "", ~
## $ total_vaccinations <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ people_vaccinated <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ people_fully_vaccinated <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_vaccinations <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_vaccinations_smoothed <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ total_vaccinations_per_hundred <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ people_vaccinated_per_hundred <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ people_fully_vaccinated_per_hundred <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_vaccinations_smoothed_per_million <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ stringency_index <dbl> 8.33, 8.33, 8.33, 8.33, 8.33, 8.~
## $ population <dbl> 38928341, 38928341, 38928341, 38~
## $ population_density <dbl> 54.422, 54.422, 54.422, 54.422, ~
## $ median_age <dbl> 18.6, 18.6, 18.6, 18.6, 18.6, 18~
## $ aged_65_older <dbl> 2.581, 2.581, 2.581, 2.581, 2.58~
## $ aged_70_older <dbl> 1.337, 1.337, 1.337, 1.337, 1.33~
## $ gdp_per_capita <dbl> 1803.987, 1803.987, 1803.987, 18~
## $ extreme_poverty <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ cardiovasc_death_rate <dbl> 597.029, 597.029, 597.029, 597.0~
## $ diabetes_prevalence <dbl> 9.59, 9.59, 9.59, 9.59, 9.59, 9.~
## $ female_smokers <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ male_smokers <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ handwashing_facilities <dbl> 37.746, 37.746, 37.746, 37.746, ~
## $ hospital_beds_per_thousand <dbl> 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.~
## $ life_expectancy <dbl> 64.83, 64.83, 64.83, 64.83, 64.8~
## $ human_development_index <dbl> 0.511, 0.511, 0.511, 0.511, 0.51~
## $ excess_mortality <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
data <- data %>%
group_by(location) %>%
summarise(continent = max(continent),
pop = max(population,na.rm = TRUE),
cases = max(total_cases,na.rm = TRUE),
deaths = max(total_deaths,na.rm = TRUE),
vac1 = max(people_vaccinated - people_fully_vaccinated,
na.rm = TRUE),
vac2 = max(people_fully_vaccinated,na.rm = TRUE))
data <- data %>% mutate(cases = ifelse(!(cases>=0), 0, cases),
deaths = ifelse(!(deaths>=0), 0, deaths),
vac1 = ifelse(!(vac1>=0 ), 0, vac1),
vac2 = ifelse(!(vac2>=0), 0, vac2))
glimpse(data)
## Rows: 229
## Columns: 7
## $ location <chr> "Afghanistan", "Africa", "Albania", "Algeria", "Andorra", "A~
## $ continent <chr> "Asia", "", "Europe", "Africa", "Europe", "Africa", "North A~
## $ pop <dbl> 38928341, 1340598113, 2877800, 43851043, 77265, 32866268, 15~
## $ cases <dbl> 91458, 5057604, 132461, 133742, 13826, 36790, 0, 1263, 41454~
## $ deaths <dbl> 3612, 135003, 2453, 3579, 127, 836, 0, 42, 86029, 4488, 0, 7~
## $ vac1 <dbl> 393254, 19098256, 259335, 0, 22909, 546182, 5332, 29279, 970~
## $ vac2 <dbl> 177266, 11546311, 332173, 0, 10938, 447704, 5818, 23844, 333~
data <- data %>% filter(location %nin% c("World",
"Asia",
"Europe",
"North America",
"European Union",
"South America",
"Africa"))
#Estados Unidos: US$ 20,933 trilhões
#China: US$ 14,723 trilhões
#Japão: US$ 5,049 trilhões
#Alemanha: US$ 3,803 trilhões
#Reino Unido: US$ 2,711 trilhões
#Índia: US$ 2,709 trilhões
#França: US$ 2,599 trilhões
#Itália: US$ 1,885 trilhão
#Canadá: US$ 1,643 trilhão
#Coreia do Sul: US$ 1,631 trilhão
#Rússia: US$ 1,474 trilhão
#Brasil: US$ 1,434 trilhão
#Austrália: US$ 1,359 trilhão
#Espanha: US$ 1,278 trilhão
#México: US$ 1,076 trilhão
names <- c('Brazil',
'United States',
'Canada',
'Mexico',
'Germany',
'United Kingdom',
'French',
'Italy',
'Spain',
'Russia',
'India',
'South Korea',
'China',
'Japan',
'Australia')
colors <- c('#F28B30',
'#BF0A3A',
'#022873',
'#F23D6D',
'gray',
'#03A62C')
data <- mutate(data, aux = ifelse(location %in% names,
ifelse(location == 'Brazil',
'z','b'),'a'))
data <- mutate(data, Continent = ifelse(aux == 'a',
'Others',
continent))
#library(scales)
data %>%
arrange(aux) %>%
ggplot(aes(x=cases/pop,
y=deaths/pop)) +
geom_point(aes(size=vac2/pop,
color=Continent),
alpha=0.6) +
scale_size(range = c(.1, 35), name="fully vaccinated") +
scale_colour_manual(values = colors) +
ggrepel::geom_text_repel(data = subset(data,
location %in% names),
aes(x=cases/pop,y=deaths/pop,label=location)) +
xlim(-0.012, .18) +
ylim(-0.001, .0058) +
theme_classic() +
theme(legend.position = 'top') +
guides( size = FALSE) +
labs(title="COVID-19 vaccinations of top 15 GPD countries")
