library(ggplot2)
library(dplyr)
library(Hmisc) # %nin% 
data <- read.csv("owid-covid-data.csv")


glimpse(data)
## Rows: 95,743
## Columns: 60
## $ iso_code                              <chr> "AFG", "AFG", "AFG", "AFG", "AFG~
## $ continent                             <chr> "Asia", "Asia", "Asia", "Asia", ~
## $ location                              <chr> "Afghanistan", "Afghanistan", "A~
## $ date                                  <chr> "2020-02-24", "2020-02-25", "202~
## $ total_cases                           <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 4,~
## $ new_cases                             <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0,~
## $ new_cases_smoothed                    <dbl> NA, NA, NA, NA, NA, 0.143, 0.143~
## $ total_deaths                          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_deaths                            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_deaths_smoothed                   <dbl> NA, NA, NA, NA, NA, 0, 0, 0, 0, ~
## $ total_cases_per_million               <dbl> 0.026, 0.026, 0.026, 0.026, 0.02~
## $ new_cases_per_million                 <dbl> 0.026, 0.000, 0.000, 0.000, 0.00~
## $ new_cases_smoothed_per_million        <dbl> NA, NA, NA, NA, NA, 0.004, 0.004~
## $ total_deaths_per_million              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_deaths_per_million                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_deaths_smoothed_per_million       <dbl> NA, NA, NA, NA, NA, 0, 0, 0, 0, ~
## $ reproduction_rate                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ icu_patients                          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ icu_patients_per_million              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ hosp_patients                         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ hosp_patients_per_million             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ weekly_icu_admissions                 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ weekly_icu_admissions_per_million     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ weekly_hosp_admissions                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ weekly_hosp_admissions_per_million    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_tests                             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ total_tests                           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ total_tests_per_thousand              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_tests_per_thousand                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_tests_smoothed                    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_tests_smoothed_per_thousand       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ positive_rate                         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ tests_per_case                        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ tests_units                           <chr> "", "", "", "", "", "", "", "", ~
## $ total_vaccinations                    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ people_vaccinated                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ people_fully_vaccinated               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_vaccinations                      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_vaccinations_smoothed             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ total_vaccinations_per_hundred        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ people_vaccinated_per_hundred         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ people_fully_vaccinated_per_hundred   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_vaccinations_smoothed_per_million <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ stringency_index                      <dbl> 8.33, 8.33, 8.33, 8.33, 8.33, 8.~
## $ population                            <dbl> 38928341, 38928341, 38928341, 38~
## $ population_density                    <dbl> 54.422, 54.422, 54.422, 54.422, ~
## $ median_age                            <dbl> 18.6, 18.6, 18.6, 18.6, 18.6, 18~
## $ aged_65_older                         <dbl> 2.581, 2.581, 2.581, 2.581, 2.58~
## $ aged_70_older                         <dbl> 1.337, 1.337, 1.337, 1.337, 1.33~
## $ gdp_per_capita                        <dbl> 1803.987, 1803.987, 1803.987, 18~
## $ extreme_poverty                       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ cardiovasc_death_rate                 <dbl> 597.029, 597.029, 597.029, 597.0~
## $ diabetes_prevalence                   <dbl> 9.59, 9.59, 9.59, 9.59, 9.59, 9.~
## $ female_smokers                        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ male_smokers                          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ handwashing_facilities                <dbl> 37.746, 37.746, 37.746, 37.746, ~
## $ hospital_beds_per_thousand            <dbl> 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.~
## $ life_expectancy                       <dbl> 64.83, 64.83, 64.83, 64.83, 64.8~
## $ human_development_index               <dbl> 0.511, 0.511, 0.511, 0.511, 0.51~
## $ excess_mortality                      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
data <- data %>% 
  group_by(location) %>%
  summarise(continent = max(continent),
    pop = max(population,na.rm = TRUE),
    cases = max(total_cases,na.rm = TRUE),
    deaths = max(total_deaths,na.rm = TRUE),
    vac1 = max(people_vaccinated - people_fully_vaccinated,
               na.rm = TRUE), 
    vac2 = max(people_fully_vaccinated,na.rm = TRUE))

data <- data %>% mutate(cases = ifelse(!(cases>=0), 0, cases),
                        deaths = ifelse(!(deaths>=0), 0, deaths),
                        vac1 = ifelse(!(vac1>=0 ), 0, vac1), 
                        vac2 = ifelse(!(vac2>=0), 0, vac2))

glimpse(data)
## Rows: 229
## Columns: 7
## $ location  <chr> "Afghanistan", "Africa", "Albania", "Algeria", "Andorra", "A~
## $ continent <chr> "Asia", "", "Europe", "Africa", "Europe", "Africa", "North A~
## $ pop       <dbl> 38928341, 1340598113, 2877800, 43851043, 77265, 32866268, 15~
## $ cases     <dbl> 91458, 5057604, 132461, 133742, 13826, 36790, 0, 1263, 41454~
## $ deaths    <dbl> 3612, 135003, 2453, 3579, 127, 836, 0, 42, 86029, 4488, 0, 7~
## $ vac1      <dbl> 393254, 19098256, 259335, 0, 22909, 546182, 5332, 29279, 970~
## $ vac2      <dbl> 177266, 11546311, 332173, 0, 10938, 447704, 5818, 23844, 333~


data <- data %>% filter(location %nin% c("World",
                                         "Asia",
                                         "Europe",
                                         "North America",
                                         "European Union",
                                         "South America",
                                         "Africa"))

#Estados Unidos: US$ 20,933 trilhões
#China: US$ 14,723 trilhões
#Japão: US$ 5,049 trilhões
#Alemanha: US$ 3,803 trilhões
#Reino Unido: US$ 2,711 trilhões
#Índia: US$ 2,709 trilhões
#França: US$ 2,599 trilhões
#Itália: US$ 1,885 trilhão
#Canadá: US$ 1,643 trilhão
#Coreia do Sul: US$ 1,631 trilhão
#Rússia: US$ 1,474 trilhão

#Brasil: US$ 1,434 trilhão

#Austrália: US$ 1,359 trilhão
#Espanha: US$ 1,278 trilhão
#México: US$ 1,076 trilhão

names <- c('Brazil',
          'United States',
          'Canada',
          'Mexico',
          'Germany',
          'United Kingdom',
          'French',
          'Italy',
          'Spain',
          'Russia',
          'India',
          'South Korea',
          'China',
          'Japan',
          'Australia')


colors <- c('#F28B30',
            '#BF0A3A',
            '#022873',
            '#F23D6D',
            'gray',
            '#03A62C')

data <- mutate(data, aux = ifelse(location %in% names,
                                  ifelse(location == 'Brazil',
                                         'z','b'),'a'))

data <- mutate(data, Continent =  ifelse(aux == 'a',
                                         'Others',
                                         continent))


#library(scales)

data %>%
  arrange(aux) %>% 
  ggplot(aes(x=cases/pop,
             y=deaths/pop)) +
  geom_point(aes(size=vac2/pop,
                 color=Continent),
             alpha=0.6) +
  scale_size(range = c(.1, 35), name="fully vaccinated") +
  scale_colour_manual(values = colors) +
  ggrepel::geom_text_repel(data = subset(data,
                                         location %in% names),
            aes(x=cases/pop,y=deaths/pop,label=location)) +
  xlim(-0.012, .18) +
  ylim(-0.001, .0058) +
  theme_classic() +
  theme(legend.position = 'top') + 
  guides( size = FALSE) +
  labs(title="COVID-19 vaccinations of top 15 GPD countries")