Bubble Chart

Hanna Rodrigues Ferreira

25 outubro 2021

data <- read.csv("owid-covid-data.csv")

data <- data %>% mutate(cases = total_cases, 
                        deaths = total_deaths,
                        vac1 = people_vaccinated,
                        vac2 = people_fully_vaccinated,
                        pop = population)

data <- data %>% select(continent,
                        location,
                        cases,
                        deaths,
                        vac1,
                        vac2,
                        date,
                        pop)

data <- data %>% filter(!(location %in% c("World",
                                         "Asia",
                                         "Europe",
                                         "North America",
                                         "European Union",
                                         "South America",
                                         "Africa",
                                         "Oceania",
                                         "International",
                                         "Northern Cyprus"))) #NA's

glimpse(data)
## Rows: 119,979
## Columns: 8
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, ~
## $ location  <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afghanis~
## $ cases     <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, ~
## $ deaths    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vac1      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vac2      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ date      <fct> 2020-02-24, 2020-02-25, 2020-02-26, 2020-02-27, 2020-02-28, ~
## $ pop       <dbl> 39835428, 39835428, 39835428, 39835428, 39835428, 39835428, ~
summary(data)
##          continent            location          cases              deaths      
##               :    0   Argentina  :   663   Min.   :       1   Min.   :     1  
##  Africa       :31837   Mexico     :   663   1st Qu.:    2034   1st Qu.:    68  
##  Asia         :29132   Peru       :   663   Median :   19328   Median :   560  
##  Europe       :29435   Thailand   :   660   Mean   :  500419   Mean   : 12536  
##  North America:16237   Taiwan     :   648   3rd Qu.:  187994   3rd Qu.:  4532  
##  Oceania      : 5974   South Korea:   643   Max.   :45444260   Max.   :735941  
##  South America: 7364   (Other)    :116039   NA's   :6524       NA's   :17295   
##       vac1                vac2                   date       
##  Min.   :0.000e+00   Min.   :1.000e+00   2021-06-21:   220  
##  1st Qu.:1.873e+05   1st Qu.:1.016e+05   2021-06-22:   220  
##  Median :1.123e+06   Median :7.931e+05   2021-06-23:   220  
##  Mean   :1.043e+07   Mean   :6.931e+06   2021-06-24:   220  
##  3rd Qu.:5.446e+06   3rd Qu.:4.312e+06   2021-06-25:   220  
##  Max.   :1.101e+09   Max.   :1.048e+09   2021-06-26:   220  
##  NA's   :93786       NA's   :96745       (Other)   :118659  
##       pop           
##  Min.   :4.700e+01  
##  1st Qu.:1.775e+06  
##  Median :8.715e+06  
##  Mean   :4.069e+07  
##  3rd Qu.:2.967e+07  
##  Max.   :1.444e+09  
## 
data <- data %>%
  mutate(date_aux = as.Date(date)) %>%
  filter(date_aux>"2020-01-01") %>%
  group_by(location, month(date_aux)) %>%
  filter(date_aux == max(date_aux))

data <- data %>%
    group_by(location) %>%
    fill(cases,
         deaths,
         vac1,
         vac2, .direction = c("down"))
summary(data)
##          continent                  location        cases         
##               :  0   Afghanistan        :  12   Min.   :       1  
##  Africa       :652   Albania            :  12   1st Qu.:   10718  
##  Asia         :580   Algeria            :  12   Median :   89960  
##  Europe       :600   Andorra            :  12   Mean   :  822547  
##  North America:369   Angola             :  12   3rd Qu.:  396547  
##  Oceania      :177   Antigua and Barbuda:  12   Max.   :45444260  
##  South America:147   (Other)            :2453   NA's   :226       
##      deaths            vac1                vac2                   date     
##  Min.   :     1   Min.   :        0   Min.   :        1   2021-06-30: 220  
##  1st Qu.:   179   1st Qu.:    73056   1st Qu.:    42842   2021-07-31: 220  
##  Median :  1513   Median :   658136   Median :   377890   2021-08-31: 219  
##  Mean   : 18573   Mean   :  8129208   Mean   :  5620520   2021-05-31: 217  
##  3rd Qu.:  8236   3rd Qu.:  3763073   3rd Qu.:  3014534   2021-09-30: 215  
##  Max.   :735941   Max.   :717676310   Max.   :304591540   2021-04-30: 214  
##  NA's   :339      NA's   :1149        NA's   :1350        (Other)   :1220  
##       pop               date_aux          month(date_aux) 
##  Min.   :4.700e+01   Min.   :2020-11-30   Min.   : 1.000  
##  1st Qu.:9.029e+05   1st Qu.:2021-02-28   1st Qu.: 4.000  
##  Median :6.908e+06   Median :2021-05-31   Median : 6.000  
##  Mean   :3.727e+07   Mean   :2021-05-19   Mean   : 6.463  
##  3rd Qu.:2.705e+07   3rd Qu.:2021-08-31   3rd Qu.: 9.000  
##  Max.   :1.444e+09   Max.   :2021-10-24   Max.   :12.000  
## 
data <- data %>%
        group_by(location) %>%
        mutate(cases = 100*replace_na(cases,0)/pop,
               deaths = 100*replace_na(deaths,0)/pop,
               vac1 = 100*replace_na(vac1,0)/pop,
               vac2 = 100*replace_na(vac2,0)/pop)

data<- data %>% mutate(date_num = as.numeric(date_aux))


data <- data %>% select(-pop)

data <- data %>%
    group_by(location) %>%
    fill(cases,
         deaths,
         vac1,
         vac2, .direction = c("down"))
summary(data)
##          continent                  location        cases        
##               :  0   Afghanistan        :  12   Min.   : 0.0000  
##  Africa       :652   Albania            :  12   1st Qu.: 0.1005  
##  Asia         :580   Algeria            :  12   Median : 1.0592  
##  Europe       :600   Andorra            :  12   Mean   : 2.8562  
##  North America:369   Angola             :  12   3rd Qu.: 4.6428  
##  Oceania      :177   Antigua and Barbuda:  12   Max.   :22.4380  
##  South America:147   (Other)            :2453                    
##      deaths              vac1               vac2                date     
##  Min.   :0.000000   Min.   :  0.0000   Min.   :  0.00   2021-06-30: 220  
##  1st Qu.:0.001168   1st Qu.:  0.0000   1st Qu.:  0.00   2021-07-31: 220  
##  Median :0.013201   Median :  0.3526   Median :  0.00   2021-08-31: 219  
##  Mean   :0.050683   Mean   : 14.8468   Mean   : 10.56   2021-05-31: 217  
##  3rd Qu.:0.079854   3rd Qu.: 23.2293   3rd Qu.: 10.36   2021-09-30: 215  
##  Max.   :0.599687   Max.   :120.3467   Max.   :118.14   2021-04-30: 214  
##                                                         (Other)   :1220  
##     date_aux          month(date_aux)     date_num    
##  Min.   :2020-11-30   Min.   : 1.000   Min.   :18596  
##  1st Qu.:2021-02-28   1st Qu.: 4.000   1st Qu.:18686  
##  Median :2021-05-31   Median : 6.000   Median :18778  
##  Mean   :2021-05-19   Mean   : 6.463   Mean   :18767  
##  3rd Qu.:2021-08-31   3rd Qu.: 9.000   3rd Qu.:18870  
##  Max.   :2021-10-24   Max.   :12.000   Max.   :18924  
## 
glimpse(data)
## Rows: 2,525
## Columns: 10
## Groups: location [223]
## $ continent         <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia~
## $ location          <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, ~
## $ cases             <dbl> 0.1160148, 0.1313655, 0.1381258, 0.1398604, 0.141718~
## $ deaths            <dbl> 0.004425709, 0.005495109, 0.006024788, 0.006132732, ~
## $ vac1              <dbl> 0.00000000, 0.00000000, 0.00000000, 0.02058469, 0.02~
## $ vac2              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.000000~
## $ date              <fct> 2020-11-30, 2020-12-31, 2021-01-31, 2021-02-28, 2021~
## $ date_aux          <date> 2020-11-30, 2020-12-31, 2021-01-31, 2021-02-28, 202~
## $ `month(date_aux)` <dbl> 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2,~
## $ date_num          <dbl> 18596, 18627, 18658, 18686, 18717, 18747, 18778, 188~
names <- c('Brazil',
           'United States',
           'Canada',
           'Mexico',
           'Germany',
           'United Kingdom',
           'French',
           'Italy',
           'Spain',
           'Russia',
           'India',
           'South Korea',
           'China',
           'Japan',
           'Australia')


colors <- c('gray',
            '#F28B30', # Asia (laranja)
            '#BF0A3A', # Europa (vermelho)
            '#022873', # Am?rica do norte (azul)
            '#F23D6D', # Oceania (rosa)    
            '#03A62C', # Am?rica do sul
            'yellow')    # Outros (cinza)
data <- mutate(data, Continent=ifelse(location %in% names,
                                      continent,
                                      as_factor("Others")))
p_animation <- data %>%
  ggplot(aes(x=cases,
             y=deaths,
             size=vac2)) +
  geom_point(aes(color=as_factor(Continent),
                 frame=date_num,
                 ids=location),alpha=0.6) +
  scale_x_continuous(limits = c(-1.5, 20)) +
  scale_y_continuous(limits = c(-0.09, .65)) +
  scale_size(range = c(.1, 25), name="fully vaccinated") +
  scale_colour_manual(values = colors) +
  theme_classic() +
  labs(title="COVID-19 vaccinations of top 15 GPD countries") +
  theme(legend.position="none")
## Warning: Ignoring unknown aesthetics: frame, ids
p_animation
## Warning: Removed 5 rows containing missing values (geom_point).

ggplotly(p_animation,tooltip = "text") %>% animation_opts(700,
                               redraw = FALSE,
                               mode = 'afterall') %>%
  animation_button(x = 1, xanchor = "right",
                   y = 0.05, yanchor = "bottom") %>% 
  animation_slider(hide = TRUE)
data <- data %>%
  group_by(location) %>% 
  filter(date_aux == max(date_aux))

p_static <- data %>%
  ggplot(aes(x=cases,
             y=deaths,
             size=vac2)) +
  geom_point(aes(color=as_factor(Continent),
                 ids=location),alpha=0.6) +
  scale_x_continuous(limits = c(-1.5, 20)) +
  scale_y_continuous(limits = c(-0.09, .65)) +
  scale_size(range = c(.1, 25), name="fully vaccinated") +
  scale_colour_manual(values = colors) +
  theme_classic() +
  labs(title="COVID-19 vaccinations of top 15 GPD countries") +
  theme(legend.position="none")
## Warning: Ignoring unknown aesthetics: ids
p_static
## Warning: Removed 2 rows containing missing values (geom_point).

ggplotly(p_static,tooltip = "text")