data <- read.csv("owid-covid-data.csv")
data <- data %>% mutate(cases = total_cases,
deaths = total_deaths,
vac1 = people_vaccinated,
vac2 = people_fully_vaccinated,
pop = population)
data <- data %>% select(continent,
location,
cases,
deaths,
vac1,
vac2,
date,
pop)
data <- data %>% filter(!(location %in% c("World",
"Asia",
"Europe",
"North America",
"European Union",
"South America",
"Africa",
"Oceania",
"International",
"Northern Cyprus"))) #NA's
glimpse(data)
## Rows: 119,979
## Columns: 8
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, ~
## $ location <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afghanis~
## $ cases <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, ~
## $ deaths <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vac1 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vac2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ date <fct> 2020-02-24, 2020-02-25, 2020-02-26, 2020-02-27, 2020-02-28, ~
## $ pop <dbl> 39835428, 39835428, 39835428, 39835428, 39835428, 39835428, ~
## continent location cases deaths
## : 0 Argentina : 663 Min. : 1 Min. : 1
## Africa :31837 Mexico : 663 1st Qu.: 2034 1st Qu.: 68
## Asia :29132 Peru : 663 Median : 19328 Median : 560
## Europe :29435 Thailand : 660 Mean : 500419 Mean : 12536
## North America:16237 Taiwan : 648 3rd Qu.: 187994 3rd Qu.: 4532
## Oceania : 5974 South Korea: 643 Max. :45444260 Max. :735941
## South America: 7364 (Other) :116039 NA's :6524 NA's :17295
## vac1 vac2 date
## Min. :0.000e+00 Min. :1.000e+00 2021-06-21: 220
## 1st Qu.:1.873e+05 1st Qu.:1.016e+05 2021-06-22: 220
## Median :1.123e+06 Median :7.931e+05 2021-06-23: 220
## Mean :1.043e+07 Mean :6.931e+06 2021-06-24: 220
## 3rd Qu.:5.446e+06 3rd Qu.:4.312e+06 2021-06-25: 220
## Max. :1.101e+09 Max. :1.048e+09 2021-06-26: 220
## NA's :93786 NA's :96745 (Other) :118659
## pop
## Min. :4.700e+01
## 1st Qu.:1.775e+06
## Median :8.715e+06
## Mean :4.069e+07
## 3rd Qu.:2.967e+07
## Max. :1.444e+09
##
data <- data %>%
mutate(date_aux = as.Date(date)) %>%
filter(date_aux>"2020-01-01") %>%
group_by(location, month(date_aux)) %>%
filter(date_aux == max(date_aux))
data <- data %>%
group_by(location) %>%
fill(cases,
deaths,
vac1,
vac2, .direction = c("down"))
## continent location cases
## : 0 Afghanistan : 12 Min. : 1
## Africa :652 Albania : 12 1st Qu.: 10718
## Asia :580 Algeria : 12 Median : 89960
## Europe :600 Andorra : 12 Mean : 822547
## North America:369 Angola : 12 3rd Qu.: 396547
## Oceania :177 Antigua and Barbuda: 12 Max. :45444260
## South America:147 (Other) :2453 NA's :226
## deaths vac1 vac2 date
## Min. : 1 Min. : 0 Min. : 1 2021-06-30: 220
## 1st Qu.: 179 1st Qu.: 73056 1st Qu.: 42842 2021-07-31: 220
## Median : 1513 Median : 658136 Median : 377890 2021-08-31: 219
## Mean : 18573 Mean : 8129208 Mean : 5620520 2021-05-31: 217
## 3rd Qu.: 8236 3rd Qu.: 3763073 3rd Qu.: 3014534 2021-09-30: 215
## Max. :735941 Max. :717676310 Max. :304591540 2021-04-30: 214
## NA's :339 NA's :1149 NA's :1350 (Other) :1220
## pop date_aux month(date_aux)
## Min. :4.700e+01 Min. :2020-11-30 Min. : 1.000
## 1st Qu.:9.029e+05 1st Qu.:2021-02-28 1st Qu.: 4.000
## Median :6.908e+06 Median :2021-05-31 Median : 6.000
## Mean :3.727e+07 Mean :2021-05-19 Mean : 6.463
## 3rd Qu.:2.705e+07 3rd Qu.:2021-08-31 3rd Qu.: 9.000
## Max. :1.444e+09 Max. :2021-10-24 Max. :12.000
##
data <- data %>%
group_by(location) %>%
mutate(cases = 100*replace_na(cases,0)/pop,
deaths = 100*replace_na(deaths,0)/pop,
vac1 = 100*replace_na(vac1,0)/pop,
vac2 = 100*replace_na(vac2,0)/pop)
data<- data %>% mutate(date_num = as.numeric(date_aux))
data <- data %>% select(-pop)
data <- data %>%
group_by(location) %>%
fill(cases,
deaths,
vac1,
vac2, .direction = c("down"))
## continent location cases
## : 0 Afghanistan : 12 Min. : 0.0000
## Africa :652 Albania : 12 1st Qu.: 0.1005
## Asia :580 Algeria : 12 Median : 1.0592
## Europe :600 Andorra : 12 Mean : 2.8562
## North America:369 Angola : 12 3rd Qu.: 4.6428
## Oceania :177 Antigua and Barbuda: 12 Max. :22.4380
## South America:147 (Other) :2453
## deaths vac1 vac2 date
## Min. :0.000000 Min. : 0.0000 Min. : 0.00 2021-06-30: 220
## 1st Qu.:0.001168 1st Qu.: 0.0000 1st Qu.: 0.00 2021-07-31: 220
## Median :0.013201 Median : 0.3526 Median : 0.00 2021-08-31: 219
## Mean :0.050683 Mean : 14.8468 Mean : 10.56 2021-05-31: 217
## 3rd Qu.:0.079854 3rd Qu.: 23.2293 3rd Qu.: 10.36 2021-09-30: 215
## Max. :0.599687 Max. :120.3467 Max. :118.14 2021-04-30: 214
## (Other) :1220
## date_aux month(date_aux) date_num
## Min. :2020-11-30 Min. : 1.000 Min. :18596
## 1st Qu.:2021-02-28 1st Qu.: 4.000 1st Qu.:18686
## Median :2021-05-31 Median : 6.000 Median :18778
## Mean :2021-05-19 Mean : 6.463 Mean :18767
## 3rd Qu.:2021-08-31 3rd Qu.: 9.000 3rd Qu.:18870
## Max. :2021-10-24 Max. :12.000 Max. :18924
##
## Rows: 2,525
## Columns: 10
## Groups: location [223]
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia~
## $ location <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, ~
## $ cases <dbl> 0.1160148, 0.1313655, 0.1381258, 0.1398604, 0.141718~
## $ deaths <dbl> 0.004425709, 0.005495109, 0.006024788, 0.006132732, ~
## $ vac1 <dbl> 0.00000000, 0.00000000, 0.00000000, 0.02058469, 0.02~
## $ vac2 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.000000~
## $ date <fct> 2020-11-30, 2020-12-31, 2021-01-31, 2021-02-28, 2021~
## $ date_aux <date> 2020-11-30, 2020-12-31, 2021-01-31, 2021-02-28, 202~
## $ `month(date_aux)` <dbl> 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2,~
## $ date_num <dbl> 18596, 18627, 18658, 18686, 18717, 18747, 18778, 188~
names <- c('Brazil',
'United States',
'Canada',
'Mexico',
'Germany',
'United Kingdom',
'French',
'Italy',
'Spain',
'Russia',
'India',
'South Korea',
'China',
'Japan',
'Australia')
colors <- c('gray',
'#F28B30', # Asia (laranja)
'#BF0A3A', # Europa (vermelho)
'#022873', # Am?rica do norte (azul)
'#F23D6D', # Oceania (rosa)
'#03A62C', # Am?rica do sul
'yellow') # Outros (cinza)
data <- mutate(data, Continent=ifelse(location %in% names,
continent,
as_factor("Others")))
p_animation <- data %>%
ggplot(aes(x=cases,
y=deaths,
size=vac2)) +
geom_point(aes(color=as_factor(Continent),
frame=date_num,
ids=location),alpha=0.6) +
scale_x_continuous(limits = c(-1.5, 20)) +
scale_y_continuous(limits = c(-0.09, .65)) +
scale_size(range = c(.1, 25), name="fully vaccinated") +
scale_colour_manual(values = colors) +
theme_classic() +
labs(title="COVID-19 vaccinations of top 15 GPD countries") +
theme(legend.position="none")
## Warning: Ignoring unknown aesthetics: frame, ids
## Warning: Removed 5 rows containing missing values (geom_point).

ggplotly(p_animation,tooltip = "text") %>% animation_opts(700,
redraw = FALSE,
mode = 'afterall') %>%
animation_button(x = 1, xanchor = "right",
y = 0.05, yanchor = "bottom") %>%
animation_slider(hide = TRUE)
data <- data %>%
group_by(location) %>%
filter(date_aux == max(date_aux))
p_static <- data %>%
ggplot(aes(x=cases,
y=deaths,
size=vac2)) +
geom_point(aes(color=as_factor(Continent),
ids=location),alpha=0.6) +
scale_x_continuous(limits = c(-1.5, 20)) +
scale_y_continuous(limits = c(-0.09, .65)) +
scale_size(range = c(.1, 25), name="fully vaccinated") +
scale_colour_manual(values = colors) +
theme_classic() +
labs(title="COVID-19 vaccinations of top 15 GPD countries") +
theme(legend.position="none")
## Warning: Ignoring unknown aesthetics: ids
## Warning: Removed 2 rows containing missing values (geom_point).

ggplotly(p_static,tooltip = "text")