if(!require("pacman")) install.packages("pacman")

pacman::p_load(tidyverse,ggplot2,ggpubr,ggrepel,ggpmisc,vroom,broom, kableExtra)

df <- vroom("https://covid.ourworldindata.org/data/owid-covid-data.csv")

df1 <- df %>% filter(date == max(date-1)) #%>% mutate_if(is.numeric, replace_na, 0)
formula <- y ~ poly(x, 2, raw = TRUE)

df2 <- df1 %>%  
  filter(continent %in% c("Europe", "Africa","South America"))

mod <- df2 %>%  
    lm(log(total_deaths_per_million)~ median_age + I(median_age^2),.) 

tidy(mod) %>% kable("pandoc")
term estimate std.error statistic p.value
(Intercept) -5.7166020 1.4501601 -3.942049 0.0001475
median_age 0.5672906 0.1044407 5.431699 0.0000004
I(median_age^2) -0.0066010 0.0016889 -3.908475 0.0001665
pred = predict(mod,df2)
cbind(location=df2$location,median_age=df2$median_age,total_deaths_per_million_log=log(df2$total_deaths_per_million),pred)%>% na.omit() %>% as.data.frame() %>% head(15)%>% kable("pandoc") 
location median_age total_deaths_per_million_log pred
1 Albania 38 6.08263434918395 6.30858088266708
2 Algeria 29.1 4.16439909217244 5.20175231842619
4 Angola 16.8 2.55962752931371 1.95081072263384
5 Argentina 31.9 6.9061880515225 5.66271331563301
6 Austria 44.4 6.64438070432314 6.45813154254812
7 Belarus 40.3 5.10264305722337 6.42457318548253
8 Belgium 41.8 7.46797088698953 6.46259457637511
9 Benin 18.8 1.33342087151282 2.61539994030243
10 Bolivia 25.4 6.70491912950478 4.43387097009135
11 Bosnia and Herzegovina 42.5 7.20376483152337 6.47017233507218
12 Botswana 25.8 3.40757698862619 4.52559850452802
13 Brazil 33.5 6.88171105337429 5.87964848179554
14 Bulgaria 44.7 7.09699286481782 6.45187369747282
15 Burkina Faso 17.6 1.53471436623816 2.22298338033159
16 Burundi 17.5 -1.78379129957888 2.1894238688945
cbind(location=df2$location,median_age=df2$median_age,total_deaths_per_million=df2$total_deaths_per_million,pred=exp(pred)) %>% na.omit()%>% as.data.frame() %>% head(15)%>% kable("pandoc") 
location median_age total_deaths_per_million pred
1 Albania 38 438.182 549.264924095499
2 Algeria 29.1 64.354 181.590167036453
4 Angola 16.8 12.931 7.03438820719837
5 Argentina 31.9 998.434 287.928825419431
6 Austria 44.4 768.454 637.868113005403
7 Belarus 40.3 164.456 616.817494690468
8 Belgium 41.8 1751.05 640.721302174819
9 Benin 18.8 3.794 13.672683541742
10 Bolivia 25.4 816.412 84.2569425583934
11 Bosnia and Herzegovina 42.5 1344.483 645.594976035522
12 Botswana 25.8 30.192 92.3511818920367
13 Brazil 33.5 974.292 357.683487350435
14 Bulgaria 44.7 1208.328 633.888896820653
15 Burkina Faso 17.6 4.64 9.23484085166474
16 Burundi 17.5 0.168 8.93006674312384
df2 %>%
  filter(continent != "NA") %>% 
  ggscatter(
    x = "median_age",
    y = "total_deaths_per_million",
    color = "continent",
    size = "gdp_per_capita",
    palette = "Dark2",
    shape = "continent") +
  stat_dens2d_labels(geom = "label_repel",
                     keep.fraction = 0.55, 
                     mapping = aes(x = median_age,
                                   y = total_deaths_per_million,
                                   label = location,
                                   col = continent), size=5, label.fill = NA) +
  theme_minimal(20) + 
  xlab("Idade mƩdia populacional") + 
  ylab("Mortes por milão de habitantes - (log2 scale)") +
  labs(title = paste("COVID-19 Mortes por Milhão vs. Idade média populacional",format(df1$date[1], "%d %b, %Y")),
       col = "Continente",
       shape = "Continente",
       size = "PIB per capita",
       caption = "Cid Póvoas @cidedson \n Fonte: COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University") + 
  theme(legend.text = element_text(size=20)) +
  stat_quadrant_counts(quadrants = 0L, label.x = "right", label.y = "bottom",
                       aes(label = sprintf("%i observaƧƵes", stat(count))), size=8) +
  guides(col = guide_legend(override.aes = list(size = 5)))+
  geom_smooth(method = "lm", formula = "y~x", se = T, col="red", fill="red", alpha=0.1, linetype=2) +
  geom_smooth(method = "lm", formula = formula, se = T, col="blue", fill='blue', alpha=0.1, linetype=2) +
  stat_cor(method = "spearman", size=6,
           aes(label = paste(r.label, ..p.label.., sep = "~`,`~")),col="red") +
  stat_poly_eq(aes(label =  paste(stat(eq.label), 
                                stat(adj.rr.label), sep = "*\", \"*")),
             formula = formula, parse = TRUE, size=6, label.y = 1, col="blue") +
    scale_y_continuous(labels = scales::comma_format(accuracy = 1),
                       breaks = 2^seq(0, 19, 0.5),
                       trans = "log") +
  scale_x_continuous(labels = scales::comma_format(accuracy = 1),
                     breaks = seq(0,50,2))

df1 %>% filter(continent %in% c("Europe","South America") 
               & population_density < 520
               ) %>% 
  mutate(continent = ifelse(continent == "Europe", "Europa","AmƩrica do Sul")) %>% 
  ggscatter(
    y = "population_density",
    x = "total_deaths_per_million",
    color = "continent",
    size = "new_deaths_per_million",
    palette = "Dark2",
    shape = "continent") +
  stat_dens2d_labels(geom = "label_repel",
                     keep.fraction = 0.55, 
                     mapping = aes(y = population_density,
                                   x = total_deaths_per_million,
                                   label = location,
                                   col = continent), size=8, label.fill = NA) +
  theme_minimal(20) + 
  ylab("Densidade Populacional (pessoa/km²) - (log2 scale)") + 
  xlab("Mortes por milão de habitantes - (log2 scale)") +
  labs(title = paste("COVID-19 Mortes por Milhão vs. Densidade Populacional",format(df1$date[1], "%d %b, %Y")),
       col = "Continente",
       shape = "Continente",
       size = "Novas mortes por milhão",
       caption = "Cid Póvoas @cidedson \n Fonte: COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University") + 
  scale_y_continuous(labels = scales::comma_format(accuracy = 1),
                     breaks = 2^seq(2, 19, 1),
                     trans = "log2") +
  scale_x_continuous(labels = scales::comma_format(accuracy = 1),
                     breaks = 2^seq(1,19,1),
                     trans = "log2") +
  theme(legend.text = element_text(size=20)) +
  stat_quadrant_counts(quadrants = 0L, label.x = "right", label.y = "bottom", 
                       aes(label = sprintf("%i observaƧƵes", stat(count))), size=8) +
  guides(col = guide_legend(override.aes = list(size = 5))) +
  geom_smooth(method = "lm", formula = "y~x", se = T, col="red", fill="red",alpha=0.1, linetype=2) +
  stat_cor(method = "spearman", size=6, 
           aes(label = paste(r.label,..p.label.., sep = "~`,`~")), col="red")