Covid

if(!require("pacman")) install.packages("pacman")

pacman::p_load(tidyverse,ggplot2,ggpubr,ggrepel,ggpmisc,vroom,broom, kableExtra)

df <- vroom("https://covid.ourworldindata.org/data/owid-covid-data.csv")

df1 <- df %>% filter(date == max(date-1)) #%>% mutate_if(is.numeric, replace_na, 0)

formula <- y ~ poly(x, 2, raw = TRUE)

df2 <- df1 %>%  
  filter(continent %in% c("Europe", "Africa","South America"))

mod <- df2 %>%  
    lm(log(total_deaths_per_million)~ median_age + I(median_age^2),.) 

tidy(mod) %>% kable("pandoc")

term	estimate	std.error	statistic	p.value
(Intercept)	-5.7166020	1.4501601	-3.942049	0.0001475
median_age	0.5672906	0.1044407	5.431699	0.0000004
I(median_age^2)	-0.0066010	0.0016889	-3.908475	0.0001665

pred = predict(mod,df2)
cbind(location=df2$location,median_age=df2$median_age,total_deaths_per_million_log=log(df2$total_deaths_per_million),pred)%>% na.omit() %>% as.data.frame() %>% head(15)%>% kable("pandoc")

	location	median_age	total_deaths_per_million_log	pred
1	Albania	38	6.08263434918395	6.30858088266708
2	Algeria	29.1	4.16439909217244	5.20175231842619
4	Angola	16.8	2.55962752931371	1.95081072263384
5	Argentina	31.9	6.9061880515225	5.66271331563301
6	Austria	44.4	6.64438070432314	6.45813154254812
7	Belarus	40.3	5.10264305722337	6.42457318548253
8	Belgium	41.8	7.46797088698953	6.46259457637511
9	Benin	18.8	1.33342087151282	2.61539994030243
10	Bolivia	25.4	6.70491912950478	4.43387097009135
11	Bosnia and Herzegovina	42.5	7.20376483152337	6.47017233507218
12	Botswana	25.8	3.40757698862619	4.52559850452802
13	Brazil	33.5	6.88171105337429	5.87964848179554
14	Bulgaria	44.7	7.09699286481782	6.45187369747282
15	Burkina Faso	17.6	1.53471436623816	2.22298338033159
16	Burundi	17.5	-1.78379129957888	2.1894238688945

cbind(location=df2$location,median_age=df2$median_age,total_deaths_per_million=df2$total_deaths_per_million,pred=exp(pred)) %>% na.omit()%>% as.data.frame() %>% head(15)%>% kable("pandoc")

	location	median_age	total_deaths_per_million	pred
1	Albania	38	438.182	549.264924095499
2	Algeria	29.1	64.354	181.590167036453
4	Angola	16.8	12.931	7.03438820719837
5	Argentina	31.9	998.434	287.928825419431
6	Austria	44.4	768.454	637.868113005403
7	Belarus	40.3	164.456	616.817494690468
8	Belgium	41.8	1751.05	640.721302174819
9	Benin	18.8	3.794	13.672683541742
10	Bolivia	25.4	816.412	84.2569425583934
11	Bosnia and Herzegovina	42.5	1344.483	645.594976035522
12	Botswana	25.8	30.192	92.3511818920367
13	Brazil	33.5	974.292	357.683487350435
14	Bulgaria	44.7	1208.328	633.888896820653
15	Burkina Faso	17.6	4.64	9.23484085166474
16	Burundi	17.5	0.168	8.93006674312384

df2 %>%
  filter(continent != "NA") %>% 
  ggscatter(
    x = "median_age",
    y = "total_deaths_per_million",
    color = "continent",
    size = "gdp_per_capita",
    palette = "Dark2",
    shape = "continent") +
  stat_dens2d_labels(geom = "label_repel",
                     keep.fraction = 0.55, 
                     mapping = aes(x = median_age,
                                   y = total_deaths_per_million,
                                   label = location,
                                   col = continent), size=5, label.fill = NA) +
  theme_minimal(20) + 
  xlab("Idade média populacional") + 
  ylab("Mortes por milão de habitantes - (log2 scale)") +
  labs(title = paste("COVID-19 Mortes por Milhão vs. Idade média populacional",format(df1$date[1], "%d %b, %Y")),
       col = "Continente",
       shape = "Continente",
       size = "PIB per capita",
       caption = "Cid Póvoas @cidedson \n Fonte: COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University") + 
  theme(legend.text = element_text(size=20)) +
  stat_quadrant_counts(quadrants = 0L, label.x = "right", label.y = "bottom",
                       aes(label = sprintf("%i observações", stat(count))), size=8) +
  guides(col = guide_legend(override.aes = list(size = 5)))+
  geom_smooth(method = "lm", formula = "y~x", se = T, col="red", fill="red", alpha=0.1, linetype=2) +
  geom_smooth(method = "lm", formula = formula, se = T, col="blue", fill='blue', alpha=0.1, linetype=2) +
  stat_cor(method = "spearman", size=6,
           aes(label = paste(r.label, ..p.label.., sep = "~`,`~")),col="red") +
  stat_poly_eq(aes(label =  paste(stat(eq.label), 
                                stat(adj.rr.label), sep = "*\", \"*")),
             formula = formula, parse = TRUE, size=6, label.y = 1, col="blue") +
    scale_y_continuous(labels = scales::comma_format(accuracy = 1),
                       breaks = 2^seq(0, 19, 0.5),
                       trans = "log") +
  scale_x_continuous(labels = scales::comma_format(accuracy = 1),
                     breaks = seq(0,50,2))

df1 %>% filter(continent %in% c("Europe","South America") 
               & population_density < 520
               ) %>% 
  mutate(continent = ifelse(continent == "Europe", "Europa","América do Sul")) %>% 
  ggscatter(
    y = "population_density",
    x = "total_deaths_per_million",
    color = "continent",
    size = "new_deaths_per_million",
    palette = "Dark2",
    shape = "continent") +
  stat_dens2d_labels(geom = "label_repel",
                     keep.fraction = 0.55, 
                     mapping = aes(y = population_density,
                                   x = total_deaths_per_million,
                                   label = location,
                                   col = continent), size=8, label.fill = NA) +
  theme_minimal(20) + 
  ylab("Densidade Populacional (pessoa/km²) - (log2 scale)") + 
  xlab("Mortes por milão de habitantes - (log2 scale)") +
  labs(title = paste("COVID-19 Mortes por Milhão vs. Densidade Populacional",format(df1$date[1], "%d %b, %Y")),
       col = "Continente",
       shape = "Continente",
       size = "Novas mortes por milhão",
       caption = "Cid Póvoas @cidedson \n Fonte: COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University") + 
  scale_y_continuous(labels = scales::comma_format(accuracy = 1),
                     breaks = 2^seq(2, 19, 1),
                     trans = "log2") +
  scale_x_continuous(labels = scales::comma_format(accuracy = 1),
                     breaks = 2^seq(1,19,1),
                     trans = "log2") +
  theme(legend.text = element_text(size=20)) +
  stat_quadrant_counts(quadrants = 0L, label.x = "right", label.y = "bottom", 
                       aes(label = sprintf("%i observações", stat(count))), size=8) +
  guides(col = guide_legend(override.aes = list(size = 5))) +
  geom_smooth(method = "lm", formula = "y~x", se = T, col="red", fill="red",alpha=0.1, linetype=2) +
  stat_cor(method = "spearman", size=6, 
           aes(label = paste(r.label,..p.label.., sep = "~`,`~")), col="red")

Covid

Cid Edson Póvoas

2021-01-15