if(!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse,ggplot2,ggpubr,ggrepel,ggpmisc,vroom,broom, kableExtra)
df <- vroom("https://covid.ourworldindata.org/data/owid-covid-data.csv")
df1 <- df %>% filter(date == max(date-1)) #%>% mutate_if(is.numeric, replace_na, 0)
formula <- y ~ poly(x, 2, raw = TRUE)
df2 <- df1 %>%
filter(continent %in% c("Europe", "Africa","South America"))
mod <- df2 %>%
lm(log(total_deaths_per_million)~ median_age + I(median_age^2),.)
tidy(mod) %>% kable("pandoc")
| (Intercept) |
-5.7166020 |
1.4501601 |
-3.942049 |
0.0001475 |
| median_age |
0.5672906 |
0.1044407 |
5.431699 |
0.0000004 |
| I(median_age^2) |
-0.0066010 |
0.0016889 |
-3.908475 |
0.0001665 |
pred = predict(mod,df2)
cbind(location=df2$location,median_age=df2$median_age,total_deaths_per_million_log=log(df2$total_deaths_per_million),pred)%>% na.omit() %>% as.data.frame() %>% head(15)%>% kable("pandoc")
| 1 |
Albania |
38 |
6.08263434918395 |
6.30858088266708 |
| 2 |
Algeria |
29.1 |
4.16439909217244 |
5.20175231842619 |
| 4 |
Angola |
16.8 |
2.55962752931371 |
1.95081072263384 |
| 5 |
Argentina |
31.9 |
6.9061880515225 |
5.66271331563301 |
| 6 |
Austria |
44.4 |
6.64438070432314 |
6.45813154254812 |
| 7 |
Belarus |
40.3 |
5.10264305722337 |
6.42457318548253 |
| 8 |
Belgium |
41.8 |
7.46797088698953 |
6.46259457637511 |
| 9 |
Benin |
18.8 |
1.33342087151282 |
2.61539994030243 |
| 10 |
Bolivia |
25.4 |
6.70491912950478 |
4.43387097009135 |
| 11 |
Bosnia and Herzegovina |
42.5 |
7.20376483152337 |
6.47017233507218 |
| 12 |
Botswana |
25.8 |
3.40757698862619 |
4.52559850452802 |
| 13 |
Brazil |
33.5 |
6.88171105337429 |
5.87964848179554 |
| 14 |
Bulgaria |
44.7 |
7.09699286481782 |
6.45187369747282 |
| 15 |
Burkina Faso |
17.6 |
1.53471436623816 |
2.22298338033159 |
| 16 |
Burundi |
17.5 |
-1.78379129957888 |
2.1894238688945 |
cbind(location=df2$location,median_age=df2$median_age,total_deaths_per_million=df2$total_deaths_per_million,pred=exp(pred)) %>% na.omit()%>% as.data.frame() %>% head(15)%>% kable("pandoc")
| 1 |
Albania |
38 |
438.182 |
549.264924095499 |
| 2 |
Algeria |
29.1 |
64.354 |
181.590167036453 |
| 4 |
Angola |
16.8 |
12.931 |
7.03438820719837 |
| 5 |
Argentina |
31.9 |
998.434 |
287.928825419431 |
| 6 |
Austria |
44.4 |
768.454 |
637.868113005403 |
| 7 |
Belarus |
40.3 |
164.456 |
616.817494690468 |
| 8 |
Belgium |
41.8 |
1751.05 |
640.721302174819 |
| 9 |
Benin |
18.8 |
3.794 |
13.672683541742 |
| 10 |
Bolivia |
25.4 |
816.412 |
84.2569425583934 |
| 11 |
Bosnia and Herzegovina |
42.5 |
1344.483 |
645.594976035522 |
| 12 |
Botswana |
25.8 |
30.192 |
92.3511818920367 |
| 13 |
Brazil |
33.5 |
974.292 |
357.683487350435 |
| 14 |
Bulgaria |
44.7 |
1208.328 |
633.888896820653 |
| 15 |
Burkina Faso |
17.6 |
4.64 |
9.23484085166474 |
| 16 |
Burundi |
17.5 |
0.168 |
8.93006674312384 |
df2 %>%
filter(continent != "NA") %>%
ggscatter(
x = "median_age",
y = "total_deaths_per_million",
color = "continent",
size = "gdp_per_capita",
palette = "Dark2",
shape = "continent") +
stat_dens2d_labels(geom = "label_repel",
keep.fraction = 0.55,
mapping = aes(x = median_age,
y = total_deaths_per_million,
label = location,
col = continent), size=5, label.fill = NA) +
theme_minimal(20) +
xlab("Idade mƩdia populacional") +
ylab("Mortes por milão de habitantes - (log2 scale)") +
labs(title = paste("COVID-19 Mortes por Milhão vs. Idade média populacional",format(df1$date[1], "%d %b, %Y")),
col = "Continente",
shape = "Continente",
size = "PIB per capita",
caption = "Cid Póvoas @cidedson \n Fonte: COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University") +
theme(legend.text = element_text(size=20)) +
stat_quadrant_counts(quadrants = 0L, label.x = "right", label.y = "bottom",
aes(label = sprintf("%i observaƧƵes", stat(count))), size=8) +
guides(col = guide_legend(override.aes = list(size = 5)))+
geom_smooth(method = "lm", formula = "y~x", se = T, col="red", fill="red", alpha=0.1, linetype=2) +
geom_smooth(method = "lm", formula = formula, se = T, col="blue", fill='blue', alpha=0.1, linetype=2) +
stat_cor(method = "spearman", size=6,
aes(label = paste(r.label, ..p.label.., sep = "~`,`~")),col="red") +
stat_poly_eq(aes(label = paste(stat(eq.label),
stat(adj.rr.label), sep = "*\", \"*")),
formula = formula, parse = TRUE, size=6, label.y = 1, col="blue") +
scale_y_continuous(labels = scales::comma_format(accuracy = 1),
breaks = 2^seq(0, 19, 0.5),
trans = "log") +
scale_x_continuous(labels = scales::comma_format(accuracy = 1),
breaks = seq(0,50,2))

df1 %>% filter(continent %in% c("Europe","South America")
& population_density < 520
) %>%
mutate(continent = ifelse(continent == "Europe", "Europa","AmƩrica do Sul")) %>%
ggscatter(
y = "population_density",
x = "total_deaths_per_million",
color = "continent",
size = "new_deaths_per_million",
palette = "Dark2",
shape = "continent") +
stat_dens2d_labels(geom = "label_repel",
keep.fraction = 0.55,
mapping = aes(y = population_density,
x = total_deaths_per_million,
label = location,
col = continent), size=8, label.fill = NA) +
theme_minimal(20) +
ylab("Densidade Populacional (pessoa/km²) - (log2 scale)") +
xlab("Mortes por milão de habitantes - (log2 scale)") +
labs(title = paste("COVID-19 Mortes por Milhão vs. Densidade Populacional",format(df1$date[1], "%d %b, %Y")),
col = "Continente",
shape = "Continente",
size = "Novas mortes por milhão",
caption = "Cid Póvoas @cidedson \n Fonte: COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University") +
scale_y_continuous(labels = scales::comma_format(accuracy = 1),
breaks = 2^seq(2, 19, 1),
trans = "log2") +
scale_x_continuous(labels = scales::comma_format(accuracy = 1),
breaks = 2^seq(1,19,1),
trans = "log2") +
theme(legend.text = element_text(size=20)) +
stat_quadrant_counts(quadrants = 0L, label.x = "right", label.y = "bottom",
aes(label = sprintf("%i observaƧƵes", stat(count))), size=8) +
guides(col = guide_legend(override.aes = list(size = 5))) +
geom_smooth(method = "lm", formula = "y~x", se = T, col="red", fill="red",alpha=0.1, linetype=2) +
stat_cor(method = "spearman", size=6,
aes(label = paste(r.label,..p.label.., sep = "~`,`~")), col="red")
