1 Cargar datos limpios

df <- read_parquet("covid_owid_limpio.parquet")
skim(df)
Data summary
Name df
Number of rows 429435
Number of columns 68
_______________________
Column type frequency:
character 4
Date 1
numeric 63
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
iso_code 0 1.00 3 8 0 255 0
continent 26525 0.94 4 13 0 6 0
pais 0 1.00 4 32 0 255 0
tests_units 322647 0.25 13 15 0 4 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date 0 1 2020-01-01 2024-08-14 2022-04-20 1688

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
casos_totales 17631 0.96 7365292.35 4.477582e+07 0.00 6280.75 63653.00 758272.00 7.758668e+08 ▇▁▁▁▁
casos_nuevos 19276 0.96 8017.36 2.296649e+05 0.00 0.00 0.00 0.00 4.423623e+07 ▇▁▁▁▁
new_cases_smoothed 20506 0.95 8041.03 8.661611e+04 0.00 0.00 12.00 313.29 6.319461e+06 ▇▁▁▁▁
muertes_totales 17631 0.96 81259.57 4.411901e+05 0.00 43.00 799.00 9574.00 7.057132e+06 ▇▁▁▁▁
muertes_nuevas 18827 0.96 71.85 1.368320e+03 0.00 0.00 0.00 0.00 1.037190e+05 ▇▁▁▁▁
new_deaths_smoothed 20057 0.95 72.06 5.136400e+02 0.00 0.00 0.00 3.14 1.481700e+04 ▇▁▁▁▁
total_cases_per_million 17631 0.96 112096.20 1.622404e+05 0.00 1916.10 29145.48 156770.19 7.635986e+05 ▇▁▁▁▁
new_cases_per_million 19276 0.96 122.36 1.508780e+03 0.00 0.00 0.00 0.00 2.417582e+05 ▇▁▁▁▁
new_cases_smoothed_per_million 20506 0.95 122.71 5.597000e+02 0.00 0.00 2.79 56.25 3.453689e+04 ▇▁▁▁▁
total_deaths_per_million 17631 0.96 835.51 1.134930e+03 0.00 24.57 295.09 1283.82 6.601110e+03 ▇▂▁▁▁
new_deaths_per_million 18827 0.96 0.76 6.980000e+00 0.00 0.00 0.00 0.00 8.936600e+02 ▇▁▁▁▁
new_deaths_smoothed_per_million 20057 0.95 0.76 2.550000e+00 0.00 0.00 0.00 0.36 1.276600e+02 ▇▁▁▁▁
reproduction_rate 244618 0.43 0.91 4.000000e-01 -0.07 0.72 0.95 1.14 5.870000e+00 ▇▃▁▁▁
icu_patients 390319 0.09 660.97 2.139620e+03 0.00 21.00 90.00 413.00 2.889100e+04 ▇▁▁▁▁
icu_patients_per_million 390319 0.09 15.66 2.279000e+01 0.00 2.33 6.43 18.78 1.806800e+02 ▇▁▁▁▁
hosp_patients 388779 0.09 3911.74 9.845750e+03 0.00 186.00 776.00 3051.00 1.544970e+05 ▇▁▁▁▁
hosp_patients_per_million 388779 0.09 125.99 1.511600e+02 0.00 31.00 74.24 159.76 1.526850e+03 ▇▁▁▁▁
weekly_icu_admissions 418442 0.03 317.89 5.144100e+02 0.00 17.00 92.00 353.00 4.838000e+03 ▇▁▁▁▁
weekly_icu_admissions_per_million 418442 0.03 9.67 1.357000e+01 0.00 1.55 4.64 12.65 2.249800e+02 ▇▁▁▁▁
weekly_hosp_admissions 404938 0.06 4291.72 1.091962e+04 0.00 223.00 864.00 3893.00 1.539770e+05 ▇▁▁▁▁
weekly_hosp_admissions_per_million 404938 0.06 82.62 8.840000e+01 0.00 23.73 56.28 110.00 7.170800e+02 ▇▁▁▁▁
total_tests 350048 0.18 21104573.94 8.409869e+07 0.00 364654.00 2067330.00 10248451.50 9.214000e+09 ▇▁▁▁▁
new_tests 354032 0.18 67285.41 2.477340e+05 1.00 2244.00 8783.00 37229.00 3.585563e+07 ▇▁▁▁▁
total_tests_per_thousand 350048 0.18 924.25 2.195430e+03 0.00 43.58 234.14 894.38 3.292583e+04 ▇▁▁▁▁
new_tests_per_thousand 354032 0.18 3.27 9.030000e+00 0.00 0.29 0.97 2.91 5.310600e+02 ▇▁▁▁▁
new_tests_smoothed 325470 0.24 142178.36 1.138215e+06 0.00 1486.00 6570.00 32205.00 1.476998e+07 ▇▁▁▁▁
new_tests_smoothed_per_thousand 325470 0.24 2.83 7.310000e+00 0.00 0.20 0.85 2.58 1.476000e+02 ▇▁▁▁▁
positive_rate 333508 0.22 0.10 1.200000e-01 0.00 0.02 0.06 0.14 1.000000e+00 ▇▁▁▁▁
tests_per_case 335087 0.22 2403.63 3.344366e+04 1.00 7.10 17.50 54.60 1.023632e+06 ▇▁▁▁▁
vacunas_totales 344018 0.20 561697983.43 1.842160e+09 0.00 1970788.00 14394348.00 116197175.00 1.357877e+10 ▇▁▁▁▁
vacunados_una_dosis 348303 0.19 248706410.74 8.006461e+08 0.00 1050009.25 6901087.50 50932952.00 5.631264e+09 ▇▁▁▁▁
vacunados_completos 351374 0.18 228663910.07 7.403763e+08 1.00 964400.00 6191345.00 47731850.00 5.177943e+09 ▇▁▁▁▁
refuerzos 375835 0.12 150581058.90 4.360697e+08 1.00 602282.00 5765440.00 40190716.25 2.817381e+09 ▇▁▁▁▁
new_vaccinations 358464 0.17 739864.03 3.183064e+06 0.00 2010.00 20531.00 173611.50 4.967320e+07 ▇▁▁▁▁
new_vaccinations_smoothed 234406 0.45 283875.82 1.922352e+06 0.00 279.00 3871.00 31803.00 4.369181e+07 ▇▁▁▁▁
total_vaccinations_per_hundred 344018 0.20 124.28 8.510000e+01 0.00 44.77 130.55 194.99 4.102300e+02 ▇▆▆▁▁
people_vaccinated_per_hundred 348303 0.19 53.50 2.938000e+01 0.00 27.88 64.30 77.78 1.290700e+02 ▆▃▇▆▁
people_fully_vaccinated_per_hundred 351374 0.18 48.68 2.904000e+01 0.00 21.22 57.92 73.61 1.268900e+02 ▆▃▇▅▁
total_boosters_per_hundred 375835 0.12 36.30 3.022000e+01 0.00 5.92 35.91 57.62 1.504700e+02 ▇▆▃▁▁
new_vaccinations_smoothed_per_million 234406 0.45 1851.48 3.117830e+03 0.00 106.00 605.00 2402.00 1.171130e+05 ▇▁▁▁▁
new_people_vaccinated_smoothed 237258 0.45 106070.70 7.866884e+05 0.00 43.00 771.00 9307.00 2.107127e+07 ▇▁▁▁▁
new_people_vaccinated_smoothed_per_hundred 237258 0.45 0.07 1.800000e-01 0.00 0.00 0.01 0.07 1.171000e+01 ▇▁▁▁▁
stringency_index 233245 0.46 42.88 2.487000e+01 0.00 22.22 42.85 62.04 1.000000e+02 ▇▆▇▆▂
population_density 68943 0.84 394.07 1.785450e+03 0.14 37.73 88.12 222.87 2.054677e+04 ▇▁▁▁▁
median_age 94772 0.78 30.46 9.090000e+00 15.10 22.20 29.70 38.70 4.820000e+01 ▇▆▇▆▆
aged_65_older 106165 0.75 8.68 6.090000e+00 1.14 3.53 6.29 13.93 2.705000e+01 ▇▃▂▂▁
aged_70_older 98120 0.77 5.49 4.140000e+00 0.53 2.06 3.87 8.64 1.849000e+01 ▇▃▂▂▁
gdp_per_capita 101143 0.76 18904.18 1.982958e+04 661.24 4227.63 12294.88 27216.44 1.169356e+05 ▇▂▁▁▁
extreme_poverty 217439 0.49 13.92 2.007000e+01 0.10 0.60 2.50 21.40 7.760000e+01 ▇▂▁▁▁
cardiovasc_death_rate 100570 0.77 264.64 1.207600e+02 79.37 175.70 245.46 333.44 7.244200e+02 ▇▇▃▁▁
diabetes_prevalence 83524 0.81 8.56 4.930000e+00 0.99 5.35 7.20 10.79 3.053000e+01 ▇▇▂▁▁
female_smokers 182270 0.58 10.77 1.076000e+01 0.10 1.90 6.30 19.30 4.400000e+01 ▇▂▂▁▁
male_smokers 185618 0.57 33.10 1.385000e+01 7.70 22.60 33.10 41.50 7.810000e+01 ▆▇▆▂▁
handwashing_facilities 267694 0.38 50.65 3.191000e+01 1.19 20.86 49.54 82.50 1.000000e+02 ▇▅▅▅▇
hospital_beds_per_thousand 138746 0.68 3.11 2.550000e+00 0.10 1.30 2.50 4.21 1.380000e+01 ▇▃▂▁▁
life_expectancy 39136 0.91 73.70 7.390000e+00 53.28 69.50 75.05 79.46 8.675000e+01 ▁▃▅▇▅
human_development_index 110308 0.74 0.72 1.500000e-01 0.39 0.60 0.74 0.83 9.600000e-01 ▂▅▅▇▆
population 0 1.00 152033640.40 6.975408e+08 47.00 523798.00 6336393.00 32969520.00 7.975105e+09 ▇▁▁▁▁
excess_mortality_cumulative_absolute 416024 0.03 56047.65 1.568691e+05 -37726.10 176.50 6815.20 39128.04 1.349776e+06 ▇▁▁▁▁
excess_mortality_cumulative 416024 0.03 9.77 1.204000e+01 -44.23 2.06 8.13 15.16 7.808000e+01 ▁▃▇▁▁
excess_mortality 416024 0.03 10.93 2.456000e+01 -95.92 -1.50 5.66 15.57 3.782200e+02 ▃▇▁▁▁
excess_mortality_cumulative_per_million 416024 0.03 1772.67 1.991890e+03 -2936.45 116.88 1270.80 2883.02 1.029352e+04 ▁▇▃▁▁
casos_por_vacuna 355903 0.17 42.78 8.187780e+03 0.00 0.00 0.00 0.00 2.100420e+06 ▇▁▁▁▁

2 Dosis de refuerzo aplicadas globalmente

df %>%
  group_by(date) %>%
  summarise(total_refuerzos = sum(refuerzos, na.rm = TRUE)) %>%
  ggplot(aes(date, total_refuerzos)) +
  geom_line(color = "darkgreen") +
  labs(title = "Dosis de refuerzo aplicadas globalmente",
       x = "Fecha", y = "Cantidad de dosis")

3 Casos nuevos vs vacunación completa (países seleccionados)

paises <- c("Paraguay", "Argentina", "Brasil", "Chile")

df %>%
  filter(pais %in% paises) %>%
  ggplot(aes(date)) +
  geom_line(aes(y = casos_nuevos, color = "Casos nuevos")) +
  geom_line(aes(y = vacunados_completos, color = "Vacunados completos")) +
  facet_wrap(~pais, scales = "free_y") +
  labs(title = "Casos vs vacunación completa",
       x = "Fecha", color = "Indicador")

4 Top 10 países por vacunación completa

df %>%
  filter(date == max(date)) %>%
  arrange(desc(vacunados_completos)) %>%
  slice_head(n = 10) %>%
  select(pais, vacunados_completos, casos_totales, muertes_totales) %>%
  knitr::kable()
pais vacunados_completos casos_totales muertes_totales
World 5177942957 NA NA
Asia 3462095463 NA NA
Upper-middle-income countries 1990653301 NA NA
High-income countries 929255961 NA NA
Europe 493751304 NA NA
European Union (27) 327967426 NA NA
Malaysia 27551144 NA NA
Lithuania 1881106 NA NA

5 Metodología

El análisis se basa en datos proporcionados por Our World in Data, una fuente de datos abiertos ampliamente reconocida. Se aplicaron los siguientes pasos metodológicos:

  1. Obtención del dataset: Descarga directa desde GitHub en formato CSV.
  2. Preparación de datos:
    • Limpieza de columnas (nombres, duplicados y valores faltantes).
    • Conversión de fechas y selección de variables clave.
    • Creación de variables derivadas, como casos por vacuna.
    • Guardado en formato Parquet para eficiencia de almacenamiento.
  3. Análisis exploratorio:
    • Evaluación temporal de dosis de refuerzo aplicadas a nivel global.
    • Comparación entre países seleccionados (Paraguay, Argentina, Brasil y Chile) en términos de nuevos casos y vacunación completa.
    • Ranking de países con mayor cantidad de personas con vacunación completa.

Se utilizaron librerías como tidyverse, lubridate, arrow y ggplot2 para todo el proceso.

6 Conclusiones

Este trabajo puede extenderse con análisis por continente, tasas por población y relación con otras variables sociales o económicas.