Packages

pacman::p_load(tidyverse)

Dataset

Fuente del dataset: https://github.com/jorgeperezrojas/covid19-data

chile <- read_csv("https://bit.ly/chilecovid19")

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   region = col_character()
## )

## See spec(...) for full column specifications.

Ver los datos

head(chile, 10)

## # A tibble: 10 x 52
##    codigo region `03/07/2020` `03/08/2020` `03/09/2020` `03/10/2020`
##     <dbl> <chr>         <dbl>        <dbl>        <dbl>        <dbl>
##  1     15 Arica…            0            0            0            0
##  2      1 Tarap…            0            0            0            0
##  3      2 Antof…            0            0            0            0
##  4      3 Ataca…            0            0            0            0
##  5      4 Coqui…            0            0            0            0
##  6      5 Valpa…            0            0            0            0
##  7     13 Metro…            4            5            5           10
##  8      6 O’Hig…            0            0            0            0
##  9      7 Maule             2            3            5            5
## 10     16 Ñuble             0            0            0            0
## # … with 46 more variables: `03/11/2020` <dbl>, `03/12/2020` <dbl>,
## #   `03/13/2020` <dbl>, `03/14/2020` <dbl>, `03/15/2020` <dbl>,
## #   `03/16/2020` <dbl>, `03/17/2020` <dbl>, `03/18/2020` <dbl>,
## #   `03/19/2020` <dbl>, `03/20/2020` <dbl>, `03/21/2020` <dbl>,
## #   `03/22/2020` <dbl>, `03/23/2020` <dbl>, `03/24/2020` <dbl>,
## #   `03/25/2020` <dbl>, `03/26/2020` <dbl>, `03/27/2020` <dbl>,
## #   `03/28/2020` <dbl>, `03/29/2020` <dbl>, `03/30/2020` <dbl>,
## #   `03/31/2020` <dbl>, `04/01/2020` <dbl>, `04/02/2020` <dbl>,
## #   `04/03/2020` <dbl>, `04/04/2020` <dbl>, `04/05/2020` <dbl>,
## #   `04/06/2020` <dbl>, `04/07/2020` <dbl>, `04/08/2020` <dbl>,
## #   `04/09/2020` <dbl>, `04/10/2020` <dbl>, `04/11/2020` <dbl>,
## #   `04/12/2020` <dbl>, `04/13/2020` <dbl>, `04/14/2020` <dbl>,
## #   `04/15/2020` <dbl>, `04/16/2020` <dbl>, `04/17/2020` <dbl>,
## #   `04/18/2020` <dbl>, `04/19/2020` <dbl>, `04/20/2020` <dbl>,
## #   `04/21/2020` <dbl>, `04/22/2020` <dbl>, `04/23/2020` <dbl>,
## #   `04/24/2020` <dbl>, `04/25/2020` <dbl>

FCT_INORDER Ordeno las regiones

table(chile$region)

## 
##        Antofagasta          Araucanía Arica y Parinacota            Atacama 
##                  1                  1                  1                  1 
##              Aysén             Biobío           Coquimbo          Los Lagos 
##                  1                  1                  1                  1 
##           Los Ríos         Magallanes              Maule      Metropolitana 
##                  1                  1                  1                  1 
##              Ñuble          O’Higgins           Tarapacá         Valparaíso 
##                  1                  1                  1                  1

chile <- chile %>% 
  mutate(region = fct_inorder(region))

PIVOT_LONGER Reformatear los datos

chile_long <- chile %>% 
  pivot_longer(cols = matches("/"), 
               names_to = "fecha", 
               values_to = "casos") %>%
  mutate(fecha = lubridate::mdy(fecha))

HEAD Ver en formato long

head(chile_long)

## # A tibble: 6 x 4
##   codigo region             fecha      casos
##    <dbl> <fct>              <date>     <dbl>
## 1     15 Arica y Parinacota 2020-03-07     0
## 2     15 Arica y Parinacota 2020-03-08     0
## 3     15 Arica y Parinacota 2020-03-09     0
## 4     15 Arica y Parinacota 2020-03-10     0
## 5     15 Arica y Parinacota 2020-03-11     0
## 6     15 Arica y Parinacota 2020-03-12     0

Graficos

GROUP BY and SUMMARISE Total por fecha absoluto

chile_long %>%
  group_by(fecha) %>%
  summarise(total = sum(casos)) %>%
  ggplot(aes(x = fecha, y = total)) +
  geom_line()

## SCALE LOG Total por fecha log10

chile_long %>%
  group_by(fecha) %>%
  summarise(total = sum(casos)) %>%
  ggplot(aes(x = fecha, y = total)) +
  geom_line() +
  scale_y_log10() + 
  labs(y = "log 10 total")

Por región

Todos juntos absoluto

chile_long %>% 
  ggplot(aes(x = fecha, y = casos, group = region))  + 
  geom_line()

COLOR con colores absoluto

chile_long %>% 
  ggplot(aes(x = fecha, y = casos, 
             group = region, color = region))  + 
  geom_line()

con colores log 10

chile_long %>% 
  ggplot(aes(x = fecha, y = casos, 
             group = region, color = region))  + 
  geom_line() +
  scale_y_log10() + 
  labs(y = "log 10 casos")

## Warning: Transformation introduced infinite values in continuous y-axis

GGHIGHLIGHT Solución con gghighlight

pacman::p_load(gghighlight)

chile_long %>% 
  ggplot(aes(x = fecha, y = casos, 
             group = region, color = region))  + 
  geom_line() +
  scale_y_log10() + 
  gghighlight::gghighlight(codigo == 14)

## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...

## label_key: region

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

chile_long %>% 
  ggplot(aes(x = fecha, y = casos, 
             group = region, color = region))  + 
  geom_line() +
  scale_y_log10() + 
  gghighlight::gghighlight(codigo %in% c(5, 12, 14))

## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...

## label_key: region

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

FACET Solucion con facet absoluto

chile_long %>% 
  ggplot(aes(x = fecha, y = casos, group = region))  + 
  geom_line() +
  facet_wrap(~region)

Solución con facet log10

chile_long %>% 
  ggplot(aes(x = fecha, y = casos, group = region))  + 
  geom_line() +
  facet_wrap(~codigo) + 
  scale_y_log10() + 
  labs(y = "log 10 casos")

## Warning: Transformation introduced infinite values in continuous y-axis

FILTER facet log10

chile_long %>% 
  filter(codigo %in% c(5, 12, 14)) %>% 
  ggplot(aes(x = fecha, y = casos, group = region))  + 
  geom_line() +
  facet_wrap(~codigo) + 
  scale_y_log10()

## Warning: Transformation introduced infinite values in continuous y-axis

LAG Ver casos nuevos

Tabla casos nuevos

chile_long %>% 
  mutate(casos_nuevos = casos - lag(casos)) %>% 
  # mutate(value_diff = lag(casos, order_by = fecha)) %>% 
  pivot_longer(casos:casos_nuevos, 
               names_to = "item", 
               values_to = "casos") %>% 
   mutate(casos = if_else(casos < 0, 0, casos)) %>% 
   pivot_wider(names_from = item, 
              values_from = casos)

## # A tibble: 800 x 5
##    codigo region             fecha      casos casos_nuevos
##     <dbl> <fct>              <date>     <dbl>        <dbl>
##  1     15 Arica y Parinacota 2020-03-07     0           NA
##  2     15 Arica y Parinacota 2020-03-08     0            0
##  3     15 Arica y Parinacota 2020-03-09     0            0
##  4     15 Arica y Parinacota 2020-03-10     0            0
##  5     15 Arica y Parinacota 2020-03-11     0            0
##  6     15 Arica y Parinacota 2020-03-12     0            0
##  7     15 Arica y Parinacota 2020-03-13     0            0
##  8     15 Arica y Parinacota 2020-03-14     0            0
##  9     15 Arica y Parinacota 2020-03-15     0            0
## 10     15 Arica y Parinacota 2020-03-16     0            0
## # … with 790 more rows

Graficos casos y casos nuevos facet todos log10

chile_long %>% 
  mutate(casos_nuevos = casos - lag(casos)) %>% 
  # mutate(value_diff = lag(casos, order_by = fecha)) %>% 
  pivot_longer(casos:casos_nuevos, 
               names_to = "item", 
               values_to = "casos") %>% 
  mutate(casos = if_else(casos < 0, 0, casos)) %>%  
  group_by(fecha, item) %>% 
  summarise(sum = sum(casos)) %>% 
  # filter(codigo != 13) %>%  # con escala absoluta queda un desastre
  ggplot(aes(x = fecha, y = sum, color = item)) +
  geom_line() +
  scale_y_log10()

## Warning: Removed 1 row(s) containing missing values (geom_path).

Graficos casos y casos nuevos facet

chile_long %>% 
  mutate(casos_nuevos = casos - lag(casos)) %>% 
  # mutate(value_diff = lag(casos, order_by = fecha)) %>% 
  pivot_longer(casos:casos_nuevos, 
               names_to = "item", 
               values_to = "casos") %>% 
  mutate(casos = if_else(casos < 0, 0, casos)) %>%  
  # filter(codigo != 13) %>%  # con escala absoluta queda un desastre
  ggplot(aes(x = fecha, y = casos, group = item, color = item)) +
  geom_line() +
  scale_y_log10() +
  facet_wrap( ~ region)

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 1 row(s) containing missing values (geom_path).

filter solo casos nuevos facet log10

chile_long %>% 
  mutate(casos_nuevos = casos - lag(casos)) %>% 
  # mutate(value_diff = lag(casos, order_by = fecha)) %>% 
  pivot_longer(casos:casos_nuevos, 
               names_to = "item", 
               values_to = "casos") %>% 
  mutate(casos = if_else(casos < 0, 0, casos)) %>%  
  filter(item == "casos_nuevos") %>% 
  # filter(codigo != 13) %>%  # con escala absoluta queda un desastre
  ggplot(aes(x = fecha, y = casos, group = item, color = item)) +
  geom_line() +
  scale_y_log10() +
  facet_wrap( ~ region)

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 1 row(s) containing missing values (geom_path).

Avanzado, por size y color

chile_long %>% 
  mutate(casos_nuevos = casos - lag(casos)) %>% 
    mutate(casos_nuevos = if_else(casos_nuevos < 0, 0, casos_nuevos)) %>%  
  # arrange(desc(casos_nuevos))
  # filter(codigo != 13) %>%  # con escala absoluta queda un desastre
  ggplot(aes(x = fecha, y = casos, size = casos_nuevos,  color = log10(casos_nuevos))) +
  scale_color_distiller(palette = "Spectral") + 
  geom_line() +
  scale_y_log10() +
  facet_wrap( ~ region) + 
  theme_minimal()

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 1 row(s) containing missing values (geom_path).

Peaks y valleys

pacman::p_load(ggpmisc)

chile_long %>%
  mutate(casos_nuevos = casos - lag(casos)) %>%
  mutate(casos_nuevos = if_else(casos_nuevos < 0, 0, casos_nuevos)) %>%
  group_by(fecha) %>%
  summarise(nuevos_casos = sum(casos_nuevos)) %>%
  ggplot(aes(x = fecha, y = nuevos_casos)) +
  geom_line()

## Warning: Removed 1 row(s) containing missing values (geom_path).

chile_long %>%
  mutate(casos_nuevos = casos - lag(casos)) %>%
  mutate(casos_nuevos = if_else(casos_nuevos < 0, 0, casos_nuevos)) %>%
  mutate(fecha = as.POSIXct(fecha, format = "%Y-%m-%d")) %>% 
  group_by(fecha) %>%
  summarise(nuevos_casos = sum(casos_nuevos)) %>%
  ggplot(aes(x = fecha, y = nuevos_casos)) +
  geom_line() +
  stat_peaks(colour = "red") +
  stat_valleys(colour = "blue") + 
  stat_peaks(geom = "text", colour = "red", size = 3, 
             vjust = -0.5, x.label.fmt = "%d/%m") +
  stat_valleys(geom = "text", colour = "blue", size = 3, 
             vjust = 1.5, x.label.fmt = "%d/%m")

## Warning: Removed 1 rows containing non-finite values (stat_peaks).

## Warning: Removed 1 rows containing non-finite values (stat_valleys).

## Warning: Removed 1 rows containing non-finite values (stat_peaks).

## Warning: Removed 1 rows containing non-finite values (stat_valleys).

## Warning: Removed 1 row(s) containing missing values (geom_path).

MAPAS (SF)

Fuente: https://arcruz0.github.io/libroadp/mapas.html

Nota: en Ubuntu

sudo apt install libudunits2-dev libgdal-dev

pacman::p_load(sf)

chile_regiones <- sf::st_read("Regiones/")

## Reading layer `Regional' from data source `/home/sergio/Dropbox/Public/Practico R/Chile_COVID-SEIR-model-Stockholm/Regiones' using driver `ESRI Shapefile'
## Simple feature collection with 17 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -12184470 ymin: -7554436 xmax: -7393642 ymax: -1978920
## projected CRS:  WGS 84 / Pseudo-Mercator

chile_regiones

## Simple feature collection with 17 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -12184470 ymin: -7554436 xmax: -7393642 ymax: -1978920
## projected CRS:  WGS 84 / Pseudo-Mercator
## First 10 features:
##    objectid cir_sena codregion   area_km   st_area_sh st_length_
## 1      1084        1        15  16866.82  18868687744   750529.6
## 2      1085        2         1  42284.57  48306372203  1213713.1
## 3      1086        3         2 126071.43 150845155633  2516112.0
## 4      1087       15        12 133053.14 358131609833 90498303.6
## 5      1088       14        11 106703.38 224274263072 41444810.6
## 6      1089        4         3  75661.25  96439063562  2401740.7
## 7      1090        5         4  40575.90  54980818749  2065933.1
## 8      1091        6         5  16322.97  23014748571  1679609.2
## 9      1092        7        13  15392.03  22252038246  1064252.6
## 10     1093       13        10  48408.37  87718341940  7874157.9
##                                       Region                       geometry
## 1               Región de Arica y Parinacota MULTIPOLYGON (((-7727277 -1...
## 2                         Región de Tarapacá MULTIPOLYGON (((-7810214 -2...
## 3                      Región de Antofagasta MULTIPOLYGON (((-7870471 -2...
## 4   Región de Magallanes y Antártica Chilena MULTIPOLYGON (((-7494058 -7...
## 5  Región de Aysén del Gral.Ibañez del Campo MULTIPOLYGON (((-8413518 -6...
## 6                          Región de Atacama MULTIPOLYGON (((-7932748 -3...
## 7                         Región de Coquimbo MULTIPOLYGON (((-7963269 -3...
## 8                       Región de Valparaíso MULTIPOLYGON (((-8991646 -3...
## 9           Región Metropolitana de Santiago MULTIPOLYGON (((-7873737 -3...
## 10                       Región de Los Lagos MULTIPOLYGON (((-8331768 -5...

class(chile_regiones)

## [1] "sf"         "data.frame"

ggplot(data = chile_regiones) +
  geom_sf()

Genero centroides

chile_regiones <-
  chile_regiones %>% mutate(
    centroid = map(geometry, st_centroid),
    coords = map(centroid,
                 st_coordinates),
    coords_x = map_dbl(coords, 1),
    coords_y = map_dbl(coords,
                       2)
  )
head(chile_regiones)

## Simple feature collection with 6 features and 11 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -8429326 ymin: -7554436 xmax: -7393642 ymax: -1978920
## projected CRS:  WGS 84 / Pseudo-Mercator
##   objectid cir_sena codregion   area_km   st_area_sh st_length_
## 1     1084        1        15  16866.82  18868687744   750529.6
## 2     1085        2         1  42284.57  48306372203  1213713.1
## 3     1086        3         2 126071.43 150845155633  2516112.0
## 4     1087       15        12 133053.14 358131609833 90498303.6
## 5     1088       14        11 106703.38 224274263072 41444810.6
## 6     1089        4         3  75661.25  96439063562  2401740.7
##                                      Region                       geometry
## 1              Región de Arica y Parinacota MULTIPOLYGON (((-7727277 -1...
## 2                        Región de Tarapacá MULTIPOLYGON (((-7810214 -2...
## 3                     Región de Antofagasta MULTIPOLYGON (((-7870471 -2...
## 4  Región de Magallanes y Antártica Chilena MULTIPOLYGON (((-7494058 -7...
## 5 Región de Aysén del Gral.Ibañez del Campo MULTIPOLYGON (((-8413518 -6...
## 6                         Región de Atacama MULTIPOLYGON (((-7932748 -3...
##             centroid             coords coords_x coords_y
## 1 -7751017, -2095926 -7751017, -2095926 -7751017 -2095926
## 2 -7724801, -2299133 -7724801, -2299133 -7724801 -2299133
## 3 -7694562, -2698980 -7694562, -2698980 -7694562 -2698980
## 4 -7996566, -6900069 -7996566, -6900069 -7996566 -6900069
## 5 -8155537, -5855922 -8155537, -5855922 -8155537 -5855922
## 6 -7782781, -3174675 -7782781, -3174675 -7782781 -3174675

pacman::p_load(ggrepel)

ggplot(data = chile_regiones) +
  geom_sf() +
  geom_text_repel(
    mapping = aes(coords_x, coords_y, label = Region), # ojo con label
    size = 1.5,
    min.segment.length = 0
  )

table(chile_regiones$Region)

## 
##                     Región de Antofagasta 
##                                         1 
##              Región de Arica y Parinacota 
##                                         1 
##                         Región de Atacama 
##                                         1 
## Región de Aysén del Gral.Ibañez del Campo 
##                                         1 
##                        Región de Coquimbo 
##                                         1 
##                    Región de La Araucanía 
##                                         1 
##                       Región de Los Lagos 
##                                         1 
##                        Región de Los Ríos 
##                                         1 
##  Región de Magallanes y Antártica Chilena 
##                                         1 
##                           Región de Ñuble 
##                                         1 
##                        Región de Tarapacá 
##                                         1 
##                      Región de Valparaíso 
##                                         1 
##                        Región del Bío-Bío 
##                                         1 
##  Región del Libertador Bernardo O'Higgins 
##                                         1 
##                          Región del Maule 
##                                         1 
##          Región Metropolitana de Santiago 
##                                         1 
##                         Zona sin demarcar 
##                                         1

tail(chile_long)

## # A tibble: 6 x 4
##   codigo region     fecha      casos
##    <dbl> <fct>      <date>     <dbl>
## 1     12 Magallanes 2020-04-20   566
## 2     12 Magallanes 2020-04-21   593
## 3     12 Magallanes 2020-04-22   605
## 4     12 Magallanes 2020-04-23   615
## 5     12 Magallanes 2020-04-24   624
## 6     12 Magallanes 2020-04-25   655

Selecciono el último día

chile_casos_ultimo <- chile_long %>%
  filter(fecha == max(fecha))

Creo un codigo en comun para ambas bases de datos

chile_regiones <- chile_regiones %>%
  mutate(codigo = codregion)

Elimino la zona sin demarcar

chile_regiones <- chile_regiones %>% 
  filter(!codigo == 0)

Uno las dos bases de datos

chile_regiones <- left_join(chile_regiones, chile_casos_ultimo, 
          by = "codigo")

Llenamos

ggplot(data = chile_regiones) +
  geom_sf(aes(fill = casos))

ggplot(data = chile_regiones) +
  geom_sf(aes(fill = log10(casos)))

ggplot(data = chile_regiones) +
  geom_sf(aes(fill = log10(casos))) +
  scale_fill_distiller(palette = "Spectral")

Mapa por 100.000 habitantes

Base de datos con habitantes por region

chile_habitantes <- read_csv("chile_habitantes.csv")

## Parsed with column specification:
## cols(
##   codigo = col_double(),
##   poblacion = col_double()
## )

head(chile_habitantes, 20)

## # A tibble: 16 x 2
##    codigo poblacion
##     <dbl>     <dbl>
##  1     15     25110
##  2      1    382773
##  3      2    691854
##  4      3    691854
##  5      4    836096
##  6      5   1960170
##  7     13   8125072
##  8      6    991063
##  9      7   1131939
## 10     16    511551
## 11      8   1663696
## 12      9   1014343
## 13     14    405835
## 14     10    891440
## 15     11    107297
## 16     12    178362

chile_regiones <- left_join(chile_regiones,
                            chile_habitantes,
                            by = "codigo")

Creo una tasa por 100000 habitantes

chile_regiones <- chile_regiones %>% 
  mutate(tasa_por_100000 = casos / poblacion * 100000)

ggplot(data = chile_regiones) +
  geom_sf(aes(fill = tasa_por_100000)) +
  scale_fill_distiller(palette = "Spectral") + 
  labs(title = "Tasa de casos por 100.000 habitantes", 
       fill = "Tasa por 100.000 hab")

ggplot(data = chile_regiones) +
  geom_sf(aes(fill = log10(tasa_por_100000))) +
  scale_fill_distiller(palette = "Spectral") + 
  labs(title = "Tasa de casos por log10(100.000) habitantes", 
       fill = "Tasa por log10(100.000) hab")

Graficando los datos de COVID19 en Chile

Sergio Uribe

2020-04-26