library(readr)
## Warning: package 'readr' was built under R version 4.0.4
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v dplyr   1.0.5
## v tibble  3.1.1     v stringr 1.4.0
## v tidyr   1.1.3     v forcats 0.5.1
## v purrr   0.3.4
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.4
## Warning: package 'purrr' was built under R version 4.0.4
## Warning: package 'dplyr' was built under R version 4.0.4
## Warning: package 'stringr' was built under R version 4.0.4
## Warning: package 'forcats' was built under R version 4.0.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
read_csv2("data/positivos_covid.csv")
## i Using '\',\'' as decimal and '\'.\'' as grouping mark. Use `read_delim()` for more control.
## 
## -- Column specification --------------------------------------------------------
## cols(
##   FECHA_CORTE = col_double(),
##   UUID = col_character(),
##   DEPARTAMENTO = col_character(),
##   PROVINCIA = col_character(),
##   DISTRITO = col_character(),
##   METODODX = col_character(),
##   EDAD = col_double(),
##   SEXO = col_character(),
##   FECHA_RESULTADO = col_double()
## )
## # A tibble: 2,019,716 x 9
##    FECHA_CORTE UUID       DEPARTAMENTO PROVINCIA DISTRITO   METODODX  EDAD SEXO 
##          <dbl> <chr>      <chr>        <chr>     <chr>      <chr>    <dbl> <chr>
##  1    20210616 7320cabdc~ LIMA         LIMA      LIMA       PR          35 FEME~
##  2    20210616 e81602051~ LIMA         LIMA      PACHACAMAC PR          36 FEME~
##  3    20210616 cecdbf100~ LIMA         LIMA      LIMA       PR          36 FEME~
##  4    20210616 71ecb6bcc~ LIMA         LIMA      LIMA       PR          37 FEME~
##  5    20210616 566af4276~ LIMA         LIMA      LIMA       PR          37 FEME~
##  6    20210616 027561e9d~ LIMA         LIMA      PACHACAMAC PR          38 FEME~
##  7    20210616 f016889b9~ LIMA         LIMA      PACHACAMAC PR          38 FEME~
##  8    20210616 971f8e129~ LIMA         LIMA      CARABAYLLO PR          35 FEME~
##  9    20210616 bc45b71b0~ LIMA         LIMA      LIMA       PR          35 FEME~
## 10    20210616 0e2a1928d~ LIMA         LIMA      SAN JUAN ~ PR          35 FEME~
## # ... with 2,019,706 more rows, and 1 more variable: FECHA_RESULTADO <dbl>
positivos_covid <- read_csv2("data/positivos_covid.csv")
## i Using '\',\'' as decimal and '\'.\'' as grouping mark. Use `read_delim()` for more control.
## 
## -- Column specification --------------------------------------------------------
## cols(
##   FECHA_CORTE = col_double(),
##   UUID = col_character(),
##   DEPARTAMENTO = col_character(),
##   PROVINCIA = col_character(),
##   DISTRITO = col_character(),
##   METODODX = col_character(),
##   EDAD = col_double(),
##   SEXO = col_character(),
##   FECHA_RESULTADO = col_double()
## )
positivos_covid
## # A tibble: 2,019,716 x 9
##    FECHA_CORTE UUID       DEPARTAMENTO PROVINCIA DISTRITO   METODODX  EDAD SEXO 
##          <dbl> <chr>      <chr>        <chr>     <chr>      <chr>    <dbl> <chr>
##  1    20210616 7320cabdc~ LIMA         LIMA      LIMA       PR          35 FEME~
##  2    20210616 e81602051~ LIMA         LIMA      PACHACAMAC PR          36 FEME~
##  3    20210616 cecdbf100~ LIMA         LIMA      LIMA       PR          36 FEME~
##  4    20210616 71ecb6bcc~ LIMA         LIMA      LIMA       PR          37 FEME~
##  5    20210616 566af4276~ LIMA         LIMA      LIMA       PR          37 FEME~
##  6    20210616 027561e9d~ LIMA         LIMA      PACHACAMAC PR          38 FEME~
##  7    20210616 f016889b9~ LIMA         LIMA      PACHACAMAC PR          38 FEME~
##  8    20210616 971f8e129~ LIMA         LIMA      CARABAYLLO PR          35 FEME~
##  9    20210616 bc45b71b0~ LIMA         LIMA      LIMA       PR          35 FEME~
## 10    20210616 0e2a1928d~ LIMA         LIMA      SAN JUAN ~ PR          35 FEME~
## # ... with 2,019,706 more rows, and 1 more variable: FECHA_RESULTADO <dbl>
positivos_covid %>% 
  select(FECHA_RESULTADO, DEPARTAMENTO, PROVINCIA, DISTRITO, SEXO, EDAD) %>% 
  filter (DEPARTAMENTO == "PIURA") %>% 
  group_by(PROVINCIA) %>% 
  summarise(total_casos = n()) %>% 
  arrange(desc(total_casos)) %>% 
  ungroup() %>% 
  ggplot(aes(total_casos, PROVINCIA, fill = PROVINCIA)) + 
  geom_col()

positivos_covid %>% 
  select(FECHA_RESULTADO, PROVINCIA, DISTRITO, SEXO, EDAD) %>% 
  filter (PROVINCIA == "TALARA") %>% 
  group_by(DISTRITO) %>% 
  summarise(total_casos = n()) %>% 
  arrange(desc(total_casos)) %>% 
  ungroup() %>% 
  ggplot(aes(DISTRITO, total_casos, fill = DISTRITO)) + 
  geom_col()

library(readxl)
## Warning: package 'readxl' was built under R version 4.0.4
read_xlsx("data/IDH 2019.xlsx")
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
## # A tibble: 2,303 x 17
##    `índice de Desa~ ...2  ...3  ...4  ...5  ...6  `2019` ...8  ...9  ...10 ...11
##    <chr>            <chr> <chr> <lgl> <lgl> <chr> <chr>  <chr> <chr> <chr> <lgl>
##  1 <NA>             <NA>  <NA>  NA    NA    <NA>  <NA>   <NA>  <NA>  <NA>  NA   
##  2 UBIGEO           DEPA~ <NA>  NA    NA    Pobl~ Esper~ Pobl~ Años~ Ingr~ NA   
##  3 <NA>             <NA>  Prov~ NA    NA    <NA>  <NA>   <NA>  <NA>  <NA>  NA   
##  4 <NA>             <NA>  Dist~ NA    NA    <NA>  <NA>   <NA>  <NA>  <NA>  NA   
##  5 <NA>             <NA>  <NA>  NA    NA    <NA>  <NA>   <NA>  <NA>  <NA>  NA   
##  6 <NA>             <NA>  <NA>  NA    NA    <NA>  <NA>   <NA>  <NA>  <NA>  NA   
##  7 000000           PERÚ  <NA>  NA    NA    3129~ 75.42~ 67.6~ 9.13~ 1032~ NA   
##  8 <NA>             <NA>  <NA>  NA    NA    <NA>  <NA>   <NA>  <NA>  <NA>  NA   
##  9 010000           AMAZ~ <NA>  NA    NA    3974~ 68.94~ 45.4~ 6.46~ 669.~ NA   
## 10 010100           <NA>  Chac~ NA    NA    5580~ 72.31~ 67.0~ 8.18~ 944.~ NA   
## # ... with 2,293 more rows, and 6 more variables: ...12 <chr>, ...13 <chr>,
## #   ...14 <chr>, ...15 <chr>, ...16 <chr>, ...17 <chr>
IDH_2019 <- read_xlsx("data/IDH 2019.xlsx")
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
read_xlsx("data/idh_distritos_2019.xlsx")
## New names:
## * `` -> ...2
## # A tibble: 1,874 x 15
##    UBIGEO  ...2 Distrito    habitantes ranking_hab   IDH ranking_IDH  años
##    <chr>  <dbl> <chr>            <dbl>       <dbl> <dbl>       <dbl> <dbl>
##  1 010101     1 Chachapoyas     33038.         171 0.642         125  72.2
##  2 010102     2 Asuncion          267.        1861 0.423         765  71.4
##  3 010103     3 Balsas           1467.        1443 0.315        1355  68.6
##  4 010104     4 Cheto             585.        1749 0.346        1159  77.5
##  5 010105     5 Chiliquin         391.        1829 0.275        1563  72.5
##  6 010106     6 Chuquibamba      1789.        1365 0.269        1593  67.0
##  7 010107     7 Granada           337.        1844 0.358        1091  66.2
##  8 010108     8 Huancas          1457.        1448 0.415         806  73.1
##  9 010109     9 La Jalca         4279.         899 0.295        1459  73.1
## 10 010110    10 Leimebamba       3855.         963 0.399         881  70.3
## # ... with 1,864 more rows, and 7 more variables: ranking_años <dbl>,
## #   edu_sec_porc <dbl>, edu_ranking <dbl>, años_edu <dbl>,
## #   ranking_años_edu <dbl>, ing_fam_pc <dbl>, ranking_ing <dbl>
idh_distritos_2019 <- read_xlsx("data/idh_distritos_2019.xlsx")
## New names:
## * `` -> ...2

ejemplo

ejemplo_ubigeo %>% separate(ubigeo, into = c(“cod_dep”, “cod_prov”, “cod_dis”), sep = c(2, 4)) %>% filter(cod_dep == “01”, cod_prov == “01”)

idh_talara <- idh_distritos_2019 %>% 
  filter( Distrito %in% c("El Alto", "La Brea", "Lobitos", "Los Organos", "Mancora", "Pariñas")) %>%   select(Distrito, IDH, habitantes) %>% 
  mutate(Distrito = str_to_upper(Distrito)) %>% 
  mutate(DISTRITO = Distrito) %>% 
  select("IDH", "DISTRITO", "habitantes")
covid_talara <- positivos_covid %>% 
  select(FECHA_RESULTADO, PROVINCIA, DISTRITO, SEXO, EDAD) %>% 
  filter (PROVINCIA == "TALARA") %>% 
  group_by(DISTRITO) %>% 
  summarise(total_casos = n()) 
talara <- covid_talara %>% 
left_join(idh_talara) %>% 
  mutate(porccasosporhab = total_casos/habitantes*100)
## Joining, by = "DISTRITO"
talara
## # A tibble: 6 x 5
##   DISTRITO    total_casos   IDH habitantes porccasosporhab
##   <chr>             <int> <dbl>      <dbl>           <dbl>
## 1 EL ALTO             289 0.575      7348.            3.93
## 2 LA BREA             515 0.588     10993.            4.68
## 3 LOBITOS              65 0.592      1553.            4.19
## 4 LOS ORGANOS         518 0.563      9570.            5.41
## 5 MANCORA             690 0.561     14045.            4.91
## 6 PARIÑAS            3223 0.590     89997.            3.58
talara %>% 
  ggplot(aes(IDH, porccasosporhab)) +
  geom_point()

talara %>% 
  ggplot(aes(IDH, porccasosporhab, size= habitantes)) +
   geom_text(aes(label = DISTRITO)) +
   geom_point()