library(readr)
## Warning: package 'readr' was built under R version 4.0.4
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v dplyr 1.0.5
## v tibble 3.1.1 v stringr 1.4.0
## v tidyr 1.1.3 v forcats 0.5.1
## v purrr 0.3.4
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.4
## Warning: package 'purrr' was built under R version 4.0.4
## Warning: package 'dplyr' was built under R version 4.0.4
## Warning: package 'stringr' was built under R version 4.0.4
## Warning: package 'forcats' was built under R version 4.0.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
read_csv2("data/positivos_covid.csv")
## i Using '\',\'' as decimal and '\'.\'' as grouping mark. Use `read_delim()` for more control.
##
## -- Column specification --------------------------------------------------------
## cols(
## FECHA_CORTE = col_double(),
## UUID = col_character(),
## DEPARTAMENTO = col_character(),
## PROVINCIA = col_character(),
## DISTRITO = col_character(),
## METODODX = col_character(),
## EDAD = col_double(),
## SEXO = col_character(),
## FECHA_RESULTADO = col_double()
## )
## # A tibble: 2,019,716 x 9
## FECHA_CORTE UUID DEPARTAMENTO PROVINCIA DISTRITO METODODX EDAD SEXO
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 20210616 7320cabdc~ LIMA LIMA LIMA PR 35 FEME~
## 2 20210616 e81602051~ LIMA LIMA PACHACAMAC PR 36 FEME~
## 3 20210616 cecdbf100~ LIMA LIMA LIMA PR 36 FEME~
## 4 20210616 71ecb6bcc~ LIMA LIMA LIMA PR 37 FEME~
## 5 20210616 566af4276~ LIMA LIMA LIMA PR 37 FEME~
## 6 20210616 027561e9d~ LIMA LIMA PACHACAMAC PR 38 FEME~
## 7 20210616 f016889b9~ LIMA LIMA PACHACAMAC PR 38 FEME~
## 8 20210616 971f8e129~ LIMA LIMA CARABAYLLO PR 35 FEME~
## 9 20210616 bc45b71b0~ LIMA LIMA LIMA PR 35 FEME~
## 10 20210616 0e2a1928d~ LIMA LIMA SAN JUAN ~ PR 35 FEME~
## # ... with 2,019,706 more rows, and 1 more variable: FECHA_RESULTADO <dbl>
positivos_covid <- read_csv2("data/positivos_covid.csv")
## i Using '\',\'' as decimal and '\'.\'' as grouping mark. Use `read_delim()` for more control.
##
## -- Column specification --------------------------------------------------------
## cols(
## FECHA_CORTE = col_double(),
## UUID = col_character(),
## DEPARTAMENTO = col_character(),
## PROVINCIA = col_character(),
## DISTRITO = col_character(),
## METODODX = col_character(),
## EDAD = col_double(),
## SEXO = col_character(),
## FECHA_RESULTADO = col_double()
## )
positivos_covid
## # A tibble: 2,019,716 x 9
## FECHA_CORTE UUID DEPARTAMENTO PROVINCIA DISTRITO METODODX EDAD SEXO
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 20210616 7320cabdc~ LIMA LIMA LIMA PR 35 FEME~
## 2 20210616 e81602051~ LIMA LIMA PACHACAMAC PR 36 FEME~
## 3 20210616 cecdbf100~ LIMA LIMA LIMA PR 36 FEME~
## 4 20210616 71ecb6bcc~ LIMA LIMA LIMA PR 37 FEME~
## 5 20210616 566af4276~ LIMA LIMA LIMA PR 37 FEME~
## 6 20210616 027561e9d~ LIMA LIMA PACHACAMAC PR 38 FEME~
## 7 20210616 f016889b9~ LIMA LIMA PACHACAMAC PR 38 FEME~
## 8 20210616 971f8e129~ LIMA LIMA CARABAYLLO PR 35 FEME~
## 9 20210616 bc45b71b0~ LIMA LIMA LIMA PR 35 FEME~
## 10 20210616 0e2a1928d~ LIMA LIMA SAN JUAN ~ PR 35 FEME~
## # ... with 2,019,706 more rows, and 1 more variable: FECHA_RESULTADO <dbl>
positivos_covid %>%
select(FECHA_RESULTADO, DEPARTAMENTO, PROVINCIA, DISTRITO, SEXO, EDAD) %>%
filter (DEPARTAMENTO == "PIURA") %>%
group_by(PROVINCIA) %>%
summarise(total_casos = n()) %>%
arrange(desc(total_casos)) %>%
ungroup() %>%
ggplot(aes(total_casos, PROVINCIA, fill = PROVINCIA)) +
geom_col()
positivos_covid %>%
select(FECHA_RESULTADO, PROVINCIA, DISTRITO, SEXO, EDAD) %>%
filter (PROVINCIA == "TALARA") %>%
group_by(DISTRITO) %>%
summarise(total_casos = n()) %>%
arrange(desc(total_casos)) %>%
ungroup() %>%
ggplot(aes(DISTRITO, total_casos, fill = DISTRITO)) +
geom_col()
library(readxl)
## Warning: package 'readxl' was built under R version 4.0.4
read_xlsx("data/IDH 2019.xlsx")
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
## # A tibble: 2,303 x 17
## `índice de Desa~ ...2 ...3 ...4 ...5 ...6 `2019` ...8 ...9 ...10 ...11
## <chr> <chr> <chr> <lgl> <lgl> <chr> <chr> <chr> <chr> <chr> <lgl>
## 1 <NA> <NA> <NA> NA NA <NA> <NA> <NA> <NA> <NA> NA
## 2 UBIGEO DEPA~ <NA> NA NA Pobl~ Esper~ Pobl~ Años~ Ingr~ NA
## 3 <NA> <NA> Prov~ NA NA <NA> <NA> <NA> <NA> <NA> NA
## 4 <NA> <NA> Dist~ NA NA <NA> <NA> <NA> <NA> <NA> NA
## 5 <NA> <NA> <NA> NA NA <NA> <NA> <NA> <NA> <NA> NA
## 6 <NA> <NA> <NA> NA NA <NA> <NA> <NA> <NA> <NA> NA
## 7 000000 PERÚ <NA> NA NA 3129~ 75.42~ 67.6~ 9.13~ 1032~ NA
## 8 <NA> <NA> <NA> NA NA <NA> <NA> <NA> <NA> <NA> NA
## 9 010000 AMAZ~ <NA> NA NA 3974~ 68.94~ 45.4~ 6.46~ 669.~ NA
## 10 010100 <NA> Chac~ NA NA 5580~ 72.31~ 67.0~ 8.18~ 944.~ NA
## # ... with 2,293 more rows, and 6 more variables: ...12 <chr>, ...13 <chr>,
## # ...14 <chr>, ...15 <chr>, ...16 <chr>, ...17 <chr>
IDH_2019 <- read_xlsx("data/IDH 2019.xlsx")
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
read_xlsx("data/idh_distritos_2019.xlsx")
## New names:
## * `` -> ...2
## # A tibble: 1,874 x 15
## UBIGEO ...2 Distrito habitantes ranking_hab IDH ranking_IDH años
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 010101 1 Chachapoyas 33038. 171 0.642 125 72.2
## 2 010102 2 Asuncion 267. 1861 0.423 765 71.4
## 3 010103 3 Balsas 1467. 1443 0.315 1355 68.6
## 4 010104 4 Cheto 585. 1749 0.346 1159 77.5
## 5 010105 5 Chiliquin 391. 1829 0.275 1563 72.5
## 6 010106 6 Chuquibamba 1789. 1365 0.269 1593 67.0
## 7 010107 7 Granada 337. 1844 0.358 1091 66.2
## 8 010108 8 Huancas 1457. 1448 0.415 806 73.1
## 9 010109 9 La Jalca 4279. 899 0.295 1459 73.1
## 10 010110 10 Leimebamba 3855. 963 0.399 881 70.3
## # ... with 1,864 more rows, and 7 more variables: ranking_años <dbl>,
## # edu_sec_porc <dbl>, edu_ranking <dbl>, años_edu <dbl>,
## # ranking_años_edu <dbl>, ing_fam_pc <dbl>, ranking_ing <dbl>
idh_distritos_2019 <- read_xlsx("data/idh_distritos_2019.xlsx")
## New names:
## * `` -> ...2
ejemplo_ubigeo %>% separate(ubigeo, into = c(“cod_dep”, “cod_prov”, “cod_dis”), sep = c(2, 4)) %>% filter(cod_dep == “01”, cod_prov == “01”)
idh_talara <- idh_distritos_2019 %>%
filter( Distrito %in% c("El Alto", "La Brea", "Lobitos", "Los Organos", "Mancora", "Pariñas")) %>% select(Distrito, IDH, habitantes) %>%
mutate(Distrito = str_to_upper(Distrito)) %>%
mutate(DISTRITO = Distrito) %>%
select("IDH", "DISTRITO", "habitantes")
covid_talara <- positivos_covid %>%
select(FECHA_RESULTADO, PROVINCIA, DISTRITO, SEXO, EDAD) %>%
filter (PROVINCIA == "TALARA") %>%
group_by(DISTRITO) %>%
summarise(total_casos = n())
talara <- covid_talara %>%
left_join(idh_talara) %>%
mutate(porccasosporhab = total_casos/habitantes*100)
## Joining, by = "DISTRITO"
talara
## # A tibble: 6 x 5
## DISTRITO total_casos IDH habitantes porccasosporhab
## <chr> <int> <dbl> <dbl> <dbl>
## 1 EL ALTO 289 0.575 7348. 3.93
## 2 LA BREA 515 0.588 10993. 4.68
## 3 LOBITOS 65 0.592 1553. 4.19
## 4 LOS ORGANOS 518 0.563 9570. 5.41
## 5 MANCORA 690 0.561 14045. 4.91
## 6 PARIÑAS 3223 0.590 89997. 3.58
talara %>%
ggplot(aes(IDH, porccasosporhab)) +
geom_point()
talara %>%
ggplot(aes(IDH, porccasosporhab, size= habitantes)) +
geom_text(aes(label = DISTRITO)) +
geom_point()