library(rvest)
link = "https://es.wikipedia.org/wiki/Elecciones_municipales_de_Lima_de_2022#Resultados_por_distrito"
path = '//*[@id="mw-content-text"]/div[1]/table[9]'
dataWS <- read_html(link)%>%html_nodes(xpath = path)%>%html_table()%>% .[[1]]
head(dataWS)
## # A tibble: 6 × 17
## Distrito RP RP PP PP SP SP FE FE APP APP JP
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Distrito "" "" "" "" "" "" "" "" "" "" ""
## 2 Distrito "V" "%" "V" "%" "V" "%" "V" "%" "V" "%" "V"
## 3 Ancón "3,725" "13.2… "9,3… "33.… "5,9… "21.… "2,0… "7.2… "3,5… "12.… "1,6…
## 4 Ate "57,374" "17.4… "98,… "29.… "52,… "15.… "27,… "8.5… "26,… "7.9… "25,…
## 5 Barranco "11,604" "36.9… "5,7… "18.… "6,4… "20.… "2,7… "8.8… "2,1… "6.9… "1,6…
## 6 Breña "22,721" "31.1… "18,… "25.… "14,… "19.… "8,2… "11.… "2,4… "3.3… "3,8…
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>
library(readxl)
residuosPeru <- read_excel("residuosPeru.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
View(residuosPeru)
library(openxlsx)
Lima2022 <- read.xlsx("Lima2022.xlsx", detectDates = TRUE)
str(Lima2022)
## 'data.frame': 55 obs. of 3 variables:
## $ TASA.DE.DENUNCIAS.POR.COMISION.DE.DELITOS,SEGUN.DISTRITO: chr "(Tasa por cada 10 000 habitantes)" NA "Distrito" "150101 LIMA" ...
## $ X2 : chr NA "Per_xDBE4_o" NA NA ...
## $ X3 : chr NA "2022" NA "642,7" ...
View(Lima2022)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Limpieza dataWS
dataWS <- dataWS[-c(1:2), ]
str(dataWS)
## tibble [44 × 17] (S3: tbl_df/tbl/data.frame)
## $ Distrito: chr [1:44] "Ancón" "Ate" "Barranco" "Breña" ...
## $ RP : chr [1:44] "3,725" "57,374" "11,604" "22,721" ...
## $ RP : chr [1:44] "13.28" "17.49" "36.92" "31.15" ...
## $ PP : chr [1:44] "9,332" "98,373" "5,766" "18,676" ...
## $ PP : chr [1:44] "33.27" "29.99" "18.34" "25.61" ...
## $ SP : chr [1:44] "5,987" "52,069" "6,401" "14,022" ...
## $ SP : chr [1:44] "21.35" "15.87" "20.37" "19.23" ...
## $ FE : chr [1:44] "2,043" "27,911" "2,772" "8,240" ...
## $ FE : chr [1:44] "7.28" "8.51" "8.82" "11.30" ...
## $ APP : chr [1:44] "3,587" "26,140" "2,193" "2,473" ...
## $ APP : chr [1:44] "12.79" "7.97" "6.98" "3.39" ...
## $ JP : chr [1:44] "1,679" "25,113" "1,628" "3,813" ...
## $ JP : chr [1:44] "5.99" "7.66" "5.18" "5.23" ...
## $ AvP : chr [1:44] "1,228" "32,851" "845" "2,435" ...
## $ AvP : chr [1:44] "4.38" "10.01" "2.69" "3.34" ...
## $ PL : chr [1:44] "465" "8,191" "223" "552" ...
## $ PL : chr [1:44] "1.67" "2.50" "0.71" "0.76" ...
head(dataWS)
## # A tibble: 6 × 17
## Distrito RP RP PP PP SP SP FE FE APP APP JP
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Ancón 3,725 13.28 9,332 33.27 5,987 21.35 2,043 7.28 3,587 12.79 1,679
## 2 Ate 57,374 17.49 98,373 29.99 52,0… 15.87 27,9… 8.51 26,1… 7.97 25,1…
## 3 Barranco 11,604 36.92 5,766 18.34 6,401 20.37 2,772 8.82 2,193 6.98 1,628
## 4 Breña 22,721 31.15 18,676 25.61 14,0… 19.23 8,240 11.30 2,473 3.39 3,813
## 5 Carabayllo 30,418 19.24 43,980 27.82 33,3… 21.12 15,7… 9.94 16,4… 10.40 11,6…
## 6 Chaclacayo 8,111 28.42 6,491 22.74 5,360 18.78 3,014 10.56 1,301 4.56 1,733
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>
Ahora, solo RP, PP, SP y Distrito
dataWS <- dataWS[, c("Distrito", "RP", "PP", "SP")]
str(dataWS)
## tibble [44 × 4] (S3: tbl_df/tbl/data.frame)
## $ Distrito: chr [1:44] "Ancón" "Ate" "Barranco" "Breña" ...
## $ RP : chr [1:44] "3,725" "57,374" "11,604" "22,721" ...
## $ PP : chr [1:44] "9,332" "98,373" "5,766" "18,676" ...
## $ SP : chr [1:44] "5,987" "52,069" "6,401" "14,022" ...
head((dataWS))
## # A tibble: 6 × 4
## Distrito RP PP SP
## <chr> <chr> <chr> <chr>
## 1 Ancón 3,725 9,332 5,987
## 2 Ate 57,374 98,373 52,069
## 3 Barranco 11,604 5,766 6,401
## 4 Breña 22,721 18,676 14,022
## 5 Carabayllo 30,418 43,980 33,394
## 6 Chaclacayo 8,111 6,491 5,360
#Limpieza Lima2022
Lima2022 <- Lima2022[-c(1:3), ]
Lima2022 <- Lima2022[, -which(names(Lima2022) == "X2")]
Lima2022 <- head(Lima2022, -9)
names(Lima2022) <- c("Distrito", "PorcentajeDelito")
str(Lima2022)
## 'data.frame': 43 obs. of 2 variables:
## $ Distrito : chr "150101 LIMA" "150102 ANCON" "150103 ATE" "150104 BARRANCO" ...
## $ PorcentajeDelito: chr "642,7" "113,6" "100,8" "553,7" ...
head(Lima2022)
## Distrito PorcentajeDelito
## 4 150101 LIMA 642,7
## 5 150102 ANCON 113,6
## 6 150103 ATE 100,8
## 7 150104 BARRANCO 553,7
## 8 150105 BREс 218,6
## 9 150106 CARABAYLLO 111,4
#Limpieza residuosPeru
names(residuosPeru) <- c("FECHA_CORTE", "N_SEC", "UBIGEO", "REG_NAT", "DEPARTAMENTO", "PROVINCIA", "Distrito", "POB_TOTAL", "POB_URBANA", "POB_RURAL", "GPC_DOM", "QRESIDUOS_DOM", "QRESIDUOS_NO_DOM", "QRESIDUOSMUN", "PERIODO")
residuosPeru <- residuosPeru[-1, ]
str(residuosPeru)
## tibble [14,978 × 15] (S3: tbl_df/tbl/data.frame)
## $ FECHA_CORTE : chr [1:14978] "20230614" "20230614" "20230614" "20230614" ...
## $ N_SEC : chr [1:14978] "1" "2" "3" "4" ...
## $ UBIGEO : chr [1:14978] "10101" "10102" "10103" "10104" ...
## $ REG_NAT : chr [1:14978] "SELVA" "SELVA" "SIERRA" "SIERRA" ...
## $ DEPARTAMENTO : chr [1:14978] "AMAZONAS" "AMAZONAS" "AMAZONAS" "AMAZONAS" ...
## $ PROVINCIA : chr [1:14978] "CHACHAPOYAS" "CHACHAPOYAS" "CHACHAPOYAS" "CHACHAPOYAS" ...
## $ Distrito : chr [1:14978] "CHACHAPOYAS" "ASUNCION" "BALSAS" "CHETO" ...
## $ POB_TOTAL : chr [1:14978] "28423" "291" "1615" "597" ...
## $ POB_URBANA : chr [1:14978] "27548" "151" "299" "388" ...
## $ POB_RURAL : chr [1:14978] "875" "140" "1316" "209" ...
## $ GPC_DOM : chr [1:14978] "0.48" "0.61" "0.45" "0.45" ...
## $ QRESIDUOS_DOM : chr [1:14978] "4857.5" "33.56" "48.96" "63.59" ...
## $ QRESIDUOS_NO_DOM: chr [1:14978] "2081.78" "14.38" "20.98" "27.25" ...
## $ QRESIDUOSMUN : chr [1:14978] "6939.28" "47.95" "69.95" "90.84" ...
## $ PERIODO : chr [1:14978] "2014" "2014" "2014" "2014" ...
head(residuosPeru)
## # A tibble: 6 × 15
## FECHA_CORTE N_SEC UBIGEO REG_NAT DEPARTAMENTO PROVINCIA Distrito POB_TOTAL
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 20230614 1 10101 SELVA AMAZONAS CHACHAPOYAS CHACHAPOY… 28423
## 2 20230614 2 10102 SELVA AMAZONAS CHACHAPOYAS ASUNCION 291
## 3 20230614 3 10103 SIERRA AMAZONAS CHACHAPOYAS BALSAS 1615
## 4 20230614 4 10104 SIERRA AMAZONAS CHACHAPOYAS CHETO 597
## 5 20230614 5 10105 SIERRA AMAZONAS CHACHAPOYAS CHILIQUIN 737
## 6 20230614 6 10106 SIERRA AMAZONAS CHACHAPOYAS CHUQUIBAM… 2096
## # ℹ 7 more variables: POB_URBANA <chr>, POB_RURAL <chr>, GPC_DOM <chr>,
## # QRESIDUOS_DOM <chr>, QRESIDUOS_NO_DOM <chr>, QRESIDUOSMUN <chr>,
## # PERIODO <chr>
#Uniendo las 3 datas
merged_data <- left_join(Lima2022, residuosPeru, by = "Distrito")
merged_data <- left_join(merged_data, dataWS, by = "Distrito")
str(merged_data)
## 'data.frame': 43 obs. of 19 variables:
## $ Distrito : chr "150101 LIMA" "150102 ANCON" "150103 ATE" "150104 BARRANCO" ...
## $ PorcentajeDelito: chr "642,7" "113,6" "100,8" "553,7" ...
## $ FECHA_CORTE : chr NA NA NA NA ...
## $ N_SEC : chr NA NA NA NA ...
## $ UBIGEO : chr NA NA NA NA ...
## $ REG_NAT : chr NA NA NA NA ...
## $ DEPARTAMENTO : chr NA NA NA NA ...
## $ PROVINCIA : chr NA NA NA NA ...
## $ POB_TOTAL : chr NA NA NA NA ...
## $ POB_URBANA : chr NA NA NA NA ...
## $ POB_RURAL : chr NA NA NA NA ...
## $ GPC_DOM : chr NA NA NA NA ...
## $ QRESIDUOS_DOM : chr NA NA NA NA ...
## $ QRESIDUOS_NO_DOM: chr NA NA NA NA ...
## $ QRESIDUOSMUN : chr NA NA NA NA ...
## $ PERIODO : chr NA NA NA NA ...
## $ RP : chr NA NA NA NA ...
## $ PP : chr NA NA NA NA ...
## $ SP : chr NA NA NA NA ...
head(merged_data)
## Distrito PorcentajeDelito FECHA_CORTE N_SEC UBIGEO REG_NAT
## 1 150101 LIMA 642,7 <NA> <NA> <NA> <NA>
## 2 150102 ANCON 113,6 <NA> <NA> <NA> <NA>
## 3 150103 ATE 100,8 <NA> <NA> <NA> <NA>
## 4 150104 BARRANCO 553,7 <NA> <NA> <NA> <NA>
## 5 150105 BREс 218,6 <NA> <NA> <NA> <NA>
## 6 150106 CARABAYLLO 111,4 <NA> <NA> <NA> <NA>
## DEPARTAMENTO PROVINCIA POB_TOTAL POB_URBANA POB_RURAL GPC_DOM QRESIDUOS_DOM
## 1 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 3 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## QRESIDUOS_NO_DOM QRESIDUOSMUN PERIODO RP PP SP
## 1 <NA> <NA> <NA> <NA> <NA> <NA>
## 2 <NA> <NA> <NA> <NA> <NA> <NA>
## 3 <NA> <NA> <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA> <NA> <NA>
library(stats)