library(rio)
lima22 = import("Lima2022.xlsx")
## New names:
## • `` -> `...2`
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
lima22 <- mutate(lima22,
ubigeo = as.numeric(gsub("[^0-9]", "", Distrito)),
distri = gsub("[^A-Za-z]", "", Distrito))
lima22 = select(lima22, Tasa, ubigeo, distri)
#Scrapeo /html/body/div[2]/div/div[3]/main/div[4]/div[3]/div[1]/table[9]
library(rvest)
library(rvest)
link = "https://es.wikipedia.org/wiki/Elecciones_municipales_de_Lima_de_2022"
path = '/html/body/div[2]/div/div[3]/main/div[3]/div[3]/div[1]/table[9]/tbody'
dataWS <- read_html(link)%>%html_nodes(xpath = path)%>%html_table()%>% .[[1]]
head(dataWS)
## # A tibble: 6 × 17
## Distrito RP RP PP PP SP SP FE FE APP APP JP
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Distrito "" "" "" "" "" "" "" "" "" "" ""
## 2 Distrito "V" "%" "V" "%" "V" "%" "V" "%" "V" "%" "V"
## 3 Ancón "3,725" "13.2… "9,3… "33.… "5,9… "21.… "2,0… "7.2… "3,5… "12.… "1,6…
## 4 Ate "57,374" "17.4… "98,… "29.… "52,… "15.… "27,… "8.5… "26,… "7.9… "25,…
## 5 Barranco "11,604" "36.9… "5,7… "18.… "6,4… "20.… "2,7… "8.8… "2,1… "6.9… "1,6…
## 6 Breña "22,721" "31.1… "18,… "25.… "14,… "19.… "8,2… "11.… "2,4… "3.3… "3,8…
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>
dataWS <- dataWS[-c(1:2), ]
dataWS <- dataWS[-c(44), ]
residuos = import("residuosPeru.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
# Asignar el encabezado como nombres de columna
colnames(residuos) <- residuos[1,]
# Eliminar la primera fila
residuos <- residuos[-1,]
rownames(residuos) <- NULL
residuos<- subset(residuos, PROVINCIA == "LIMA")
colnames(dataWS)[colnames(dataWS) == "Distrito"] <- "DISTRITO"
colnames(lima22)[colnames(lima22) == "distri"] <- "DISTRITO"
residuos <- residuos %>%
group_by(DISTRITO) %>%
slice_sample(n = 1) %>%
ungroup()