EP2---RL.knit

library(rio)
lima22 = import("Lima2022.xlsx")

## New names:
## • `` -> `...2`

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

lima22 <- mutate(lima22, 
                ubigeo = as.numeric(gsub("[^0-9]", "", Distrito)),
                distri = gsub("[^A-Za-z]", "", Distrito))

lima22 = select(lima22, Tasa, ubigeo, distri)

#Scrapeo /html/body/div[2]/div/div[3]/main/div[4]/div[3]/div[1]/table[9]

library(rvest)
library(rvest)

link = "https://es.wikipedia.org/wiki/Elecciones_municipales_de_Lima_de_2022"
path = '/html/body/div[2]/div/div[3]/main/div[3]/div[3]/div[1]/table[9]/tbody'
dataWS <- read_html(link)%>%html_nodes(xpath = path)%>%html_table()%>% .[[1]]
head(dataWS)

## # A tibble: 6 × 17
##   Distrito RP       RP     PP    PP    SP    SP    FE    FE    APP   APP   JP   
##   <chr>    <chr>    <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Distrito ""       ""     ""    ""    ""    ""    ""    ""    ""    ""    ""   
## 2 Distrito "V"      "%"    "V"   "%"   "V"   "%"   "V"   "%"   "V"   "%"   "V"  
## 3 Ancón    "3,725"  "13.2… "9,3… "33.… "5,9… "21.… "2,0… "7.2… "3,5… "12.… "1,6…
## 4 Ate      "57,374" "17.4… "98,… "29.… "52,… "15.… "27,… "8.5… "26,… "7.9… "25,…
## 5 Barranco "11,604" "36.9… "5,7… "18.… "6,4… "20.… "2,7… "8.8… "2,1… "6.9… "1,6…
## 6 Breña    "22,721" "31.1… "18,… "25.… "14,… "19.… "8,2… "11.… "2,4… "3.3… "3,8…
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>

dataWS <- dataWS[-c(1:2), ]

dataWS <- dataWS[-c(44), ]

residuos = import("residuosPeru.xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`

# Asignar el encabezado como nombres de columna
colnames(residuos) <- residuos[1,]

# Eliminar la primera fila
residuos <- residuos[-1,]

rownames(residuos) <- NULL

residuos<- subset(residuos, PROVINCIA == "LIMA")

colnames(dataWS)[colnames(dataWS) == "Distrito"] <- "DISTRITO"

colnames(lima22)[colnames(lima22) == "distri"] <- "DISTRITO"

residuos <- residuos %>%
  group_by(DISTRITO) %>%
  slice_sample(n = 1) %>%
  ungroup()