iuvghv

library(readxl)
residuos<- read_excel("residuosPeru (1).xlsx")
View(residuos)
str(residuos)

## tibble [14,978 × 15] (S3: tbl_df/tbl/data.frame)
##  $ FECHA_CORTE     : num [1:14978] 20230614 20230614 20230614 20230614 20230614 ...
##  $ N_SEC           : num [1:14978] 1 2 3 4 5 6 7 8 9 10 ...
##  $ UBIGEO          : num [1:14978] 10101 10102 10103 10104 10105 ...
##  $ REG_NAT         : chr [1:14978] "SELVA" "SELVA" "SIERRA" "SIERRA" ...
##  $ DEPARTAMENTO    : chr [1:14978] "AMAZONAS" "AMAZONAS" "AMAZONAS" "AMAZONAS" ...
##  $ PROVINCIA       : chr [1:14978] "CHACHAPOYAS" "CHACHAPOYAS" "CHACHAPOYAS" "CHACHAPOYAS" ...
##  $ DISTRITO        : chr [1:14978] "CHACHAPOYAS" "ASUNCION" "BALSAS" "CHETO" ...
##  $ POB_TOTAL       : num [1:14978] 28423 291 1615 597 737 ...
##  $ POB_URBANA      : num [1:14978] 27548 151 299 388 197 ...
##  $ POB_RURAL       : num [1:14978] 875 140 1316 209 540 ...
##  $ GPC_DOM         : num [1:14978] 0.48 0.61 0.45 0.45 0.45 0.45 0.61 0.45 0.61 0.61 ...
##  $ QRESIDUOS_DOM   : num [1:14978] 4857.5 33.6 49 63.6 32.4 ...
##  $ QRESIDUOS_NO_DOM: num [1:14978] 2081.8 14.4 21 27.2 13.9 ...
##  $ QRESIDUOS_MUN   : num [1:14978] 6939.3 48 70 90.8 46.3 ...
##  $ PERIODO         : num [1:14978] 2014 2014 2014 2014 2014 ...

library(readxl)
lima <- read_excel("lima2022d.xlsx")
View(lima)
str(lima)

## tibble [43 × 2] (S3: tbl_df/tbl/data.frame)
##  $ DISTRITO: chr [1:43] "150101 LIMA" "150102 ANCON" "150103 ATE" "150104 BARRANCO" ...
##  $ 2022    : chr [1:43] "642,7" "113,6" "100,8" "553,7" ...

#install.packages("rvest")
library(rvest)

## Warning: package 'rvest' was built under R version 4.3.3

url <- "https://es.wikipedia.org/wiki/Elecciones_municipales_de_Lima_de_2022"
webpage <- read_html(url)
tables <- html_table(webpage)
df <- tables[[10]]
write.csv(df, "Resultados_por_distrito.csv", row.names = FALSE)
library(readr)

## 
## Attaching package: 'readr'

## The following object is masked from 'package:rvest':
## 
##     guess_encoding

Resultados<- read_csv("Resultados_por_distrito.csv")

## New names:
## • `RP` -> `RP...2`
## • `RP` -> `RP...3`
## • `PP` -> `PP...4`
## • `PP` -> `PP...5`
## • `SP` -> `SP...6`
## • `SP` -> `SP...7`
## • `FE` -> `FE...8`
## • `FE` -> `FE...9`
## • `APP` -> `APP...10`
## • `APP` -> `APP...11`
## • `JP` -> `JP...12`
## • `JP` -> `JP...13`
## • `AvP` -> `AvP...14`
## • `AvP` -> `AvP...15`
## • `PL` -> `PL...16`
## • `PL` -> `PL...17`

## Rows: 46 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): Distrito, RP...2, RP...3, PP...4, PP...5, SP...6, SP...7, FE...8, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

  resultadosnuevos <- Resultados[-c(1, 2), ]

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

resultadosnuevos <- resultadosnuevos %>%
  rename(DISTRITO = Distrito)

#install.packages("dplyr")
library(dplyr)
residuosfilra<- residuos %>%
  filter(PROVINCIA == "LIMA" & PERIODO == 2021)|>
  select(DISTRITO, QRESIDUOS_NO_DOM)

lima$DISTRITO <- gsub("BREc", "Breña", lima$DISTRITO)

 resultadosnuevos<- resultadosnuevos[-44, ]

lima$`2022`=gsub(",",".",lima$`2022`)

dataframe_final1 <- merge(residuosfilra, lima, by = "DISTRITO", all = TRUE)

 dataframe_final1<- dataframe_final1[-5, ]

 dataframe_final1<- dataframe_final1[-5, ]

dataframe_final2 <- merge(dataframe_final1, resultadosnuevos, by = "DISTRITO", all.y = TRUE)

residuosfilra$DISTRITO <- gsub("^\\d+ ", "", residuosfilra$DISTRITO)

lima$DISTRITO <- gsub("^\\d+ ", "", lima$DISTRITO)

dataframe_final <- dataframe_final2[complete.cases(dataframe_final2), ]

iuvghv

2024-05-25