library(readxl)
residuos<- read_excel("residuosPeru (1).xlsx")
View(residuos)
str(residuos)
## tibble [14,978 × 15] (S3: tbl_df/tbl/data.frame)
## $ FECHA_CORTE : num [1:14978] 20230614 20230614 20230614 20230614 20230614 ...
## $ N_SEC : num [1:14978] 1 2 3 4 5 6 7 8 9 10 ...
## $ UBIGEO : num [1:14978] 10101 10102 10103 10104 10105 ...
## $ REG_NAT : chr [1:14978] "SELVA" "SELVA" "SIERRA" "SIERRA" ...
## $ DEPARTAMENTO : chr [1:14978] "AMAZONAS" "AMAZONAS" "AMAZONAS" "AMAZONAS" ...
## $ PROVINCIA : chr [1:14978] "CHACHAPOYAS" "CHACHAPOYAS" "CHACHAPOYAS" "CHACHAPOYAS" ...
## $ DISTRITO : chr [1:14978] "CHACHAPOYAS" "ASUNCION" "BALSAS" "CHETO" ...
## $ POB_TOTAL : num [1:14978] 28423 291 1615 597 737 ...
## $ POB_URBANA : num [1:14978] 27548 151 299 388 197 ...
## $ POB_RURAL : num [1:14978] 875 140 1316 209 540 ...
## $ GPC_DOM : num [1:14978] 0.48 0.61 0.45 0.45 0.45 0.45 0.61 0.45 0.61 0.61 ...
## $ QRESIDUOS_DOM : num [1:14978] 4857.5 33.6 49 63.6 32.4 ...
## $ QRESIDUOS_NO_DOM: num [1:14978] 2081.8 14.4 21 27.2 13.9 ...
## $ QRESIDUOS_MUN : num [1:14978] 6939.3 48 70 90.8 46.3 ...
## $ PERIODO : num [1:14978] 2014 2014 2014 2014 2014 ...
library(readxl)
lima <- read_excel("lima2022d.xlsx")
View(lima)
str(lima)
## tibble [43 × 2] (S3: tbl_df/tbl/data.frame)
## $ DISTRITO: chr [1:43] "150101 LIMA" "150102 ANCON" "150103 ATE" "150104 BARRANCO" ...
## $ 2022 : chr [1:43] "642,7" "113,6" "100,8" "553,7" ...
#install.packages("rvest")
library(rvest)
## Warning: package 'rvest' was built under R version 4.3.3
url <- "https://es.wikipedia.org/wiki/Elecciones_municipales_de_Lima_de_2022"
webpage <- read_html(url)
tables <- html_table(webpage)
df <- tables[[10]]
write.csv(df, "Resultados_por_distrito.csv", row.names = FALSE)
library(readr)
##
## Attaching package: 'readr'
## The following object is masked from 'package:rvest':
##
## guess_encoding
Resultados<- read_csv("Resultados_por_distrito.csv")
## New names:
## • `RP` -> `RP...2`
## • `RP` -> `RP...3`
## • `PP` -> `PP...4`
## • `PP` -> `PP...5`
## • `SP` -> `SP...6`
## • `SP` -> `SP...7`
## • `FE` -> `FE...8`
## • `FE` -> `FE...9`
## • `APP` -> `APP...10`
## • `APP` -> `APP...11`
## • `JP` -> `JP...12`
## • `JP` -> `JP...13`
## • `AvP` -> `AvP...14`
## • `AvP` -> `AvP...15`
## • `PL` -> `PL...16`
## • `PL` -> `PL...17`
## Rows: 46 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): Distrito, RP...2, RP...3, PP...4, PP...5, SP...6, SP...7, FE...8, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
resultadosnuevos <- Resultados[-c(1, 2), ]
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
resultadosnuevos <- resultadosnuevos %>%
rename(DISTRITO = Distrito)
#install.packages("dplyr")
library(dplyr)
residuosfilra<- residuos %>%
filter(PROVINCIA == "LIMA" & PERIODO == 2021)|>
select(DISTRITO, QRESIDUOS_NO_DOM)
lima$DISTRITO <- gsub("BREc", "Breña", lima$DISTRITO)
resultadosnuevos<- resultadosnuevos[-44, ]
lima$`2022`=gsub(",",".",lima$`2022`)
dataframe_final1 <- merge(residuosfilra, lima, by = "DISTRITO", all = TRUE)
dataframe_final1<- dataframe_final1[-5, ]
dataframe_final1<- dataframe_final1[-5, ]
dataframe_final2 <- merge(dataframe_final1, resultadosnuevos, by = "DISTRITO", all.y = TRUE)
residuosfilra$DISTRITO <- gsub("^\\d+ ", "", residuosfilra$DISTRITO)
lima$DISTRITO <- gsub("^\\d+ ", "", lima$DISTRITO)
dataframe_final <- dataframe_final2[complete.cases(dataframe_final2), ]