parte 2 examen

library(readr)
datali1 <- read_csv("Lima2022.xlsx - data.csv")

## New names:
## Rows: 60 Columns: 3
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): TASA DE DENUNCIAS POR COMISION DE DELITOS,SEGUN DISTRITO, ...2 num (1):
## ...3
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...2`
## • `` -> `...3`

View(datali1)

encabezado <- datali1[4, ]
datos <- datali1[-4, ]
colnames(datos) <- encabezado
print(datos)

## # A tibble: 59 × 3
##    Distrito                          ``     `NA`
##    <chr>                             <chr> <dbl>
##  1 (Tasa por cada 10 000 habitantes) <NA>     NA
##  2 <NA>                              <NA>     NA
##  3 <NA>                              Per?o  2022
##  4 150101 LIMA                       <NA>   6427
##  5 150102 ANCON                      <NA>   1136
##  6 150103 ATE                        <NA>   1008
##  7 150104 BARRANCO                   <NA>   5537
##  8 150105 BREс                       <NA>   2186
##  9 150106 CARABAYLLO                 <NA>   1114
## 10 150107 CHACLACAYO                 <NA>   1559
## # ℹ 49 more rows

dostos_faltantes <- datos[complete.cases(datos), ]

library(readxl)
residuosPeru <- read_excel("residuosPeru.xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`

View(residuosPeru)

encabezado <- residuosPeru[1, ]
datos1 <- residuosPeru[-1, ]
colnames(datos1) <- encabezado
print(datos1)

## # A tibble: 14,978 × 15
##    FECHA_CORTE N_SEC UBIGEO REG_NAT DEPARTAMENTO PROVINCIA   DISTRITO  POB_TOTAL
##    <chr>       <chr> <chr>  <chr>   <chr>        <chr>       <chr>     <chr>    
##  1 20230614    1     10101  SELVA   AMAZONAS     CHACHAPOYAS CHACHAPO… 28423    
##  2 20230614    2     10102  SELVA   AMAZONAS     CHACHAPOYAS ASUNCION  291      
##  3 20230614    3     10103  SIERRA  AMAZONAS     CHACHAPOYAS BALSAS    1615     
##  4 20230614    4     10104  SIERRA  AMAZONAS     CHACHAPOYAS CHETO     597      
##  5 20230614    5     10105  SIERRA  AMAZONAS     CHACHAPOYAS CHILIQUIN 737      
##  6 20230614    6     10106  SIERRA  AMAZONAS     CHACHAPOYAS CHUQUIBA… 2096     
##  7 20230614    7     10107  SELVA   AMAZONAS     CHACHAPOYAS GRANADA   395      
##  8 20230614    8     10108  SIERRA  AMAZONAS     CHACHAPOYAS HUANCAS   1240     
##  9 20230614    9     10109  SELVA   AMAZONAS     CHACHAPOYAS LA JALCA  5551     
## 10 20230614    10    10110  SELVA   AMAZONAS     CHACHAPOYAS LEIMEBAM… 4200     
## # ℹ 14,968 more rows
## # ℹ 7 more variables: POB_URBANA <chr>, POB_RURAL <chr>, GPC_DOM <chr>,
## #   QRESIDUOS_DOM <chr>, QRESIDUOS_NO_DOM <chr>, QRESIDUOS_MUN <chr>,
## #   PERIODO <chr>

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

datos1 <- datos1|>
  rename(Distrito = DISTRITO)

#install.packages("rvest")
#install.packages("tidyverse")

library(rvest)

## Warning: package 'rvest' was built under R version 4.3.3

## 
## Attaching package: 'rvest'

## The following object is masked from 'package:readr':
## 
##     guess_encoding

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()         masks stats::filter()
## ✖ rvest::guess_encoding() masks readr::guess_encoding()
## ✖ dplyr::lag()            masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

url <- "https://es.wikipedia.org/wiki/Elecciones_municipales_de_Lima_de_2022"
webpage <- read_html(url)
tables <- html_table(webpage)

# Por ejemplo, si la tabla que quieres está en la primera posición
df <- tables[[10]]

# Exportar a CSV
write.csv(df, "Resultados_por_distrito.csv", row.names = FALSE)

library(readr)
Resultados_por_distrito <- read_csv("Resultados_por_distrito.csv")

## New names:
## Rows: 46 Columns: 17
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (17): Distrito, RP...2, RP...3, PP...4, PP...5, SP...6, SP...7, FE...8, ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `RP` -> `RP...2`
## • `RP` -> `RP...3`
## • `PP` -> `PP...4`
## • `PP` -> `PP...5`
## • `SP` -> `SP...6`
## • `SP` -> `SP...7`
## • `FE` -> `FE...8`
## • `FE` -> `FE...9`
## • `APP` -> `APP...10`
## • `APP` -> `APP...11`
## • `JP` -> `JP...12`
## • `JP` -> `JP...13`
## • `AvP` -> `AvP...14`
## • `AvP` -> `AvP...15`
## • `PL` -> `PL...16`
## • `PL` -> `PL...17`

View(Resultados_por_distrito)

datosRD <- datos[complete.cases(datos), ]

merged_df <- merge(Resultados_por_distrito,datos, by = "Distrito")

merged_df2 <- merge(merged_df,datos1, by = "Distrito")

parte 2 examen

2024-05-15