library(rio)
lima22 = import("Lima2022.xlsx")
## New names:
## • `` -> `...2`
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
lima22 <- mutate(lima22, 
                ubigeo = as.numeric(gsub("[^0-9]", "", Distrito)),
                distri = gsub("[^A-Za-z]", "", Distrito))
lima22 = select(lima22, Tasa, ubigeo, distri)

#Scrapeo /html/body/div[2]/div/div[3]/main/div[4]/div[3]/div[1]/table[9]

library(rvest)
library(rvest)

link = "https://es.wikipedia.org/wiki/Elecciones_municipales_de_Lima_de_2022"
path = '/html/body/div[2]/div/div[3]/main/div[3]/div[3]/div[1]/table[9]/tbody'
dataWS <- read_html(link)%>%html_nodes(xpath = path)%>%html_table()%>% .[[1]]
head(dataWS)
## # A tibble: 6 × 17
##   Distrito RP       RP     PP    PP    SP    SP    FE    FE    APP   APP   JP   
##   <chr>    <chr>    <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Distrito ""       ""     ""    ""    ""    ""    ""    ""    ""    ""    ""   
## 2 Distrito "V"      "%"    "V"   "%"   "V"   "%"   "V"   "%"   "V"   "%"   "V"  
## 3 Ancón    "3,725"  "13.2… "9,3… "33.… "5,9… "21.… "2,0… "7.2… "3,5… "12.… "1,6…
## 4 Ate      "57,374" "17.4… "98,… "29.… "52,… "15.… "27,… "8.5… "26,… "7.9… "25,…
## 5 Barranco "11,604" "36.9… "5,7… "18.… "6,4… "20.… "2,7… "8.8… "2,1… "6.9… "1,6…
## 6 Breña    "22,721" "31.1… "18,… "25.… "14,… "19.… "8,2… "11.… "2,4… "3.3… "3,8…
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>
dataWS <- dataWS[-c(1:2), ]
dataWS <- dataWS[-c(44), ]
residuos = import("residuosPeru.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
colnames(residuos) <- residuos[1,]

residuos <- residuos[-1,]

rownames(residuos) <- NULL
residuos<- subset(residuos, PROVINCIA == "LIMA")
colnames(dataWS)[colnames(dataWS) == "Distrito"] <- "DISTRITO"
colnames(lima22)[colnames(lima22) == "distri"] <- "DISTRITO"
residuos <- residuos %>%
  group_by(DISTRITO) %>%
  slice_sample(n = 1) %>%
  ungroup()
remove_accents <- function(x) {
  iconv(x, from = "UTF-8", to = "ASCII//TRANSLIT")
}
dataWS$DISTRITO <- remove_accents(dataWS$DISTRITO)
to_uppercase <- function(x) {
  toupper(x)
}
dataWS$DISTRITO <- to_uppercase(dataWS$DISTRITO)
remove_non_letters <- function(x) {
  gsub("[^A-Za-z]", "", x)
}
dataWS$DISTRITO <- remove_non_letters(dataWS$DISTRITO)
replace_value1 <- function(x) {
  sub("LURIGANCHOCHOSICA", "LURIGANCHO", x)
}
dataWS$DISTRITO <- replace_value1(dataWS$DISTRITO)
residuos$DISTRITO <- remove_non_letters(residuos$DISTRITO)
residuos$QRESIDUOS_NO_DOM <- round(as.numeric(residuos$QRESIDUOS_NO_DOM), 2)
str(residuos
    )
## tibble [43 × 15] (S3: tbl_df/tbl/data.frame)
##  $ FECHA_CORTE     : chr [1:43] "20230614" "20230614" "20230614" "20230614" ...
##  $ N_SEC           : chr [1:43] "6874" "1264" "5002" "10625" ...
##  $ UBIGEO          : chr [1:43] "150102" "150103" "150104" "150105" ...
##  $ REG_NAT         : chr [1:43] "COSTA" "COSTA" "COSTA" "COSTA" ...
##  $ DEPARTAMENTO    : chr [1:43] "LIMA" "LIMA" "LIMA" "LIMA" ...
##  $ PROVINCIA       : chr [1:43] "LIMA" "LIMA" "LIMA" "LIMA" ...
##  $ DISTRITO        : chr [1:43] "ANCON" "ATE" "BARRANCO" "BREA" ...
##  $ POB_TOTAL       : chr [1:43] "41474" "611082" "29482" "92153" ...
##  $ POB_URBANA      : chr [1:43] "41474" "611082" "29482" "92153" ...
##  $ POB_RURAL       : chr [1:43] "0" "0" "0" "0" ...
##  $ GPC_DOM         : chr [1:43] "0.68" "0.57" "0.78" "0.74" ...
##  $ QRESIDUOS_DOM   : chr [1:43] "10293.85" "127750" "8415.049999999999" "24890.53" ...
##  $ QRESIDUOS_NO_DOM: num [1:43] 4412 54750 3606 10667 31726 ...
##  $ QRESIDUOS_MUN   : chr [1:43] "14705.5" "182500" "12021.5" "35557.89" ...
##  $ PERIODO         : chr [1:43] "2017" "2014" "2016" "2019" ...
residuos <- residuos %>% select(DISTRITO, QRESIDUOS_NO_DOM)
lima22 <- lima22 %>% select(DISTRITO, Tasa)
replace_value2 <- function(x) {
  sub("MAGDALENAVIEJA", "PUEBLOLIBRE", x)
}
residuos$DISTRITO <- replace_value2(residuos$DISTRITO)
replace_value3 <- function(x) {
  sub("BRE", "BRENA", x)
}
lima22$DISTRITO <- replace_value3(lima22$DISTRITO)
replace_value4 <- function(x) {
  sub("BREA", "BRENA", x)
}
residuos$DISTRITO <- replace_value4(residuos$DISTRITO)
replace_value5 <- function(x) {
  sub("PUEBLOLIBRENA", "PUEBLOLIBRE", x)
}
lima22$DISTRITO <- replace_value5(lima22$DISTRITO)
dataFinal <- merge(merge(residuos, dataWS, by = "DISTRITO", all = TRUE), lima22, by = "DISTRITO", all = TRUE)
dataFinal <- dataFinal %>% select(DISTRITO, Tasa, RP, PP, SP, QRESIDUOS_NO_DOM)
cols_to_compare <- dataFinal[, c("RP", "PP", "SP")]
max_values <- apply(cols_to_compare, 1, max)
cols_to_compare[cols_to_compare == max_values] <- 1
cols_to_compare[cols_to_compare != 1] <- 0
dataFinal[, c("RP", "PP", "SP")] <- cols_to_compare
h1=formula(RP~QRESIDUOS_NO_DOM + Tasa)
str(dataFinal)
## 'data.frame':    43 obs. of  6 variables:
##  $ DISTRITO        : chr  "ANCON" "ATE" "BARRANCO" "BRENA" ...
##  $ Tasa            : chr  "113,6" "100,8" "553,7" "218,6" ...
##  $ RP              : chr  "0" "0" "0" "1" ...
##  $ PP              : chr  "1" "1" "0" "0" ...
##  $ SP              : chr  "0" "0" "1" "0" ...
##  $ QRESIDUOS_NO_DOM: num  4412 54750 3606 10667 31726 ...
dataFinal$Tasa <- gsub(",", ".", dataFinal$Tasa)
dataFinal$Tasa <- as.numeric(dataFinal$Tasa)
str(dataFinal)
## 'data.frame':    43 obs. of  6 variables:
##  $ DISTRITO        : chr  "ANCON" "ATE" "BARRANCO" "BRENA" ...
##  $ Tasa            : num  114 101 554 219 111 ...
##  $ RP              : chr  "0" "0" "0" "1" ...
##  $ PP              : chr  "1" "1" "0" "0" ...
##  $ SP              : chr  "0" "0" "1" "0" ...
##  $ QRESIDUOS_NO_DOM: num  4412 54750 3606 10667 31726 ...
dataFinal$RP <- as.factor(dataFinal$RP)
rlog = glm(h1, data=dataFinal, family = binomial)
summary(rlog)
## 
## Call:
## glm(formula = h1, family = binomial, data = dataFinal)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)
## (Intercept)      -7.359e-01  6.668e-01  -1.104    0.270
## QRESIDUOS_NO_DOM -1.023e-05  1.557e-05  -0.657    0.511
## Tasa              1.724e-03  2.780e-03   0.620    0.535
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 55.618  on 42  degrees of freedom
## Residual deviance: 54.828  on 40  degrees of freedom
## AIC: 60.828
## 
## Number of Fisher Scoring iterations: 4
library(modelsummary)
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
## 
## Change the default backend persistently:
## 
##   config_modelsummary(factory_default = 'gt')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
modelrl=list('Gana RP (I)'=rlog)
modelsummary(modelrl,
             title = "Regresión Logística",
             exponentiate = T,
             stars = TRUE,
             output = "kableExtra")
Regresión Logística
 Gana RP (I)
(Intercept) 0.479
(0.319)
QRESIDUOS_NO_DOM 1.000
(0.000)
Tasa 1.002
(0.003)
Num.Obs. 43
AIC 60.8
BIC 66.1
Log.Lik. -27.414
F 0.373
RMSE 0.47
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
library(margins)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
marginalsData=summary(margins(rlog))
marginalsData%>% kable(caption = "Efectos Marginales Promedio (AME)- Modelo I") %>%kableExtra::kable_styling(full_width = T)
Efectos Marginales Promedio (AME)- Modelo I
factor AME SE z p lower upper
QRESIDUOS_NO_DOM -0.0000023 0.0000034 -0.6674578 0.5044798 -0.0000090 4.40e-06
Tasa 0.0003846 0.0006099 0.6306042 0.5282994 -0.0008108 1.58e-03

Como los intervalos de confianza pasan por el 0, se evidencia que no tiene un efecto estadísticamente significativo. Asimismo, el modelo en general tiene un pvalue de 0.3, el cual es mucho mayor a 0.05 por lo que es modelo no tiene un efecto estadísticamente significativo. Pero, no se puede descartar totalmente que tenga efecto (?)