Estadistica

library(rio)

data1= import("residuosPeru.xlsx")

data2= readxl::read_excel("Lima2022.xlsx")

## New names:
## • `` -> `...2`

library(rvest)

link = "https://es.wikipedia.org/wiki/Elecciones_municipales_de_Lima_de_2022"
path = '//*[@id="mw-content-text"]/div[1]/table[9]'
data3 <- read_html(link)%>%html_nodes(xpath = path)%>%html_table()%>% .[[1]]
head(data3)

## # A tibble: 6 × 17
##   Distrito RP       RP     PP    PP    SP    SP    FE    FE    APP   APP   JP   
##   <chr>    <chr>    <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Distrito ""       ""     ""    ""    ""    ""    ""    ""    ""    ""    ""   
## 2 Distrito "V"      "%"    "V"   "%"   "V"   "%"   "V"   "%"   "V"   "%"   "V"  
## 3 Ancón    "3,725"  "13.2… "9,3… "33.… "5,9… "21.… "2,0… "7.2… "3,5… "12.… "1,6…
## 4 Ate      "57,374" "17.4… "98,… "29.… "52,… "15.… "27,… "8.5… "26,… "7.9… "25,…
## 5 Barranco "11,604" "36.9… "5,7… "18.… "6,4… "20.… "2,7… "8.8… "2,1… "6.9… "1,6…
## 6 Breña    "22,721" "31.1… "18,… "25.… "14,… "19.… "8,2… "11.… "2,4… "3.3… "3,8…
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

data1 = filter(data1,PROVINCIA == "LIMA" ) 
data1=data1[!duplicated(data1), ]

data1=filter(data1,PERIODO == 2021 )

data1 <- data1[, -c(1, 2,3,4,5,6,8,9,10,11,12,14,15)]

data3<- data3[-1, ]

data3<- data3[-1, ]

data3 <- data3[, -c(2,4,6,8,9,10,11,12,13,14,15,16,17)]

data3$Distrito = toupper(data3$Distrito)
table(data3$Distrito)

## 
##                   ANCÓN                     ATE                BARRANCO 
##                       1                       1                       1 
##                   BREÑA              CARABAYLLO              CHACLACAYO 
##                       1                       1                       1 
##              CHORRILLOS             CIENEGUILLA                   COMAS 
##                       1                       1                       1 
##             EL AGUSTINO           INDEPENDENCIA             JESÚS MARÍA 
##                       1                       1                       1 
##               LA MOLINA             LA VICTORIA                    LIMA 
##                       1                       1                       1 
##                   LINCE              LOS OLIVOS      LURIGANCHO-CHOSICA 
##                       1                       1                       1 
##                   LURÍN       MAGDALENA DEL MAR              MIRAFLORES 
##                       1                       1                       1 
##              PACHACÁMAC                PUCUSANA            PUEBLO LIBRE 
##                       1                       1                       1 
##           PUENTE PIEDRA           PUNTA HERMOSA             PUNTA NEGRA 
##                       1                       1                       1 
##                   RÍMAC             SAN BARTOLO               SAN BORJA 
##                       1                       1                       1 
##              SAN ISIDRO  SAN JUAN DE LURIGANCHO  SAN JUAN DE MIRAFLORES 
##                       1                       1                       1 
##                SAN LUIS    SAN MARTÍN DE PORRES              SAN MIGUEL 
##                       1                       1                       1 
##             SANTA ANITA     SANTA MARÍA DEL MAR              SANTA ROSA 
##                       1                       1                       1 
##       SANTIAGO DE SURCO               SURQUILLO                   TOTAL 
##                       1                       1                       1 
##       VILLA EL SALVADOR VILLA MARÍA DEL TRIUNFO 
##                       1                       1

data3$Distrito <- iconv(data3$Distrito, to = "ASCII//TRANSLIT")

data3[4, "Distrito"] <- "BREÑA"

data3[18, "Distrito"] <- "LURIGANCHO"

data2<-data2[,-c(2) ]

 data2$Distrito<-gsub("^[0-9]+ ", "", data2$Distrito)

colnames(data1)[1] = "Distrito"

TOTADATA = merge(data1, data2, by = "Distrito", all.x = TRUE, all.y = TRUE)

TOTAL = merge(TOTADATA, data3, by = "Distrito")

library(dplyr)

TOTAL<- TOTAL %>%
  mutate(Victoria_RP = ifelse(RP > pmax(PP, SP), 1, 0))

TOTAL1 <- na.omit(TOTAL)

modelo1=formula(Victoria_RP~Denuncias+QRESIDUOS_NO_DOM)

rlog1=glm(modelo1, data=TOTAL1,family = binomial)
summary(rlog1)

## 
## Call:
## glm(formula = modelo1, family = binomial, data = TOTAL1)
## 
## Coefficients: (1 not defined because of singularities)
##                    Estimate Std. Error z value Pr(>|z|)
## (Intercept)      -2.657e+01  3.561e+05       0        1
## Denuncias110,1    5.313e+01  5.036e+05       0        1
## Denuncias111,4   -1.430e-13  5.036e+05       0        1
## Denuncias113,6   -3.291e-13  5.036e+05       0        1
## Denuncias120,2   -2.475e-13  5.036e+05       0        1
## Denuncias121,9    5.313e+01  5.036e+05       0        1
## Denuncias123,8    5.313e+01  5.036e+05       0        1
## Denuncias124,0    5.313e+01  5.036e+05       0        1
## Denuncias126,6   -2.251e-13  5.036e+05       0        1
## Denuncias127,2   -1.874e-13  5.036e+05       0        1
## Denuncias133,8   -2.227e-13  5.036e+05       0        1
## Denuncias148,0    5.313e+01  5.036e+05       0        1
## Denuncias151,0    5.313e+01  5.036e+05       0        1
## Denuncias153,1    5.313e+01  5.036e+05       0        1
## Denuncias153,9   -1.277e-13  5.036e+05       0        1
## Denuncias154,4    5.313e+01  5.036e+05       0        1
## Denuncias155,9    5.313e+01  5.036e+05       0        1
## Denuncias160,3   -1.579e-13  5.036e+05       0        1
## Denuncias160,5   -1.909e-13  5.036e+05       0        1
## Denuncias165,0   -1.331e-13  5.036e+05       0        1
## Denuncias169,8    5.313e+01  5.036e+05       0        1
## Denuncias170,4   -2.141e-13  5.036e+05       0        1
## Denuncias188,2   -1.465e-13  5.036e+05       0        1
## Denuncias200,4    5.313e+01  5.036e+05       0        1
## Denuncias210,3    5.313e+01  5.036e+05       0        1
## Denuncias215,1   -1.366e-13  5.036e+05       0        1
## Denuncias218,6    5.313e+01  5.036e+05       0        1
## Denuncias227,1   -2.113e-13  5.036e+05       0        1
## Denuncias251,3   -1.686e-13  5.036e+05       0        1
## Denuncias261,6    5.313e+01  5.036e+05       0        1
## Denuncias268,8    5.313e+01  5.036e+05       0        1
## Denuncias278,8    5.313e+01  5.036e+05       0        1
## Denuncias291,8   -1.551e-13  5.036e+05       0        1
## Denuncias389,8   -2.417e-13  5.036e+05       0        1
## Denuncias553,7    5.313e+01  5.036e+05       0        1
## Denuncias63,2    -1.922e-13  5.036e+05       0        1
## Denuncias642,7    5.313e+01  5.036e+05       0        1
## Denuncias78,9    -1.572e-13  5.036e+05       0        1
## Denuncias84,6    -1.047e-13  5.036e+05       0        1
## Denuncias90,1    -1.923e-13  5.036e+05       0        1
## Denuncias90,5     5.313e+01  5.036e+05       0        1
## Denuncias99,2    -2.389e-13  5.036e+05       0        1
## QRESIDUOS_NO_DOM         NA         NA      NA       NA
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 5.7843e+01  on 41  degrees of freedom
## Residual deviance: 2.4367e-10  on  0  degrees of freedom
## AIC: 84
## 
## Number of Fisher Scoring iterations: 25

reg2=lm(modelo1,data=TOTAL1)
summary(reg2)

## 
## Call:
## lm(formula = modelo1, data = TOTAL1)
## 
## Residuals:
## ALL 42 residuals are 0: no residual degrees of freedom!
## 
## Coefficients: (1 not defined because of singularities)
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)       2.484e-16        NaN     NaN      NaN
## Denuncias110,1    1.000e+00        NaN     NaN      NaN
## Denuncias111,4    1.380e-15        NaN     NaN      NaN
## Denuncias113,6    3.840e-15        NaN     NaN      NaN
## Denuncias120,2   -2.879e-15        NaN     NaN      NaN
## Denuncias121,9    1.000e+00        NaN     NaN      NaN
## Denuncias123,8    1.000e+00        NaN     NaN      NaN
## Denuncias124,0    1.000e+00        NaN     NaN      NaN
## Denuncias126,6   -9.404e-16        NaN     NaN      NaN
## Denuncias127,2   -6.849e-16        NaN     NaN      NaN
## Denuncias133,8   -1.726e-16        NaN     NaN      NaN
## Denuncias148,0    1.000e+00        NaN     NaN      NaN
## Denuncias151,0    1.000e+00        NaN     NaN      NaN
## Denuncias153,1    1.000e+00        NaN     NaN      NaN
## Denuncias153,9   -7.062e-16        NaN     NaN      NaN
## Denuncias154,4    1.000e+00        NaN     NaN      NaN
## Denuncias155,9    1.000e+00        NaN     NaN      NaN
## Denuncias160,3    1.500e-16        NaN     NaN      NaN
## Denuncias160,5   -8.495e-16        NaN     NaN      NaN
## Denuncias165,0    1.772e-16        NaN     NaN      NaN
## Denuncias169,8    1.000e+00        NaN     NaN      NaN
## Denuncias170,4   -1.191e-16        NaN     NaN      NaN
## Denuncias188,2   -5.617e-16        NaN     NaN      NaN
## Denuncias200,4    1.000e+00        NaN     NaN      NaN
## Denuncias210,3    1.000e+00        NaN     NaN      NaN
## Denuncias215,1   -7.497e-16        NaN     NaN      NaN
## Denuncias218,6    1.000e+00        NaN     NaN      NaN
## Denuncias227,1   -5.055e-16        NaN     NaN      NaN
## Denuncias251,3   -5.746e-17        NaN     NaN      NaN
## Denuncias261,6    1.000e+00        NaN     NaN      NaN
## Denuncias268,8    1.000e+00        NaN     NaN      NaN
## Denuncias278,8    1.000e+00        NaN     NaN      NaN
## Denuncias291,8   -1.248e-15        NaN     NaN      NaN
## Denuncias389,8   -9.801e-16        NaN     NaN      NaN
## Denuncias553,7    1.000e+00        NaN     NaN      NaN
## Denuncias63,2    -5.638e-16        NaN     NaN      NaN
## Denuncias642,7    1.000e+00        NaN     NaN      NaN
## Denuncias78,9    -4.742e-16        NaN     NaN      NaN
## Denuncias84,6    -1.666e-15        NaN     NaN      NaN
## Denuncias90,1    -7.051e-16        NaN     NaN      NaN
## Denuncias90,5     1.000e+00        NaN     NaN      NaN
## Denuncias99,2    -9.421e-16        NaN     NaN      NaN
## QRESIDUOS_NO_DOM         NA         NA      NA       NA
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 41 and 0 DF,  p-value: NA

Estadistica

:)

2024-05-25