PARCIAL-SEGUNDA-PARTE.knit

library(rio)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#LIMPIEZA DE LA PRIMERA BASE DE DATOS: LIMA2022

data_denuncias=import("Lima2022.xlsx")

## New names:
## • `` -> `...2`

data_denuncias= data_denuncias[,-c(2)]

print(names(data_denuncias))

## [1] "Distrito" "2022"

library(dplyr)

data_denuncias <- data_denuncias %>% rename(DENUNCIAS_2022 = `2022`)

#LIMPIEZA DE LA SEGUNDA BASE DE DATOS: RESIDUOS PERU

datos_residuos= import("residuosPeru.xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`

residuos_lima <- datos_residuos %>%
  filter(...6 == "LIMA")%>%
  slice(1:43)

print(names(residuos_lima))

##  [1] "Residuos municipales generados anualmente"
##  [2] "...2"                                     
##  [3] "...3"                                     
##  [4] "...4"                                     
##  [5] "...5"                                     
##  [6] "...6"                                     
##  [7] "...7"                                     
##  [8] "...8"                                     
##  [9] "...9"                                     
## [10] "...10"                                    
## [11] "...11"                                    
## [12] "...12"                                    
## [13] "...13"                                    
## [14] "...14"                                    
## [15] "...15"

residuos_lima <- residuos_lima %>% 
  rename(
    FECHA_CORTE = `Residuos municipales generados anualmente`,
    N_SEC = '...2',
    UBIGEO = '...3',
    REG_NAT = '...4',
    DEPARTAMENTO= '...5',
    PROVINCIA ='...6',
    DISTRITO1 = '...7',
    POB_TOTAL = '...8',
    POB_URBANA = '...9',
    POB_RURAL = '...10',
    GPC_DOM = '...11',
    QRESIDUOS_DOM = '...12',
    QRESIDUOS_NO_DOM = '...13',
    QRESIDUOS_MUN = '...14',
    PERIODO = '...15')

print(names(residuos_lima))

##  [1] "FECHA_CORTE"      "N_SEC"            "UBIGEO"           "REG_NAT"         
##  [5] "DEPARTAMENTO"     "PROVINCIA"        "DISTRITO1"        "POB_TOTAL"       
##  [9] "POB_URBANA"       "POB_RURAL"        "GPC_DOM"          "QRESIDUOS_DOM"   
## [13] "QRESIDUOS_NO_DOM" "QRESIDUOS_MUN"    "PERIODO"

#TERCERA BASE DE DATOS

data_votos= import("datos_votos.xlsx")

print(names(data_votos))

##  [1] "Column1"  "Column2"  "Column3"  "Column4"  "Column5"  "Column6" 
##  [7] "Column7"  "Column8"  "Column9"  "Column10" "Column11" "Column12"
## [13] "Column13" "Column14" "Column15" "Column16" "Column17"

data_votos <- data_votos %>% 
  rename(
    DISTRITOVOTO = `Column1`,
VOTOS_RP = `Column2`,
PORC_RP = `Column3`,
VOTOS_PP = `Column4`,
PORC_PP = `Column5`,
VOTOS_SP = `Column6`,
PORC_SP = `Column7`,
VOTOS_FE = `Column8`,
PORC_FE = `Column9`,
VOTOS_APP = `Column10`,
PORC_APP = `Column11`,
VOTOS_JP = `Column12`,
PORC_JP = `Column13`,
VOTOS_AVP = `Column14`,
PORC_AVP = `Column15`,
VOTOS_PL = `Column16`,
PORC_PL = `Column17`)

print(names(data_votos))

##  [1] "DISTRITOVOTO" "VOTOS_RP"     "PORC_RP"      "VOTOS_PP"     "PORC_PP"     
##  [6] "VOTOS_SP"     "PORC_SP"      "VOTOS_FE"     "PORC_FE"      "VOTOS_APP"   
## [11] "PORC_APP"     "VOTOS_JP"     "PORC_JP"      "VOTOS_AVP"    "PORC_AVP"    
## [16] "VOTOS_PL"     "PORC_PL"

data_votos <- data_votos %>%
  mutate_at(vars(2:ncol(data_votos)), ~ as.numeric(gsub(",", ".", .)))

data_votos <- rbind(data_votos[15, ], data_votos[-15, ])

temp <- data_votos[24, ]

data_votos[24, ] <- data_votos[23, ]

data_votos[23, ] <- data_votos[22, ]

data_votos[22, ] <- data_votos[21, ]

data_votos[21, ] <- temp

#ANALISIS DE LA DATA

data_unida <- bind_cols(data_denuncias,residuos_lima, data_votos)

data_unida <- data_unida[, -c(7, 8)]

print(data_unida[, c(1, 7, 16)])

##                   Distrito               DISTRITO1            DISTRITOVOTO
## 1                     LIMA                    LIMA                    Lima
## 2                    ANCON                   ANCON                   Ancón
## 3                      ATE                     ATE                     Ate
## 4                 BARRANCO                BARRANCO                Barranco
## 5                     BREс                   BREÑA                   Breña
## 6               CARABAYLLO              CARABAYLLO              Carabayllo
## 7               CHACLACAYO              CHACLACAYO              Chaclacayo
## 8               CHORRILLOS              CHORRILLOS              Chorrillos
## 9              CIENEGUILLA             CIENEGUILLA             Cieneguilla
## 10                   COMAS                   COMAS                   Comas
## 11             EL AGUSTINO             EL AGUSTINO             El Agustino
## 12           INDEPENDENCIA           INDEPENDENCIA           Independencia
## 13             JESUS MARIA             JESUS MARIA             Jesús María
## 14               LA MOLINA               LA MOLINA               La Molina
## 15             LA VICTORIA             LA VICTORIA             La Victoria
## 16                   LINCE                   LINCE                   Lince
## 17              LOS OLIVOS              LOS OLIVOS              Los Olivos
## 18              LURIGANCHO              LURIGANCHO      Lurigancho-Chosica
## 19                   LURIN                   LURIN                   Lurín
## 20       MAGDALENA DEL MAR       MAGDALENA DEL MAR       Magdalena del Mar
## 21            PUEBLO LIBRE         MAGDALENA VIEJA            Pueblo Libre
## 22              MIRAFLORES              MIRAFLORES              Miraflores
## 23              PACHACAMAC              PACHACAMAC              Pachacámac
## 24                PUCUSANA                PUCUSANA                Pucusana
## 25           PUENTE PIEDRA           PUENTE PIEDRA           Puente Piedra
## 26           PUNTA HERMOSA           PUNTA HERMOSA           Punta Hermosa
## 27             PUNTA NEGRA             PUNTA NEGRA             Punta Negra
## 28                   RIMAC                   RIMAC                   Rímac
## 29             SAN BARTOLO             SAN BARTOLO             San Bartolo
## 30               SAN BORJA               SAN BORJA               San Borja
## 31              SAN ISIDRO              SAN ISIDRO              San Isidro
## 32  SAN JUAN DE LURIGANCHO  SAN JUAN DE LURIGANCHO  San Juan de Lurigancho
## 33  SAN JUAN DE MIRAFLORES  SAN JUAN DE MIRAFLORES  San Juan de Miraflores
## 34                SAN LUIS                SAN LUIS                San Luis
## 35    SAN MARTIN DE PORRES    SAN MARTIN DE PORRES    San Martín de Porres
## 36              SAN MIGUEL              SAN MIGUEL              San Miguel
## 37             SANTA ANITA             SANTA ANITA             Santa Anita
## 38     SANTA MARIA DEL MAR     SANTA MARIA DEL MAR     Santa María del Mar
## 39              SANTA ROSA              SANTA ROSA              Santa Rosa
## 40       SANTIAGO DE SURCO       SANTIAGO DE SURCO       Santiago de Surco
## 41               SURQUILLO               SURQUILLO               Surquillo
## 42       VILLA EL SALVADOR       VILLA EL SALVADOR       Villa El Salvador
## 43 VILLA MARIA DEL TRIUNFO VILLA MARIA DEL TRIUNFO Villa María del Triunfo

data_unida <- data_unida[, -c(7,16)]

str(data_unida)

## 'data.frame':    43 obs. of  30 variables:
##  $ Distrito        : chr  "LIMA" "ANCON" "ATE" "BARRANCO" ...
##  $ DENUNCIAS_2022  : num  643 114 101 554 219 ...
##  $ FECHA_CORTE     : chr  "20230614" "20230614" "20230614" "20230614" ...
##  $ N_SEC           : chr  "1262" "1263" "1264" "1265" ...
##  $ UBIGEO          : chr  "150101" "150102" "150103" "150104" ...
##  $ REG_NAT         : chr  "COSTA" "COSTA" "COSTA" "COSTA" ...
##  $ POB_TOTAL       : chr  "276857" "38482" "611082" "30641" ...
##  $ POB_URBANA      : chr  "276857" "38482" "611082" "30641" ...
##  $ POB_RURAL       : chr  "0" "0" "0" "0" ...
##  $ GPC_DOM         : chr  "1.59" "0.51" "0.56999999999999995" "1.25" ...
##  $ QRESIDUOS_DOM   : chr  "161045.5" "7223.3" "127750" "13999.3" ...
##  $ QRESIDUOS_NO_DOM: chr  "69019.5" "3095.7" "54750" "5999.7" ...
##  $ QRESIDUOS_MUN   : chr  "230065" "10319" "182500" "19999" ...
##  $ PERIODO         : chr  "2014" "2014" "2014" "2014" ...
##  $ VOTOS_RP        : num  62.33 3.73 57.37 11.6 22.72 ...
##  $ PORC_RP         : num  29.2 13.3 17.5 36.9 31.1 ...
##  $ VOTOS_PP        : num  52.98 9.33 98.37 5.77 18.68 ...
##  $ PORC_PP         : num  24.9 33.3 30 18.3 25.6 ...
##  $ VOTOS_SP        : num  43.61 5.99 52.07 6.4 14.02 ...
##  $ PORC_SP         : num  20.5 21.4 15.9 20.4 19.2 ...
##  $ VOTOS_FE        : num  25.79 2.04 27.91 2.77 8.24 ...
##  $ PORC_FE         : num  12.1 7.28 8.51 8.82 11.3 ...
##  $ VOTOS_APP       : num  7.68 3.59 26.14 2.19 2.47 ...
##  $ PORC_APP        : num  3.6 12.79 7.97 6.98 3.39 ...
##  $ VOTOS_JP        : num  13.13 1.68 25.11 1.63 3.81 ...
##  $ PORC_JP         : num  6.16 5.99 7.66 5.18 5.23 7.37 6.07 5.61 6.8 7.17 ...
##  $ VOTOS_AVP       : num  4.51 1.23 32.85 845 2.44 ...
##  $ PORC_AVP        : num  2.12 4.38 10.01 2.69 3.34 ...
##  $ VOTOS_PL        : num  3.1 465 8.19 223 552 ...
##  $ PORC_PL         : num  1.46 1.67 2.5 0.71 0.76 2.3 3 1.09 1.62 1.44 ...

#PREGUNTA UNO

set.seed(2019)

data_unida$RP_GANA <- ifelse(data_unida$PORC_RP > data_unida$PORC_PP & data_unida$PORC_RP > data_unida$PORC_SP, 1, 0)

data_unida$RP_GANA

##  [1] 1 0 0 1 1 0 1 1 0 0 0 0 1 1 1 1 0 0 0 1 1 1 0 0 0 1 1 0 0 1 1 0 0 1 0 1 0 1
## [39] 0 1 1 0 0

#caso de exito es 1 y el caso de no exito es 0

data_unida$QRESIDUOS_NO_DOM <- as.numeric(data_unida$QRESIDUOS_NO_DOM)

h1=formula(RP_GANA~QRESIDUOS_NO_DOM+DENUNCIAS_2022)

rlog1=glm(h1, data=data_unida,family = binomial)

modelrl=list('RP_gana (I)'=rlog1)

library(modelsummary)

## Version 2.0.0 of `modelsummary`, to be released soon, will introduce a
##   breaking change: The default table-drawing package will be `tinytable`
##   instead of `kableExtra`. All currently supported table-drawing packages
##   will continue to be supported for the foreseeable future, including
##   `kableExtra`, `gt`, `huxtable`, `flextable, and `DT`.
##   
##   You can always call the `config_modelsummary()` function to change the
##   default table-drawing package in persistent fashion. To try `tinytable`
##   now:
##   
##   config_modelsummary(factory_default = 'tinytable')
##   
##   To set the default back to `kableExtra`:
##   
##   config_modelsummary(factory_default = 'kableExtra')

modelsummary(modelrl,
             title = "Regresión Logística",
             stars = TRUE,
             output = "kableExtra")

Regresión Logística
	RP_gana (I)
(Intercept)	-1.789+
	(1.041)
QRESIDUOS_NO_DOM	0.000+
	(0.000)
DENUNCIAS_2022	0.015*
	(0.006)
Num.Obs.	43
AIC	50.6
BIC	55.9
Log.Lik.	-22.318
F	3.674
RMSE	0.42
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001

#PREGUNTA2

modelo2=formula(PORC_RP~QRESIDUOS_NO_DOM+DENUNCIAS_2022)

modelo2

## PORC_RP ~ QRESIDUOS_NO_DOM + DENUNCIAS_2022

reg2=lm(modelo2,data=data_unida)
summary(reg2)

## 
## Call:
## lm(formula = modelo2, data = data_unida)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.274  -6.968  -3.174   4.480  32.712 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.533e+01  3.427e+00   7.390  5.4e-09 ***
## QRESIDUOS_NO_DOM -1.124e-04  9.947e-05  -1.130   0.2650    
## DENUNCIAS_2022    2.861e-02  1.188e-02   2.409   0.0207 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.01 on 40 degrees of freedom
## Multiple R-squared:  0.139,  Adjusted R-squared:  0.09595 
## F-statistic: 3.229 on 2 and 40 DF,  p-value: 0.05013

library(modelsummary)
model2=list('Porcentaje Renovacion Popular (Ii)'=reg2)
modelsummary(model2, title = "Regresion: modelo 2",
             stars = TRUE,
             output = "kableExtra")

Regresion: modelo 2
	Porcentaje Renovacion Popular (Ii)
(Intercept)	25.327***
	(3.427)
QRESIDUOS_NO_DOM	0.000
	(0.000)
DENUNCIAS_2022	0.029*
	(0.012)
Num.Obs.	43
R2	0.139
R2 Adj.	0.096
AIC	340.7
BIC	347.8
Log.Lik.	-166.361
F	3.229
RMSE	11.59
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

resBP=bptest(reg2)
data.frame(list('BP'=resBP$statistic,
             'df'=resBP$parameter,
             "p-value"=resBP$p.value))%>%
    kable(caption = resBP$method)%>%kable_styling(full_width = F)

studentized Breusch-Pagan test
	BP	df	p.value
BP	1.217374	2	0.5440648

library(DescTools)

## 
## Attaching package: 'DescTools'

## The following objects are masked from 'package:modelsummary':
## 
##     Format, Mean, Median, N, SD, Var

VIF(reg2) %>%kable(col.names = "VIF",caption ="Evaluando Multicolinealidad usando VIF (Variance Inflation Factors)" )%>%kable_styling(full_width = F)

Evaluando Multicolinealidad usando VIF (Variance Inflation Factors)
	VIF
QRESIDUOS_NO_DOM	1.018987
DENUNCIAS_2022	1.018987