Ejercicio con las variables en procentaje

library(rio)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dataVotos = import("DataV.csv")
dataE = import("dataEDU.csv")
dataFC = import("Fc.csv")
# Combinar los dataframes uno por uno
temp1 = merge(dataE, dataFC, by='UBIGEO', all.x = T)
temp1 <- temp1 %>% 
  na.omit()
data_dep <- temp1 %>%
  group_by(DEPARTAMENTO) %>%
  summarise(across(where(is.numeric), sum, na.rm = TRUE), .groups = "drop")
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(where(is.numeric), sum, na.rm = TRUE)`.
## ℹ In group 1: `DEPARTAMENTO = "AMAZONAS"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))
data_dep <- data_dep[-19, ]
# Combinar los dataframes uno por uno
dataFinal = merge(data_dep, dataVotos, by='DEPARTAMENTO', all.x = T)
str(dataVotos)
## 'data.frame':    25 obs. of  5 variables:
##  $ Castillo     : chr  "34 464" "110 620" "88 812" "256 224" ...
##  $ Fujimori     : chr  "17 815" "67 394" "10 879" "40 216" ...
##  $ Participación: chr  "184 057" "613 850" "219 260" "902 243" ...
##  $ Electores    : chr  "306 186" "886 265" "316 000" "1 145 268" ...
##  $ DEPARTAMENTO : chr  "AMAZONAS" "ANCASH" "APURIMAC" "AREQUIPA" ...
dataVotos <- dataVotos %>%
  mutate(across(
    .cols = -DEPARTAMENTO,  # aplica a todas menos Departamento
    .fns = ~ as.numeric(gsub(" ", "", .))
  ))
## Warning: There were 4 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(...)`.
## Caused by warning:
## ! NAs introducidos por coerción
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 3 remaining warnings.
dataVotos$Electores[15] <- 8322644
dataVotos$Participación[15] <- 6206220
dataVotos$Fujimori[15] <- 754216
dataVotos$Castillo[15] <- 416743
data_dep$DEPARTAMENTO[data_dep$DEPARTAMENTO == "LIMA METROPOLITANA"] <- "LIMA"
data_unida <- data_dep %>%
  group_by(DEPARTAMENTO) %>%
  summarise(across(where(is.numeric), sum, na.rm = TRUE)) %>%
  ungroup()

-Data final

# Combinar los dataframes uno por uno
dataFinal = merge(data_unida, dataVotos, by='DEPARTAMENTO', all.x = T)
dataFinal$porc_Castillo <- (dataFinal$Castillo / dataFinal$Electores) * 100
dataFinal$porc_Fujimori <- (dataFinal$Fujimori / dataFinal$Electores) * 100
dataFinal <- dataFinal %>%
  mutate(
    total_educacion = total_superior + total_no_superior,
    porc_superior = (total_superior / total_educacion) * 100,
    porc_no_superior = (total_no_superior / total_educacion) * 100
  )
dataFinal <- dataFinal %>%
  mutate(
    total_alfabetismo = total_lee + total_no_lee,
    porc_lee = (total_lee / total_alfabetismo) * 100,
    porc_no_lee = (total_no_lee / total_alfabetismo) * 100
  )
dataFinal <- dataFinal %>%
  mutate(
    porc_fallecidos = (total_fallecidos / Electores) * 100
  )

ANÁLISIS

  • Las variables independientes consideradas fueron:

Porcentaje de personas que no saben leer

Porcentaje de personas sin educación superior

Porcentaje de fallecidos durante la pandemia

Poisson

library(modelsummary)
## Warning: package 'modelsummary' was built under R version 4.4.2
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
h1 = formula(porc_Castillo ~ porc_no_lee + porc_no_superior + porc_fallecidos)

rp1 = glm(h1, data = dataFinal, 
        offset = log(Electores), #exposure 
        family = poisson(link = "log"))
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.255903
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.481594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.105063
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.372405
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.515097
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.066724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.093410
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.645326
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.645611
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.924952
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.693603
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.377151
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.318710
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.495377
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.007339
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.237544
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.551355
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.690356
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.035409
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.086333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.693376
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.529128
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.801035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.537733
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.755512
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.255903
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.481594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.105063
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.372405
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.515097
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.066724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.093410
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.645326
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.645611
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.924952
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.693603
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.377151
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.318710
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.495377
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.007339
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.237544
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.551355
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.690356
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.035409
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.086333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.693376
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.529128
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.801035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.537733
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.755512
summary(rp1)
## 
## Call:
## glm(formula = h1, family = poisson(link = "log"), data = dataFinal, 
##     offset = log(Electores))
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -1.72696    2.82012  -0.612   0.5403    
## porc_no_lee      -0.02835    0.01754  -1.616   0.1061    
## porc_no_superior -0.06125    0.02931  -2.089   0.0367 *  
## porc_fallecidos  -3.63251    0.42606  -8.526   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 636.39  on 24  degrees of freedom
## Residual deviance: 413.13  on 21  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 6

Exponenciamos para interpretar

#cbind(exp(coef(rp1)),exp(confint(rp1)))

Primeras conclusiones:

La variable más significativa es “fallecidos”. Tiene una relación inversa con el voto a Pedro Castillo, lo que significa que, a mayor porcentaje de fallecidos, menor es el número de votos a Castillo. Las variables “porc_no_superior” también muestran una relación negativa, pero con menos fuerza, mientras que “porc_no_lee” no tiene un impacto significativo.

Dispersión de la 1ra Poisson

#Over y underdisperción: under → quasi poisson ; over → quasi y binomial negativa

library(magrittr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.4.2
## 
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
overdispersion=AER::dispersiontest(rp1,alternative='greater')$ p.value<0.05
underdispersion=AER::dispersiontest(rp1,alternative='less')$ p.value<0.05
# tabla
testResult=as.data.frame(rbind(overdispersion,underdispersion))
names(testResult)='Es probable?'
testResult%>%kable(caption = "Test de Equidispersión")%>%kableExtra::kable_styling()
Test de Equidispersión
Es probable?
overdispersion TRUE
underdispersion FALSE

Regresión Quasipoisson

# Regresión Quasipoisson

rqp = glm(h1, data = dataFinal,
          offset=log(Electores),
          family = quasipoisson(link = "log"))
summary(rqp)
## 
## Call:
## glm(formula = h1, family = quasipoisson(link = "log"), data = dataFinal, 
##     offset = log(Electores))
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -1.72696   18.35060  -0.094    0.926
## porc_no_lee      -0.02835    0.11414  -0.248    0.806
## porc_no_superior -0.06125    0.19074  -0.321    0.751
## porc_fallecidos  -3.63251    2.77241  -1.310    0.204
## 
## (Dispersion parameter for quasipoisson family taken to be 42.34134)
## 
##     Null deviance: 636.39  on 24  degrees of freedom
## Residual deviance: 413.13  on 21  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 6

Entonces..

Conclusión: Esto significa que porc_no_lee, porc_no_superior y porc_fallecidos no tienen un efecto estadísticamente significativo sobre el voto a Pedro Castillo en este caso.

Regresión Binomial Negativa:

#Regresión Binomial Negativa:
# bin
h1off=formula(porc_Castillo ~ porc_no_lee + porc_no_superior + porc_fallecidos + offset(log(Electores)))

rbn=MASS::glm.nb(h1off,data=dataFinal)
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.255903
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.481594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.105063
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.372405
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.515097
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.066724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.093410
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.645326
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.645611
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.924952
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.693603
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.377151
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.318710
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.495377
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.007339
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.237544
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.551355
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.690356
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.035409
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.086333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.693376
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.529128
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.801035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.537733
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.755512
summary(rbn)
## 
## Call:
## MASS::glm.nb(formula = h1off, data = dataFinal, init.theta = 1.071585369, 
##     link = log)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)   
## (Intercept)       9.20460    8.43520   1.091  0.27518   
## porc_no_lee      -0.06026    0.05736  -1.050  0.29349   
## porc_no_superior -0.16084    0.08731  -1.842  0.06544 . 
## porc_fallecidos  -3.72114    1.27723  -2.913  0.00357 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.0716) family taken to be 1)
## 
##     Null deviance: 34.181  on 24  degrees of freedom
## Residual deviance: 28.105  on 21  degrees of freedom
## AIC: 213.86
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.072 
##           Std. Err.:  0.287 
## 
##  2 x log-likelihood:  -203.862

conclusion 1:

La variable más significativa es porc_fallecidos, ya que tiene un p-value inferior a 0.05. Esto implica que la proporción de fallecidos tiene una relación inversa y estadísticamente significativa con el voto a Pedro Castillo: a medida que aumenta la proporción de fallecidos, disminuye el número de votos a Castillo.

Poisson 2

Correteaste la regresión Poisson pero usando las variables positivas:

porc_lee = personas que saben leer

porc_superior = personas con educación superior

porc_fallecidos = fallecidos por COVID (igual que antes)

h2 = formula(porc_Castillo ~ porc_lee + porc_superior + porc_fallecidos)

rp2 = glm(h2, data = dataFinal, 
        offset = log(Electores), #exposure 
        family = poisson(link = "log"))
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.255903
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.481594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.105063
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.372405
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.515097
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.066724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.093410
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.645326
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.645611
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.924952
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.693603
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.377151
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.318710
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.495377
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.007339
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.237544
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.551355
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.690356
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.035409
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.086333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.693376
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.529128
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.801035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.537733
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.755512
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.255903
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.481594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.105063
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.372405
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.515097
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.066724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.093410
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.645326
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.645611
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.924952
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.693603
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.377151
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.318710
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.495377
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.007339
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.237544
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.551355
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.690356
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.035409
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.086333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.693376
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.529128
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.801035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.537733
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.755512
summary(rp2)
## 
## Call:
## glm(formula = h2, family = poisson(link = "log"), data = dataFinal, 
##     offset = log(Electores))
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -10.68645    0.86176 -12.401   <2e-16 ***
## porc_lee          0.02835    0.01754   1.616   0.1061    
## porc_superior     0.06125    0.02931   2.089   0.0367 *  
## porc_fallecidos  -3.63251    0.42606  -8.526   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 636.39  on 24  degrees of freedom
## Residual deviance: 413.13  on 21  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 6

La variable más significativa es porc_fallecidos, que tiene un p-value extremadamente bajo (<2e-16), lo que implica que la proporción de fallecidos tiene una relación inversa y altamente significativa con el voto a Pedro Castillo: a medida que aumenta la proporción de fallecidos, disminuye el número de votos a Pedro Castillo. La variable porc_superior también es significativa, pero con un p-value de 0.0367, lo que sugiere que la educación superior tiene una relación directa con el voto a Pedro Castillo.

Dispersión de la 2da Poisson

#Over y underdisperción: under → quasi poisson ; over → quasi y binomial negativa

overdispersion=AER::dispersiontest(rp2,alternative='greater')$ p.value<0.05
underdispersion=AER::dispersiontest(rp2,alternative='less')$ p.value<0.05
# tabla
testResult=as.data.frame(rbind(overdispersion,underdispersion))
names(testResult)='Es probable?'
testResult%>%kable(caption = "Test de Equidispersión")%>%kableExtra::kable_styling()
Test de Equidispersión
Es probable?
overdispersion TRUE
underdispersion FALSE

Regresión Quasipoisson 2

# Regresión Quasipoisson

rqp2 = glm(h2, data = dataFinal,
          offset=log(Electores),
          family = quasipoisson(link = "log"))
summary(rqp2)
## 
## Call:
## glm(formula = h2, family = quasipoisson(link = "log"), data = dataFinal, 
##     offset = log(Electores))
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     -10.68645    5.60748  -1.906   0.0705 .
## porc_lee          0.02835    0.11414   0.248   0.8063  
## porc_superior     0.06125    0.19074   0.321   0.7513  
## porc_fallecidos  -3.63251    2.77241  -1.310   0.2043  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 42.34134)
## 
##     Null deviance: 636.39  on 24  degrees of freedom
## Residual deviance: 413.13  on 21  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 6

Ninguna de las variables (porc_lee, porc_superior, porc_fallecidos) es estadísticamente significativa. Esto sugiere que en este modelo específico, las proporciones de personas que saben leer, tienen educación superior, o la proporción de fallecidos no tienen un impacto significativo en el número de votos a Pedro Castillo, en comparación con otros factores en el análisis.

Regresión Binomial Negativa 2:

#Regresión Binomial Negativa:
# bin
h2off=formula(porc_Castillo ~ porc_lee + porc_superior + porc_fallecidos + offset(log(Electores)))

rbn2=MASS::glm.nb(h2off,data=dataFinal)
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.255903
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.481594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.105063
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.372405
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.515097
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.066724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.093410
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.645326
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.645611
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.924952
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.693603
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.377151
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.318710
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.495377
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.007339
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.237544
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.551355
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.690356
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.035409
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.086333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.693376
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.529128
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.801035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.537733
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.755512
summary(rbn2)
## 
## Call:
## MASS::glm.nb(formula = h2off, data = dataFinal, init.theta = 1.071585369, 
##     link = log)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -12.90556    2.85120  -4.526    6e-06 ***
## porc_lee          0.06026    0.05736   1.050  0.29349    
## porc_superior     0.16084    0.08731   1.842  0.06544 .  
## porc_fallecidos  -3.72114    1.27723  -2.913  0.00357 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.0716) family taken to be 1)
## 
##     Null deviance: 34.181  on 24  degrees of freedom
## Residual deviance: 28.105  on 21  degrees of freedom
## AIC: 213.86
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.072 
##           Std. Err.:  0.287 
## 
##  2 x log-likelihood:  -203.862

Conclusión 2:

El porc_fallecidos es la variable más significativa, ya que tiene un p-value muy bajo y su coeficiente es negativo, lo que indica que a mayor proporción de fallecidos, menor es el número de votos a Pedro Castillo, de forma inversamente proporcional. Por otro lado, porc_superior muestra una tendencia de aumento en los votos a Castillo, pero la relación no es estadísticamente significativa debido a su p-value marginalmente mayor a 0.05. La variable porc_lee no tiene un impacto significativo en el modelo.

DATA –> https://github.com/yharaaa/Data-ejercicio-11-04


Construcción de variables independientes (VI)

Para analizar la relación entre el nivel educativo y el comportamiento electoral, se construyeron variables agregadas a partir de los registros individuales de la Encuesta Nacional de Hogares (ENAHO). Las variables clave utilizadas fueron:

A partir de estas variables se generaron las siguientes agrupaciones:

1. Educación superior
Se construyó la variable total_superior, que agrupa a todas las personas que han culminado al menos un nivel de educación superior. Esto incluye los siguientes códigos de la variable P301A:
- 8: Superior no universitaria completa
- 10: Superior universitaria completa
- 11: Maestría o Doctorado

2. Sin educación superior
La variable total_no_superior incluye a todas las personas que no tienen estudios superiores completos. Se agruparon los siguientes códigos de P301A:
- 1 a 7 y 9: desde sin nivel hasta educación superior incompleta (no universitaria o universitaria)

3. Sabe leer y escribir
La variable total_lee corresponde a todas las personas que respondieron “Sí” (código 1) en la variable P302.

4. No sabe leer y escribir
La variable total_no_lee incluye a quienes respondieron “No” (código 2) en la variable P302.

Los valores perdidos (9) fueron tratados como casos faltantes y no considerados en el análisis.

Finalmente, estas variables fueron agregadas a nivel departamental para que pudieran ser usadas como predictores en los modelos de regresión.


Fuentes:

Fallecidos: https://www.datosabiertos.gob.pe/dataset/fallecidos-por-covid-19-ministerio-de-salud-minsa/resource/4b7636f3-5f0c-4404-8526

Educación ENAHO: https://proyectos.inei.gob.pe/microdatos/

Votos wikipedia: https://es.wikipedia.org/wiki/Elecciones_generales_de_Per%C3%BA_de_2021

write.csv(dataFinal, "C:/Users/YHARA/OneDrive/Desktop/R/dataFinal.csv", row.names = FALSE)