library(rio)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dataVotos = import("DataV.csv")
dataE = import("dataEDU.csv")
dataFC = import("Fc.csv")
# Combinar los dataframes uno por uno
temp1 = merge(dataE, dataFC, by='UBIGEO', all.x = T)
temp1 <- temp1 %>%
na.omit()
data_dep <- temp1 %>%
group_by(DEPARTAMENTO) %>%
summarise(across(where(is.numeric), sum, na.rm = TRUE), .groups = "drop")
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(where(is.numeric), sum, na.rm = TRUE)`.
## ℹ In group 1: `DEPARTAMENTO = "AMAZONAS"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
data_dep <- data_dep[-19, ]
# Combinar los dataframes uno por uno
dataFinal = merge(data_dep, dataVotos, by='DEPARTAMENTO', all.x = T)
str(dataVotos)
## 'data.frame': 25 obs. of 5 variables:
## $ Castillo : chr "34 464" "110 620" "88 812" "256 224" ...
## $ Fujimori : chr "17 815" "67 394" "10 879" "40 216" ...
## $ Participación: chr "184 057" "613 850" "219 260" "902 243" ...
## $ Electores : chr "306 186" "886 265" "316 000" "1 145 268" ...
## $ DEPARTAMENTO : chr "AMAZONAS" "ANCASH" "APURIMAC" "AREQUIPA" ...
dataVotos <- dataVotos %>%
mutate(across(
.cols = -DEPARTAMENTO, # aplica a todas menos Departamento
.fns = ~ as.numeric(gsub(" ", "", .))
))
## Warning: There were 4 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(...)`.
## Caused by warning:
## ! NAs introducidos por coerción
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 3 remaining warnings.
dataVotos$Electores[15] <- 8322644
dataVotos$Participación[15] <- 6206220
dataVotos$Fujimori[15] <- 754216
dataVotos$Castillo[15] <- 416743
data_dep$DEPARTAMENTO[data_dep$DEPARTAMENTO == "LIMA METROPOLITANA"] <- "LIMA"
data_unida <- data_dep %>%
group_by(DEPARTAMENTO) %>%
summarise(across(where(is.numeric), sum, na.rm = TRUE)) %>%
ungroup()
-Data final
# Combinar los dataframes uno por uno
dataFinal = merge(data_unida, dataVotos, by='DEPARTAMENTO', all.x = T)
dataFinal$porc_Castillo <- (dataFinal$Castillo / dataFinal$Electores) * 100
dataFinal$porc_Fujimori <- (dataFinal$Fujimori / dataFinal$Electores) * 100
dataFinal <- dataFinal %>%
mutate(
total_educacion = total_superior + total_no_superior,
porc_superior = (total_superior / total_educacion) * 100,
porc_no_superior = (total_no_superior / total_educacion) * 100
)
dataFinal <- dataFinal %>%
mutate(
total_alfabetismo = total_lee + total_no_lee,
porc_lee = (total_lee / total_alfabetismo) * 100,
porc_no_lee = (total_no_lee / total_alfabetismo) * 100
)
dataFinal <- dataFinal %>%
mutate(
porc_fallecidos = (total_fallecidos / Electores) * 100
)
Porcentaje de personas que no saben leer
Porcentaje de personas sin educación superior
Porcentaje de fallecidos durante la pandemia
library(modelsummary)
## Warning: package 'modelsummary' was built under R version 4.4.2
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
## options(modelsummary_factory_latex = 'kableExtra')
## options(modelsummary_factory_html = 'kableExtra')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
h1 = formula(porc_Castillo ~ porc_no_lee + porc_no_superior + porc_fallecidos)
rp1 = glm(h1, data = dataFinal,
offset = log(Electores), #exposure
family = poisson(link = "log"))
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.255903
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.481594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.105063
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.372405
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.515097
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.066724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.093410
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.645326
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.645611
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.924952
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.693603
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.377151
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.318710
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.495377
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.007339
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.237544
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.551355
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.690356
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.035409
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.086333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.693376
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.529128
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.801035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.537733
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.755512
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.255903
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.481594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.105063
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.372405
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.515097
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.066724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.093410
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.645326
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.645611
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.924952
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.693603
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.377151
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.318710
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.495377
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.007339
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.237544
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.551355
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.690356
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.035409
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.086333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.693376
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.529128
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.801035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.537733
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.755512
summary(rp1)
##
## Call:
## glm(formula = h1, family = poisson(link = "log"), data = dataFinal,
## offset = log(Electores))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.72696 2.82012 -0.612 0.5403
## porc_no_lee -0.02835 0.01754 -1.616 0.1061
## porc_no_superior -0.06125 0.02931 -2.089 0.0367 *
## porc_fallecidos -3.63251 0.42606 -8.526 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 636.39 on 24 degrees of freedom
## Residual deviance: 413.13 on 21 degrees of freedom
## AIC: Inf
##
## Number of Fisher Scoring iterations: 6
Exponenciamos para interpretar
#cbind(exp(coef(rp1)),exp(confint(rp1)))
Primeras conclusiones:
La variable más significativa es “fallecidos”. Tiene una relación inversa con el voto a Pedro Castillo, lo que significa que, a mayor porcentaje de fallecidos, menor es el número de votos a Castillo. Las variables “porc_no_superior” también muestran una relación negativa, pero con menos fuerza, mientras que “porc_no_lee” no tiene un impacto significativo.
#Over y underdisperción: under → quasi poisson ; over → quasi y binomial negativa
library(magrittr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.4.2
##
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
overdispersion=AER::dispersiontest(rp1,alternative='greater')$ p.value<0.05
underdispersion=AER::dispersiontest(rp1,alternative='less')$ p.value<0.05
# tabla
testResult=as.data.frame(rbind(overdispersion,underdispersion))
names(testResult)='Es probable?'
testResult%>%kable(caption = "Test de Equidispersión")%>%kableExtra::kable_styling()
| Es probable? | |
|---|---|
| overdispersion | TRUE |
| underdispersion | FALSE |
# Regresión Quasipoisson
rqp = glm(h1, data = dataFinal,
offset=log(Electores),
family = quasipoisson(link = "log"))
summary(rqp)
##
## Call:
## glm(formula = h1, family = quasipoisson(link = "log"), data = dataFinal,
## offset = log(Electores))
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.72696 18.35060 -0.094 0.926
## porc_no_lee -0.02835 0.11414 -0.248 0.806
## porc_no_superior -0.06125 0.19074 -0.321 0.751
## porc_fallecidos -3.63251 2.77241 -1.310 0.204
##
## (Dispersion parameter for quasipoisson family taken to be 42.34134)
##
## Null deviance: 636.39 on 24 degrees of freedom
## Residual deviance: 413.13 on 21 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
Entonces..
Conclusión: Esto significa que porc_no_lee, porc_no_superior y porc_fallecidos no tienen un efecto estadísticamente significativo sobre el voto a Pedro Castillo en este caso.
#Regresión Binomial Negativa:
# bin
h1off=formula(porc_Castillo ~ porc_no_lee + porc_no_superior + porc_fallecidos + offset(log(Electores)))
rbn=MASS::glm.nb(h1off,data=dataFinal)
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.255903
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.481594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.105063
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.372405
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.515097
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.066724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.093410
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.645326
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.645611
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.924952
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.693603
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.377151
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.318710
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.495377
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.007339
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.237544
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.551355
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.690356
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.035409
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.086333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.693376
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.529128
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.801035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.537733
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.755512
summary(rbn)
##
## Call:
## MASS::glm.nb(formula = h1off, data = dataFinal, init.theta = 1.071585369,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 9.20460 8.43520 1.091 0.27518
## porc_no_lee -0.06026 0.05736 -1.050 0.29349
## porc_no_superior -0.16084 0.08731 -1.842 0.06544 .
## porc_fallecidos -3.72114 1.27723 -2.913 0.00357 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.0716) family taken to be 1)
##
## Null deviance: 34.181 on 24 degrees of freedom
## Residual deviance: 28.105 on 21 degrees of freedom
## AIC: 213.86
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.072
## Std. Err.: 0.287
##
## 2 x log-likelihood: -203.862
La variable más significativa es porc_fallecidos, ya que tiene un p-value inferior a 0.05. Esto implica que la proporción de fallecidos tiene una relación inversa y estadísticamente significativa con el voto a Pedro Castillo: a medida que aumenta la proporción de fallecidos, disminuye el número de votos a Castillo.
Correteaste la regresión Poisson pero usando las variables positivas:
porc_lee = personas que saben leer
porc_superior = personas con educación superior
porc_fallecidos = fallecidos por COVID (igual que antes)
h2 = formula(porc_Castillo ~ porc_lee + porc_superior + porc_fallecidos)
rp2 = glm(h2, data = dataFinal,
offset = log(Electores), #exposure
family = poisson(link = "log"))
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.255903
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.481594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.105063
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.372405
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.515097
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.066724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.093410
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.645326
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.645611
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.924952
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.693603
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.377151
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.318710
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.495377
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.007339
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.237544
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.551355
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.690356
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.035409
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.086333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.693376
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.529128
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.801035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.537733
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.755512
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.255903
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.481594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.105063
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.372405
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.515097
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.066724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.093410
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.645326
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.645611
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.924952
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.693603
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.377151
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.318710
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.495377
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.007339
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.237544
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.551355
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.690356
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.035409
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.086333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.693376
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.529128
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.801035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.537733
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.755512
summary(rp2)
##
## Call:
## glm(formula = h2, family = poisson(link = "log"), data = dataFinal,
## offset = log(Electores))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.68645 0.86176 -12.401 <2e-16 ***
## porc_lee 0.02835 0.01754 1.616 0.1061
## porc_superior 0.06125 0.02931 2.089 0.0367 *
## porc_fallecidos -3.63251 0.42606 -8.526 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 636.39 on 24 degrees of freedom
## Residual deviance: 413.13 on 21 degrees of freedom
## AIC: Inf
##
## Number of Fisher Scoring iterations: 6
La variable más significativa es porc_fallecidos, que tiene un p-value extremadamente bajo (<2e-16), lo que implica que la proporción de fallecidos tiene una relación inversa y altamente significativa con el voto a Pedro Castillo: a medida que aumenta la proporción de fallecidos, disminuye el número de votos a Pedro Castillo. La variable porc_superior también es significativa, pero con un p-value de 0.0367, lo que sugiere que la educación superior tiene una relación directa con el voto a Pedro Castillo.
#Over y underdisperción: under → quasi poisson ; over → quasi y binomial negativa
overdispersion=AER::dispersiontest(rp2,alternative='greater')$ p.value<0.05
underdispersion=AER::dispersiontest(rp2,alternative='less')$ p.value<0.05
# tabla
testResult=as.data.frame(rbind(overdispersion,underdispersion))
names(testResult)='Es probable?'
testResult%>%kable(caption = "Test de Equidispersión")%>%kableExtra::kable_styling()
| Es probable? | |
|---|---|
| overdispersion | TRUE |
| underdispersion | FALSE |
# Regresión Quasipoisson
rqp2 = glm(h2, data = dataFinal,
offset=log(Electores),
family = quasipoisson(link = "log"))
summary(rqp2)
##
## Call:
## glm(formula = h2, family = quasipoisson(link = "log"), data = dataFinal,
## offset = log(Electores))
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.68645 5.60748 -1.906 0.0705 .
## porc_lee 0.02835 0.11414 0.248 0.8063
## porc_superior 0.06125 0.19074 0.321 0.7513
## porc_fallecidos -3.63251 2.77241 -1.310 0.2043
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 42.34134)
##
## Null deviance: 636.39 on 24 degrees of freedom
## Residual deviance: 413.13 on 21 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
Ninguna de las variables (porc_lee, porc_superior, porc_fallecidos) es estadísticamente significativa. Esto sugiere que en este modelo específico, las proporciones de personas que saben leer, tienen educación superior, o la proporción de fallecidos no tienen un impacto significativo en el número de votos a Pedro Castillo, en comparación con otros factores en el análisis.
#Regresión Binomial Negativa:
# bin
h2off=formula(porc_Castillo ~ porc_lee + porc_superior + porc_fallecidos + offset(log(Electores)))
rbn2=MASS::glm.nb(h2off,data=dataFinal)
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.255903
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.481594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.105063
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.372405
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.515097
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.066724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.093410
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.645326
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.645611
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.924952
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.693603
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.377151
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.318710
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.495377
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.007339
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.237544
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.551355
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.690356
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.035409
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.086333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.693376
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.529128
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.801035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.537733
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.755512
summary(rbn2)
##
## Call:
## MASS::glm.nb(formula = h2off, data = dataFinal, init.theta = 1.071585369,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.90556 2.85120 -4.526 6e-06 ***
## porc_lee 0.06026 0.05736 1.050 0.29349
## porc_superior 0.16084 0.08731 1.842 0.06544 .
## porc_fallecidos -3.72114 1.27723 -2.913 0.00357 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.0716) family taken to be 1)
##
## Null deviance: 34.181 on 24 degrees of freedom
## Residual deviance: 28.105 on 21 degrees of freedom
## AIC: 213.86
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.072
## Std. Err.: 0.287
##
## 2 x log-likelihood: -203.862
El porc_fallecidos es la variable más significativa, ya que tiene un p-value muy bajo y su coeficiente es negativo, lo que indica que a mayor proporción de fallecidos, menor es el número de votos a Pedro Castillo, de forma inversamente proporcional. Por otro lado, porc_superior muestra una tendencia de aumento en los votos a Castillo, pero la relación no es estadísticamente significativa debido a su p-value marginalmente mayor a 0.05. La variable porc_lee no tiene un impacto significativo en el modelo.
DATA –> https://github.com/yharaaa/Data-ejercicio-11-04
Construcción de variables independientes (VI)
Para analizar la relación entre el nivel educativo y el comportamiento electoral, se construyeron variables agregadas a partir de los registros individuales de la Encuesta Nacional de Hogares (ENAHO). Las variables clave utilizadas fueron:
A partir de estas variables se generaron las siguientes agrupaciones:
1. Educación superior
Se construyó la variable total_superior, que agrupa a todas
las personas que han culminado al menos un nivel de educación superior.
Esto incluye los siguientes códigos de la variable P301A:
- 8: Superior no universitaria completa
- 10: Superior universitaria completa
- 11: Maestría o Doctorado
2. Sin educación superior
La variable total_no_superior incluye a todas las personas
que no tienen estudios superiores completos. Se agruparon los siguientes
códigos de P301A:
- 1 a 7 y 9: desde sin nivel
hasta educación superior incompleta (no universitaria o
universitaria)
3. Sabe leer y escribir
La variable total_lee corresponde a todas las personas que
respondieron “Sí” (código 1) en la variable P302.
4. No sabe leer y escribir
La variable total_no_lee incluye a quienes respondieron
“No” (código 2) en la variable P302.
Los valores perdidos (9) fueron tratados como casos
faltantes y no considerados en el análisis.
Finalmente, estas variables fueron agregadas a nivel departamental para que pudieran ser usadas como predictores en los modelos de regresión.
Fuentes:
Educación ENAHO: https://proyectos.inei.gob.pe/microdatos/
Votos wikipedia: https://es.wikipedia.org/wiki/Elecciones_generales_de_Per%C3%BA_de_2021
write.csv(dataFinal, "C:/Users/YHARA/OneDrive/Desktop/R/dataFinal.csv", row.names = FALSE)