1406

setwd("C:/Trabajo grupal Tucaloza y Valentini/F00017014-Latinobarometro_2023_Rdata_v1_0 (1)")

#Exportamos la data

library(rio)
data1 = import("Latinobarometro2023.rdata")

data1$idenpa=as.factor(data1$idenpa)
data1$idenpa=factor(data1$idenpa,
                levels = levels(data1$idenpa),
                labels = c("Argentina", "Bolivia", "Brasil", "Colombia", "Costa Rica" , "Chile" , "Ecuador", "El Salvador", "Guatemala", "Honduras", "México", "Nicaragua", "Panamá", "Paraguay", "Perú", "Uruguay", "Venezuela"),
                ordered = F)

Filtramos para tener solamente los datos de PERÚ

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

peru <- data1 %>%
filter(idenpa=="Perú")

names(peru)=gsub(pattern = "NUMENTRE",
replacement = "NumEntre",x = names(peru))

Ahora: Renombramos las variables y nos quedamos con los datos de nuestro interés

#VARIABLES INDEPENDIENTES Satisfacción con la democracia

names(peru)=gsub(pattern = "P11STGBS.A",
                           replacement = "satis_dem",
                           x = names(peru))

table(peru$satis_dem)

## 
##  -2  -1   1   2   3   4 
##   4  12  46  49 577 512

library(dplyr)
peru = peru %>%
  filter(satis_dem %in% c(1,2,3,4))

table(peru$satis_dem)

## 
##   1   2   3   4 
##  46  49 577 512

Creencias

names(peru)=gsub(pattern = "P10STGB",
                           replacement = "creencias",
                           x = names(peru))

library(dplyr)
peru = peru %>%
  filter(creenciasS %in% c(1,2,3))

#VARIABLES DE CONTROL #ETNIA

names(peru)=gsub(pattern = "S7",
                           replacement = "etnia",
                           x = names(peru))

library(dplyr)
peru = peru %>%
  filter(etnia %in% c(2,3,4,6))
table(peru$etnia)

## 
##   2   3   4   6 
##  30  80 857  61

#CLASE SOCIAL - ESTRATO

names(peru)=gsub(pattern = "S2",
                           replacement = "estrato",
                           x = names(peru))

library(dplyr)
peru = peru %>%
  filter(estrato %in% c(1,2,3,4,5))
table(peru$estrato)

## 
##   1   2   3   4   5 
##  22  58 428 332 171

#PARTIDO POLITICO - FAMPART #En el caso de esta variable, al tener muchas categorías lo que estamos haciendo es separar los datos en “Si se identidifica con un partido político” = 1, “No se identifica con ningún partido político” = 0

names(peru)=gsub(pattern = "fampart",
                           replacement = "partido_pol",
                           x = names(peru))

table(peru$partido_pol)

## 
##   0  20  30  40  50  60  70  80  98 
## 821  15   8  34  29  89  11   2   2

library(dplyr)
peru = peru %>%
  filter(partido_pol %in% c(0,20,30,40,50,60,70,80,98))

table(peru$partido_pol)

## 
##   0  20  30  40  50  60  70  80  98 
## 821  15   8  34  29  89  11   2   2

Ya que esta variable tiene muchas categorías, haremos una condición: Si pertene a un partido político será 1 de lo contrario será 0

peru$partido_pol <- ifelse(peru$partido_pol != 0, 1, 0)
table(peru$partido_pol)

## 
##   0   1 
## 821 190

#VARIABLE DEPENDIENTE - CONFIANZA EN LAS INSTITUCIONES

names(peru)=gsub(pattern = "P13ST.D",
                           replacement = "Congreso",
                           x = names(peru))

names(peru)=gsub(pattern = "P13ST.E",
                           replacement = "Gobierno",
                           x = names(peru))

names(peru)=gsub(pattern = "P13ST.G",
                           replacement = "Partidos_Politicos",
                           x = names(peru))

names(peru)=gsub(pattern = "P13ST.I",
                           replacement = "Presidente",
                           x = names(peru))

#DATOS PERDIDOS

total_na <- sum(is.na(peru))
print(paste("Total de valores NA:", total_na))

## [1] "Total de valores NA: 0"

table(peru$Congreso)

## 
##  -2   1   2   3   4 
##   1  11  42 248 709

library(dplyr)
peru = peru %>%
  filter(Congreso %in% c(1,2,3,4))
table(peru$Congreso)

## 
##   1   2   3   4 
##  11  42 248 709

table(peru$Gobierno)

## 
##  -1   1   2   3   4 
##   1  16  80 298 615

library(dplyr)
peru = peru %>%
  filter(Gobierno %in% c(1,2,3,4))
table(peru$Gobierno)

## 
##   1   2   3   4 
##  16  80 298 615

table(peru$Partidos_Politicos)

## 
##  -1   1   2   3   4 
##   1  19  61 266 662

library(dplyr)
peru = peru %>%
  filter(Partidos_Politicos %in% c(1,2,3,4))
table(peru$Partidos_Politicos)

## 
##   1   2   3   4 
##  19  61 266 662

table(peru$Presidente)

## 
##  -1   1   2   3   4 
##   2  17 110 266 613

library(dplyr)
peru = peru %>%
  filter(Presidente %in% c(1,2,3,4))
table(peru$Presidente)

## 
##   1   2   3   4 
##  17 110 266 613

library(writexl)

MIPAIS <- peru [,c("numentre","satis_dem", "creenciasS", "etnia", "estrato", "partido_pol", "Congreso", "Gobierno", "Partidos_Politicos", "Presidente")]

#EFA 2

names(MIPAIS)

##  [1] "numentre"           "satis_dem"          "creenciasS"        
##  [4] "etnia"              "estrato"            "partido_pol"       
##  [7] "Congreso"           "Gobierno"           "Partidos_Politicos"
## [10] "Presidente"

dontselect=c("numentre","satis_dem","creenciasS","etnia","estrato", "partido_pol")
select=setdiff(names(MIPAIS),dontselect) 
DF=MIPAIS[,select]

# usaremos:
library(magrittr)
head(DF,10)%>%
    rmarkdown::paged_table()

#Calculemos las correlaciones entre todas las variables

DF$Congreso= as.numeric(DF$Congreso)
DF$Gobierno= as.numeric(DF$Gobierno)
DF$Partidos_Politicos= as.numeric(DF$Partidos_Politicos)
DF$Presidente= as.numeric(DF$Presidente)

library(polycor)

corMatrix=polycor::hetcor(DF)$correlations

round(corMatrix,2)

##                    Congreso Gobierno Partidos_Politicos Presidente
## Congreso               1.00     0.59               0.46       0.45
## Gobierno               0.59     1.00               0.45       0.64
## Partidos_Politicos     0.46     0.45               1.00       0.37
## Presidente             0.45     0.64               0.37       1.00

library(ggcorrplot)

## Loading required package: ggplot2

ggcorrplot(corMatrix)

library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## The following object is masked from 'package:polycor':
## 
##     polyserial

psych::KMO(corMatrix)

## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.75
## MSA for each item = 
##           Congreso           Gobierno Partidos_Politicos         Presidente 
##               0.78               0.70               0.84               0.74

cortest.bartlett(corMatrix,n=nrow(DF))$p.value>0.05

## [1] FALSE

library(matrixcalc)

is.singular.matrix(corMatrix)

## [1] FALSE

fa.parallel(DF, fa = 'fa',correct = T,plot = F)

## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

library(GPArotation)

## 
## Attaching package: 'GPArotation'

## The following objects are masked from 'package:psych':
## 
##     equamax, varimin

resfa <- fa(DF,
            nfactors = 1,
            cor = 'mixed',
            rotate = "varimax", #oblimin?
            fm="minres")
print(resfa$loadings)

## 
## Loadings:
##                    MR1  
## Congreso           0.811
## Gobierno           0.923
## Partidos_Politicos 0.676
## Presidente         0.773
## 
##                  MR1
## SS loadings    2.562
## Proportion Var 0.641

sort(resfa$communality)

## Partidos_Politicos         Presidente           Congreso           Gobierno 
##          0.4568818          0.5971540          0.6571722          0.8510519

regresFactors=as.data.frame(resfa$scores)%>%head()

MIPAIS$confianza_instituciones <- resfa$scores

head(MIPAIS)

##       numentre satis_dem creenciasS etnia estrato partido_pol Congreso Gobierno
## 15605      541         3          1     4       3           0        3        3
## 15606      904         3          1     4       3           1        3        3
## 15607      853         4          2     4       5           0        4        4
## 15609       32         4          1     3       4           0        4        4
## 15610      854         4          1     4       5           1        4        4
## 15611       94         4          1     4       3           0        4        4
##       Partidos_Politicos Presidente        MR1
## 15605                  3          3 -0.9947508
## 15606                  3          3 -0.9947508
## 15607                  4          4  0.8399561
## 15609                  4          4  0.8399561
## 15610                  4          4  0.8399561
## 15611                  4          4  0.8399561

names(MIPAIS)=gsub(pattern = "confianza_instituciones",
                           replacement = "conf_inst",
                           x = names(MIPAIS))

#Rescale confianza en las instituciones

range(MIPAIS$conf_inst, na.rm = TRUE)

## [1] -4.6641645  0.8399561

MIPAIS$confianza <- cut(
  MIPAIS$conf_inst,
  breaks = 10,                    # Dividir en 10 categorías
  labels = 1:10,                  # Etiquetar las categorías del 1 al 10
  include.lowest = TRUE           # Incluir el valor mínimo en la primera categoría
)

# Convertir a numérica si es necesario
MIPAIS$confianza <- as.numeric(as.character(MIPAIS$confianza))

# Verificar los primeros registros para asegurarse de que la recategorización funcionó
head(MIPAIS[, c("conf_inst", "confianza")])

##              MR1 confianza
## 15605 -0.9947508         7
## 15606 -0.9947508         7
## 15607  0.8399561        10
## 15609  0.8399561        10
## 15610  0.8399561        10
## 15611  0.8399561        10

table(MIPAIS$confianza)

## 
##   1   2   3   4   5   6   7   8   9  10 
##   5   1   5  22  26  37 121 105 151 533

#Crear una columna que indique: Si el puntaje es >=7 sí confia (1) de lo contrario no confía (0)

MIPAIS$confia <- ifelse(MIPAIS$confianza >= 7, "1", "0")

head(MIPAIS[, c("confianza", "confia")])

##       confianza confia
## 15605         7      1
## 15606         7      1
## 15607        10      1
## 15609        10      1
## 15610        10      1
## 15611        10      1

library(writexl) write_xlsx(MIPAIS, “MiPais-Completo.xlsx”)

library(writexl)

DATAFINAL <- MIPAIS [,c("numentre","satis_dem", "creenciasS", "etnia", "estrato", "partido_pol", "confianza", "confia")]

Descargamos nuestra data final para realizar nuestras regresiones

write_xlsx(DATAFINAL, "perufinal.xlsx")

#REGRESIÓN LINEAL MULTIPLE

library(rio)
PERU= import("perufinal.xlsx")

str(PERU)

## 'data.frame':    1006 obs. of  8 variables:
##  $ numentre   : num  541 904 853 32 854 ...
##  $ satis_dem  : num  3 3 4 4 4 4 3 3 2 4 ...
##  $ creenciasS : num  1 1 2 1 1 1 2 3 1 1 ...
##  $ etnia      : num  4 4 4 3 4 4 3 4 4 4 ...
##  $ estrato    : num  3 3 5 4 5 3 4 4 3 4 ...
##  $ partido_pol: num  0 1 0 0 1 0 0 0 0 1 ...
##  $ confianza  : num  7 7 10 10 10 10 7 6 7 10 ...
##  $ confia     : chr  "1" "1" "1" "1" ...

#Hacemos un summary de nuestra variable de interes “Confianza”

summary(PERU$confianza)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   8.000  10.000   8.785  10.000  10.000

library(DescTools)

## 
## Attaching package: 'DescTools'

## The following objects are masked from 'package:psych':
## 
##     AUC, ICC, SD

allStats=c(summary(PERU$confianza),
  sd=sd(PERU$confianza),
  skew=Skew(PERU$confianza),
  kurt=Kurt(PERU$confianza),
  cv=CoefVar(PERU$confianza))
allStats

##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max.         sd 
##  1.0000000  8.0000000 10.0000000  8.7852883 10.0000000 10.0000000  1.7056016 
##       skew       kurt         cv 
## -1.6113072  2.5934197  0.1941429

library(ggplot2)

base=ggplot(data=PERU,
            aes(x=confianza))
histogram= base + geom_histogram(aes(y = after_stat(density)),
                 colour = 1, fill = "pink",bins=10) +  
    stat_function(fun = dnorm,
                  args = list(mean = allStats['Mean'],
                              sd = allStats['sd']),col='red')
    
histogram

#PARA CORRELACIÓN, VERIFICAMOS QUE NUESTRAS VARIABLES SEAN NUMERICAS

#SATISFACCIÓN DE LA DEMOCRACIA

PERU$satis_dem=as.numeric(PERU$satis_dem)
class(PERU$satis_dem)

## [1] "numeric"

#HACEMOS EL CORTEST

library(ggplot2)
cor_test_result <- cor.test(PERU$confianza, PERU$satis_dem)
cor_test_result

## 
##  Pearson's product-moment correlation
## 
## data:  PERU$confianza and PERU$satis_dem
## t = 12.425, df = 1004, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3102457 0.4174425
## sample estimates:
##       cor 
## 0.3650534

#CREENCIAS

PERU$creenciasS = as.numeric(PERU$creenciasS) #Ya es numérica

#realizamos el cortest 
library(ggplot2)
cor_test_result2 <- cor.test(PERU$confianza, PERU$creenciasS)
cor_test_result2

## 
##  Pearson's product-moment correlation
## 
## data:  PERU$confianza and PERU$creenciasS
## t = 3.4981, df = 1004, p-value = 0.0004891
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04825259 0.17038541
## sample estimates:
##       cor 
## 0.1097332

library(corrplot)

## corrplot 0.92 loaded

# Selecciona las columnas específicas
selected_columns <- PERU[, c("satis_dem", "confianza", "creenciasS")]

# Asegúrate de que todas las columnas seleccionadas sean numéricas
selected_columns <- selected_columns[, sapply(selected_columns, is.numeric)]

# Calcula la matriz de correlación para las columnas seleccionadas
cor_matrix <- cor(selected_columns, use = "complete.obs")

# Muestra la matriz de correlación
print(cor_matrix)

##            satis_dem confianza creenciasS
## satis_dem  1.0000000 0.3650534  0.1224376
## confianza  0.3650534 1.0000000  0.1097332
## creenciasS 0.1224376 0.1097332  1.0000000

# Opcional: Visualiza la matriz de correlación usando corrplot
corrplot(cor_matrix, method = "color", tl.col = "black", tl.srt = 45)

#GRAFICOS DE CORRELACIÓN CON ANOVA

Tenemos 1 variable dependiente que es CONFIANZA EN LAS INSTITUCIOENS, y las dos variables independientes son CATEGORICAS, CREENCIAS Y SATIS_DEM. En ese caso utilizaremos, un prueba de ANOVA para ver la diferencia de medias entre los grupos.

# ANOVA para satisfacción con la democracia
anova_satis_dem <- aov(confianza ~ satis_dem, data = PERU)
summary(anova_satis_dem)

##               Df Sum Sq Mean Sq F value Pr(>F)    
## satis_dem      1  389.6   389.6   154.4 <2e-16 ***
## Residuals   1004 2534.0     2.5                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova_creenciasS <- aov(confianza ~ creenciasS, data = PERU)
summary(anova_creenciasS)

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## creenciasS     1   35.2   35.20   12.24 0.000489 ***
## Residuals   1004 2888.4    2.88                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#PARA GRAFICOS

# Calcular las medias y errores estándar
df_summary_satis <- PERU %>%
  group_by(satis_dem) %>%
  summarise(
    mean_confianza = mean(confianza, na.rm = TRUE),
    se_confianza = sd(confianza, na.rm = TRUE) / sqrt(n())
  )

df_summary_creencias <- PERU %>%
  group_by(creenciasS) %>%
  summarise(
    mean_confianza = mean(confianza, na.rm = TRUE),
    se_confianza = sd(confianza, na.rm = TRUE) / sqrt(n())
  )

# Gráfico de barras para satisfacción con la democracia
ggplot(df_summary_satis, aes(x = satis_dem, y = mean_confianza)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  geom_errorbar(aes(ymin = mean_confianza - se_confianza, ymax = mean_confianza + se_confianza), width = 0.2) +
  theme_minimal() +
  labs(title = "Media de la Confianza por Satisfacción con la Democracia",
       x = "Satisfacción con la Democracia",
       y = "Media de la Confianza")+scale_y_continuous(limits = c(0, max(df_summary_satis$mean_confianza + df_summary_satis$se_confianza) * 1.1))

# Gráfico de barras para creencias
ggplot(df_summary_creencias, aes(x = creenciasS, y = mean_confianza)) +
  geom_bar(stat = "identity", fill = "pink") +
  geom_errorbar(aes(ymin = mean_confianza - se_confianza, ymax = mean_confianza + se_confianza), width = 0.2) +
  theme_minimal() +
  labs(title = "Media de la Confianza por Creencias",
       x = "Creencias",
       y = "Media de la Confianza")+scale_y_continuous(limits = c(0, max(df_summary_creencias$mean_confianza + df_summary_creencias$se_confianza) * 1.1))

# hipotesis en R
modelo1=formula(confianza~ satis_dem + etnia + estrato + partido_pol + creenciasS)

reg1=lm(modelo1,data=PERU)

#LINEALIDAD

# linea roja debe tender a horizontal
plot(reg1, 1)

#HOMOCEDASTICIDAD

# linea roja debe tender a horizontal
plot(reg1, 3)

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

# null: modelo homocedastico
resBP=bptest(reg1)
data.frame(list('BP'=resBP$statistic,
             'df'=resBP$parameter,
             "p-value"=resBP$p.value))

##          BP df      p.value
## BP 57.79099  5 3.473757e-11

#NORMALIDAD DE RESIDUOS

# puntos cerca a la diagonal?
plot(reg1, 2)

#NULL: Datos se distribuyen de manera normal
resSW=shapiro.test(reg1$residuals)
data.frame(list('SW'=resSW$statistic,
             "p-value"=resSW$p.value))

##         SW      p.value
## W 0.898673 3.125903e-25

#NO MULTUCOLINEALIDAD

library(DescTools)
# > 5 es problematico
VIF(reg1)

##   satis_dem       etnia     estrato partido_pol  creenciasS 
##    1.032962    1.001280    1.012560    1.013156    1.020213

#VALORES INFLUYENTES

plot(reg1, 5)

checkReg2=as.data.frame(influence.measures(reg1)$is.inf)
checkReg2[checkReg2$cook.d & checkReg2$hat,c('cook.d','hat')]

## [1] cook.d hat   
## <0 rows> (or 0-length row.names)

Lo que queremos ver en esta regresión es que tanto influyen las variables independientes en la confianza en las instituciones. Entonces tomaremos satis_dem , creenciasS, etnia, estrato, partido_pol

str(PERU)

## 'data.frame':    1006 obs. of  8 variables:
##  $ numentre   : num  541 904 853 32 854 ...
##  $ satis_dem  : num  3 3 4 4 4 4 3 3 2 4 ...
##  $ creenciasS : num  1 1 2 1 1 1 2 3 1 1 ...
##  $ etnia      : num  4 4 4 3 4 4 3 4 4 4 ...
##  $ estrato    : num  3 3 5 4 5 3 4 4 3 4 ...
##  $ partido_pol: num  0 1 0 0 1 0 0 0 0 1 ...
##  $ confianza  : num  7 7 10 10 10 10 7 6 7 10 ...
##  $ confia     : chr  "1" "1" "1" "1" ...

PERU=PERU[complete.cases(PERU),]

seleccion=c("satis_dem","creenciasS","etnia","estrato",
            "partido_pol","confia")
PERU[,seleccion]=lapply(PERU[,seleccion],as.factor)

peruStats=summary(PERU[,-1])
peruStats

##  satis_dem creenciasS etnia   estrato partido_pol   confianza      confia 
##  1: 40     1:542      2: 29   1: 22   0:816       Min.   : 1.000   0: 96  
##  2: 43     2:181      3: 77   2: 58   1:190       1st Qu.: 8.000   1:910  
##  3:505     3:283      4:839   3:427               Median :10.000          
##  4:418                6: 61   4:329               Mean   : 8.785          
##                               5:170               3rd Qu.:10.000          
##                                                   Max.   :10.000

reg1=lm(modelo1,data=PERU)
summary(reg1)

## 
## Call:
## lm(formula = modelo1, data = PERU)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6057 -0.7557  0.4547  0.9859  3.9543 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.29898    0.48945  12.869  < 2e-16 ***
## satis_dem2    1.11424    0.35044   3.180  0.00152 ** 
## satis_dem3    1.78623    0.26359   6.777 2.11e-11 ***
## satis_dem4    2.54067    0.26630   9.540  < 2e-16 ***
## etnia3       -0.04476    0.34650  -0.129  0.89723    
## etnia4       -0.25325    0.29869  -0.848  0.39672    
## etnia6       -0.37025    0.35650  -1.039  0.29925    
## estrato2      0.42585    0.39641   1.074  0.28296    
## estrato3      0.60415    0.34846   1.734  0.08327 .  
## estrato4      0.73248    0.35156   2.084  0.03746 *  
## estrato5      1.01927    0.35963   2.834  0.00469 ** 
## partido_pol1 -0.30476    0.12819  -2.377  0.01763 *  
## creenciasS2   0.13631    0.13590   1.003  0.31610    
## creenciasS3   0.22642    0.11698   1.936  0.05321 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.575 on 992 degrees of freedom
## Multiple R-squared:  0.1579, Adjusted R-squared:  0.1468 
## F-statistic: 14.31 on 13 and 992 DF,  p-value: < 2.2e-16

library(ggplot2)
library(broom)
library(dotwhisker)

model_tidy <- tidy(reg1)

dwplot(model_tidy) +
  theme_minimal() +
  labs(title = "Coeficientes del Modelo de Regresión",
       x = "Estimación",
       y = "Predictor")

#REGRESIÓN LOGÍSTICA

#DICOTOMIZANDO LA SATISFACCIÓN A SATISFECHO Y NO SATISFECHO

library(dplyr)

# Suponiendo que tu data frame se llama peru_data
PERU <- PERU %>%
  mutate(satisdem_dic = if_else(satis_dem %in% c(3, 4), 1, 0))

# Ver los primeros registros para verificar
head(PERU)

##   numentre satis_dem creenciasS etnia estrato partido_pol confianza confia
## 1      541         3          1     4       3           0         7      1
## 2      904         3          1     4       3           1         7      1
## 3      853         4          2     4       5           0        10      1
## 4       32         4          1     3       4           0        10      1
## 5      854         4          1     4       5           1        10      1
## 6       94         4          1     4       3           0        10      1
##   satisdem_dic
## 1            1
## 2            1
## 3            1
## 4            1
## 5            1
## 6            1

table(PERU$satisdem_dic)

## 
##   0   1 
##  83 923

#DICOTOMIZANDO ESTRATO A CLASE ALTA Y CLASE BAJA Si es Clase Alta = 1 y si es Clase Baja=0

table(PERU$estrato)

## 
##   1   2   3   4   5 
##  22  58 427 329 170

PERU <- PERU %>%
  mutate(estrato_dic = if_else(estrato %in% c(1,2), 1, 0))
table(PERU$estrato_dic)

## 
##   0   1 
## 926  80

#DICOTOMIZANDO CREENCIAS Si cree que la democracia es la mejor forma de gobierno será 1 de lo contrario será 0

table(PERU$creenciasS)

## 
##   1   2   3 
## 542 181 283

PERU <- PERU %>%
  mutate(creencias_dic = if_else(creenciasS %in% c(1), 1, 0))
table(PERU$creencias_dic)

## 
##   0   1 
## 464 542

#ETNIA LA DEJAREMOS COMO TAL

table(PERU$etnia)

## 
##   2   3   4   6 
##  29  77 839  61

PERU$etnia = as.factor(PERU$etnia)

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%()       masks ggplot2::%+%()
## ✖ psych::alpha()     masks ggplot2::alpha()
## ✖ tidyr::extract()   masks magrittr::extract()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(car)

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## The following object is masked from 'package:DescTools':
## 
##     Recode
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## The following object is masked from 'package:dplyr':
## 
##     recode

#CORRELACION ENTRE LAS VARIABLES

#HAREMOS CHI CUADRADO

# Convertir las variables a factores
PERU$confia <- as.factor(PERU$confia)
PERU$satisdem_dic <- as.factor(PERU$satisdem_dic)
PERU$creencias_dic <- as.factor(PERU$creencias_dic)

# Tabla de contingencia y prueba de Chi-cuadrado para satisfacción con la democracia
tabla_satisfaccion <- table(PERU$confia, PERU$satisdem_dic)
chi_satisfaccion <- chisq.test(tabla_satisfaccion)
chi_satisfaccion

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tabla_satisfaccion
## X-squared = 58.318, df = 1, p-value = 2.229e-14

# Tabla de contingencia y prueba de Chi-cuadrado para creencias
tabla_creencias <- table(PERU$confia, PERU$creencias_dic)
chi_creencias <- chisq.test(tabla_creencias)
chi_creencias

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tabla_creencias
## X-squared = 6.4288, df = 1, p-value = 0.01123

# Resultados para satisfacción con la democracia
print(chi_satisfaccion)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tabla_satisfaccion
## X-squared = 58.318, df = 1, p-value = 2.229e-14

# Resultados para creencias
print(chi_creencias)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tabla_creencias
## X-squared = 6.4288, df = 1, p-value = 0.01123

Interptetamos -> Rechazamos la hipótesis nula (No hay asociación) y concluimos que existe una asociación significativa entre las variables. GRAFICAMOS…

# Convertir tabla de contingencia en un dataframe
df_satis_dem <- as.data.frame(tabla_satisfaccion)
colnames(df_satis_dem) <- c("Satisfaccion", "Confianza", "Frecuencia")

# Gráfico de barras apiladas
ggplot(df_satis_dem, aes(x = Satisfaccion, y = Frecuencia, fill = Confianza)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(title = "Distribución de Confianza por Satisfacción con la Democracia",
       x = "Satisfacción con la Democracia",
       y = "Frecuencia") +
  scale_fill_manual(values = c("red", "skyblue")) # Ajustar colores si es necesario

# Convertir tabla de contingencia en un dataframe
df_creencias <- as.data.frame(tabla_creencias)
colnames(df_creencias) <- c("Creencias", "Confianza", "Frecuencia")

# Gráfico de barras apiladas
ggplot(df_creencias, aes(x = Creencias, y = Frecuencia, fill = Confianza)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(title = "Distribución de Confianza por Creencias",
       x = "Creencias",
       y = "Frecuencia") +
  scale_fill_manual(values = c("red", "skyblue")) # Ajustar colores si es necesario

library(car)
modelo <- glm(confia ~ satisdem_dic + creencias_dic + estrato_dic + partido_pol + etnia, 
              data = PERU, family = binomial)

# Resumen del modelo
summary(modelo)

## 
## Call:
## glm(formula = confia ~ satisdem_dic + creencias_dic + estrato_dic + 
##     partido_pol + etnia, family = binomial, data = PERU)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     1.28926    0.70961   1.817   0.0692 .  
## satisdem_dic1   1.74946    0.27164   6.440 1.19e-10 ***
## creencias_dic1 -0.51558    0.23513  -2.193   0.0283 *  
## estrato_dic    -0.37807    0.35136  -1.076   0.2819    
## partido_pol1   -0.44057    0.25622  -1.720   0.0855 .  
## etnia3          0.82849    0.88494   0.936   0.3492    
## etnia4         -0.08836    0.65895  -0.134   0.8933    
## etnia6         -0.62450    0.74287  -0.841   0.4005    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 633.62  on 1005  degrees of freedom
## Residual deviance: 576.68  on  998  degrees of freedom
## AIC: 592.68
## 
## Number of Fisher Scoring iterations: 6

exp(coef(modelo))

##    (Intercept)  satisdem_dic1 creencias_dic1    estrato_dic   partido_pol1 
##      3.6300860      5.7515002      0.5971561      0.6851843      0.6436711 
##         etnia3         etnia4         etnia6 
##      2.2898589      0.9154348      0.5355268

#PARA LA ETNIA

dep=PERU$confia # a la fila
ind=PERU$etnia # a la columna

volsexTable=table(dep,ind,dnn = c('confia','etnia'))
library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

### suma por fila y columna
addmargins(volsexTable)%>%
    kable(caption = "Tabla de Contingencia: 'Confía' y 'Etnia'")%>%
    kableExtra::kable_styling(full_width = F)

Tabla de Contingencia: ‘Confía’ y ‘Etnia’
	2	3	4	6	Sum
0	3	3	80	10	96
1	26	74	759	51	910
Sum	29	77	839	61	1006

2: BLACK 3: INDIGENOUS 4: MESTIZO 6: WHITE

#EFECTOS MARGINALES

library(margins)
library(kableExtra)
marginalsData=summary(margins(modelo))
marginalsData%>% kable(caption = "Efectos Marginales Promedio (AME)") %>%kableExtra::kable_styling(full_width = T)

Efectos Marginales Promedio (AME)
factor	AME	SE	z	p	lower	upper
creencias_dic1	-0.0399902	0.0177454	-2.2535533	0.0242243	-0.0747705	-0.0052099
estrato_dic	-0.0300457	0.0279291	-1.0757842	0.2820238	-0.0847858	0.0246944
etnia3	0.0462369	0.0543686	0.8504334	0.3950842	-0.0603237	0.1527975
etnia4	-0.0068898	0.0498555	-0.1381959	0.8900856	-0.1046048	0.0908251
etnia6	-0.0591058	0.0645573	-0.9155552	0.3599002	-0.1856357	0.0674242
partido_pol1	-0.0384315	0.0243821	-1.5762134	0.1149766	-0.0862196	0.0093567
satisdem_dic1	0.2328349	0.0503720	4.6223065	0.0000038	0.1341075	0.3315622

library(ggplot2)
base= ggplot(marginalsData,aes(x=factor, y=AME)) + geom_point()
base +  geom_errorbar(aes(ymin=lower, ymax=upper))

#SI EL COVARIADO INCLUYE A CERO NO SERÁ SIGNIFICATIVO

#CLUSTERIZACIÓN

1406

Valentina Nauchi

2024-06-14