# ============================================================
# ANÁLISIS: PERCEPCIÓN SOBRE TRABAJO DE LA MUJER
# Base: Woman Work Perc.xlsx
# Estudiante: Nina María Sánchez Ramírez
# Tutor: Carlos Eduardo Alonso-Malaver
# Actividades:
# 1. Tablas univariadas
# 2. Tablas bivariadas
# 3. Análisis de Correspondencias Múltiples MCA
# ============================================================

# ============================================================
# 1. INSTALACIÓN Y CARGA DE PAQUETES
# ============================================================

paquetes <- c(
  "readxl", "dplyr", "janitor", "FactoMineR",
  "factoextra", "ggplot2", "openxlsx"
)

paquetes_no_instalados <- paquetes[!(paquetes %in% installed.packages()[, "Package"])]

if(length(paquetes_no_instalados) > 0){
  install.packages(paquetes_no_instalados)
}

library(readxl)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(janitor)
## 
## Adjuntando el paquete: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(FactoMineR)
library(factoextra)
## Cargando paquete requerido: ggplot2
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
library(ggplot2)
library(openxlsx)

# ============================================================
# 2. IMPORTAR BASE DE DATOS
# ============================================================

ruta <- "C:\\Users\\Nina Sanchez\\Documents\\ACER -LENOVO-NINA 2019-20\\1. USCO\\ESTADISTICA\\2026_Estadistica\\Malaver\\Woman Work Perc.xlsx"

datos <- read_excel(ruta)

# Revisar estructura inicial
str(datos)
## tibble [3,418 × 5] (S3: tbl_df/tbl/data.frame)
##  $ Id: chr [1:3418] "001" "002" "003" "004" ...
##  $ Q1: chr [1:3418] "SA" "SA" "A" "DK" ...
##  $ Q2: chr [1:3418] "A" "SA" "SA" "SA" ...
##  $ Q3: chr [1:3418] "A" "SA" "SD" "A" ...
##  $ Q4: chr [1:3418] "SA" "SA" "SA" "A" ...
names(datos)
## [1] "Id" "Q1" "Q2" "Q3" "Q4"
head(datos)
## # A tibble: 6 × 5
##   Id    Q1    Q2    Q3    Q4   
##   <chr> <chr> <chr> <chr> <chr>
## 1 001   SA    A     A     SA   
## 2 002   SA    SA    SA    SA   
## 3 003   A     SA    SD    SA   
## 4 004   DK    SA    A     A    
## 5 005   A     SA    SD    A    
## 6 006   SA    A     A     SA
# Limpiar nombres de variables
datos <- datos %>%
  clean_names()

# Convertir variables a factor
datos_cat <- datos %>%
  mutate(across(everything(), as.factor))

# Revisar valores perdidos
colSums(is.na(datos_cat))
## id q1 q2 q3 q4 
##  0  0  0  0  0
# Crear carpeta de resultados
dir.create("resultados_mujer_trabajo", showWarnings = FALSE)

# ============================================================
# 3. SELECCIONAR VARIABLES DE PERCEPCIÓN
# ============================================================

# Se excluye id porque solo identifica a cada participante.
# El análisis se realiza únicamente con q1, q2, q3 y q4.

datos_percepcion <- datos_cat %>%
  select(q1, q2, q3, q4)

# ============================================================
# 4. TABLAS UNIVARIADAS
# ============================================================

tablas_uni <- lapply(names(datos_percepcion), function(var){
  datos_percepcion %>%
    tabyl(!!sym(var)) %>%
    adorn_totals("row") %>%
    adorn_pct_formatting(digits = 1)
})

names(tablas_uni) <- names(datos_percepcion)

# Ver tabla de Q1
tablas_uni$q1
##     q1    n percent
##      A  476   13.9%
##     DK  362   10.6%
##     SA 2501   73.2%
##     SD   79    2.3%
##  Total 3418  100.0%
# Exportar tablas univariadas a Excel
wb_uni <- createWorkbook()

for(i in seq_along(tablas_uni)){
  addWorksheet(wb_uni, paste0("Uni_", names(tablas_uni)[i]))
  writeData(wb_uni, sheet = i, tablas_uni[[i]])
}

saveWorkbook(
  wb_uni,
  "resultados_mujer_trabajo/tablas_univariadas.xlsx",
  overwrite = TRUE
)

# ============================================================
# 5. TABLAS BIVARIADAS
# ============================================================

pares <- combn(names(datos_percepcion), 2, simplify = FALSE)

crear_bivariada <- function(var1, var2){
  datos_percepcion %>%
    tabyl(!!sym(var1), !!sym(var2)) %>%
    adorn_totals(c("row", "col")) %>%
    adorn_percentages("row") %>%
    adorn_pct_formatting(digits = 1)
}

tablas_bi <- lapply(pares, function(p){
  crear_bivariada(p[1], p[2])
})

names(tablas_bi) <- sapply(pares, function(p){
  paste0(p[1], "_vs_", p[2])
})

# Ver primera tabla bivariada
tablas_bi[[1]]
##     q1     A    DK    SA  Total
##      A 38.2%  9.7% 52.1% 100.0%
##     DK 37.6%  8.8% 53.6% 100.0%
##     SA 38.3%  8.2% 53.5% 100.0%
##     SD 29.1% 11.4% 59.5% 100.0%
##  Total 38.0%  8.5% 53.5% 100.0%
# Exportar tablas bivariadas a Excel
wb_bi <- createWorkbook()

for(i in seq_along(tablas_bi)){
  nombre_hoja <- substr(names(tablas_bi)[i], 1, 31)
  addWorksheet(wb_bi, nombre_hoja)
  writeData(wb_bi, sheet = i, tablas_bi[[i]])
}

saveWorkbook(
  wb_bi,
  "resultados_mujer_trabajo/tablas_bivariadas.xlsx",
  overwrite = TRUE
)

# ============================================================
# 6. ANÁLISIS DE CORRESPONDENCIAS MÚLTIPLES MCA
# ============================================================

mca <- MCA(datos_percepcion, graph = FALSE)

# Resumen del MCA
summary(mca)
## 
## Call:
## MCA(X = datos_percepcion, graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance               0.269   0.263   0.259   0.257   0.255   0.249   0.247
## % of var.              9.779   9.567   9.407   9.331   9.276   9.049   8.999
## Cumulative % of var.   9.779  19.345  28.752  38.084  47.359  56.408  65.408
##                        Dim.8   Dim.9  Dim.10  Dim.11
## Variance               0.242   0.240   0.236   0.233
## % of var.              8.812   8.736   8.585   8.460
## Cumulative % of var.  74.219  82.955  91.540 100.000
## 
## Individuals (the 10 first)
##          Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr   cos2  
## 1     | -0.085  0.001  0.009 |  0.212  0.005  0.053 | -0.180  0.004  0.038 |
## 2     |  0.376  0.015  0.056 | -0.454  0.023  0.082 | -0.506  0.029  0.103 |
## 3     | -0.424  0.020  0.059 |  0.108  0.001  0.004 | -0.384  0.017  0.049 |
## 4     |  0.162  0.003  0.008 | -0.092  0.001  0.003 |  0.849  0.081  0.226 |
## 5     |  0.251  0.007  0.018 |  0.147  0.002  0.006 | -0.386  0.017  0.042 |
## 6     | -0.085  0.001  0.009 |  0.212  0.005  0.053 | -0.180  0.004  0.038 |
## 7     |  0.148  0.002  0.019 | -0.290  0.009  0.072 | -0.098  0.001  0.008 |
## 8     |  0.589  0.038  0.255 |  0.251  0.007  0.046 | -0.182  0.004  0.024 |
## 9     | -0.526  0.030  0.423 | -0.329  0.012  0.165 | -0.095  0.001  0.014 |
## 10    |  0.817  0.073  0.248 |  0.087  0.001  0.003 | -0.591  0.039  0.130 |
## 
## Categories (the 10 first)
##           Dim.1     ctr    cos2  v.test     Dim.2     ctr    cos2  v.test  
## q1_A  |  -0.485   3.044   0.038 -11.400 |   0.953  12.017   0.147  22.406 |
## q1_DK |   0.089   0.078   0.001   1.788 |   0.271   0.741   0.009   5.460 |
## q1_SA |   0.062   0.259   0.010   5.960 |  -0.134   1.241   0.049 -12.896 |
## q1_SD |   0.560   0.673   0.007   5.031 |  -2.756  16.685   0.180 -24.782 |
## q2_A  |   0.588  12.226   0.212  26.923 |   0.661  15.801   0.268  30.273 |
## q2_DK |  -0.574   2.612   0.031 -10.246 |  -0.141   0.160   0.002  -2.512 |
## q2_SA |  -0.327   5.300   0.122 -20.457 |  -0.448  10.187   0.230 -28.052 |
## q3_A  |  -0.444  11.196   0.309 -32.471 |   0.176   1.795   0.048  12.859 |
## q3_DK |   0.587   2.935   0.035  10.897 |  -1.046   9.524   0.110 -19.416 |
## q3_SA |   1.427  20.986   0.254  29.454 |  -0.080   0.067   0.001  -1.646 |
##         Dim.3     ctr    cos2  v.test  
## q1_A   -0.673   6.089   0.073 -15.816 |
## q1_DK   1.841  34.700   0.402  37.044 |
## q1_SA  -0.084   0.504   0.019  -8.148 |
## q1_SD  -1.712   6.548   0.069 -15.395 |
## q2_A   -0.140   0.717   0.012  -6.394 |
## q2_DK   0.421   1.465   0.017   7.526 |
## q2_SA   0.032   0.053   0.001   2.005 |
## q3_A   -0.077   0.354   0.009  -5.661 |
## q3_DK   1.780  28.026   0.319  33.028 |
## q3_SA  -0.914   8.950   0.104 -18.866 |
## 
## Categorical variables (eta2)
##         Dim.1 Dim.2 Dim.3  
## q1    | 0.044 0.323 0.495 |
## q2    | 0.217 0.275 0.023 |
## q3    | 0.396 0.120 0.387 |
## q4    | 0.419 0.334 0.129 |
# Eigenvalues: varianza explicada por dimensiones
eig <- get_eigenvalue(mca)
eig
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1   0.2689142         9.778700                     9.77870
## Dim.2   0.2630834         9.566668                    19.34537
## Dim.3   0.2586942         9.407061                    28.75243
## Dim.4   0.2566081         9.331205                    38.08363
## Dim.5   0.2550840         9.275780                    47.35941
## Dim.6   0.2488480         9.049018                    56.40843
## Dim.7   0.2474809         8.999305                    65.40774
## Dim.8   0.2423166         8.811512                    74.21925
## Dim.9   0.2402315         8.735692                    82.95494
## Dim.10  0.2360844         8.584888                    91.53983
## Dim.11  0.2326547         8.460169                   100.00000
write.xlsx(
  eig,
  "resultados_mujer_trabajo/eigenvalues_mca.xlsx"
)

# Coordenadas, contribuciones y calidad de representación
var_mca <- get_mca_var(mca)

coord_cat <- as.data.frame(var_mca$coord)
contrib_cat <- as.data.frame(var_mca$contrib)
cos2_cat <- as.data.frame(var_mca$cos2)

write.xlsx(
  coord_cat,
  "resultados_mujer_trabajo/coordenadas_categorias_mca.xlsx"
)

write.xlsx(
  contrib_cat,
  "resultados_mujer_trabajo/contribuciones_categorias_mca.xlsx"
)

write.xlsx(
  cos2_cat,
  "resultados_mujer_trabajo/calidad_representacion_cos2_mca.xlsx"
)

# ============================================================
# 7. GRÁFICOS DEL MCA
# ============================================================

# Gráfico de eigenvalues
grafico_eig <- fviz_screeplot(
  mca,
  addlabels = TRUE,
  ylim = c(0, 50)
) +
  ggtitle("Varianza explicada por dimensiones del MCA") +
  theme_minimal()

grafico_eig

ggsave(
  "resultados_mujer_trabajo/grafico_eigenvalues_mca.png",
  plot = grafico_eig,
  width = 8,
  height = 5,
  dpi = 300
)

# Mapa perceptual de categorías
grafico_var <- fviz_mca_var(
  mca,
  repel = TRUE,
  col.var = "contrib",
  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")
) +
  ggtitle("Mapa perceptual de categorías del MCA") +
  theme_minimal()

grafico_var

ggsave(
  "resultados_mujer_trabajo/mapa_perceptual_categorias_mca.png",
  plot = grafico_var,
  width = 9,
  height = 7,
  dpi = 300
)
exists("mca")
## [1] TRUE
# ============================================================
# Mapa de individuos sin etiquetas
# ============================================================

grafico_ind <- fviz_mca_ind(
  mca,
  label = "none",
  geom = "point",
  alpha.ind = 0.4
) +
  ggtitle("Mapa de individuos del MCA") +
  theme_minimal()

print(grafico_ind)

ggsave(
  filename = "resultados_mujer_trabajo/mapa_individuos_mca.png",
  plot = grafico_ind,
  width = 9,
  height = 7,
  dpi = 300
)

# ============================================================
# Biplot del MCA mostrando categorías
# ============================================================

grafico_biplot <- fviz_mca_biplot(
  mca,
  label = "var",
  repel = TRUE,
  invisible = "ind",
  ggtheme = theme_minimal()
) +
  ggtitle("Biplot del MCA: categorías de percepción")

grafico_biplot

ggsave(
  "resultados_mujer_trabajo/biplot_mca.png",
  plot = grafico_biplot,
  width = 9,
  height = 7,
  dpi = 300
)

# ============================================================
# 8. CONCLUSIONES Y RECOMENDACIONES
# ============================================================

# Conclusiones:
# 1. La base Woman Work Perc.xlsx contiene 3418 registros y cuatro preguntas
#    de percepción: Q1, Q2, Q3 y Q4.
# 2. Las tablas univariadas permiten identificar la frecuencia y porcentaje
#    de cada respuesta: SA, A, DK y SD.
# 3. Las tablas bivariadas permiten observar asociaciones descriptivas entre
#    pares de preguntas, sin interpretar causalidad.
# 4. El MCA es adecuado porque las variables son categóricas y permite visualizar
#    perfiles de respuesta mediante un mapa perceptual.
# 5. La variable Id no se interpreta porque solo identifica a cada participante.

# Recomendaciones:
# 1. Excluir Id de las tablas interpretativas y del MCA.
# 2. Analizar solo Q1, Q2, Q3 y Q4 como variables categóricas.
# 3. Recodificar las respuestas para facilitar la lectura:
#    SA = totalmente de acuerdo
#    A  = de acuerdo
#    DK = no sabe
#    SD = totalmente en desacuerdo
# 4. Presentar en el informe una tabla univariada, una tabla bivariada
#    representativa y el gráfico principal del MCA.
# 5. Usar lenguaje descriptivo: “se observa”, “se identifica” o “se asocia”,
#    evitando afirmar relaciones causales.

# ============================================================
# CIERRE DEL ANÁLISIS
# ============================================================