2. Re-codificación de las variables a las 4 formas de violencia

violec_especif16

#violencia16 fisica
violec_especif16$violencia16_fisica1  = factor(violec_especif16$P13_1_1,
                             labels = c("1","1","1","0"))

violec_especif16$violencia16_fisica2  = factor(violec_especif16$P13_1_2,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica3  = factor(violec_especif16$P13_1_3,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica4  = factor(violec_especif16$P13_1_4,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica5  = factor(violec_especif16$P13_1_5,
                             labels = c("1","1","1","0"))

violec_especif16$violencia16_fisica6  = factor(violec_especif16$P13_1_6,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica7  = factor(violec_especif16$P13_1_7,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica8  = factor(violec_especif16$P13_1_8,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica9  = factor(violec_especif16$P13_1_9,
                             labels = c("1","1","1","0"))



# violencia16 psicologica
violec_especif16$violencia16_psicologica1  = factor(violec_especif16$P13_1_10,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica2  = factor(violec_especif16$P13_1_11,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica3  = factor(violec_especif16$P13_1_12,
                             labels = c("1","1","1","0"))

violec_especif16$violencia16_psicologica4  = factor(violec_especif16$P13_1_13,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica5  = factor(violec_especif16$P13_1_14,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica6  = factor(violec_especif16$P13_1_15,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica7  = factor(violec_especif16$P13_1_16,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica8  = factor(violec_especif16$P13_1_17,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica9  = factor(violec_especif16$P13_1_18,
                             labels = c("1","1","1","0"))

violec_especif16$violencia16_psicologica10  = factor(violec_especif16$P13_1_19,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica11  = factor(violec_especif16$P13_1_20,
                             labels = c("1","1","1","0"))

violec_especif16$violencia16_psicologica12  = factor(violec_especif16$P13_1_21,
                             labels = c("1","1","1","0"))

violec_especif16$violencia16_psicologica13  = factor(violec_especif16$P13_1_22,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica14  = factor(violec_especif16$P13_1_23AB,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica15  = factor(violec_especif16$P13_1_24AB,
                             labels = c("1","1","1","0"))



#vioelencia16 sexual

violec_especif16$violencia16_sexual1  = factor(violec_especif16$P13_1_25,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_sexual2  = factor(violec_especif16$P13_1_26,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_sexual3  = factor(violec_especif16$P13_1_27,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_sexual4  = factor(violec_especif16$P13_1_28,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_sexual5  = factor(violec_especif16$P13_1_29,
                             labels = c("1","1","1","0"))


#violencia16 economica
violec_especif16$violencia16_economica1  = factor(violec_especif16$P13_1_30,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_economica2  = factor(violec_especif16$P13_1_31,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_economica3  = factor(violec_especif16$P13_1_32,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_economica4  = factor(violec_especif16$P13_1_33AB,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_economica5  = factor(violec_especif16$P13_1_34AB,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_economica6  = factor(violec_especif16$P13_1_35AB,
                             labels = c("1","1","1","0"))
violec_especif16$violencia16_economica7  = factor(violec_especif16$P13_1_36AB,
                             labels = c("1","1","1","0"))

violec_especif21

#violencia21 fisica
violec_especif21$violencia21_fisica1  = factor(violec_especif21$P14_1_1,
                             labels = c("1","1","1","0"))

violec_especif21$violencia21_fisica2  = factor(violec_especif21$P14_1_2,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica3  = factor(violec_especif21$P14_1_3,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica4  = factor(violec_especif21$P14_1_4,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica5  = factor(violec_especif21$P14_1_5,
                             labels = c("1","1","1","0"))

violec_especif21$violencia21_fisica6  = factor(violec_especif21$P14_1_6,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica7  = factor(violec_especif21$P14_1_7,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica8  = factor(violec_especif21$P14_1_8,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica9  = factor(violec_especif21$P14_1_9,
                             labels = c("1","1","1","0"))


# violencia21 psicologica
violec_especif21$violencia21_psicologica1  = factor(violec_especif21$P14_1_10,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica2  = factor(violec_especif21$P14_1_11,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica3  = factor(violec_especif21$P14_1_12,
                             labels = c("1","1","1","0"))

violec_especif21$violencia21_psicologica4  = factor(violec_especif21$P14_1_13,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica5  = factor(violec_especif21$P14_1_14,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica6  = factor(violec_especif21$P14_1_15,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica7  = factor(violec_especif21$P14_1_16,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica8  = factor(violec_especif21$P14_1_17,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica9  = factor(violec_especif21$P14_1_18,
                             labels = c("1","1","1","0"))

violec_especif21$violencia21_psicologica10  = factor(violec_especif21$P14_1_19,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica11  = factor(violec_especif21$P14_1_20,
                             labels = c("1","1","1","0"))

violec_especif21$violencia21_psicologica12  = factor(violec_especif21$P14_1_21,
                             labels = c("1","1","1","0"))

violec_especif21$violencia21_psicologica13  = factor(violec_especif21$P14_1_22,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica14  = factor(violec_especif21$P14_1_23AB,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica15  = factor(violec_especif21$P14_1_24AB,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica16  = factor(violec_especif21$P14_1_31,
                             labels = c("1","1","1","0"))


#vioelencia sexual
violec_especif21$violencia21_sexual1  = factor(violec_especif21$P14_1_25,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_sexual2  = factor(violec_especif21$P14_1_26,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_sexual3  = factor(violec_especif21$P14_1_27,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_sexual4  = factor(violec_especif21$P14_1_28,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_sexual5  = factor(violec_especif21$P14_1_29,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_sexual6  = factor(violec_especif21$P14_1_30,
                             labels = c("1","1","1","0"))


#violencia21 economica
violec_especif21$violencia21_economica1  = factor(violec_especif21$P14_1_32,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_economica2  = factor(violec_especif21$P14_1_33,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_economica3  = factor(violec_especif21$P14_1_34,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_economica4  = factor(violec_especif21$P14_1_35AB,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_economica5  = factor(violec_especif21$P14_1_36AB,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_economica6  = factor(violec_especif21$P14_1_37AB,
                             labels = c("1","1","1","0"))
violec_especif21$violencia21_economica7  = factor(violec_especif21$P14_1_38AB,
                             labels = c("1","1","1","0"))

EDAD

datos_basicos1_16<-read.csv("C:/Master/3. Semester/R/Dataset/2016/conjunto_de_datos_tsdem_endireh_2016.csv", header = TRUE, sep = ",")
datos_basicos1_21<-read.csv("C:/Master/3. Semester/R/Dataset/TSDem.csv", header = TRUE, sep = ",")
datos_basicos1_16$EDAD =as.numeric(datos_basicos1_16$EDAD)
datos_basicos1_21$EDAD =as.numeric(datos_basicos1_21$EDAD)
library(dplyr)

# Datensätze zusammenführen
data_merged16 <- violec_especif16 %>%
  left_join(datos_basicos1_16, by = c("ID_VIV", "ID_MUJ", "UPM", "VIV_SEL", "HOGAR"))

ad_violec_16 <- data_merged16 %>%
  mutate(EDAD = as.numeric(EDAD)) %>%
  filter(EDAD >= 15 & EDAD <= 18)

# Datensätze zusammenführen
data_merged21 <- violec_especif21 %>%
  left_join(datos_basicos1_21, by = c("ID_VIV", "ID_PER", "UPM", "VIV_SEL", "HOGAR"))

ad_violec_21 <- data_merged21 %>%
  mutate(EDAD = as.numeric(EDAD)) %>%
  filter(EDAD >= 15 & EDAD <= 18)

0 = no sufrio de violencia21, 1= Si sufrio de violencia

Recodificacion

library(dplyr)
library(tidyr)
library(ggplot2)

# 1. Daten in langes Format bringen
violencia_long <- violec_especif16 %>%
  select(starts_with("violencia16_fisica")) %>%   # nur die 9 Fragen
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia16_fisica", "", pregunta)),  # 1-9
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
  )

# 2. Prozentwerte pro Frage berechnen
resumen <- violencia_long %>%
  group_by(pregunta_num, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num) %>%
  mutate(porcentaje = round(100 * total / sum(total),1))

# 3. Balkendiagramm erstellen
ggplot(resumen, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
  geom_col(position = position_dodge()) +   # nebeneinander stehende Balken
  geom_text(aes(label = paste0(porcentaje, "%")), 
            position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Distribución de respuestas de violencia física por pregunta (2016)",
    x = "Pregunta (1-9)",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# 1. Crear variable dicotómica: 0 = nunca, 1 = al menos una vez
violec_especif16 <- violec_especif16 %>%
  mutate(
    viol16_fis_disc = if_else(
      rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    )
  )

# 2. Resumen de totales y porcentajes
datos_plot <- violec_especif16 %>%
  filter(!is.na(viol16_fis_disc)) %>%
  group_by(viol16_fis_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  mutate(
    porcentaje = round(100 * total / sum(total), 1)
  )

# 3. Gráfico de barras
ggplot(datos_plot, aes(x = viol16_fis_disc, y = porcentaje, fill = viol16_fis_disc)) +
  geom_col() +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")), 
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Mujeres que han sufrido violencia física (2016)",
    x = "",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

Explicacion: cualquier mujer que tenga al menos un “Sí” en las 9 preguntas será considerada como Sí.Solo las mujeres que nunca respondieron “Sí” en ninguna pregunta se cuentan como No. El gráfico muestra dos barras: No vs Sí, con número absoluto y porcentaje encima.

library(dplyr)
library(ggplot2)

# 1. Crear variable dicotómica: 0 = nunca, 1 = al menos una vez
violec_especif16 <- violec_especif16 %>%
  mutate(
    viol16_psi_disc = if_else(
      rowSums(select(., starts_with("violencia16_psicologica")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    )
  )

# 2. Resumen de totales y porcentajes
datos_plot_psi <- violec_especif16 %>%
  filter(!is.na(viol16_psi_disc)) %>%
  group_by(viol16_psi_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  mutate(
    porcentaje = round(100 * total / sum(total), 1)
  )

# 3. Gráfico de barras simple
ggplot(datos_plot_psi, aes(x = viol16_psi_disc, y = porcentaje, fill = viol16_psi_disc)) +
  geom_col() +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")), 
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Mujeres que han sufrido violencia psicológica (2016)",
    x = "",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

Explicacion: cualquier mujer que tenga al menos un “Sí” en las 15 preguntas será considerada como Sí. Solo las mujeres que nunca respondieron “Sí” en ninguna pregunta se cuentan como No.El gráfico muestra dos barras: No vs Sí, con número absoluto y porcentaje encima.

library(dplyr)
library(tidyr)
library(ggplot2)

# 1. Daten in langes Format bringen und NA entfernen
violencia_psi_long <- violec_especif16 %>%
  select(starts_with("violencia16_psicologica")) %>%  # die 15 Fragen
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  filter(!is.na(respuesta)) %>%  # NAs entfernen
  mutate(
    pregunta_num = as.numeric(gsub("violencia16_psicologica", "", pregunta)),  # 1-15
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
  )

# 2. Prozentwerte pro Frage berechnen
resumen_psi <- violencia_psi_long %>%
  group_by(pregunta_num, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num) %>%
  mutate(porcentaje = round(100 * total / sum(total),1))

# 3. Balkendiagramm erstellen mit Überschrift
ggplot(resumen_psi, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
  geom_col(position = position_dodge()) +
  geom_text(aes(label = paste0(porcentaje, "%")), 
            position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Distribución de respuestas de violencia psicológica por pregunta (2016)",
    x = "Pregunta",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

# 1. Daten in langes Format bringen (5 Fragen)
violencia_sex_long <- violec_especif16 %>%
  select(starts_with("violencia16_sexual")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  filter(!is.na(respuesta)) %>%  # NAs entfernen
  mutate(
    pregunta_num = as.numeric(gsub("violencia16_sexual", "", pregunta)),  # 1-5
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
  )

# 2. Prozentwerte pro Frage berechnen
resumen_sex <- violencia_sex_long %>%
  group_by(pregunta_num, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num) %>%
  mutate(porcentaje = round(100 * total / sum(total),1))

# 3. Balkendiagramm erstellen
ggplot(resumen_sex, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
  geom_col(position = position_dodge()) +   # nebeneinander stehende Balken
  geom_text(aes(label = paste0(porcentaje, "%")), 
            position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Distribución de respuestas de violencia sexual por pregunta (2016)",
    x = "Pregunta (1-5)",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# 1. Crear variable dicotómica: 0 = nunca, 1 = al menos una vez
violec_especif16 <- violec_especif16 %>%
  mutate(
    viol16_sex_disc_total = if_else(
      rowSums(select(., starts_with("violencia16_sexual")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    )
  )

# 2. Resumen de totales y porcentajes
datos_plot_sex <- violec_especif16 %>%
  filter(!is.na(viol16_sex_disc_total)) %>%
  group_by(viol16_sex_disc_total) %>%
  summarise(total = n(), .groups = "drop") %>%
  mutate(
    porcentaje = round(100 * total / sum(total), 1)
  )

# 3. Gráfico de barras simple
ggplot(datos_plot_sex, aes(x = viol16_sex_disc_total, y = porcentaje, fill = viol16_sex_disc_total)) +
  geom_col() +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")), 
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Mujeres que han sufrido violencia sexual (2016)",
    x = "",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

# ==============================
# 1. Datos en formato largo por pregunta (Si/No)
# ==============================

# Función para convertir cualquier tipo de violencia a formato largo
long_format_violencia <- function(data, prefix, num_preguntas) {
  data %>%
    select(starts_with(prefix)) %>%
    pivot_longer(
      cols = everything(),
      names_to = "pregunta",
      values_to = "respuesta"
    ) %>%
    filter(!is.na(respuesta)) %>%
    mutate(
      pregunta_num = as.numeric(gsub(prefix, "", pregunta)),
      respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
    )
}

# Ejemplo: violencia económica por pregunta
violencia_eco_long <- long_format_violencia(violec_especif16, "violencia16_economica", 7)
resumen_eco <- violencia_eco_long %>%
  group_by(pregunta_num, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num) %>%
  mutate(porcentaje = round(100 * total / sum(total),1))

# Gráfico de preguntas económicas
ggplot(resumen_eco, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
  geom_col(position = position_dodge()) +
  geom_text(aes(label = paste0(porcentaje, "%")), 
            position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Distribución de respuestas de violencia económica por pregunta (2016)",
    x = "Pregunta (1-7)",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

# ==============================
# 2. Resumen Si/No: al menos una vez
# ==============================

resumen_total <- function(data, prefix) {
  data %>%
    mutate(
      disc_total = if_else(
        rowSums(select(., starts_with(prefix)), na.rm = TRUE) > 0,
        "Sí",
        "No"
      )
    ) %>%
    filter(!is.na(disc_total)) %>%
    group_by(disc_total) %>%
    summarise(total = n(), .groups = "drop") %>%
    mutate(porcentaje = round(100 * total / sum(total),1))
}

# Ejemplo: violencia económica total
datos_plot_eco <- resumen_total(violec_especif16, "violencia16_economica")

# Gráfico Si/No total
ggplot(datos_plot_eco, aes(x = disc_total, y = porcentaje, fill = disc_total)) +
  geom_col() +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")), 
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Mujeres que han sufrido violencia económica (2016)",
    x = "",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)
library(tidyr)

# ==============================
# 1. Crear variables Si/No totales para cada tipo de violencia
# ==============================
violec_especif16 <- violec_especif16 %>%
  mutate(
    fisica_total = if_else(rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0, "Sí", "No"),
    psicologica_total = if_else(rowSums(select(., starts_with("violencia16_psicologica")), na.rm = TRUE) > 0, "Sí", "No"),
    sexual_total = if_else(rowSums(select(., starts_with("violencia16_sexual")), na.rm = TRUE) > 0, "Sí", "No"),
    economica_total = if_else(rowSums(select(., starts_with("violencia16_economica")), na.rm = TRUE) > 0, "Sí", "No")
  )

# ==============================
# 2. Transformar a formato largo para ggplot
# ==============================
resumen_violencia <- violec_especif16 %>%
  select(fisica_total, psicologica_total, sexual_total, economica_total) %>%
  pivot_longer(
    cols = everything(),
    names_to = "tipo_violencia",
    values_to = "respuesta"
  ) %>%
  group_by(tipo_violencia, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(tipo_violencia) %>%
  mutate(porcentaje = round(100 * total / sum(total), 1))

# ==============================
# 3. Gráfico comparativo con etiquetas personalizadas
# ==============================
ggplot(resumen_violencia, aes(x = tipo_violencia, y = porcentaje, fill = respuesta)) +
  geom_col(position = position_dodge()) +   # barras lado a lado
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
            position = position_dodge(width = 0.9), vjust = -0.5, size = 3) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  scale_x_discrete(labels = c(
    fisica_total = "vio.fisica",
    psicologica_total = "vio.psicologica",
    sexual_total = "vio.sexual",
    economica_total = "vio.economica"
  )) +
  labs(
    title = "Mujeres que han experimentado diferentes tipos de violencia (2016)",
    x = "",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

 library(dplyr)
library(tidyr)
library(ggplot2)

# 1. Daten in langes Format bringen
violencia_long21 <- violec_especif21 %>%
  select(starts_with("violencia21_fisica")) %>%   # nur die 9 Fragen
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia21_fisica", "", pregunta)),  # 1-9
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
  )

# 2. Prozentwerte pro Frage berechnen
resumen <- violencia_long21 %>%
  group_by(pregunta_num, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num) %>%
  mutate(porcentaje = round(100 * total / sum(total),1))

# 3. Balkendiagramm erstellen
ggplot(resumen, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
  geom_col(position = position_dodge()) +   # nebeneinander stehende Balken
  geom_text(aes(label = paste0(porcentaje, "%")), 
            position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Distribución de respuestas de violencia física por pregunta (2021)",
    x = "Pregunta (1-9)",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# 1. Crear variable dicotómica: 0 = nunca, 1 = al menos una vez
violec_especif21 <- violec_especif21 %>%
  mutate(
    viol21_fis_disc = if_else(
      rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    )
  )

# 2. Resumen de totales y porcentajes
datos_plot <- violec_especif21 %>%
  filter(!is.na(viol21_fis_disc)) %>%
  group_by(viol21_fis_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  mutate(
    porcentaje = round(100 * total / sum(total), 1)
  )

# 3. Gráfico de barras
ggplot(datos_plot, aes(x = viol21_fis_disc, y = porcentaje, fill = viol21_fis_disc)) +
  geom_col() +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")), 
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Mujeres que han sufrido violencia física (2021)",
    x = "",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# 1. Crear variable dicotómica: 0 = nunca, 1 = al menos una vez
violec_especif21 <- violec_especif21 %>%
  mutate(
    viol21_psi_disc = if_else(
      rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    )
  )

# 2. Resumen de totales y porcentajes
datos_plot_psi21 <- violec_especif21 %>%
  filter(!is.na(viol21_psi_disc)) %>%
  group_by(viol21_psi_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  mutate(
    porcentaje = round(100 * total / sum(total), 1)
  )

# 3. Gráfico de barras simple
ggplot(datos_plot_psi21, aes(x = viol21_psi_disc, y = porcentaje, fill = viol21_psi_disc)) +
  geom_col() +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")), 
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Mujeres que han sufrido violencia psicológica (2021)",
    x = "",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

# 1. Daten in langes Format bringen und NA entfernen
violencia_psi_long21 <- violec_especif21 %>%
  select(starts_with("violencia21_psicologica")) %>%  # die 15 Fragen
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  filter(!is.na(respuesta)) %>%  # NAs entfernen
  mutate(
    pregunta_num = as.numeric(gsub("violencia21_psicologica", "", pregunta)),  # 1-15
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
  )

# 2. Prozentwerte pro Frage berechnen
resumen_psi <- violencia_psi_long21 %>%
  group_by(pregunta_num, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num) %>%
  mutate(porcentaje = round(100 * total / sum(total),1))

# 3. Balkendiagramm erstellen mit Überschrift
ggplot(resumen_psi, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
  geom_col(position = position_dodge()) +
  geom_text(aes(label = paste0(porcentaje, "%")), 
            position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Distribución de respuestas de violencia psicológica por pregunta (2021)",
    x = "Pregunta",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

# 1. Daten in langes Format bringen (5 Fragen)
violencia_sex_long21 <- violec_especif21 %>%
  select(starts_with("violencia21_sexual")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  filter(!is.na(respuesta)) %>%  # NAs entfernen
  mutate(
    pregunta_num = as.numeric(gsub("violencia21_sexual", "", pregunta)),  # 1-5
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
  )

# 2. Prozentwerte pro Frage berechnen
resumen_sex <- violencia_sex_long21 %>%
  group_by(pregunta_num, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num) %>%
  mutate(porcentaje = round(100 * total / sum(total),1))

# 3. Balkendiagramm erstellen
ggplot(resumen_sex, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
  geom_col(position = position_dodge()) +   # nebeneinander stehende Balken
  geom_text(aes(label = paste0(porcentaje, "%")), 
            position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Distribución de respuestas de violencia sexual por pregunta (2021)",
    x = "Pregunta (1-5)",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# 1. Crear variable dicotómica: 0 = nunca, 1 = al menos una vez
violec_especif21 <- violec_especif21 %>%
  mutate(
    viol21_sex_disc_total = if_else(
      rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    )
  )

# 2. Resumen de totales y porcentajes
datos_plot_sex21 <- violec_especif21 %>%
  filter(!is.na(viol21_sex_disc_total)) %>%
  group_by(viol21_sex_disc_total) %>%
  summarise(total = n(), .groups = "drop") %>%
  mutate(
    porcentaje = round(100 * total / sum(total), 1)
  )

# 3. Gráfico de barras simple
ggplot(datos_plot_sex21, aes(x = viol21_sex_disc_total, y = porcentaje, fill = viol21_sex_disc_total)) +
  geom_col() +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")), 
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Mujeres que han sufrido violencia sexual (2021)",
    x = "",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

# ==============================
# 1. Transformar los datos a formato largo (por pregunta)
# ==============================
violencia_eco_long21 <- violec_especif21 %>%
  select(starts_with("violencia21_economica")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  filter(!is.na(respuesta)) %>%  # eliminar NAs
  mutate(
    pregunta_num = as.numeric(gsub("violencia21_economica", "", pregunta)),  # 1-7
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
  )

# ==============================
# 2. Calcular totales y porcentajes por pregunta
# ==============================
resumen_eco21 <- violencia_eco_long21 %>%
  group_by(pregunta_num, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num) %>%
  mutate(porcentaje = round(100 * total / sum(total),1))

# ==============================
# 3. Gráfico de barras por pregunta
# ==============================
ggplot(resumen_eco21, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
  geom_col(position = position_dodge()) +   # barras lado a lado
  geom_text(aes(label = paste0(porcentaje, "%")), 
            position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Distribución de respuestas de violencia económica 2021 por pregunta",
    x = "Pregunta (1-7)",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# ==============================
# 1. Crear variable Si/No total: al menos una vez
# ==============================
violec_especif21 <- violec_especif21 %>%
  mutate(
    viol21_eco_total_disc = if_else(
      rowSums(select(., starts_with("violencia21_economica")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    )
  )

# ==============================
# 2. Resumen de totales y porcentajes
# ==============================
datos_plot_eco21 <- violec_especif21 %>%
  filter(!is.na(viol21_eco_total_disc)) %>%
  group_by(viol21_eco_total_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  mutate(porcentaje = round(100 * total / sum(total), 1))

# ==============================
# 3. Gráfico de barras simple
# ==============================
ggplot(datos_plot_eco21, aes(x = viol21_eco_total_disc, y = porcentaje, fill = viol21_eco_total_disc)) +
  geom_col() +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")), 
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  labs(
    title = "Mujeres que han sufrido violencia económica (2021)",
    x = "",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

# ==============================
# 1. Crear variables Si/No totales para cada tipo de violencia (2021)
# ==============================
violec_especif21 <- violec_especif21 %>%
  mutate(
    fisica_total = if_else(rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0, "Sí", "No"),
    psicologica_total = if_else(rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0, "Sí", "No"),
    sexual_total = if_else(rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0, "Sí", "No"),
    economica_total = if_else(rowSums(select(., starts_with("violencia21_economica")), na.rm = TRUE) > 0, "Sí", "No")
  )

# ==============================
# 2. Transformar a formato largo para ggplot
# ==============================
resumen_violencia21 <- violec_especif21 %>%
  select(fisica_total, psicologica_total, sexual_total, economica_total) %>%
  pivot_longer(
    cols = everything(),
    names_to = "tipo_violencia",
    values_to = "respuesta"
  ) %>%
  group_by(tipo_violencia, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(tipo_violencia) %>%
  mutate(porcentaje = round(100 * total / sum(total), 1))

# ==============================
# 3. Gráfico comparativo
# ==============================
ggplot(resumen_violencia21, aes(x = tipo_violencia, y = porcentaje, fill = respuesta)) +
  geom_col(position = position_dodge()) +   # barras lado a lado
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
            position = position_dodge(width = 0.9), vjust = -0.5, size = 3) +
  scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
  scale_x_discrete(labels = c(
    fisica_total = "vio.fisica",
    psicologica_total = "vio.psicologica",
    sexual_total = "vio.sexual",
    economica_total = "vio.economica"
  )) +
  labs(
    title = "Mujeres que han experimentado diferentes tipos de violencia (2021)",
    x = "",
    y = "Porcentaje",
    fill = "Respuesta"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

# ==============================
# 1. Crear variables Si/No totales para cada tipo de violencia (2016)
# ==============================
violec_especif16 <- violec_especif16 %>%
  mutate(
    fisica_total = if_else(rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0, "Sí", "No"),
    psicologica_total = if_else(rowSums(select(., starts_with("violencia16_psicologica")), na.rm = TRUE) > 0, "Sí", "No"),
    sexual_total = if_else(rowSums(select(., starts_with("violencia16_sexual")), na.rm = TRUE) > 0, "Sí", "No"),
    economica_total = if_else(rowSums(select(., starts_with("violencia16_economica")), na.rm = TRUE) > 0, "Sí", "No")
  )

# ==============================
# 2. Crear variables Si/No totales para cada tipo de violencia (2021)
# ==============================
violec_especif21 <- violec_especif21 %>%
  mutate(
    fisica_total = if_else(rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0, "Sí", "No"),
    psicologica_total = if_else(rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0, "Sí", "No"),
    sexual_total = if_else(rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0, "Sí", "No"),
    economica_total = if_else(rowSums(select(., starts_with("violencia21_economica")), na.rm = TRUE) > 0, "Sí", "No")
  )

# ==============================
# 3. Resumen solo de "Sí" para cada año
# ==============================
resumen_si <- function(data, year) {
  data %>%
    select(fisica_total, psicologica_total, sexual_total, economica_total) %>%
    pivot_longer(cols = everything(), names_to = "tipo_violencia", values_to = "respuesta") %>%
    filter(respuesta == "Sí") %>%   # solo Sí
    group_by(tipo_violencia) %>%
    summarise(total = n(), .groups = "drop") %>%
    mutate(
      year = year,
      tipo_violencia = factor(tipo_violencia, levels = c("fisica_total", "psicologica_total", "sexual_total", "economica_total"))
    )
}

resumen_16 <- resumen_si(violec_especif16, "2016")
resumen_21 <- resumen_si(violec_especif21, "2021")

resumen_comparativo <- bind_rows(resumen_16, resumen_21)

# ==============================
# 4. Gráfico comparativo solo de Sí
# ==============================
resumen_comparativo <- resumen_comparativo %>%
  group_by(year) %>%
  mutate(
    porcentaje = round(100 * total / sum(total), 1)
  ) %>%
  ungroup()

resumen_comparativo <- resumen_comparativo %>%
  mutate(
    porcentaje = as.numeric(gsub("%", "", porcentaje)),
    label = paste0(total, " (", porcentaje, ")")
  )



ggplot(resumen_comparativo,
       aes(x = tipo_violencia, y = total, fill = year)) +
  geom_col(position = position_dodge(width = 0.9)) +
  geom_text(
    aes(label = label),
    position = position_dodge(width = 0.9),
    vjust = -0.4,
    size = 3
  ) +
  scale_x_discrete(labels = c(
    fisica_total = "vio. física",
    psicologica_total = "vio. psicológica",
    sexual_total = "vio. sexual",
    economica_total = "vio. económica"
  )) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
  labs(
    title = "Comparación de mujeres que han sufrido violencia (solo Sí) entre 2016 y 2021",
    x = "",
    y = "Número de mujeres",
    fill = "Año"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

# ==============================
# 1. Crear variables Si/No totales para cada tipo de violencia (2016)
# ==============================
ad_violec_16 <- ad_violec_16 %>%
  mutate(
    fisica_total = if_else(rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0, "Sí", "No"),
    psicologica_total = if_else(rowSums(select(., starts_with("violencia16_psicologica")), na.rm = TRUE) > 0, "Sí", "No"),
    sexual_total = if_else(rowSums(select(., starts_with("violencia16_sexual")), na.rm = TRUE) > 0, "Sí", "No"),
    economica_total = if_else(rowSums(select(., starts_with("violencia16_economica")), na.rm = TRUE) > 0, "Sí", "No")
  )

# ==============================
# 2. Crear variables Si/No totales para cada tipo de violencia (2021)
# ==============================
ad_violec_21  <- ad_violec_21  %>%
  mutate(
    fisica_total = if_else(rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0, "Sí", "No"),
    psicologica_total = if_else(rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0, "Sí", "No"),
    sexual_total = if_else(rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0, "Sí", "No"),
    economica_total = if_else(rowSums(select(., starts_with("violencia21_economica")), na.rm = TRUE) > 0, "Sí", "No")
  )

# ==============================
# 3. Resumen solo de "Sí" para cada año
# ==============================
resumen_si <- function(data, year) {
  data %>%
    select(fisica_total, psicologica_total, sexual_total, economica_total) %>%
    pivot_longer(cols = everything(), names_to = "tipo_violencia", values_to = "respuesta") %>%
    filter(respuesta == "Sí") %>%   # solo Sí
    group_by(tipo_violencia) %>%
    summarise(total = n(), .groups = "drop") %>%
    mutate(
      year = year,
      tipo_violencia = factor(tipo_violencia, levels = c("fisica_total", "psicologica_total", "sexual_total", "economica_total"))
    )
}

resumen_16 <- resumen_si(ad_violec_16 , "2016")
resumen_21 <- resumen_si(ad_violec_21 , "2021")

resumen_comparativo <- bind_rows(resumen_16, resumen_21)

# ==============================
# 4. Gráfico comparativo solo de Sí
# ==============================
resumen_comparativo <- resumen_comparativo %>%
  group_by(year) %>%
  mutate(
    porcentaje = round(100 * total / sum(total), 1)
  ) %>%
  ungroup()

resumen_comparativo <- resumen_comparativo %>%
  mutate(
    porcentaje = as.numeric(gsub("%", "", porcentaje)),
    label = paste0(total, " (", porcentaje, ")")
  )



ggplot(resumen_comparativo,
       aes(x = tipo_violencia, y = total, fill = year)) +
  geom_col(position = position_dodge(width = 0.9)) +
  geom_text(
    aes(label = label),
    position = position_dodge(width = 0.9),
    vjust = -0.4,
    size = 3
  ) +
  scale_x_discrete(labels = c(
    fisica_total = "vio. física",
    psicologica_total = "vio. psicológica",
    sexual_total = "vio. sexual",
    economica_total = "vio. económica"
  )) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
  labs(
    title = "Comparación de mujeres que han sufrido violencia (15-18) entre 2016 y 2021",
    x = "",
    y = "Número de mujeres",
    fill = "Año"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

### --- 2016 vorbereiten ---
violencia_long16 <- violec_especif16 %>%
  select(starts_with("violencia16_fisica")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia16_fisica", "", pregunta)),
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
    anio = 2016
  )

resumen16 <- violencia_long16 %>%
  group_by(pregunta_num, respuesta, anio) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num, anio) %>%
  mutate(porcentaje = round(100 * total / sum(total),1)) %>%
  filter(respuesta == "Sí")

### --- 2021 vorbereiten ---
violencia_long21 <- violec_especif21 %>%
  select(starts_with("violencia21_fisica")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia21_fisica", "", pregunta)),
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
    anio = 2021
  )

resumen21 <- violencia_long21 %>%
  group_by(pregunta_num, respuesta, anio) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num, anio) %>%
  mutate(porcentaje = round(100 * total / sum(total),1)) %>%
  filter(respuesta == "Sí")

### --- Beide Jahre zusammenführen ---
resumen_total <- bind_rows(resumen16, resumen21)

### --- Balkendiagramm ---
ggplot(resumen_total, aes(x = factor(pregunta_num), y = porcentaje, fill = factor(anio))) +
  geom_col(position = position_dodge()) +
  geom_text(aes(label = paste0(porcentaje, "%")), 
            position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
  labs(
    title = "Distribución de respuestas 'Sí' de violencia física (2016 vs 2021)",
    x = "Pregunta (1-9)",
    y = "Porcentaje",
    fill = "Año"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

### --- 2016 vorbereiten ---
violencia_long16 <- ad_violec_16 %>%
  select(starts_with("violencia16_fisica")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia16_fisica", "", pregunta)),
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
    anio = 2016
  )

resumen16 <- violencia_long16 %>%
  group_by(pregunta_num, respuesta, anio) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num, anio) %>%
  mutate(porcentaje = round(100 * total / sum(total),1)) %>%
  filter(respuesta == "Sí")

### --- 2021 vorbereiten ---
violencia_long21 <- ad_violec_21 %>%
  select(starts_with("violencia21_fisica")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia21_fisica", "", pregunta)),
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
    anio = 2021
  )

resumen21 <- violencia_long21 %>%
  group_by(pregunta_num, respuesta, anio) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num, anio) %>%
  mutate(porcentaje = round(100 * total / sum(total),1)) %>%
  filter(respuesta == "Sí")

### --- Beide Jahre zusammenführen ---
resumen_total <- bind_rows(resumen16, resumen21)

### --- Balkendiagramm ---
ggplot(resumen_total, aes(x = factor(pregunta_num), y = porcentaje, fill = factor(anio))) +
  geom_col(position = position_dodge()) +
  geom_text(aes(label = paste0(porcentaje, "%")), 
            position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
  labs(
    title = "Distribución de respuestas 'Sí' de violencia física (2016 vs 2021) (15-18)",
    x = "Pregunta (1-9)",
    y = "Porcentaje",
    fill = "Año"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# ==============================
# 2016: Variable dicotómica
# ==============================
violec_especif16 <- violec_especif16 %>%
  mutate(
    viol_fis_disc = if_else(
      rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    ),
    anio = 2016
  )

# ==============================
# 2021: Variable dicotómica
# ==============================
violec_especif21 <- violec_especif21 %>%
  mutate(
    viol_fis_disc = if_else(
      rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    ),
    anio = 2021
  )

# ==============================
# Ambos años juntos
# ==============================
datos_total <- bind_rows(
  select(violec_especif16, viol_fis_disc, anio),
  select(violec_especif21, viol_fis_disc, anio)
)

# ==============================
# Resumen de totales y porcentajes
# ==============================
datos_plot <- datos_total %>%
  filter(!is.na(viol_fis_disc)) %>%
  group_by(anio, viol_fis_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(anio) %>%
  mutate(porcentaje = round(100 * total / sum(total), 1))

# ==============================
# Gráfico de barras comparativo
# ==============================
ggplot(datos_plot, aes(x = viol_fis_disc, y = porcentaje, fill = factor(anio))) +
  geom_col(position = position_dodge(width = 0.8)) +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
            position = position_dodge(width = 0.8),
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon"),
                    name = "Año") +
  labs(
    title = "Mujeres que han sufrido violencia física (2016 vs 2021)",
    x = "",
    y = "Porcentaje"
  ) +
  theme_minimal()

data_merged16 <- data_merged16 %>%
  mutate(
    EDAD = as.numeric(EDAD),

    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 ~ "65-80+",
      TRUE ~ NA_character_
    ),

    viol_fis_disc = if_else(
      rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0,
      "Sí", "No"
    ),

    anio = "2016"
  )
data_merged21 <- data_merged21 %>%
  mutate(
    EDAD = as.numeric(EDAD),

    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 ~ "65-80+",
      TRUE ~ NA_character_
    ),

    viol_fis_disc = if_else(
      rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0,
      "Sí", "No"
    ),

    anio = "2021"
  )


datos_total <- bind_rows(
  select(data_merged16, grupo_edad, viol_fis_disc, anio),
  select(data_merged21, grupo_edad, viol_fis_disc, anio)
)


datos_plot <- datos_total %>%
  filter(!is.na(grupo_edad), !is.na(viol_fis_disc)) %>%
  group_by(anio, grupo_edad, viol_fis_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(anio, grupo_edad) %>%
  mutate(
    porcentaje = round(100 * total / sum(total), 1)
  )

datos_plot$grupo_edad <- factor(
  datos_plot$grupo_edad,
  levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)

ggplot(datos_plot, aes(x = grupo_edad, y = porcentaje, fill = viol_fis_disc)) +
  geom_col(position = "fill") +
  geom_text(
    aes(label = paste0(round(porcentaje,1), "%")),
    position = position_fill(vjust = 0.5),
    color = "white",
    size = 3
  ) +
  facet_wrap(~anio) +
  scale_fill_manual(values = c("No" = "lightblue", "Sí" = "salmon")) +
  labs(
    title = "Violencia física por grupo de edad (2016 vs 2021)",
    x = "Grupo de edad",
    y = "Proporción",
    fill = "Violencia"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# ==============================
# 2016: Variable dicotómica
# ==============================
ad_violec_16 <- ad_violec_16 %>%
  mutate(
    viol_fis_disc = if_else(
      rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    ),
    anio = 2016
  )

# ==============================
# 2021: Variable dicotómica
# ==============================
ad_violec_21 <- ad_violec_21 %>%
  mutate(
    viol_fis_disc = if_else(
      rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    ),
    anio = 2021
  )

# ==============================
# Ambos años juntos
# ==============================
datos_total <- bind_rows(
  select(ad_violec_16, viol_fis_disc, anio),
  select(ad_violec_21, viol_fis_disc, anio)
)

# ==============================
# Resumen de totales y porcentajes
# ==============================
datos_plot <- datos_total %>%
  filter(!is.na(viol_fis_disc)) %>%
  group_by(anio, viol_fis_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(anio) %>%
  mutate(porcentaje = round(100 * total / sum(total), 1))

# ==============================
# Gráfico de barras comparativo
# ==============================
ggplot(datos_plot, aes(x = viol_fis_disc, y = porcentaje, fill = factor(anio))) +
  geom_col(position = position_dodge(width = 0.8)) +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
            position = position_dodge(width = 0.8),
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon"),
                    name = "Año") +
  labs(
    title = "Mujeres que han sufrido violencia física (2016 vs 2021) (15-18)",
    x = "",
    y = "Porcentaje"
  ) +
  theme_minimal()

data_merged16 <- data_merged16 %>%
  mutate(
    EDAD = as.numeric(EDAD),
    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 & EDAD <= 80 ~ "65-80",
      EDAD > 80 ~ "80+",
      TRUE ~ NA_character_
    )
  )

data_merged21 <- data_merged21 %>%
  mutate(
    EDAD = as.numeric(EDAD),
    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 & EDAD <= 80 ~ "65-80",
      EDAD > 80 ~ "80+",
      TRUE ~ NA_character_
    )
  )

violencia_long16 <- data_merged16 %>%
  select(grupo_edad, starts_with("violencia16_fisica")) %>%
  pivot_longer(
    cols = starts_with("violencia16_fisica"),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia16_fisica", "", pregunta)),
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
  )

violencia_long21 <- data_merged21 %>%
  select(grupo_edad, starts_with("violencia21_fisica")) %>%
  pivot_longer(
    cols = starts_with("violencia21_fisica"),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia21_fisica", "", pregunta)),
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
  )

resumen16 <- violencia_long16 %>%
  group_by(grupo_edad, pregunta_num, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(grupo_edad, pregunta_num) %>%
  mutate(porcentaje = 100 * total / sum(total)) %>%
  filter(respuesta == "Sí")

resumen21 <- violencia_long21 %>%
  group_by(grupo_edad, pregunta_num, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(grupo_edad, pregunta_num) %>%
  mutate(porcentaje = 100 * total / sum(total)) %>%
  filter(respuesta == "Sí")
ggplot(resumen16, aes(x = factor(pregunta_num), y = porcentaje, fill = grupo_edad)) +
  geom_col(position = position_dodge()) +
  labs(
    title = "Violencia física por grupo de edad (2016)",
    x = "Pregunta",
    y = "Porcentaje",
    fill = "Grupo de edad"
  ) +
  theme_minimal()

ggplot(resumen21, aes(x = factor(pregunta_num), y = porcentaje, fill = grupo_edad)) +
  geom_col(position = position_dodge()) +
  labs(
    title = "Violencia física por grupo de edad (2021)",
    x = "Pregunta",
    y = "Porcentaje",
    fill = "Grupo de edad"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

# ==============================
# 1. Datos largos por pregunta
# ==============================
long_psi16 <- violec_especif16 %>%
  select(starts_with("violencia16_psicologica")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia16_psicologica", "", pregunta)),
    anio = 2016
  )

long_psi21 <- violec_especif21 %>%
  select(starts_with("violencia21_psicologica")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia21_psicologica", "", pregunta)),
    anio = 2021
  )

# ==============================
# 2. Solo respuestas 'Sí'
# ==============================
resumen_psi_total <- bind_rows(long_psi16, long_psi21) %>%
  filter(respuesta == 1) %>%   # 1 = Sí
  group_by(anio, pregunta_num) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(anio) %>%
  mutate(porcentaje = round(100 * total / sum(total), 1)) %>%
  ungroup()

# ==============================
# 3. Posición de barras
# ==============================
resumen_psi_total <- resumen_psi_total %>%
  mutate(
    pregunta_scaled = pregunta_num * 3,
    offset = ifelse(anio == 2016, -0.4, 0.4),
    xpos = pregunta_scaled + offset
  )

# ==============================
# 4. Gráfico
# ==============================
ggplot(resumen_psi_total, aes(x = xpos, y = porcentaje, fill = factor(anio))) +
  geom_col(width = 0.8) +
  scale_x_continuous(
    breaks = (1:15) * 3,
    labels = 1:15,
    expand = expansion(mult = c(0.02, 0.02))
  ) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
  labs(
    title = "Distribución de respuestas 'Sí' de violencia psicológica (2016 vs 2021)",
    x = "Pregunta (1-15)",
    y = "Porcentaje",
    fill = "Año"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(knitr)
library(kableExtra)

# 1. Prozent und absolute Zahlen zusammenführen als Text
tabla_valores <- resumen_psi_total %>%
  mutate(Porcentaje_Total = paste0(porcentaje, "% (", total, ")")) %>%
  select(pregunta_num, anio, Porcentaje_Total) %>%
  pivot_wider(
    names_from = anio,
    values_from = Porcentaje_Total,
    names_prefix = "Año_"
  ) %>%
  arrange(pregunta_num)

# 2. Tabelle anzeigen
tabla_valores %>%
  kable(
    col.names = c("Pregunta", "2016", "2021"),
    align = "c",
    caption = "Respuestas 'Sí' de violencia psicológica (2016 vs 2021)"
  ) %>%
  kable_styling(full_width = FALSE, position = "center")
Respuestas ‘Sí’ de violencia psicológica (2016 vs 2021)
Pregunta 2016 2021
1 11.5% (16884) 11% (15193)
2 10.6% (15585) 10.9% (15147)
3 9.1% (13353) 10% (13852)
4 8% (11833) 8.1% (11269)
5 8.6% (12656) 8% (11155)
6 3.3% (4909) 3.5% (4879)
7 3.9% (5675) 4% (5600)
8 5.5% (8153) 6.7% (9349)
9 1.1% (1672) 1.1% (1512)
10 2.5% (3648) 2.5% (3412)
11 4.1% (6007) 3.9% (5474)
12 16.9% (24838) 15.4% (21289)
13 4.7% (6952) 5.1% (7006)
14 3% (4484) 3.5% (4799)
15 7.3% (10701) 6.1% (8419)
16 NA 0.2% (267)
library(dplyr)
library(tidyr)
library(ggplot2)

# ==============================
# 1. Datos largos por pregunta
# ==============================
long_psi16 <- ad_violec_16 %>%
  select(starts_with("violencia16_psicologica")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia16_psicologica", "", pregunta)),
    anio = 2016
  )

long_psi21 <- ad_violec_21 %>%
  select(starts_with("violencia21_psicologica")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia21_psicologica", "", pregunta)),
    anio = 2021
  )

# ==============================
# 2. Solo respuestas 'Sí'
# ==============================
resumen_psi_total <- bind_rows(long_psi16, long_psi21) %>%
  filter(respuesta == 1) %>%   # 1 = Sí
  group_by(anio, pregunta_num) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(anio) %>%
  mutate(porcentaje = round(100 * total / sum(total), 1)) %>%
  ungroup()

# ==============================
# 3. Posición de barras
# ==============================
resumen_psi_total <- resumen_psi_total %>%
  mutate(
    pregunta_scaled = pregunta_num * 3,
    offset = ifelse(anio == 2016, -0.4, 0.4),
    xpos = pregunta_scaled + offset
  )

# ==============================
# 4. Gráfico
# ==============================
ggplot(resumen_psi_total, aes(x = xpos, y = porcentaje, fill = factor(anio))) +
  geom_col(width = 0.8) +
  scale_x_continuous(
    breaks = (1:15) * 3,
    labels = 1:15,
    expand = expansion(mult = c(0.02, 0.02))
  ) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
  labs(
    title = "Distribución de respuestas 'Sí' de violencia psicológica (2016 vs 2021) (15-18)",
    x = "Pregunta (1-15)",
    y = "Porcentaje",
    fill = "Año"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(knitr)
library(kableExtra)

# 1. Prozent und absolute Zahlen zusammenführen als Text
tabla_valores <- resumen_psi_total %>%
  mutate(Porcentaje_Total = paste0(porcentaje, "% (", total, ")")) %>%
  select(pregunta_num, anio, Porcentaje_Total) %>%
  pivot_wider(
    names_from = anio,
    values_from = Porcentaje_Total,
    names_prefix = "Año_"
  ) %>%
  arrange(pregunta_num)

# 2. Tabelle anzeigen
tabla_valores %>%
  kable(
    col.names = c("Pregunta", "2016", "2021"),
    align = "c",
    caption = "Respuestas 'Sí' de violencia psicológica (2016 vs 2021)"
  ) %>%
  kable_styling(full_width = FALSE, position = "center")
Respuestas ‘Sí’ de violencia psicológica (2016 vs 2021)
Pregunta 2016 2021
1 6.1% (277) 6.1% (207)
2 8% (364) 8.8% (300)
3 14.6% (667) 14.6% (500)
4 5.1% (231) 5.5% (189)
5 5.6% (255) 5.1% (174)
6 2.8% (128) 3.3% (112)
7 4.4% (199) 3.5% (120)
8 14.1% (643) 16.1% (551)
9 0.2% (10) 0.4% (13)
10 1.6% (74) 2.3% (77)
11 2.2% (100) 2% (68)
12 19.5% (889) 16.9% (578)
13 12.2% (556) 12.3% (420)
14 0.7% (31) 0.8% (29)
15 3.2% (144) 1.8% (62)
16 NA 0.4% (14)
library(dplyr)
library(ggplot2)

# ==============================
# 2016: Variable dicotómica
# ==============================
violec_especif16 <- violec_especif16 %>%
  mutate(
    viol_psi_disc = if_else(
      rowSums(select(., starts_with("violencia16_psicologica")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    ),
    anio = 2016
  )

# ==============================
# 2021: Variable dicotómica
# ==============================
violec_especif21 <- violec_especif21 %>%
  mutate(
    viol_psi_disc = if_else(
      rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    ),
    anio = 2021
  )

# ==============================
# Ambos años juntos
# ==============================
datos_total_psi <- bind_rows(
  select(violec_especif16, viol_psi_disc, anio),
  select(violec_especif21, viol_psi_disc, anio)
)

# ==============================
# Resumen de totales y porcentajes
# ==============================
datos_plot_psi <- datos_total_psi %>%
  filter(!is.na(viol_psi_disc)) %>%
  group_by(anio, viol_psi_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(anio) %>%
  mutate(porcentaje = round(100 * total / sum(total), 1))

# ==============================
# Gráfico de barras comparativo
# ==============================
ggplot(datos_plot_psi, aes(x = viol_psi_disc, y = porcentaje, fill = factor(anio))) +
  geom_col(position = position_dodge(width = 0.8)) +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
            position = position_dodge(width = 0.8),
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon"),
                    name = "Año") +
  labs(
    title = "Mujeres que han sufrido violencia psicológica (2016 vs 2021)",
    x = "",
    y = "Porcentaje"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# ==============================
# 1. 2016 vorbereiten
# ==============================
data_merged16 <- data_merged16 %>%
  mutate(
    EDAD = as.numeric(EDAD),

    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 ~ "65-80+",
      
      TRUE ~ NA_character_
    ),

    viol_psi_disc = if_else(
      rowSums(select(., starts_with("violencia16_psicologica")), na.rm = TRUE) > 0,
      "Sí", "No"
    ),

    anio = "2016"
  )

# ==============================
# 2. 2021 vorbereiten
# ==============================
data_merged21 <- data_merged21 %>%
  mutate(
    EDAD = as.numeric(EDAD),

    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 ~ "65-80+",
      
      TRUE ~ NA_character_
    ),

    viol_psi_disc = if_else(
      rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0,
      "Sí", "No"
    ),

    anio = "2021"
  )

# ==============================
# 3. Beide Jahre kombinieren
# ==============================
datos_total_psi <- bind_rows(
  select(data_merged16, grupo_edad, viol_psi_disc, anio),
  select(data_merged21, grupo_edad, viol_psi_disc, anio)
)

# ==============================
# 4. Prozent berechnen
# ==============================
datos_plot_psi <- datos_total_psi %>%
  filter(!is.na(grupo_edad), !is.na(viol_psi_disc)) %>%
  group_by(anio, grupo_edad, viol_psi_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(anio, grupo_edad) %>%
  mutate(
    porcentaje = 100 * total / sum(total)
  )

# Reihenfolge der Altersgruppen fixieren
datos_plot_psi$grupo_edad <- factor(
  datos_plot_psi$grupo_edad,
  levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)

# ==============================
# 5. Grafik
# ==============================
ggplot(datos_plot_psi, aes(x = grupo_edad, y = porcentaje, fill = viol_psi_disc)) +
  geom_col(position = "fill") +
  geom_text(
    aes(label = paste0(round(porcentaje,1), "%")),
    position = position_fill(vjust = 0.5),
    color = "white",
    size = 3
  ) +
  facet_wrap(~anio) +
  scale_fill_manual(values = c("No" = "lightblue", "Sí" = "salmon")) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Violencia psicológica por grupo de edad (2016 vs 2021)",
    x = "Grupo de edad",
    y = "Proporción",
    fill = "Violencia"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

### --- 2016 vorbereiten ---
violencia_sex_long16 <- violec_especif16 %>%
  select(starts_with("violencia16_sexual")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  filter(!is.na(respuesta)) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia16_sexual", "", pregunta)),
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
    anio = 2016
  )

resumen_sex16 <- violencia_sex_long16 %>%
  group_by(pregunta_num, respuesta, anio) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num, anio) %>%
  mutate(porcentaje = round(100 * total / sum(total),1)) %>%
  filter(respuesta == "Sí")

### --- 2021 vorbereiten ---
violencia_sex_long21 <- violec_especif21 %>%
  select(starts_with("violencia21_sexual")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  filter(!is.na(respuesta)) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia21_sexual", "", pregunta)),
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
    anio = 2021
  )

resumen_sex21 <- violencia_sex_long21 %>%
  group_by(pregunta_num, respuesta, anio) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num, anio) %>%
  mutate(porcentaje = round(100 * total / sum(total),1)) %>%
  filter(respuesta == "Sí")

### --- Beide Jahre zusammenführen ---
resumen_sex_total <- bind_rows(resumen_sex16, resumen_sex21)

### --- Balkendiagramm mit Prozent + absolute Zahlen ---
ggplot(resumen_sex_total, aes(x = factor(pregunta_num), y = porcentaje, fill = factor(anio))) +
  geom_col(position = position_dodge(width = 0.8)) +
  geom_text(
    aes(label = paste0(porcentaje, "%\n", total)),
    position = position_dodge(width = 0.8),
    vjust = -0.25,
    size = 3
  ) +
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.15))
  ) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
  labs(
    title = "Distribución de respuestas 'Sí' de violencia sexual (2016 vs 2021)",
    x = "Pregunta (1-5)",
    y = "Porcentaje",
    fill = "Año"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

# ==============================
# 1. Altersgruppen definieren
# ==============================
data_merged16 <- data_merged16 %>%
  mutate(
    EDAD = as.numeric(EDAD),
    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 ~ "65-80+",
      TRUE ~ NA_character_
    )
  )

data_merged21 <- data_merged21 %>%
  mutate(
    EDAD = as.numeric(EDAD),
    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 ~ "65-80+",
      TRUE ~ NA_character_
    )
  )

# ==============================
# 2. Long Format + Altersgruppen behalten
# ==============================

### --- 2016 ---
violencia_sex_long16 <- data_merged16 %>%
  select(grupo_edad, starts_with("violencia16_sexual")) %>%
  pivot_longer(
    cols = starts_with("violencia16_sexual"),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  filter(!is.na(respuesta), !is.na(grupo_edad)) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia16_sexual", "", pregunta)),
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
    anio = "2016"
  )

### --- 2021 ---
violencia_sex_long21 <- data_merged21 %>%
  select(grupo_edad, starts_with("violencia21_sexual")) %>%
  pivot_longer(
    cols = starts_with("violencia21_sexual"),
    names_to = "pregunta",
    values_to = "respuesta"
  ) %>%
  filter(!is.na(respuesta), !is.na(grupo_edad)) %>%
  mutate(
    pregunta_num = as.numeric(gsub("violencia21_sexual", "", pregunta)),
    respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
    anio = "2021"
  )

# ==============================
# 3. Zusammenführen
# ==============================
violencia_sex_total <- bind_rows(violencia_sex_long16, violencia_sex_long21)

# ==============================
# 4. Prozent berechnen (nach Altersgruppen!)
# ==============================
resumen_sex_total <- violencia_sex_total %>%
  group_by(anio, grupo_edad, pregunta_num, respuesta) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(anio, grupo_edad, pregunta_num) %>%
  mutate(porcentaje = 100 * total / sum(total)) %>%
  filter(respuesta == "Sí")

# Reihenfolge fixieren
resumen_sex_total$grupo_edad <- factor(
  resumen_sex_total$grupo_edad,
  levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)

# ==============================
# 5. Grafik (JETZT mit Altersgruppen!)
# ==============================
ggplot(resumen_sex_total,
       aes(x = factor(pregunta_num),
           y = porcentaje,
           fill = grupo_edad)) +
  geom_col(position = position_dodge(width = 0.8)) +
  geom_text(
    aes(label = paste0(round(porcentaje,1), "%\n", total)),
    position = position_dodge(width = 0.8),
    vjust = -0.25,
    size = 3
  ) +
  facet_wrap(~anio) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Violencia sexual por grupo de edad (2016 vs 2021)",
    x = "Pregunta",
    y = "Porcentaje",
    fill = "Grupo de edad"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# ==============================
# 2016: Variable dicotómica
# ==============================
violec_especif16 <- violec_especif16 %>%
  mutate(
    viol_sex_disc = if_else(
      rowSums(select(., starts_with("violencia16_sexual")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    ),
    anio = 2016
  )

# ==============================
# 2021: Variable dicotómica
# ==============================
violec_especif21 <- violec_especif21 %>%
  mutate(
    viol_sex_disc = if_else(
      rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0,
      "Sí",
      "No"
    ),
    anio = 2021
  )

# ==============================
# Ambos años juntos
# ==============================
datos_total_sex <- bind_rows(
  select(violec_especif16, viol_sex_disc, anio),
  select(violec_especif21, viol_sex_disc, anio)
)

# ==============================
# Resumen de totales y porcentajes
# ==============================
datos_plot_sex <- datos_total_sex %>%
  filter(!is.na(viol_sex_disc)) %>%
  group_by(anio, viol_sex_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(anio) %>%
  mutate(porcentaje = round(100 * total / sum(total), 1))

# ==============================
# Gráfico de barras comparativo
# ==============================
ggplot(datos_plot_sex, aes(x = viol_sex_disc, y = porcentaje, fill = factor(anio))) +
  geom_col(position = position_dodge(width = 0.8)) +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
            position = position_dodge(width = 0.8),
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon"),
                    name = "Año") +
  labs(
    title = "Mujeres que han sufrido violencia sexual (2016 vs 2021)",
    x = "",
    y = "Porcentaje"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# ==============================
# 1. 2016 vorbereiten
# ==============================
data_merged16 <- data_merged16 %>%
  mutate(
    EDAD = as.numeric(EDAD),

    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 ~ "65-80+",
      TRUE ~ NA_character_
    ),

    viol_sex_disc = if_else(
      rowSums(select(., starts_with("violencia16_sexual")), na.rm = TRUE) > 0,
      "Sí", "No"
    ),

    anio = "2016"
  )

# ==============================
# 2. 2021 vorbereiten
# ==============================
data_merged21 <- data_merged21 %>%
  mutate(
    EDAD = as.numeric(EDAD),

    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 ~ "65-80+",
      TRUE ~ NA_character_
    ),

    viol_sex_disc = if_else(
      rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0,
      "Sí", "No"
    ),

    anio = "2021"
  )

# ==============================
# 3. Beide Jahre kombinieren
# ==============================
datos_total_sex <- bind_rows(
  select(data_merged16, grupo_edad, viol_sex_disc, anio),
  select(data_merged21, grupo_edad, viol_sex_disc, anio)
)

# ==============================
# 4. Prozent berechnen (nach Altersgruppen!)
# ==============================
datos_plot_sex <- datos_total_sex %>%
  filter(!is.na(grupo_edad), !is.na(viol_sex_disc)) %>%
  group_by(anio, grupo_edad, viol_sex_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(anio, grupo_edad) %>%
  mutate(
    porcentaje = round(100 * total / sum(total), 1)
  )

# Reihenfolge fixieren
datos_plot_sex$grupo_edad <- factor(
  datos_plot_sex$grupo_edad,
  levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)

# ==============================
# 5. Grafik
# ==============================
ggplot(datos_plot_sex,
       aes(x = grupo_edad,
           y = porcentaje,
           fill = viol_sex_disc)) +
  geom_col(position = "fill") +
  geom_text(
    aes(label = paste0(round(porcentaje,1), "%")),
    position = position_fill(vjust = 0.5),
    color = "white",
    size = 3
  ) +
  facet_wrap(~anio) +
  scale_fill_manual(values = c("No" = "lightblue", "Sí" = "salmon")) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Violencia sexual por grupo de edad (2016 vs 2021)",
    x = "Grupo de edad",
    y = "Proporción",
    fill = "Violencia"
  ) +
  theme_minimal()

library(dplyr)
library(tidyr)
library(ggplot2)

# ==============================
# Función para transformar datos a formato largo
# ==============================
long_format_violencia <- function(data, prefix) {
  data %>%
    select(starts_with(prefix)) %>%
    pivot_longer(
      cols = everything(),
      names_to = "pregunta",
      values_to = "respuesta"
    ) %>%
    filter(!is.na(respuesta)) %>%
    mutate(
      pregunta_num = as.numeric(gsub(prefix, "", pregunta)),
      respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
    )
}

# ==============================
# 2016: violencia económica
# ==============================
violencia_eco_long16 <- long_format_violencia(violec_especif16, "violencia16_economica") %>%
  mutate(anio = 2016)

resumen_eco16 <- violencia_eco_long16 %>%
  group_by(pregunta_num, respuesta, anio) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num, anio) %>%
  mutate(porcentaje = round(100 * total / sum(total),1)) %>%
  filter(respuesta == "Sí")  # solo Sí

# ==============================
# 2021: violencia económica
# ==============================
violencia_eco_long21 <- long_format_violencia(violec_especif21, "violencia21_economica") %>%
  mutate(anio = 2021)

resumen_eco21 <- violencia_eco_long21 %>%
  group_by(pregunta_num, respuesta, anio) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num, anio) %>%
  mutate(porcentaje = round(100 * total / sum(total),1)) %>%
  filter(respuesta == "Sí")  # solo Sí

# ==============================
# Combinar 2016 y 2021
# ==============================
resumen_eco_total <- bind_rows(resumen_eco16, resumen_eco21)

# ==============================
# Gráfico de barras con porcentaje y total
# ==============================
ggplot(resumen_eco_total, aes(x = factor(pregunta_num), y = porcentaje, fill = factor(anio))) +
  geom_col(position = position_dodge(width = 0.8)) +
  geom_text(
    aes(label = paste0(porcentaje, "%\n", total)),
    position = position_dodge(width = 0.8),
    vjust = -0.25,
    size = 3
  ) +
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.18))
  ) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
  labs(
    title = "Distribución de respuestas 'Sí' de violencia económica (2016 vs 2021)",
    x = "Pregunta (1-7)",
    y = "Porcentaje",
    fill = "Año"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(margin = margin(b = 22))
  )

library(dplyr)
library(tidyr)
library(ggplot2)

# ==============================
# Función para transformar datos a formato largo
# ==============================
long_format_violencia <- function(data, prefix) {
  data %>%
    select(starts_with(prefix)) %>%
    pivot_longer(
      cols = everything(),
      names_to = "pregunta",
      values_to = "respuesta"
    ) %>%
    filter(!is.na(respuesta)) %>%
    mutate(
      pregunta_num = as.numeric(gsub(prefix, "", pregunta)),
      respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
    )
}

# ==============================
# 2016: violencia económica
# ==============================
violencia_eco_long16 <- long_format_violencia(ad_violec_16, "violencia16_economica") %>%
  mutate(anio = 2016)

resumen_eco16 <- violencia_eco_long16 %>%
  group_by(pregunta_num, respuesta, anio) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num, anio) %>%
  mutate(porcentaje = round(100 * total / sum(total),1)) %>%
  filter(respuesta == "Sí")  # solo Sí

# ==============================
# 2021: violencia económica
# ==============================
violencia_eco_long21 <- long_format_violencia(ad_violec_21, "violencia21_economica") %>%
  mutate(anio = 2021)

resumen_eco21 <- violencia_eco_long21 %>%
  group_by(pregunta_num, respuesta, anio) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num, anio) %>%
  mutate(porcentaje = round(100 * total / sum(total),1)) %>%
  filter(respuesta == "Sí")  # solo Sí

# ==============================
# Combinar 2016 y 2021
# ==============================
resumen_eco_total <- bind_rows(resumen_eco16, resumen_eco21)

# ==============================
# Gráfico de barras con porcentaje y total
# ==============================
ggplot(resumen_eco_total, aes(x = factor(pregunta_num), y = porcentaje, fill = factor(anio))) +
  geom_col(position = position_dodge(width = 0.8)) +
  geom_text(
    aes(label = paste0(porcentaje, "%\n", total)),
    position = position_dodge(width = 0.8),
    vjust = -0.25,
    size = 3
  ) +
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.18))
  ) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
  labs(
    title = "Distribución de respuestas 'Sí' de violencia económica (2016 vs 2021) (15-18)",
    x = "Pregunta (1-7)",
    y = "Porcentaje",
    fill = "Año"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(margin = margin(b = 22))
  )

library(dplyr)
library(tidyr)
library(ggplot2)

# ==============================
# Función para transformar datos a formato largo
# ==============================
long_format_violencia <- function(data, prefix) {
  data %>%
    select(starts_with(prefix)) %>%
    pivot_longer(
      cols = everything(),
      names_to = "pregunta",
      values_to = "respuesta"
    ) %>%
    filter(!is.na(respuesta)) %>%
    mutate(
      pregunta_num = as.numeric(gsub(prefix, "", pregunta)),
      respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
    )
}

# ==============================
# 2016: violencia económica
# ==============================
violencia_eco_long16 <- long_format_violencia(ad_violec_16, "violencia16_economica") %>%
  mutate(anio = 2016)

resumen_eco16 <- violencia_eco_long16 %>%
  group_by(pregunta_num, respuesta, anio) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num, anio) %>%
  mutate(porcentaje = round(100 * total / sum(total),1)) %>%
  filter(respuesta == "Sí")  # solo Sí

# ==============================
# 2021: violencia económica
# ==============================
violencia_eco_long21 <- long_format_violencia(ad_violec_21, "violencia21_economica") %>%
  mutate(anio = 2021)

resumen_eco21 <- violencia_eco_long21 %>%
  group_by(pregunta_num, respuesta, anio) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(pregunta_num, anio) %>%
  mutate(porcentaje = round(100 * total / sum(total),1)) %>%
  filter(respuesta == "Sí")  # solo Sí

# ==============================
# Combinar 2016 y 2021
# ==============================
resumen_eco_total <- bind_rows(resumen_eco16, resumen_eco21)

# ==============================
# Gráfico de barras con porcentaje y total
# ==============================
ggplot(resumen_eco_total, aes(x = factor(pregunta_num), y = porcentaje, fill = factor(anio))) +
  geom_col(position = position_dodge(width = 0.8)) +
  geom_text(
    aes(label = paste0(porcentaje, "%\n", total)),
    position = position_dodge(width = 0.8),
    vjust = -0.25,
    size = 3
  ) +
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.18))
  ) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
  labs(
    title = "Distribución de respuestas 'Sí' de violencia económica (2016 vs 2021) (15-18)",
    x = "Pregunta (1-7)",
    y = "Porcentaje",
    fill = "Año"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(margin = margin(b = 22))
  )

library(dplyr)
library(ggplot2)

# ==============================
# Función resumen total
# ==============================
resumen_total <- function(data, prefix) {
  data %>%
    mutate(
      disc_total = if_else(
        rowSums(select(., starts_with(prefix)), na.rm = TRUE) > 0,
        "Sí",
        "No"
      )
    )
}

# ==============================
# 2016: preparar datos
# ==============================
datos_eco16 <- resumen_total(violec_especif16, "violencia16_economica") %>%
  filter(!is.na(disc_total)) %>%
  group_by(disc_total) %>%
  summarise(total = n(), .groups = "drop") %>%
  mutate(porcentaje = round(100 * total / sum(total),1),
         anio = 2016)

# ==============================
# 2021: preparar datos
# ==============================
datos_eco21 <- resumen_total(violec_especif21, "violencia21_economica") %>%
  filter(!is.na(disc_total)) %>%
  group_by(disc_total) %>%
  summarise(total = n(), .groups = "drop") %>%
  mutate(porcentaje = round(100 * total / sum(total),1),
         anio = 2021)

# ==============================
# Combinar años
# ==============================
datos_eco_total <- bind_rows(datos_eco16, datos_eco21)

# ==============================
# Gráfico comparativo
# ==============================
ggplot(datos_eco_total, aes(x = disc_total, y = porcentaje, fill = factor(anio))) +
  geom_col(position = position_dodge(width = 0.8)) +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
            position = position_dodge(width = 0.8),
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon"),
                    name = "Año") +
  labs(
    title = "Mujeres que han sufrido violencia económica (2016 vs 2021)",
    x = "",
    y = "Porcentaje"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# ==============================
# 1. 2016 vorbereiten
# ==============================
data_merged16 <- data_merged16 %>%
  mutate(
    EDAD = as.numeric(EDAD),

    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 ~ "65-80+",
      TRUE ~ NA_character_
    ),

    viol_eco_disc = if_else(
      rowSums(select(., starts_with("violencia16_economica")), na.rm = TRUE) > 0,
      "Sí", "No"
    ),

    anio = "2016"
  )

# ==============================
# 2. 2021 vorbereiten
# ==============================
data_merged21 <- data_merged21 %>%
  mutate(
    EDAD = as.numeric(EDAD),

    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 ~ "65-80+",
      TRUE ~ NA_character_
    ),

    viol_eco_disc = if_else(
      rowSums(select(., starts_with("violencia21_economica")), na.rm = TRUE) > 0,
      "Sí", "No"
    ),

    anio = "2021"
  )

# ==============================
# 3. Beide Jahre kombinieren
# ==============================
datos_total_eco <- bind_rows(
  select(data_merged16, grupo_edad, viol_eco_disc, anio),
  select(data_merged21, grupo_edad, viol_eco_disc, anio)
)

# ==============================
# 4. Prozent berechnen
# ==============================
datos_plot_eco <- datos_total_eco %>%
  filter(!is.na(grupo_edad), !is.na(viol_eco_disc)) %>%
  group_by(anio, grupo_edad, viol_eco_disc) %>%
  summarise(total = n(), .groups = "drop") %>%
  group_by(anio, grupo_edad) %>%
  mutate(
    porcentaje = 100 * total / sum(total)
  )

# Reihenfolge fixieren
datos_plot_eco$grupo_edad <- factor(
  datos_plot_eco$grupo_edad,
  levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)

# ==============================
# 5. Grafik
# ==============================
ggplot(datos_plot_eco,
       aes(x = grupo_edad,
           y = porcentaje,
           fill = viol_eco_disc)) +
  geom_col(position = "fill") +
  geom_text(
    aes(label = paste0(round(porcentaje,1), "%")),
    position = position_fill(vjust = 0.5),
    color = "white",
    size = 3
  ) +
  facet_wrap(~anio) +
  scale_fill_manual(values = c("No" = "lightblue", "Sí" = "salmon")) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Violencia económica por grupo de edad (2016 vs 2021)",
    x = "Grupo de edad",
    y = "Proporción",
    fill = "Violencia"
  ) +
  theme_minimal()

library(dplyr)
library(ggplot2)

# ==============================
# Función resumen total
# ==============================
resumen_total <- function(data, prefix) {
  data %>%
    mutate(
      disc_total = if_else(
        rowSums(select(., starts_with(prefix)), na.rm = TRUE) > 0,
        "Sí",
        "No"
      )
    )
}

# ==============================
# 2016: preparar datos
# ==============================
datos_eco16 <- resumen_total(ad_violec_16, "violencia16_economica") %>%
  filter(!is.na(disc_total)) %>%
  group_by(disc_total) %>%
  summarise(total = n(), .groups = "drop") %>%
  mutate(porcentaje = round(100 * total / sum(total),1),
         anio = 2016)

# ==============================
# 2021: preparar datos
# ==============================
datos_eco21 <- resumen_total(ad_violec_21, "violencia21_economica") %>%
  filter(!is.na(disc_total)) %>%
  group_by(disc_total) %>%
  summarise(total = n(), .groups = "drop") %>%
  mutate(porcentaje = round(100 * total / sum(total),1),
         anio = 2021)

# ==============================
# Combinar años
# ==============================
datos_eco_total <- bind_rows(datos_eco16, datos_eco21)

# ==============================
# Gráfico comparativo
# ==============================
ggplot(datos_eco_total, aes(x = disc_total, y = porcentaje, fill = factor(anio))) +
  geom_col(position = position_dodge(width = 0.8)) +
  geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
            position = position_dodge(width = 0.8),
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon"),
                    name = "Año") +
  labs(
    title = "Mujeres que han sufrido violencia económica (2016 vs 2021) (15-18)",
    x = "",
    y = "Porcentaje"
  ) +
  theme_minimal()

EDAD

library(dplyr)
library(dplyr) 


library(dplyr)

# einfache Variante: nur Frauen behalten, alle Spalten bleiben erhalten
datos_mujeres16 <- datos_basicos1_16 %>%
  filter(SEXO == 2)

# Kontrolle
glimpse(datos_mujeres16)
## Rows: 229,854
## Columns: 42
## $ ID_VIV     <dbl> 100006.0, 100025.0, 100031.1, 100035.0, 100041.0, 100126.0,…
## $ ID_MUJ     <chr> "0100006.04.01.02\n", "0100025.03.02.03\n", "0100031.05.01.…
## $ UPM        <dbl> 100006, 100025, 100031, 100035, 100041, 100126, 100133, 100…
## $ PROG       <dbl> 39, 46, 7, 46, 71, 16, 95, 10, 6, 51, 32, 75, 122, 41, 93, …
## $ VIV_SEL    <dbl> 4, 3, 5, 3, 1, 1, 2, 3, 1, 3, 3, 2, 5, 1, 5, 2, 4, 1, 1, 1,…
## $ CVE_ENT    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ NOM_ENT    <chr> "Aguascalientes\n", "Aguascalientes\n", "Aguascalientes\n",…
## $ CVE_MUN    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ NOM_MUN    <chr> "Aguascalientes\n", "Aguascalientes\n", "Aguascalientes\n",…
## $ HOGAR      <dbl> 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1,…
## $ N_REN      <dbl> 2, 3, 3, 2, 3, 2, 1, 3, 2, 2, 1, 3, 2, 1, 2, 1, 3, 2, 7, 1,…
## $ COD_RES    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ COD_RES_E  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ NOMBRE     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ PAREN      <dbl> 3, 1, 3, 1, 3, 2, 1, 8, 2, 3, 1, 3, 2, 1, 1, 1, 3, 2, 9, 1,…
## $ SEXO       <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ EDAD       <dbl> 20, 42, 50, 44, 28, 34, 46, 26, 33, 24, 52, 29, 55, 59, 25,…
## $ P2_5       <dbl> 1, 97, 97, 97, 1, 96, 96, 2, 97, 1, 97, 2, 97, 96, 96, 97, …
## $ P2_6       <dbl> 98, 97, 1, 97, 96, 96, 96, 96, 97, 96, 97, 1, 97, 97, 96, 9…
## $ NIV        <dbl> 10, 4, 2, 3, 10, 10, 10, 7, 4, 10, 5, 10, 10, 10, 3, 5, 7, …
## $ GRA        <dbl> 1, 3, 6, 3, 4, 4, 3, 3, 3, 4, 2, 3, 5, 5, 3, 3, 3, 3, 6, 3,…
## $ P2_8       <dbl> NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ P2_9       <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ P2_10      <dbl> 3, 3, 3, 3, 3, 2, 3, 1, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 8, 3,…
## $ P2_11      <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ P2_12      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ P2_13      <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1,…
## $ P2_14      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 10, 12, 10, NA, NA, 9, NA, …
## $ P2_15      <dbl> 1, 1, 2, 1, 1, 5, 4, 1, NA, NA, NA, 1, 4, NA, 1, NA, 1, NA,…
## $ P2_16      <dbl> 6, 5, 3, 1, 5, 5, 3, 6, 1, 6, 1, 6, 5, 3, 2, 2, 3, 5, 5, 4,…
## $ CODIGO     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ REN_MUJ_EL <dbl> 2, 3, 3, 2, 3, 2, 1, 3, 2, 2, 1, 3, 2, 1, 2, 1, 3, 2, 7, 1,…
## $ REN_INF_AD <dbl> NA, NA, 3, NA, NA, 2, 1, NA, NA, NA, 1, NA, NA, NA, NA, NA,…
## $ FN_DIA     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ FN_MES     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ FAC_VIV    <dbl> 86, 92, 92, 80, 101, 78, 84, 84, 80, 80, 102, 94, 90, 76, 7…
## $ FAC_MUJ    <dbl> 172, 184, 184, 80, 203, 78, 84, 168, 80, 160, 205, 187, 90,…
## $ DOMINIO    <chr> "U\n", "U\n", "U\n", "U\n", "U\n", "U\n", "U\n", "U\n", "U\…
## $ ESTRATO    <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 2,…
## $ EST_DIS    <dbl> 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,…
## $ UPM_DIS    <dbl> 3, 9, 11, 12, 14, 36, 37, 38, 39, 39, 43, 46, 56, 65, 87, 1…
## $ COD_M15    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
table(datos_mujeres16$SEXO, useNA = "ifany")
## 
##      2 
## 229854
datos_mujeres21 <- datos_basicos1_21 %>%
  filter(SEXO == 2)


``` r
## Edad de mujeres
hist(datos_mujeres16$EDAD, 
     main = "Distribución de la edad (mujeres) 2016", 
     xlab = "Edad", 
     col = "lightblue", 
     border = "white")

hist(datos_mujeres21$EDAD, 
     main = "Distribución de la edad (mujeres) 2021", 
     xlab = "Edad", 
     col = "#DDA0DD", 
     border = "white")

library(tidyverse)

edad_2016 <- datos_mujeres16 %>%
  filter(EDAD >= 15) %>%
  mutate(
    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 & EDAD <= 79 ~ "65-79",
      EDAD >= 80 ~ "80+",
      TRUE ~ NA_character_
    ),
    anio = "2016"
  )

edad_2021 <- datos_mujeres21 %>%
  filter(EDAD >= 15) %>%
  mutate(
    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 & EDAD <= 79 ~ "65-79",
      EDAD >= 80 ~ "80+",
      TRUE ~ NA_character_
    ),
    anio = "2021"
  )

edad_all <- bind_rows(edad_2016, edad_2021)

tabla_edad <- edad_all %>%
  filter(!is.na(grupo_edad)) %>%
  group_by(anio, grupo_edad) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(anio) %>%
  mutate(
    porcentaje = round(n / sum(n) * 100, 1)
  ) %>%
  ungroup()

# Reihenfolge fixieren (sehr wichtig!)
tabla_edad$grupo_edad <- factor(
  tabla_edad$grupo_edad,
  levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-79", "80+")
)

ggplot(tabla_edad, aes(x = grupo_edad, y = porcentaje, fill = anio)) +
  geom_col(position = position_dodge(width = 0.8)) +
 geom_text(
  aes(label = paste0( n, "\n", porcentaje)),
  position = position_dodge(width = 0.8),
  vjust = -0.25,
  size = 3
)+
  scale_fill_manual(values = c("2016" = "lightblue", "2021" = "#DDA0DD")) +
  scale_y_continuous(
    name = "Porcentaje",
    expand = expansion(mult = c(0, 0.18))
  ) +
  labs(
    title = "Distribución por grupos de edad",
    subtitle = "ENDIREH 2016 vs 2021",
    x = "Grupo de edad",
    fill = "Año"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "top"
  )

library(dplyr)

# ----------------------------------------------------------
# 1. Seleccionar mujeres ≥ 15 años
# ----------------------------------------------------------

datos_mujeres16 <- datos_basicos1_16 %>%
  filter(SEXO == 2, !is.na(EDAD), EDAD >= 15) %>%
  select(
    ID_VIV, UPM, PROG, VIV_SEL, HOGAR, ID_MUJ,
    EDAD, NIV, P2_8, P2_10
  )

# ----------------------------------------------------------
# 2. Crear variables binarias de violencia
# ----------------------------------------------------------

violec_especif16 <- violec_especif16 %>%
  mutate(
    viol16_fis_bin = if_else(viol16_fis_total > 0, 1, 0),
    viol16_psi_bin = if_else(viol16_psi_total > 0, 1, 0),
    viol16_sex_bin = if_else(viol16_sex_total > 0, 1, 0),
    viol16_eco_bin = if_else(viol16_eco_total > 0, 1, 0),

    viol16_any_bin = if_else(
      viol16_fis_bin + viol16_psi_bin +
        viol16_sex_bin + viol16_eco_bin > 0,
      1, 0
    ),

    viol16_any_disc = factor(
      viol16_any_bin,
      levels = c(0, 1),
      labels = c("No", "Sí")
    )
  )

# ----------------------------------------------------------
# 3. Unir ambos conjuntos de datos
# ----------------------------------------------------------

joined16 <- datos_mujeres16 %>%
  inner_join(
    violec_especif16,
    by = c("ID_VIV", "UPM", "PROG", "VIV_SEL", "HOGAR", "ID_MUJ")
  ) %>%
  filter(!is.na(viol16_any_disc))   # ← korrekt


table(joined16$viol16_any_disc, useNA = "ifany")
## 
##    No    Sí 
## 52623 39819
library(dplyr)

joined16 <- joined16 %>%
  mutate(
    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 & EDAD <= 80 ~ "65-80",
      EDAD > 80 ~ "80+",
      TRUE ~ NA_character_
    )
  )

plot_data <- joined16 %>%
  group_by(grupo_edad, viol16_any_disc) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(grupo_edad) %>%
  mutate(
    porcentaje = n / sum(n) * 100,
    porcentaje_label = round(porcentaje, 1)  # ← nur Zahl, kein %
    
    
  )

plot_data$grupo_edad <- factor(
  plot_data$grupo_edad,
  levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)

library(ggplot2)

ggplot(plot_data,
       aes(x = grupo_edad,
           y = porcentaje,
           fill = viol16_any_disc)) +
  geom_col(position = "fill") +
  geom_text(
    aes(label = porcentaje_label),
    position = position_fill(vjust = 0.5),
    color = "white",
    size = 4
  ) +
  scale_y_continuous(labels = scales::label_number()) +
  scale_fill_manual(
    values = c("No" = "#00BFC4", "Sí" = "#F8766D")
  ) +
  labs(
    x = "Grupo de edad",
    y = "Porcentaje",
    fill = "¿Violencia?",
    title = "Porcentaje de mujeres que experimentaron violencia\nsegún grupo de edad (ENDIREH 2016)"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 12)
  )

names(violec_especif21)
##   [1] "ID_VIV"                    "ID_PER"                   
##   [3] "UPM"                       "VIV_SEL"                  
##   [5] "HOGAR"                     "N_REN"                    
##   [7] "DOMINIO"                   "CVE_ENT"                  
##   [9] "NOM_ENT"                   "CVE_MUN"                  
##  [11] "NOM_MUN"                   "T_INSTRUM"                
##  [13] "P14_1_1"                   "P14_1_2"                  
##  [15] "P14_1_3"                   "P14_1_4"                  
##  [17] "P14_1_5"                   "P14_1_6"                  
##  [19] "P14_1_7"                   "P14_1_8"                  
##  [21] "P14_1_9"                   "P14_1_10"                 
##  [23] "P14_1_11"                  "P14_1_12"                 
##  [25] "P14_1_13"                  "P14_1_14"                 
##  [27] "P14_1_15"                  "P14_1_16"                 
##  [29] "P14_1_17"                  "P14_1_18"                 
##  [31] "P14_1_19"                  "P14_1_20"                 
##  [33] "P14_1_21"                  "P14_1_22"                 
##  [35] "P14_1_23AB"                "P14_1_24AB"               
##  [37] "P14_1_25"                  "P14_1_26"                 
##  [39] "P14_1_27"                  "P14_1_28"                 
##  [41] "P14_1_29"                  "P14_1_30"                 
##  [43] "P14_1_31"                  "P14_1_32"                 
##  [45] "P14_1_33"                  "P14_1_34"                 
##  [47] "P14_1_35AB"                "P14_1_36AB"               
##  [49] "P14_1_37AB"                "P14_1_38AB"               
##  [51] "P14_2_1"                   "P14_3_1"                  
##  [53] "P14_2_2"                   "P14_3_2"                  
##  [55] "P14_2_3"                   "P14_3_3"                  
##  [57] "P14_2_4"                   "P14_3_4"                  
##  [59] "P14_2_5"                   "P14_3_5"                  
##  [61] "P14_2_6"                   "P14_3_6"                  
##  [63] "P14_2_7"                   "P14_3_7"                  
##  [65] "P14_2_8"                   "P14_3_8"                  
##  [67] "P14_2_9"                   "P14_3_9"                  
##  [69] "P14_2_10"                  "P14_3_10"                 
##  [71] "P14_2_11"                  "P14_3_11"                 
##  [73] "P14_2_12"                  "P14_3_12"                 
##  [75] "P14_2_13"                  "P14_3_13"                 
##  [77] "P14_2_14"                  "P14_3_14"                 
##  [79] "P14_2_15"                  "P14_3_15"                 
##  [81] "P14_2_16"                  "P14_3_16"                 
##  [83] "P14_2_17"                  "P14_3_17"                 
##  [85] "P14_2_18"                  "P14_3_18"                 
##  [87] "P14_2_19"                  "P14_3_19"                 
##  [89] "P14_2_20"                  "P14_3_20"                 
##  [91] "P14_2_21"                  "P14_3_21"                 
##  [93] "P14_2_22"                  "P14_3_22"                 
##  [95] "P14_2_23AB"                "P14_3_23AB"               
##  [97] "P14_2_24AB"                "P14_3_24AB"               
##  [99] "P14_2_25"                  "P14_3_25"                 
## [101] "P14_2_26"                  "P14_3_26"                 
## [103] "P14_2_27"                  "P14_3_27"                 
## [105] "P14_2_28"                  "P14_3_28"                 
## [107] "P14_2_29"                  "P14_3_29"                 
## [109] "P14_2_30"                  "P14_3_30"                 
## [111] "P14_2_31"                  "P14_3_31"                 
## [113] "P14_2_32"                  "P14_3_32"                 
## [115] "P14_2_33"                  "P14_3_33"                 
## [117] "P14_2_34"                  "P14_3_34"                 
## [119] "P14_2_35AB"                "P14_3_35AB"               
## [121] "P14_2_36AB"                "P14_3_36AB"               
## [123] "P14_2_37AB"                "P14_3_37AB"               
## [125] "P14_2_38AB"                "P14_3_38AB"               
## [127] "P14_4"                     "P14_5_1"                  
## [129] "P14_5_2"                   "P14_5_3"                  
## [131] "P14_5_4"                   "P14_5_5"                  
## [133] "P14_5_6"                   "P14_5_7"                  
## [135] "P14_6"                     "P14_7_1"                  
## [137] "P14_7_2"                   "P14_8_1"                  
## [139] "P14_8_2"                   "P14_8_3"                  
## [141] "P14_8_4"                   "P14_8_5"                  
## [143] "P14_8_6"                   "P14_8_7"                  
## [145] "P14_8_8"                   "P14_8_9"                  
## [147] "P14_8_10"                  "P14_8_10E"                
## [149] "R14_8_10X"                 "P14_9_1_1"                
## [151] "P14_9_1_2"                 "P14_9_1_3"                
## [153] "P14_10_1"                  "P14_11_1"                 
## [155] "P14_12_1"                  "P14_13_1"                 
## [157] "P14_9_2_1"                 "P14_9_2_2"                
## [159] "P14_9_2_3"                 "P14_10_2"                 
## [161] "P14_11_2"                  "P14_12_2"                 
## [163] "P14_13_2"                  "P14_9_3_1"                
## [165] "P14_9_3_2"                 "P14_9_3_3"                
## [167] "P14_10_3"                  "P14_11_3"                 
## [169] "P14_12_3"                  "P14_13_3"                 
## [171] "P14_9_4_1"                 "P14_9_4_2"                
## [173] "P14_9_4_3"                 "P14_10_4"                 
## [175] "P14_11_4"                  "P14_12_4"                 
## [177] "P14_13_4"                  "P14_9_5_1"                
## [179] "P14_9_5_2"                 "P14_9_5_3"                
## [181] "P14_10_5"                  "P14_11_5"                 
## [183] "P14_12_5"                  "P14_13_5"                 
## [185] "P14_9_6_1"                 "P14_9_6_2"                
## [187] "P14_9_6_3"                 "P14_10_6"                 
## [189] "P14_11_6"                  "P14_12_6"                 
## [191] "P14_13_6"                  "P14_9_7_1"                
## [193] "P14_9_7_2"                 "P14_9_7_3"                
## [195] "P14_10_7"                  "P14_11_7"                 
## [197] "P14_12_7"                  "P14_13_7"                 
## [199] "P14_9_8_1"                 "P14_9_8_2"                
## [201] "P14_9_8_3"                 "P14_10_8"                 
## [203] "P14_11_8"                  "P14_12_8"                 
## [205] "P14_13_8"                  "P14_9_9_1"                
## [207] "P14_9_9_2"                 "P14_9_9_3"                
## [209] "P14_10_9"                  "P14_11_9"                 
## [211] "P14_12_9"                  "P14_13_9"                 
## [213] "P14_9_10_1"                "P14_9_10_2"               
## [215] "P14_9_10_3"                "P14_10_10"                
## [217] "P14_11_10"                 "P14_12_10"                
## [219] "P14_13_10"                 "FAC_VIV"                  
## [221] "FAC_MUJ"                   "ESTRATO"                  
## [223] "UPM_DIS"                   "EST_DIS"                  
## [225] "violencia21_fisica1"       "violencia21_fisica2"      
## [227] "violencia21_fisica3"       "violencia21_fisica4"      
## [229] "violencia21_fisica5"       "violencia21_fisica6"      
## [231] "violencia21_fisica7"       "violencia21_fisica8"      
## [233] "violencia21_fisica9"       "violencia21_psicologica1" 
## [235] "violencia21_psicologica2"  "violencia21_psicologica3" 
## [237] "violencia21_psicologica4"  "violencia21_psicologica5" 
## [239] "violencia21_psicologica6"  "violencia21_psicologica7" 
## [241] "violencia21_psicologica8"  "violencia21_psicologica9" 
## [243] "violencia21_psicologica10" "violencia21_psicologica11"
## [245] "violencia21_psicologica12" "violencia21_psicologica13"
## [247] "violencia21_psicologica14" "violencia21_psicologica15"
## [249] "violencia21_psicologica16" "violencia21_sexual1"      
## [251] "violencia21_sexual2"       "violencia21_sexual3"      
## [253] "violencia21_sexual4"       "violencia21_sexual5"      
## [255] "violencia21_sexual6"       "violencia21_economica1"   
## [257] "violencia21_economica2"    "violencia21_economica3"   
## [259] "violencia21_economica4"    "violencia21_economica5"   
## [261] "violencia21_economica6"    "violencia21_economica7"   
## [263] "viol21_fis_total"          "viol21_fis_disc"          
## [265] "viol21_psi_disc"           "viol21_sex_total"         
## [267] "viol21_sex_disc"           "viol21_sex_disc_total"    
## [269] "viol21_eco_total"          "viol21_eco_disc"          
## [271] "viol21_eco_total_disc"     "fisica_total"             
## [273] "psicologica_total"         "sexual_total"             
## [275] "economica_total"           "viol_fis_disc"            
## [277] "anio"                      "viol_psi_disc"            
## [279] "viol_sex_disc"
library(dplyr)
library(ggplot2)
library(scales)

# ==========================================================
# 1. Dichotomische Gewaltvariable (robust mit rowSums)
# ==========================================================

violec_especif21 <- violec_especif21 %>%
  mutate(
    viol21_fis_bin = if_else(
      rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0, 1, 0
    ),

    viol21_psi_bin = if_else(
      rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0, 1, 0
    ),

    viol21_sex_bin = if_else(
      rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0, 1, 0
    ),

    viol21_eco_bin = if_else(
      rowSums(select(., starts_with("violencia21_economica")), na.rm = TRUE) > 0, 1, 0
    ),

    # mindestens eine Form von Gewalt
    viol21_any_bin = if_else(
      viol21_fis_bin + viol21_psi_bin +
        viol21_sex_bin + viol21_eco_bin > 0,
      1, 0
    ),

    viol21_any_disc = factor(
      viol21_any_bin,
      levels = c(0, 1),
      labels = c("No", "Sí")
    )
  )

# Kontrolle
table(violec_especif21$viol21_any_disc)
## 
##    No    Sí 
## 63640 41576
# ==========================================================
# 2. Merge mit Basisdaten
# ==========================================================

joined21 <- datos_mujeres21 %>%
  inner_join(
    violec_especif21 %>%
      select(ID_VIV, ID_PER, UPM, VIV_SEL, HOGAR, viol21_any_disc),
    by = c("ID_VIV", "ID_PER", "UPM", "VIV_SEL", "HOGAR")
  )

# Kontrolle
dim(joined21)
## [1] 105216     38
table(joined21$viol21_any_disc, useNA = "ifany")
## 
##    No    Sí 
## 63640 41576
# ==========================================================
# 3. Altersgruppen erstellen
# ==========================================================

joined21 <- joined21 %>%
  mutate(
    EDAD = as.numeric(EDAD),

    grupo_edad = case_when(
      EDAD >= 15 & EDAD <= 18 ~ "15-18",
      EDAD >= 19 & EDAD <= 25 ~ "19-25",
      EDAD >= 26 & EDAD <= 35 ~ "26-35",
      EDAD >= 36 & EDAD <= 49 ~ "36-49",
      EDAD >= 50 & EDAD <= 64 ~ "50-64",
      EDAD >= 65 ~ "65-80+",
      TRUE ~ NA_character_
    )
  )

# ==========================================================
# 4. Resumen por edad
# ==========================================================

plot_data21 <- joined21 %>%
  filter(!is.na(grupo_edad), !is.na(viol21_any_disc)) %>%
  group_by(grupo_edad, viol21_any_disc) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(grupo_edad) %>%
  mutate(
    porcentaje = n / sum(n) * 100,
    porcentaje_label = paste0(round(porcentaje, 1), "%")
  )

# Reihenfolge fixieren
plot_data21$grupo_edad <- factor(
  plot_data21$grupo_edad,
  levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)

# ==========================================================
# 5. Grafik
# ==========================================================

ggplot(plot_data21,
       aes(x = grupo_edad,
           y = porcentaje,
           fill = viol21_any_disc)) +
  geom_col(position = "fill") +
  geom_text(
    aes(label = porcentaje_label),
    position = position_fill(vjust = 0.5),
    color = "white",
    size = 4
  ) +
  scale_fill_manual(
    values = c("No" = "#00BFC4", "Sí" = "#F8766D")
  ) +
  scale_y_continuous(labels = percent_format(scale = 1)) +
  labs(
    x = "Grupo de edad",
    y = "Proporción",
    fill = "¿Violencia?",
    title = "Distribución de mujeres que experimentaron violencia\nsegún grupo de edad (ENDIREH 2021)"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1)
  )