violec_especif16
#violencia16 fisica
violec_especif16$violencia16_fisica1 = factor(violec_especif16$P13_1_1,
labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica2 = factor(violec_especif16$P13_1_2,
labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica3 = factor(violec_especif16$P13_1_3,
labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica4 = factor(violec_especif16$P13_1_4,
labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica5 = factor(violec_especif16$P13_1_5,
labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica6 = factor(violec_especif16$P13_1_6,
labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica7 = factor(violec_especif16$P13_1_7,
labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica8 = factor(violec_especif16$P13_1_8,
labels = c("1","1","1","0"))
violec_especif16$violencia16_fisica9 = factor(violec_especif16$P13_1_9,
labels = c("1","1","1","0"))
# violencia16 psicologica
violec_especif16$violencia16_psicologica1 = factor(violec_especif16$P13_1_10,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica2 = factor(violec_especif16$P13_1_11,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica3 = factor(violec_especif16$P13_1_12,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica4 = factor(violec_especif16$P13_1_13,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica5 = factor(violec_especif16$P13_1_14,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica6 = factor(violec_especif16$P13_1_15,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica7 = factor(violec_especif16$P13_1_16,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica8 = factor(violec_especif16$P13_1_17,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica9 = factor(violec_especif16$P13_1_18,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica10 = factor(violec_especif16$P13_1_19,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica11 = factor(violec_especif16$P13_1_20,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica12 = factor(violec_especif16$P13_1_21,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica13 = factor(violec_especif16$P13_1_22,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica14 = factor(violec_especif16$P13_1_23AB,
labels = c("1","1","1","0"))
violec_especif16$violencia16_psicologica15 = factor(violec_especif16$P13_1_24AB,
labels = c("1","1","1","0"))
#vioelencia16 sexual
violec_especif16$violencia16_sexual1 = factor(violec_especif16$P13_1_25,
labels = c("1","1","1","0"))
violec_especif16$violencia16_sexual2 = factor(violec_especif16$P13_1_26,
labels = c("1","1","1","0"))
violec_especif16$violencia16_sexual3 = factor(violec_especif16$P13_1_27,
labels = c("1","1","1","0"))
violec_especif16$violencia16_sexual4 = factor(violec_especif16$P13_1_28,
labels = c("1","1","1","0"))
violec_especif16$violencia16_sexual5 = factor(violec_especif16$P13_1_29,
labels = c("1","1","1","0"))
#violencia16 economica
violec_especif16$violencia16_economica1 = factor(violec_especif16$P13_1_30,
labels = c("1","1","1","0"))
violec_especif16$violencia16_economica2 = factor(violec_especif16$P13_1_31,
labels = c("1","1","1","0"))
violec_especif16$violencia16_economica3 = factor(violec_especif16$P13_1_32,
labels = c("1","1","1","0"))
violec_especif16$violencia16_economica4 = factor(violec_especif16$P13_1_33AB,
labels = c("1","1","1","0"))
violec_especif16$violencia16_economica5 = factor(violec_especif16$P13_1_34AB,
labels = c("1","1","1","0"))
violec_especif16$violencia16_economica6 = factor(violec_especif16$P13_1_35AB,
labels = c("1","1","1","0"))
violec_especif16$violencia16_economica7 = factor(violec_especif16$P13_1_36AB,
labels = c("1","1","1","0"))violec_especif21
#violencia21 fisica
violec_especif21$violencia21_fisica1 = factor(violec_especif21$P14_1_1,
labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica2 = factor(violec_especif21$P14_1_2,
labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica3 = factor(violec_especif21$P14_1_3,
labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica4 = factor(violec_especif21$P14_1_4,
labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica5 = factor(violec_especif21$P14_1_5,
labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica6 = factor(violec_especif21$P14_1_6,
labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica7 = factor(violec_especif21$P14_1_7,
labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica8 = factor(violec_especif21$P14_1_8,
labels = c("1","1","1","0"))
violec_especif21$violencia21_fisica9 = factor(violec_especif21$P14_1_9,
labels = c("1","1","1","0"))
# violencia21 psicologica
violec_especif21$violencia21_psicologica1 = factor(violec_especif21$P14_1_10,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica2 = factor(violec_especif21$P14_1_11,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica3 = factor(violec_especif21$P14_1_12,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica4 = factor(violec_especif21$P14_1_13,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica5 = factor(violec_especif21$P14_1_14,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica6 = factor(violec_especif21$P14_1_15,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica7 = factor(violec_especif21$P14_1_16,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica8 = factor(violec_especif21$P14_1_17,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica9 = factor(violec_especif21$P14_1_18,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica10 = factor(violec_especif21$P14_1_19,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica11 = factor(violec_especif21$P14_1_20,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica12 = factor(violec_especif21$P14_1_21,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica13 = factor(violec_especif21$P14_1_22,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica14 = factor(violec_especif21$P14_1_23AB,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica15 = factor(violec_especif21$P14_1_24AB,
labels = c("1","1","1","0"))
violec_especif21$violencia21_psicologica16 = factor(violec_especif21$P14_1_31,
labels = c("1","1","1","0"))
#vioelencia sexual
violec_especif21$violencia21_sexual1 = factor(violec_especif21$P14_1_25,
labels = c("1","1","1","0"))
violec_especif21$violencia21_sexual2 = factor(violec_especif21$P14_1_26,
labels = c("1","1","1","0"))
violec_especif21$violencia21_sexual3 = factor(violec_especif21$P14_1_27,
labels = c("1","1","1","0"))
violec_especif21$violencia21_sexual4 = factor(violec_especif21$P14_1_28,
labels = c("1","1","1","0"))
violec_especif21$violencia21_sexual5 = factor(violec_especif21$P14_1_29,
labels = c("1","1","1","0"))
violec_especif21$violencia21_sexual6 = factor(violec_especif21$P14_1_30,
labels = c("1","1","1","0"))
#violencia21 economica
violec_especif21$violencia21_economica1 = factor(violec_especif21$P14_1_32,
labels = c("1","1","1","0"))
violec_especif21$violencia21_economica2 = factor(violec_especif21$P14_1_33,
labels = c("1","1","1","0"))
violec_especif21$violencia21_economica3 = factor(violec_especif21$P14_1_34,
labels = c("1","1","1","0"))
violec_especif21$violencia21_economica4 = factor(violec_especif21$P14_1_35AB,
labels = c("1","1","1","0"))
violec_especif21$violencia21_economica5 = factor(violec_especif21$P14_1_36AB,
labels = c("1","1","1","0"))
violec_especif21$violencia21_economica6 = factor(violec_especif21$P14_1_37AB,
labels = c("1","1","1","0"))
violec_especif21$violencia21_economica7 = factor(violec_especif21$P14_1_38AB,
labels = c("1","1","1","0"))datos_basicos1_16<-read.csv("C:/Master/3. Semester/R/Dataset/2016/conjunto_de_datos_tsdem_endireh_2016.csv", header = TRUE, sep = ",")
datos_basicos1_21<-read.csv("C:/Master/3. Semester/R/Dataset/TSDem.csv", header = TRUE, sep = ",")datos_basicos1_16$EDAD =as.numeric(datos_basicos1_16$EDAD)
datos_basicos1_21$EDAD =as.numeric(datos_basicos1_21$EDAD)library(dplyr)
# Datensätze zusammenführen
data_merged16 <- violec_especif16 %>%
left_join(datos_basicos1_16, by = c("ID_VIV", "ID_MUJ", "UPM", "VIV_SEL", "HOGAR"))
ad_violec_16 <- data_merged16 %>%
mutate(EDAD = as.numeric(EDAD)) %>%
filter(EDAD >= 15 & EDAD <= 18)
# Datensätze zusammenführen
data_merged21 <- violec_especif21 %>%
left_join(datos_basicos1_21, by = c("ID_VIV", "ID_PER", "UPM", "VIV_SEL", "HOGAR"))
ad_violec_21 <- data_merged21 %>%
mutate(EDAD = as.numeric(EDAD)) %>%
filter(EDAD >= 15 & EDAD <= 18)library(dplyr)
library(tidyr)
library(ggplot2)
# 1. Daten in langes Format bringen
violencia_long <- violec_especif16 %>%
select(starts_with("violencia16_fisica")) %>% # nur die 9 Fragen
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia16_fisica", "", pregunta)), # 1-9
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
# 2. Prozentwerte pro Frage berechnen
resumen <- violencia_long %>%
group_by(pregunta_num, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num) %>%
mutate(porcentaje = round(100 * total / sum(total),1))
# 3. Balkendiagramm erstellen
ggplot(resumen, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
geom_col(position = position_dodge()) + # nebeneinander stehende Balken
geom_text(aes(label = paste0(porcentaje, "%")),
position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Distribución de respuestas de violencia física por pregunta (2016)",
x = "Pregunta (1-9)",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# 1. Crear variable dicotómica: 0 = nunca, 1 = al menos una vez
violec_especif16 <- violec_especif16 %>%
mutate(
viol16_fis_disc = if_else(
rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0,
"Sí",
"No"
)
)
# 2. Resumen de totales y porcentajes
datos_plot <- violec_especif16 %>%
filter(!is.na(viol16_fis_disc)) %>%
group_by(viol16_fis_disc) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(
porcentaje = round(100 * total / sum(total), 1)
)
# 3. Gráfico de barras
ggplot(datos_plot, aes(x = viol16_fis_disc, y = porcentaje, fill = viol16_fis_disc)) +
geom_col() +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Mujeres que han sufrido violencia física (2016)",
x = "",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()
Explicacion: cualquier mujer que tenga al menos un “Sí” en las 9
preguntas será considerada como Sí.Solo las mujeres que nunca
respondieron “Sí” en ninguna pregunta se cuentan como No. El gráfico
muestra dos barras: No vs Sí, con número absoluto y porcentaje
encima.
library(dplyr)
library(ggplot2)
# 1. Crear variable dicotómica: 0 = nunca, 1 = al menos una vez
violec_especif16 <- violec_especif16 %>%
mutate(
viol16_psi_disc = if_else(
rowSums(select(., starts_with("violencia16_psicologica")), na.rm = TRUE) > 0,
"Sí",
"No"
)
)
# 2. Resumen de totales y porcentajes
datos_plot_psi <- violec_especif16 %>%
filter(!is.na(viol16_psi_disc)) %>%
group_by(viol16_psi_disc) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(
porcentaje = round(100 * total / sum(total), 1)
)
# 3. Gráfico de barras simple
ggplot(datos_plot_psi, aes(x = viol16_psi_disc, y = porcentaje, fill = viol16_psi_disc)) +
geom_col() +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Mujeres que han sufrido violencia psicológica (2016)",
x = "",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()
Explicacion: cualquier mujer que tenga al menos un “Sí” en las 15
preguntas será considerada como Sí. Solo las mujeres que nunca
respondieron “Sí” en ninguna pregunta se cuentan como No.El gráfico
muestra dos barras: No vs Sí, con número absoluto y porcentaje
encima.
library(dplyr)
library(tidyr)
library(ggplot2)
# 1. Daten in langes Format bringen und NA entfernen
violencia_psi_long <- violec_especif16 %>%
select(starts_with("violencia16_psicologica")) %>% # die 15 Fragen
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta)) %>% # NAs entfernen
mutate(
pregunta_num = as.numeric(gsub("violencia16_psicologica", "", pregunta)), # 1-15
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
# 2. Prozentwerte pro Frage berechnen
resumen_psi <- violencia_psi_long %>%
group_by(pregunta_num, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num) %>%
mutate(porcentaje = round(100 * total / sum(total),1))
# 3. Balkendiagramm erstellen mit Überschrift
ggplot(resumen_psi, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
geom_col(position = position_dodge()) +
geom_text(aes(label = paste0(porcentaje, "%")),
position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Distribución de respuestas de violencia psicológica por pregunta (2016)",
x = "Pregunta",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
# 1. Daten in langes Format bringen (5 Fragen)
violencia_sex_long <- violec_especif16 %>%
select(starts_with("violencia16_sexual")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta)) %>% # NAs entfernen
mutate(
pregunta_num = as.numeric(gsub("violencia16_sexual", "", pregunta)), # 1-5
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
# 2. Prozentwerte pro Frage berechnen
resumen_sex <- violencia_sex_long %>%
group_by(pregunta_num, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num) %>%
mutate(porcentaje = round(100 * total / sum(total),1))
# 3. Balkendiagramm erstellen
ggplot(resumen_sex, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
geom_col(position = position_dodge()) + # nebeneinander stehende Balken
geom_text(aes(label = paste0(porcentaje, "%")),
position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Distribución de respuestas de violencia sexual por pregunta (2016)",
x = "Pregunta (1-5)",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# 1. Crear variable dicotómica: 0 = nunca, 1 = al menos una vez
violec_especif16 <- violec_especif16 %>%
mutate(
viol16_sex_disc_total = if_else(
rowSums(select(., starts_with("violencia16_sexual")), na.rm = TRUE) > 0,
"Sí",
"No"
)
)
# 2. Resumen de totales y porcentajes
datos_plot_sex <- violec_especif16 %>%
filter(!is.na(viol16_sex_disc_total)) %>%
group_by(viol16_sex_disc_total) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(
porcentaje = round(100 * total / sum(total), 1)
)
# 3. Gráfico de barras simple
ggplot(datos_plot_sex, aes(x = viol16_sex_disc_total, y = porcentaje, fill = viol16_sex_disc_total)) +
geom_col() +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Mujeres que han sufrido violencia sexual (2016)",
x = "",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
# ==============================
# 1. Datos en formato largo por pregunta (Si/No)
# ==============================
# Función para convertir cualquier tipo de violencia a formato largo
long_format_violencia <- function(data, prefix, num_preguntas) {
data %>%
select(starts_with(prefix)) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta)) %>%
mutate(
pregunta_num = as.numeric(gsub(prefix, "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
}
# Ejemplo: violencia económica por pregunta
violencia_eco_long <- long_format_violencia(violec_especif16, "violencia16_economica", 7)
resumen_eco <- violencia_eco_long %>%
group_by(pregunta_num, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num) %>%
mutate(porcentaje = round(100 * total / sum(total),1))
# Gráfico de preguntas económicas
ggplot(resumen_eco, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
geom_col(position = position_dodge()) +
geom_text(aes(label = paste0(porcentaje, "%")),
position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Distribución de respuestas de violencia económica por pregunta (2016)",
x = "Pregunta (1-7)",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()# ==============================
# 2. Resumen Si/No: al menos una vez
# ==============================
resumen_total <- function(data, prefix) {
data %>%
mutate(
disc_total = if_else(
rowSums(select(., starts_with(prefix)), na.rm = TRUE) > 0,
"Sí",
"No"
)
) %>%
filter(!is.na(disc_total)) %>%
group_by(disc_total) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(porcentaje = round(100 * total / sum(total),1))
}
# Ejemplo: violencia económica total
datos_plot_eco <- resumen_total(violec_especif16, "violencia16_economica")
# Gráfico Si/No total
ggplot(datos_plot_eco, aes(x = disc_total, y = porcentaje, fill = disc_total)) +
geom_col() +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Mujeres que han sufrido violencia económica (2016)",
x = "",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(ggplot2)
library(tidyr)
# ==============================
# 1. Crear variables Si/No totales para cada tipo de violencia
# ==============================
violec_especif16 <- violec_especif16 %>%
mutate(
fisica_total = if_else(rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0, "Sí", "No"),
psicologica_total = if_else(rowSums(select(., starts_with("violencia16_psicologica")), na.rm = TRUE) > 0, "Sí", "No"),
sexual_total = if_else(rowSums(select(., starts_with("violencia16_sexual")), na.rm = TRUE) > 0, "Sí", "No"),
economica_total = if_else(rowSums(select(., starts_with("violencia16_economica")), na.rm = TRUE) > 0, "Sí", "No")
)
# ==============================
# 2. Transformar a formato largo para ggplot
# ==============================
resumen_violencia <- violec_especif16 %>%
select(fisica_total, psicologica_total, sexual_total, economica_total) %>%
pivot_longer(
cols = everything(),
names_to = "tipo_violencia",
values_to = "respuesta"
) %>%
group_by(tipo_violencia, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(tipo_violencia) %>%
mutate(porcentaje = round(100 * total / sum(total), 1))
# ==============================
# 3. Gráfico comparativo con etiquetas personalizadas
# ==============================
ggplot(resumen_violencia, aes(x = tipo_violencia, y = porcentaje, fill = respuesta)) +
geom_col(position = position_dodge()) + # barras lado a lado
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
position = position_dodge(width = 0.9), vjust = -0.5, size = 3) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
scale_x_discrete(labels = c(
fisica_total = "vio.fisica",
psicologica_total = "vio.psicologica",
sexual_total = "vio.sexual",
economica_total = "vio.economica"
)) +
labs(
title = "Mujeres que han experimentado diferentes tipos de violencia (2016)",
x = "",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal() library(dplyr)
library(tidyr)
library(ggplot2)
# 1. Daten in langes Format bringen
violencia_long21 <- violec_especif21 %>%
select(starts_with("violencia21_fisica")) %>% # nur die 9 Fragen
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia21_fisica", "", pregunta)), # 1-9
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
# 2. Prozentwerte pro Frage berechnen
resumen <- violencia_long21 %>%
group_by(pregunta_num, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num) %>%
mutate(porcentaje = round(100 * total / sum(total),1))
# 3. Balkendiagramm erstellen
ggplot(resumen, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
geom_col(position = position_dodge()) + # nebeneinander stehende Balken
geom_text(aes(label = paste0(porcentaje, "%")),
position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Distribución de respuestas de violencia física por pregunta (2021)",
x = "Pregunta (1-9)",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# 1. Crear variable dicotómica: 0 = nunca, 1 = al menos una vez
violec_especif21 <- violec_especif21 %>%
mutate(
viol21_fis_disc = if_else(
rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0,
"Sí",
"No"
)
)
# 2. Resumen de totales y porcentajes
datos_plot <- violec_especif21 %>%
filter(!is.na(viol21_fis_disc)) %>%
group_by(viol21_fis_disc) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(
porcentaje = round(100 * total / sum(total), 1)
)
# 3. Gráfico de barras
ggplot(datos_plot, aes(x = viol21_fis_disc, y = porcentaje, fill = viol21_fis_disc)) +
geom_col() +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Mujeres que han sufrido violencia física (2021)",
x = "",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# 1. Crear variable dicotómica: 0 = nunca, 1 = al menos una vez
violec_especif21 <- violec_especif21 %>%
mutate(
viol21_psi_disc = if_else(
rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0,
"Sí",
"No"
)
)
# 2. Resumen de totales y porcentajes
datos_plot_psi21 <- violec_especif21 %>%
filter(!is.na(viol21_psi_disc)) %>%
group_by(viol21_psi_disc) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(
porcentaje = round(100 * total / sum(total), 1)
)
# 3. Gráfico de barras simple
ggplot(datos_plot_psi21, aes(x = viol21_psi_disc, y = porcentaje, fill = viol21_psi_disc)) +
geom_col() +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Mujeres que han sufrido violencia psicológica (2021)",
x = "",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
# 1. Daten in langes Format bringen und NA entfernen
violencia_psi_long21 <- violec_especif21 %>%
select(starts_with("violencia21_psicologica")) %>% # die 15 Fragen
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta)) %>% # NAs entfernen
mutate(
pregunta_num = as.numeric(gsub("violencia21_psicologica", "", pregunta)), # 1-15
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
# 2. Prozentwerte pro Frage berechnen
resumen_psi <- violencia_psi_long21 %>%
group_by(pregunta_num, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num) %>%
mutate(porcentaje = round(100 * total / sum(total),1))
# 3. Balkendiagramm erstellen mit Überschrift
ggplot(resumen_psi, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
geom_col(position = position_dodge()) +
geom_text(aes(label = paste0(porcentaje, "%")),
position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Distribución de respuestas de violencia psicológica por pregunta (2021)",
x = "Pregunta",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
# 1. Daten in langes Format bringen (5 Fragen)
violencia_sex_long21 <- violec_especif21 %>%
select(starts_with("violencia21_sexual")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta)) %>% # NAs entfernen
mutate(
pregunta_num = as.numeric(gsub("violencia21_sexual", "", pregunta)), # 1-5
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
# 2. Prozentwerte pro Frage berechnen
resumen_sex <- violencia_sex_long21 %>%
group_by(pregunta_num, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num) %>%
mutate(porcentaje = round(100 * total / sum(total),1))
# 3. Balkendiagramm erstellen
ggplot(resumen_sex, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
geom_col(position = position_dodge()) + # nebeneinander stehende Balken
geom_text(aes(label = paste0(porcentaje, "%")),
position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Distribución de respuestas de violencia sexual por pregunta (2021)",
x = "Pregunta (1-5)",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# 1. Crear variable dicotómica: 0 = nunca, 1 = al menos una vez
violec_especif21 <- violec_especif21 %>%
mutate(
viol21_sex_disc_total = if_else(
rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0,
"Sí",
"No"
)
)
# 2. Resumen de totales y porcentajes
datos_plot_sex21 <- violec_especif21 %>%
filter(!is.na(viol21_sex_disc_total)) %>%
group_by(viol21_sex_disc_total) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(
porcentaje = round(100 * total / sum(total), 1)
)
# 3. Gráfico de barras simple
ggplot(datos_plot_sex21, aes(x = viol21_sex_disc_total, y = porcentaje, fill = viol21_sex_disc_total)) +
geom_col() +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Mujeres que han sufrido violencia sexual (2021)",
x = "",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
# ==============================
# 1. Transformar los datos a formato largo (por pregunta)
# ==============================
violencia_eco_long21 <- violec_especif21 %>%
select(starts_with("violencia21_economica")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta)) %>% # eliminar NAs
mutate(
pregunta_num = as.numeric(gsub("violencia21_economica", "", pregunta)), # 1-7
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
# ==============================
# 2. Calcular totales y porcentajes por pregunta
# ==============================
resumen_eco21 <- violencia_eco_long21 %>%
group_by(pregunta_num, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num) %>%
mutate(porcentaje = round(100 * total / sum(total),1))
# ==============================
# 3. Gráfico de barras por pregunta
# ==============================
ggplot(resumen_eco21, aes(x = factor(pregunta_num), y = porcentaje, fill = respuesta)) +
geom_col(position = position_dodge()) + # barras lado a lado
geom_text(aes(label = paste0(porcentaje, "%")),
position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Distribución de respuestas de violencia económica 2021 por pregunta",
x = "Pregunta (1-7)",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# ==============================
# 1. Crear variable Si/No total: al menos una vez
# ==============================
violec_especif21 <- violec_especif21 %>%
mutate(
viol21_eco_total_disc = if_else(
rowSums(select(., starts_with("violencia21_economica")), na.rm = TRUE) > 0,
"Sí",
"No"
)
)
# ==============================
# 2. Resumen de totales y porcentajes
# ==============================
datos_plot_eco21 <- violec_especif21 %>%
filter(!is.na(viol21_eco_total_disc)) %>%
group_by(viol21_eco_total_disc) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(porcentaje = round(100 * total / sum(total), 1))
# ==============================
# 3. Gráfico de barras simple
# ==============================
ggplot(datos_plot_eco21, aes(x = viol21_eco_total_disc, y = porcentaje, fill = viol21_eco_total_disc)) +
geom_col() +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
labs(
title = "Mujeres que han sufrido violencia económica (2021)",
x = "",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
# ==============================
# 1. Crear variables Si/No totales para cada tipo de violencia (2021)
# ==============================
violec_especif21 <- violec_especif21 %>%
mutate(
fisica_total = if_else(rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0, "Sí", "No"),
psicologica_total = if_else(rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0, "Sí", "No"),
sexual_total = if_else(rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0, "Sí", "No"),
economica_total = if_else(rowSums(select(., starts_with("violencia21_economica")), na.rm = TRUE) > 0, "Sí", "No")
)
# ==============================
# 2. Transformar a formato largo para ggplot
# ==============================
resumen_violencia21 <- violec_especif21 %>%
select(fisica_total, psicologica_total, sexual_total, economica_total) %>%
pivot_longer(
cols = everything(),
names_to = "tipo_violencia",
values_to = "respuesta"
) %>%
group_by(tipo_violencia, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(tipo_violencia) %>%
mutate(porcentaje = round(100 * total / sum(total), 1))
# ==============================
# 3. Gráfico comparativo
# ==============================
ggplot(resumen_violencia21, aes(x = tipo_violencia, y = porcentaje, fill = respuesta)) +
geom_col(position = position_dodge()) + # barras lado a lado
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
position = position_dodge(width = 0.9), vjust = -0.5, size = 3) +
scale_fill_manual(values = c("No" = "steelblue", "Sí" = "firebrick")) +
scale_x_discrete(labels = c(
fisica_total = "vio.fisica",
psicologica_total = "vio.psicologica",
sexual_total = "vio.sexual",
economica_total = "vio.economica"
)) +
labs(
title = "Mujeres que han experimentado diferentes tipos de violencia (2021)",
x = "",
y = "Porcentaje",
fill = "Respuesta"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
# ==============================
# 1. Crear variables Si/No totales para cada tipo de violencia (2016)
# ==============================
violec_especif16 <- violec_especif16 %>%
mutate(
fisica_total = if_else(rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0, "Sí", "No"),
psicologica_total = if_else(rowSums(select(., starts_with("violencia16_psicologica")), na.rm = TRUE) > 0, "Sí", "No"),
sexual_total = if_else(rowSums(select(., starts_with("violencia16_sexual")), na.rm = TRUE) > 0, "Sí", "No"),
economica_total = if_else(rowSums(select(., starts_with("violencia16_economica")), na.rm = TRUE) > 0, "Sí", "No")
)
# ==============================
# 2. Crear variables Si/No totales para cada tipo de violencia (2021)
# ==============================
violec_especif21 <- violec_especif21 %>%
mutate(
fisica_total = if_else(rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0, "Sí", "No"),
psicologica_total = if_else(rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0, "Sí", "No"),
sexual_total = if_else(rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0, "Sí", "No"),
economica_total = if_else(rowSums(select(., starts_with("violencia21_economica")), na.rm = TRUE) > 0, "Sí", "No")
)
# ==============================
# 3. Resumen solo de "Sí" para cada año
# ==============================
resumen_si <- function(data, year) {
data %>%
select(fisica_total, psicologica_total, sexual_total, economica_total) %>%
pivot_longer(cols = everything(), names_to = "tipo_violencia", values_to = "respuesta") %>%
filter(respuesta == "Sí") %>% # solo Sí
group_by(tipo_violencia) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(
year = year,
tipo_violencia = factor(tipo_violencia, levels = c("fisica_total", "psicologica_total", "sexual_total", "economica_total"))
)
}
resumen_16 <- resumen_si(violec_especif16, "2016")
resumen_21 <- resumen_si(violec_especif21, "2021")
resumen_comparativo <- bind_rows(resumen_16, resumen_21)
# ==============================
# 4. Gráfico comparativo solo de Sí
# ==============================
resumen_comparativo <- resumen_comparativo %>%
group_by(year) %>%
mutate(
porcentaje = round(100 * total / sum(total), 1)
) %>%
ungroup()
resumen_comparativo <- resumen_comparativo %>%
mutate(
porcentaje = as.numeric(gsub("%", "", porcentaje)),
label = paste0(total, " (", porcentaje, ")")
)
ggplot(resumen_comparativo,
aes(x = tipo_violencia, y = total, fill = year)) +
geom_col(position = position_dodge(width = 0.9)) +
geom_text(
aes(label = label),
position = position_dodge(width = 0.9),
vjust = -0.4,
size = 3
) +
scale_x_discrete(labels = c(
fisica_total = "vio. física",
psicologica_total = "vio. psicológica",
sexual_total = "vio. sexual",
economica_total = "vio. económica"
)) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
labs(
title = "Comparación de mujeres que han sufrido violencia (solo Sí) entre 2016 y 2021",
x = "",
y = "Número de mujeres",
fill = "Año"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
# ==============================
# 1. Crear variables Si/No totales para cada tipo de violencia (2016)
# ==============================
ad_violec_16 <- ad_violec_16 %>%
mutate(
fisica_total = if_else(rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0, "Sí", "No"),
psicologica_total = if_else(rowSums(select(., starts_with("violencia16_psicologica")), na.rm = TRUE) > 0, "Sí", "No"),
sexual_total = if_else(rowSums(select(., starts_with("violencia16_sexual")), na.rm = TRUE) > 0, "Sí", "No"),
economica_total = if_else(rowSums(select(., starts_with("violencia16_economica")), na.rm = TRUE) > 0, "Sí", "No")
)
# ==============================
# 2. Crear variables Si/No totales para cada tipo de violencia (2021)
# ==============================
ad_violec_21 <- ad_violec_21 %>%
mutate(
fisica_total = if_else(rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0, "Sí", "No"),
psicologica_total = if_else(rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0, "Sí", "No"),
sexual_total = if_else(rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0, "Sí", "No"),
economica_total = if_else(rowSums(select(., starts_with("violencia21_economica")), na.rm = TRUE) > 0, "Sí", "No")
)
# ==============================
# 3. Resumen solo de "Sí" para cada año
# ==============================
resumen_si <- function(data, year) {
data %>%
select(fisica_total, psicologica_total, sexual_total, economica_total) %>%
pivot_longer(cols = everything(), names_to = "tipo_violencia", values_to = "respuesta") %>%
filter(respuesta == "Sí") %>% # solo Sí
group_by(tipo_violencia) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(
year = year,
tipo_violencia = factor(tipo_violencia, levels = c("fisica_total", "psicologica_total", "sexual_total", "economica_total"))
)
}
resumen_16 <- resumen_si(ad_violec_16 , "2016")
resumen_21 <- resumen_si(ad_violec_21 , "2021")
resumen_comparativo <- bind_rows(resumen_16, resumen_21)
# ==============================
# 4. Gráfico comparativo solo de Sí
# ==============================
resumen_comparativo <- resumen_comparativo %>%
group_by(year) %>%
mutate(
porcentaje = round(100 * total / sum(total), 1)
) %>%
ungroup()
resumen_comparativo <- resumen_comparativo %>%
mutate(
porcentaje = as.numeric(gsub("%", "", porcentaje)),
label = paste0(total, " (", porcentaje, ")")
)
ggplot(resumen_comparativo,
aes(x = tipo_violencia, y = total, fill = year)) +
geom_col(position = position_dodge(width = 0.9)) +
geom_text(
aes(label = label),
position = position_dodge(width = 0.9),
vjust = -0.4,
size = 3
) +
scale_x_discrete(labels = c(
fisica_total = "vio. física",
psicologica_total = "vio. psicológica",
sexual_total = "vio. sexual",
economica_total = "vio. económica"
)) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
labs(
title = "Comparación de mujeres que han sufrido violencia (15-18) entre 2016 y 2021",
x = "",
y = "Número de mujeres",
fill = "Año"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
### --- 2016 vorbereiten ---
violencia_long16 <- violec_especif16 %>%
select(starts_with("violencia16_fisica")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia16_fisica", "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
anio = 2016
)
resumen16 <- violencia_long16 %>%
group_by(pregunta_num, respuesta, anio) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num, anio) %>%
mutate(porcentaje = round(100 * total / sum(total),1)) %>%
filter(respuesta == "Sí")
### --- 2021 vorbereiten ---
violencia_long21 <- violec_especif21 %>%
select(starts_with("violencia21_fisica")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia21_fisica", "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
anio = 2021
)
resumen21 <- violencia_long21 %>%
group_by(pregunta_num, respuesta, anio) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num, anio) %>%
mutate(porcentaje = round(100 * total / sum(total),1)) %>%
filter(respuesta == "Sí")
### --- Beide Jahre zusammenführen ---
resumen_total <- bind_rows(resumen16, resumen21)
### --- Balkendiagramm ---
ggplot(resumen_total, aes(x = factor(pregunta_num), y = porcentaje, fill = factor(anio))) +
geom_col(position = position_dodge()) +
geom_text(aes(label = paste0(porcentaje, "%")),
position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
labs(
title = "Distribución de respuestas 'Sí' de violencia física (2016 vs 2021)",
x = "Pregunta (1-9)",
y = "Porcentaje",
fill = "Año"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
### --- 2016 vorbereiten ---
violencia_long16 <- ad_violec_16 %>%
select(starts_with("violencia16_fisica")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia16_fisica", "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
anio = 2016
)
resumen16 <- violencia_long16 %>%
group_by(pregunta_num, respuesta, anio) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num, anio) %>%
mutate(porcentaje = round(100 * total / sum(total),1)) %>%
filter(respuesta == "Sí")
### --- 2021 vorbereiten ---
violencia_long21 <- ad_violec_21 %>%
select(starts_with("violencia21_fisica")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia21_fisica", "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
anio = 2021
)
resumen21 <- violencia_long21 %>%
group_by(pregunta_num, respuesta, anio) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num, anio) %>%
mutate(porcentaje = round(100 * total / sum(total),1)) %>%
filter(respuesta == "Sí")
### --- Beide Jahre zusammenführen ---
resumen_total <- bind_rows(resumen16, resumen21)
### --- Balkendiagramm ---
ggplot(resumen_total, aes(x = factor(pregunta_num), y = porcentaje, fill = factor(anio))) +
geom_col(position = position_dodge()) +
geom_text(aes(label = paste0(porcentaje, "%")),
position = position_dodge(width = 0.9), vjust = -0.25, size = 3) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
labs(
title = "Distribución de respuestas 'Sí' de violencia física (2016 vs 2021) (15-18)",
x = "Pregunta (1-9)",
y = "Porcentaje",
fill = "Año"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# ==============================
# 2016: Variable dicotómica
# ==============================
violec_especif16 <- violec_especif16 %>%
mutate(
viol_fis_disc = if_else(
rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0,
"Sí",
"No"
),
anio = 2016
)
# ==============================
# 2021: Variable dicotómica
# ==============================
violec_especif21 <- violec_especif21 %>%
mutate(
viol_fis_disc = if_else(
rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0,
"Sí",
"No"
),
anio = 2021
)
# ==============================
# Ambos años juntos
# ==============================
datos_total <- bind_rows(
select(violec_especif16, viol_fis_disc, anio),
select(violec_especif21, viol_fis_disc, anio)
)
# ==============================
# Resumen de totales y porcentajes
# ==============================
datos_plot <- datos_total %>%
filter(!is.na(viol_fis_disc)) %>%
group_by(anio, viol_fis_disc) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(anio) %>%
mutate(porcentaje = round(100 * total / sum(total), 1))
# ==============================
# Gráfico de barras comparativo
# ==============================
ggplot(datos_plot, aes(x = viol_fis_disc, y = porcentaje, fill = factor(anio))) +
geom_col(position = position_dodge(width = 0.8)) +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
position = position_dodge(width = 0.8),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon"),
name = "Año") +
labs(
title = "Mujeres que han sufrido violencia física (2016 vs 2021)",
x = "",
y = "Porcentaje"
) +
theme_minimal()data_merged16 <- data_merged16 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 ~ "65-80+",
TRUE ~ NA_character_
),
viol_fis_disc = if_else(
rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0,
"Sí", "No"
),
anio = "2016"
)
data_merged21 <- data_merged21 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 ~ "65-80+",
TRUE ~ NA_character_
),
viol_fis_disc = if_else(
rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0,
"Sí", "No"
),
anio = "2021"
)
datos_total <- bind_rows(
select(data_merged16, grupo_edad, viol_fis_disc, anio),
select(data_merged21, grupo_edad, viol_fis_disc, anio)
)
datos_plot <- datos_total %>%
filter(!is.na(grupo_edad), !is.na(viol_fis_disc)) %>%
group_by(anio, grupo_edad, viol_fis_disc) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(anio, grupo_edad) %>%
mutate(
porcentaje = round(100 * total / sum(total), 1)
)
datos_plot$grupo_edad <- factor(
datos_plot$grupo_edad,
levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)
ggplot(datos_plot, aes(x = grupo_edad, y = porcentaje, fill = viol_fis_disc)) +
geom_col(position = "fill") +
geom_text(
aes(label = paste0(round(porcentaje,1), "%")),
position = position_fill(vjust = 0.5),
color = "white",
size = 3
) +
facet_wrap(~anio) +
scale_fill_manual(values = c("No" = "lightblue", "Sí" = "salmon")) +
labs(
title = "Violencia física por grupo de edad (2016 vs 2021)",
x = "Grupo de edad",
y = "Proporción",
fill = "Violencia"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# ==============================
# 2016: Variable dicotómica
# ==============================
ad_violec_16 <- ad_violec_16 %>%
mutate(
viol_fis_disc = if_else(
rowSums(select(., starts_with("violencia16_fisica")), na.rm = TRUE) > 0,
"Sí",
"No"
),
anio = 2016
)
# ==============================
# 2021: Variable dicotómica
# ==============================
ad_violec_21 <- ad_violec_21 %>%
mutate(
viol_fis_disc = if_else(
rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0,
"Sí",
"No"
),
anio = 2021
)
# ==============================
# Ambos años juntos
# ==============================
datos_total <- bind_rows(
select(ad_violec_16, viol_fis_disc, anio),
select(ad_violec_21, viol_fis_disc, anio)
)
# ==============================
# Resumen de totales y porcentajes
# ==============================
datos_plot <- datos_total %>%
filter(!is.na(viol_fis_disc)) %>%
group_by(anio, viol_fis_disc) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(anio) %>%
mutate(porcentaje = round(100 * total / sum(total), 1))
# ==============================
# Gráfico de barras comparativo
# ==============================
ggplot(datos_plot, aes(x = viol_fis_disc, y = porcentaje, fill = factor(anio))) +
geom_col(position = position_dodge(width = 0.8)) +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
position = position_dodge(width = 0.8),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon"),
name = "Año") +
labs(
title = "Mujeres que han sufrido violencia física (2016 vs 2021) (15-18)",
x = "",
y = "Porcentaje"
) +
theme_minimal()data_merged16 <- data_merged16 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 & EDAD <= 80 ~ "65-80",
EDAD > 80 ~ "80+",
TRUE ~ NA_character_
)
)
data_merged21 <- data_merged21 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 & EDAD <= 80 ~ "65-80",
EDAD > 80 ~ "80+",
TRUE ~ NA_character_
)
)
violencia_long16 <- data_merged16 %>%
select(grupo_edad, starts_with("violencia16_fisica")) %>%
pivot_longer(
cols = starts_with("violencia16_fisica"),
names_to = "pregunta",
values_to = "respuesta"
) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia16_fisica", "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
violencia_long21 <- data_merged21 %>%
select(grupo_edad, starts_with("violencia21_fisica")) %>%
pivot_longer(
cols = starts_with("violencia21_fisica"),
names_to = "pregunta",
values_to = "respuesta"
) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia21_fisica", "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
resumen16 <- violencia_long16 %>%
group_by(grupo_edad, pregunta_num, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(grupo_edad, pregunta_num) %>%
mutate(porcentaje = 100 * total / sum(total)) %>%
filter(respuesta == "Sí")
resumen21 <- violencia_long21 %>%
group_by(grupo_edad, pregunta_num, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(grupo_edad, pregunta_num) %>%
mutate(porcentaje = 100 * total / sum(total)) %>%
filter(respuesta == "Sí")ggplot(resumen16, aes(x = factor(pregunta_num), y = porcentaje, fill = grupo_edad)) +
geom_col(position = position_dodge()) +
labs(
title = "Violencia física por grupo de edad (2016)",
x = "Pregunta",
y = "Porcentaje",
fill = "Grupo de edad"
) +
theme_minimal()ggplot(resumen21, aes(x = factor(pregunta_num), y = porcentaje, fill = grupo_edad)) +
geom_col(position = position_dodge()) +
labs(
title = "Violencia física por grupo de edad (2021)",
x = "Pregunta",
y = "Porcentaje",
fill = "Grupo de edad"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
# ==============================
# 1. Datos largos por pregunta
# ==============================
long_psi16 <- violec_especif16 %>%
select(starts_with("violencia16_psicologica")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia16_psicologica", "", pregunta)),
anio = 2016
)
long_psi21 <- violec_especif21 %>%
select(starts_with("violencia21_psicologica")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia21_psicologica", "", pregunta)),
anio = 2021
)
# ==============================
# 2. Solo respuestas 'Sí'
# ==============================
resumen_psi_total <- bind_rows(long_psi16, long_psi21) %>%
filter(respuesta == 1) %>% # 1 = Sí
group_by(anio, pregunta_num) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(anio) %>%
mutate(porcentaje = round(100 * total / sum(total), 1)) %>%
ungroup()
# ==============================
# 3. Posición de barras
# ==============================
resumen_psi_total <- resumen_psi_total %>%
mutate(
pregunta_scaled = pregunta_num * 3,
offset = ifelse(anio == 2016, -0.4, 0.4),
xpos = pregunta_scaled + offset
)
# ==============================
# 4. Gráfico
# ==============================
ggplot(resumen_psi_total, aes(x = xpos, y = porcentaje, fill = factor(anio))) +
geom_col(width = 0.8) +
scale_x_continuous(
breaks = (1:15) * 3,
labels = 1:15,
expand = expansion(mult = c(0.02, 0.02))
) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
labs(
title = "Distribución de respuestas 'Sí' de violencia psicológica (2016 vs 2021)",
x = "Pregunta (1-15)",
y = "Porcentaje",
fill = "Año"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(knitr)
library(kableExtra)
# 1. Prozent und absolute Zahlen zusammenführen als Text
tabla_valores <- resumen_psi_total %>%
mutate(Porcentaje_Total = paste0(porcentaje, "% (", total, ")")) %>%
select(pregunta_num, anio, Porcentaje_Total) %>%
pivot_wider(
names_from = anio,
values_from = Porcentaje_Total,
names_prefix = "Año_"
) %>%
arrange(pregunta_num)
# 2. Tabelle anzeigen
tabla_valores %>%
kable(
col.names = c("Pregunta", "2016", "2021"),
align = "c",
caption = "Respuestas 'Sí' de violencia psicológica (2016 vs 2021)"
) %>%
kable_styling(full_width = FALSE, position = "center")| Pregunta | 2016 | 2021 |
|---|---|---|
| 1 | 11.5% (16884) | 11% (15193) |
| 2 | 10.6% (15585) | 10.9% (15147) |
| 3 | 9.1% (13353) | 10% (13852) |
| 4 | 8% (11833) | 8.1% (11269) |
| 5 | 8.6% (12656) | 8% (11155) |
| 6 | 3.3% (4909) | 3.5% (4879) |
| 7 | 3.9% (5675) | 4% (5600) |
| 8 | 5.5% (8153) | 6.7% (9349) |
| 9 | 1.1% (1672) | 1.1% (1512) |
| 10 | 2.5% (3648) | 2.5% (3412) |
| 11 | 4.1% (6007) | 3.9% (5474) |
| 12 | 16.9% (24838) | 15.4% (21289) |
| 13 | 4.7% (6952) | 5.1% (7006) |
| 14 | 3% (4484) | 3.5% (4799) |
| 15 | 7.3% (10701) | 6.1% (8419) |
| 16 | NA | 0.2% (267) |
library(dplyr)
library(tidyr)
library(ggplot2)
# ==============================
# 1. Datos largos por pregunta
# ==============================
long_psi16 <- ad_violec_16 %>%
select(starts_with("violencia16_psicologica")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia16_psicologica", "", pregunta)),
anio = 2016
)
long_psi21 <- ad_violec_21 %>%
select(starts_with("violencia21_psicologica")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia21_psicologica", "", pregunta)),
anio = 2021
)
# ==============================
# 2. Solo respuestas 'Sí'
# ==============================
resumen_psi_total <- bind_rows(long_psi16, long_psi21) %>%
filter(respuesta == 1) %>% # 1 = Sí
group_by(anio, pregunta_num) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(anio) %>%
mutate(porcentaje = round(100 * total / sum(total), 1)) %>%
ungroup()
# ==============================
# 3. Posición de barras
# ==============================
resumen_psi_total <- resumen_psi_total %>%
mutate(
pregunta_scaled = pregunta_num * 3,
offset = ifelse(anio == 2016, -0.4, 0.4),
xpos = pregunta_scaled + offset
)
# ==============================
# 4. Gráfico
# ==============================
ggplot(resumen_psi_total, aes(x = xpos, y = porcentaje, fill = factor(anio))) +
geom_col(width = 0.8) +
scale_x_continuous(
breaks = (1:15) * 3,
labels = 1:15,
expand = expansion(mult = c(0.02, 0.02))
) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
labs(
title = "Distribución de respuestas 'Sí' de violencia psicológica (2016 vs 2021) (15-18)",
x = "Pregunta (1-15)",
y = "Porcentaje",
fill = "Año"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(knitr)
library(kableExtra)
# 1. Prozent und absolute Zahlen zusammenführen als Text
tabla_valores <- resumen_psi_total %>%
mutate(Porcentaje_Total = paste0(porcentaje, "% (", total, ")")) %>%
select(pregunta_num, anio, Porcentaje_Total) %>%
pivot_wider(
names_from = anio,
values_from = Porcentaje_Total,
names_prefix = "Año_"
) %>%
arrange(pregunta_num)
# 2. Tabelle anzeigen
tabla_valores %>%
kable(
col.names = c("Pregunta", "2016", "2021"),
align = "c",
caption = "Respuestas 'Sí' de violencia psicológica (2016 vs 2021)"
) %>%
kable_styling(full_width = FALSE, position = "center")| Pregunta | 2016 | 2021 |
|---|---|---|
| 1 | 6.1% (277) | 6.1% (207) |
| 2 | 8% (364) | 8.8% (300) |
| 3 | 14.6% (667) | 14.6% (500) |
| 4 | 5.1% (231) | 5.5% (189) |
| 5 | 5.6% (255) | 5.1% (174) |
| 6 | 2.8% (128) | 3.3% (112) |
| 7 | 4.4% (199) | 3.5% (120) |
| 8 | 14.1% (643) | 16.1% (551) |
| 9 | 0.2% (10) | 0.4% (13) |
| 10 | 1.6% (74) | 2.3% (77) |
| 11 | 2.2% (100) | 2% (68) |
| 12 | 19.5% (889) | 16.9% (578) |
| 13 | 12.2% (556) | 12.3% (420) |
| 14 | 0.7% (31) | 0.8% (29) |
| 15 | 3.2% (144) | 1.8% (62) |
| 16 | NA | 0.4% (14) |
library(dplyr)
library(ggplot2)
# ==============================
# 2016: Variable dicotómica
# ==============================
violec_especif16 <- violec_especif16 %>%
mutate(
viol_psi_disc = if_else(
rowSums(select(., starts_with("violencia16_psicologica")), na.rm = TRUE) > 0,
"Sí",
"No"
),
anio = 2016
)
# ==============================
# 2021: Variable dicotómica
# ==============================
violec_especif21 <- violec_especif21 %>%
mutate(
viol_psi_disc = if_else(
rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0,
"Sí",
"No"
),
anio = 2021
)
# ==============================
# Ambos años juntos
# ==============================
datos_total_psi <- bind_rows(
select(violec_especif16, viol_psi_disc, anio),
select(violec_especif21, viol_psi_disc, anio)
)
# ==============================
# Resumen de totales y porcentajes
# ==============================
datos_plot_psi <- datos_total_psi %>%
filter(!is.na(viol_psi_disc)) %>%
group_by(anio, viol_psi_disc) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(anio) %>%
mutate(porcentaje = round(100 * total / sum(total), 1))
# ==============================
# Gráfico de barras comparativo
# ==============================
ggplot(datos_plot_psi, aes(x = viol_psi_disc, y = porcentaje, fill = factor(anio))) +
geom_col(position = position_dodge(width = 0.8)) +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
position = position_dodge(width = 0.8),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon"),
name = "Año") +
labs(
title = "Mujeres que han sufrido violencia psicológica (2016 vs 2021)",
x = "",
y = "Porcentaje"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# ==============================
# 1. 2016 vorbereiten
# ==============================
data_merged16 <- data_merged16 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 ~ "65-80+",
TRUE ~ NA_character_
),
viol_psi_disc = if_else(
rowSums(select(., starts_with("violencia16_psicologica")), na.rm = TRUE) > 0,
"Sí", "No"
),
anio = "2016"
)
# ==============================
# 2. 2021 vorbereiten
# ==============================
data_merged21 <- data_merged21 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 ~ "65-80+",
TRUE ~ NA_character_
),
viol_psi_disc = if_else(
rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0,
"Sí", "No"
),
anio = "2021"
)
# ==============================
# 3. Beide Jahre kombinieren
# ==============================
datos_total_psi <- bind_rows(
select(data_merged16, grupo_edad, viol_psi_disc, anio),
select(data_merged21, grupo_edad, viol_psi_disc, anio)
)
# ==============================
# 4. Prozent berechnen
# ==============================
datos_plot_psi <- datos_total_psi %>%
filter(!is.na(grupo_edad), !is.na(viol_psi_disc)) %>%
group_by(anio, grupo_edad, viol_psi_disc) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(anio, grupo_edad) %>%
mutate(
porcentaje = 100 * total / sum(total)
)
# Reihenfolge der Altersgruppen fixieren
datos_plot_psi$grupo_edad <- factor(
datos_plot_psi$grupo_edad,
levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)
# ==============================
# 5. Grafik
# ==============================
ggplot(datos_plot_psi, aes(x = grupo_edad, y = porcentaje, fill = viol_psi_disc)) +
geom_col(position = "fill") +
geom_text(
aes(label = paste0(round(porcentaje,1), "%")),
position = position_fill(vjust = 0.5),
color = "white",
size = 3
) +
facet_wrap(~anio) +
scale_fill_manual(values = c("No" = "lightblue", "Sí" = "salmon")) +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Violencia psicológica por grupo de edad (2016 vs 2021)",
x = "Grupo de edad",
y = "Proporción",
fill = "Violencia"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
### --- 2016 vorbereiten ---
violencia_sex_long16 <- violec_especif16 %>%
select(starts_with("violencia16_sexual")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta)) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia16_sexual", "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
anio = 2016
)
resumen_sex16 <- violencia_sex_long16 %>%
group_by(pregunta_num, respuesta, anio) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num, anio) %>%
mutate(porcentaje = round(100 * total / sum(total),1)) %>%
filter(respuesta == "Sí")
### --- 2021 vorbereiten ---
violencia_sex_long21 <- violec_especif21 %>%
select(starts_with("violencia21_sexual")) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta)) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia21_sexual", "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
anio = 2021
)
resumen_sex21 <- violencia_sex_long21 %>%
group_by(pregunta_num, respuesta, anio) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num, anio) %>%
mutate(porcentaje = round(100 * total / sum(total),1)) %>%
filter(respuesta == "Sí")
### --- Beide Jahre zusammenführen ---
resumen_sex_total <- bind_rows(resumen_sex16, resumen_sex21)
### --- Balkendiagramm mit Prozent + absolute Zahlen ---
ggplot(resumen_sex_total, aes(x = factor(pregunta_num), y = porcentaje, fill = factor(anio))) +
geom_col(position = position_dodge(width = 0.8)) +
geom_text(
aes(label = paste0(porcentaje, "%\n", total)),
position = position_dodge(width = 0.8),
vjust = -0.25,
size = 3
) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.15))
) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
labs(
title = "Distribución de respuestas 'Sí' de violencia sexual (2016 vs 2021)",
x = "Pregunta (1-5)",
y = "Porcentaje",
fill = "Año"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
# ==============================
# 1. Altersgruppen definieren
# ==============================
data_merged16 <- data_merged16 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 ~ "65-80+",
TRUE ~ NA_character_
)
)
data_merged21 <- data_merged21 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 ~ "65-80+",
TRUE ~ NA_character_
)
)
# ==============================
# 2. Long Format + Altersgruppen behalten
# ==============================
### --- 2016 ---
violencia_sex_long16 <- data_merged16 %>%
select(grupo_edad, starts_with("violencia16_sexual")) %>%
pivot_longer(
cols = starts_with("violencia16_sexual"),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta), !is.na(grupo_edad)) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia16_sexual", "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
anio = "2016"
)
### --- 2021 ---
violencia_sex_long21 <- data_merged21 %>%
select(grupo_edad, starts_with("violencia21_sexual")) %>%
pivot_longer(
cols = starts_with("violencia21_sexual"),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta), !is.na(grupo_edad)) %>%
mutate(
pregunta_num = as.numeric(gsub("violencia21_sexual", "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí")),
anio = "2021"
)
# ==============================
# 3. Zusammenführen
# ==============================
violencia_sex_total <- bind_rows(violencia_sex_long16, violencia_sex_long21)
# ==============================
# 4. Prozent berechnen (nach Altersgruppen!)
# ==============================
resumen_sex_total <- violencia_sex_total %>%
group_by(anio, grupo_edad, pregunta_num, respuesta) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(anio, grupo_edad, pregunta_num) %>%
mutate(porcentaje = 100 * total / sum(total)) %>%
filter(respuesta == "Sí")
# Reihenfolge fixieren
resumen_sex_total$grupo_edad <- factor(
resumen_sex_total$grupo_edad,
levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)
# ==============================
# 5. Grafik (JETZT mit Altersgruppen!)
# ==============================
ggplot(resumen_sex_total,
aes(x = factor(pregunta_num),
y = porcentaje,
fill = grupo_edad)) +
geom_col(position = position_dodge(width = 0.8)) +
geom_text(
aes(label = paste0(round(porcentaje,1), "%\n", total)),
position = position_dodge(width = 0.8),
vjust = -0.25,
size = 3
) +
facet_wrap(~anio) +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
scale_fill_brewer(palette = "Set2") +
labs(
title = "Violencia sexual por grupo de edad (2016 vs 2021)",
x = "Pregunta",
y = "Porcentaje",
fill = "Grupo de edad"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# ==============================
# 2016: Variable dicotómica
# ==============================
violec_especif16 <- violec_especif16 %>%
mutate(
viol_sex_disc = if_else(
rowSums(select(., starts_with("violencia16_sexual")), na.rm = TRUE) > 0,
"Sí",
"No"
),
anio = 2016
)
# ==============================
# 2021: Variable dicotómica
# ==============================
violec_especif21 <- violec_especif21 %>%
mutate(
viol_sex_disc = if_else(
rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0,
"Sí",
"No"
),
anio = 2021
)
# ==============================
# Ambos años juntos
# ==============================
datos_total_sex <- bind_rows(
select(violec_especif16, viol_sex_disc, anio),
select(violec_especif21, viol_sex_disc, anio)
)
# ==============================
# Resumen de totales y porcentajes
# ==============================
datos_plot_sex <- datos_total_sex %>%
filter(!is.na(viol_sex_disc)) %>%
group_by(anio, viol_sex_disc) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(anio) %>%
mutate(porcentaje = round(100 * total / sum(total), 1))
# ==============================
# Gráfico de barras comparativo
# ==============================
ggplot(datos_plot_sex, aes(x = viol_sex_disc, y = porcentaje, fill = factor(anio))) +
geom_col(position = position_dodge(width = 0.8)) +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
position = position_dodge(width = 0.8),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon"),
name = "Año") +
labs(
title = "Mujeres que han sufrido violencia sexual (2016 vs 2021)",
x = "",
y = "Porcentaje"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# ==============================
# 1. 2016 vorbereiten
# ==============================
data_merged16 <- data_merged16 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 ~ "65-80+",
TRUE ~ NA_character_
),
viol_sex_disc = if_else(
rowSums(select(., starts_with("violencia16_sexual")), na.rm = TRUE) > 0,
"Sí", "No"
),
anio = "2016"
)
# ==============================
# 2. 2021 vorbereiten
# ==============================
data_merged21 <- data_merged21 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 ~ "65-80+",
TRUE ~ NA_character_
),
viol_sex_disc = if_else(
rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0,
"Sí", "No"
),
anio = "2021"
)
# ==============================
# 3. Beide Jahre kombinieren
# ==============================
datos_total_sex <- bind_rows(
select(data_merged16, grupo_edad, viol_sex_disc, anio),
select(data_merged21, grupo_edad, viol_sex_disc, anio)
)
# ==============================
# 4. Prozent berechnen (nach Altersgruppen!)
# ==============================
datos_plot_sex <- datos_total_sex %>%
filter(!is.na(grupo_edad), !is.na(viol_sex_disc)) %>%
group_by(anio, grupo_edad, viol_sex_disc) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(anio, grupo_edad) %>%
mutate(
porcentaje = round(100 * total / sum(total), 1)
)
# Reihenfolge fixieren
datos_plot_sex$grupo_edad <- factor(
datos_plot_sex$grupo_edad,
levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)
# ==============================
# 5. Grafik
# ==============================
ggplot(datos_plot_sex,
aes(x = grupo_edad,
y = porcentaje,
fill = viol_sex_disc)) +
geom_col(position = "fill") +
geom_text(
aes(label = paste0(round(porcentaje,1), "%")),
position = position_fill(vjust = 0.5),
color = "white",
size = 3
) +
facet_wrap(~anio) +
scale_fill_manual(values = c("No" = "lightblue", "Sí" = "salmon")) +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Violencia sexual por grupo de edad (2016 vs 2021)",
x = "Grupo de edad",
y = "Proporción",
fill = "Violencia"
) +
theme_minimal()library(dplyr)
library(tidyr)
library(ggplot2)
# ==============================
# Función para transformar datos a formato largo
# ==============================
long_format_violencia <- function(data, prefix) {
data %>%
select(starts_with(prefix)) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta)) %>%
mutate(
pregunta_num = as.numeric(gsub(prefix, "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
}
# ==============================
# 2016: violencia económica
# ==============================
violencia_eco_long16 <- long_format_violencia(violec_especif16, "violencia16_economica") %>%
mutate(anio = 2016)
resumen_eco16 <- violencia_eco_long16 %>%
group_by(pregunta_num, respuesta, anio) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num, anio) %>%
mutate(porcentaje = round(100 * total / sum(total),1)) %>%
filter(respuesta == "Sí") # solo Sí
# ==============================
# 2021: violencia económica
# ==============================
violencia_eco_long21 <- long_format_violencia(violec_especif21, "violencia21_economica") %>%
mutate(anio = 2021)
resumen_eco21 <- violencia_eco_long21 %>%
group_by(pregunta_num, respuesta, anio) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num, anio) %>%
mutate(porcentaje = round(100 * total / sum(total),1)) %>%
filter(respuesta == "Sí") # solo Sí
# ==============================
# Combinar 2016 y 2021
# ==============================
resumen_eco_total <- bind_rows(resumen_eco16, resumen_eco21)
# ==============================
# Gráfico de barras con porcentaje y total
# ==============================
ggplot(resumen_eco_total, aes(x = factor(pregunta_num), y = porcentaje, fill = factor(anio))) +
geom_col(position = position_dodge(width = 0.8)) +
geom_text(
aes(label = paste0(porcentaje, "%\n", total)),
position = position_dodge(width = 0.8),
vjust = -0.25,
size = 3
) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.18))
) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
labs(
title = "Distribución de respuestas 'Sí' de violencia económica (2016 vs 2021)",
x = "Pregunta (1-7)",
y = "Porcentaje",
fill = "Año"
) +
theme_minimal() +
theme(
plot.title = element_text(margin = margin(b = 22))
)library(dplyr)
library(tidyr)
library(ggplot2)
# ==============================
# Función para transformar datos a formato largo
# ==============================
long_format_violencia <- function(data, prefix) {
data %>%
select(starts_with(prefix)) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta)) %>%
mutate(
pregunta_num = as.numeric(gsub(prefix, "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
}
# ==============================
# 2016: violencia económica
# ==============================
violencia_eco_long16 <- long_format_violencia(ad_violec_16, "violencia16_economica") %>%
mutate(anio = 2016)
resumen_eco16 <- violencia_eco_long16 %>%
group_by(pregunta_num, respuesta, anio) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num, anio) %>%
mutate(porcentaje = round(100 * total / sum(total),1)) %>%
filter(respuesta == "Sí") # solo Sí
# ==============================
# 2021: violencia económica
# ==============================
violencia_eco_long21 <- long_format_violencia(ad_violec_21, "violencia21_economica") %>%
mutate(anio = 2021)
resumen_eco21 <- violencia_eco_long21 %>%
group_by(pregunta_num, respuesta, anio) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num, anio) %>%
mutate(porcentaje = round(100 * total / sum(total),1)) %>%
filter(respuesta == "Sí") # solo Sí
# ==============================
# Combinar 2016 y 2021
# ==============================
resumen_eco_total <- bind_rows(resumen_eco16, resumen_eco21)
# ==============================
# Gráfico de barras con porcentaje y total
# ==============================
ggplot(resumen_eco_total, aes(x = factor(pregunta_num), y = porcentaje, fill = factor(anio))) +
geom_col(position = position_dodge(width = 0.8)) +
geom_text(
aes(label = paste0(porcentaje, "%\n", total)),
position = position_dodge(width = 0.8),
vjust = -0.25,
size = 3
) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.18))
) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
labs(
title = "Distribución de respuestas 'Sí' de violencia económica (2016 vs 2021) (15-18)",
x = "Pregunta (1-7)",
y = "Porcentaje",
fill = "Año"
) +
theme_minimal() +
theme(
plot.title = element_text(margin = margin(b = 22))
)library(dplyr)
library(tidyr)
library(ggplot2)
# ==============================
# Función para transformar datos a formato largo
# ==============================
long_format_violencia <- function(data, prefix) {
data %>%
select(starts_with(prefix)) %>%
pivot_longer(
cols = everything(),
names_to = "pregunta",
values_to = "respuesta"
) %>%
filter(!is.na(respuesta)) %>%
mutate(
pregunta_num = as.numeric(gsub(prefix, "", pregunta)),
respuesta = factor(respuesta, levels = c(0,1), labels = c("No","Sí"))
)
}
# ==============================
# 2016: violencia económica
# ==============================
violencia_eco_long16 <- long_format_violencia(ad_violec_16, "violencia16_economica") %>%
mutate(anio = 2016)
resumen_eco16 <- violencia_eco_long16 %>%
group_by(pregunta_num, respuesta, anio) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num, anio) %>%
mutate(porcentaje = round(100 * total / sum(total),1)) %>%
filter(respuesta == "Sí") # solo Sí
# ==============================
# 2021: violencia económica
# ==============================
violencia_eco_long21 <- long_format_violencia(ad_violec_21, "violencia21_economica") %>%
mutate(anio = 2021)
resumen_eco21 <- violencia_eco_long21 %>%
group_by(pregunta_num, respuesta, anio) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(pregunta_num, anio) %>%
mutate(porcentaje = round(100 * total / sum(total),1)) %>%
filter(respuesta == "Sí") # solo Sí
# ==============================
# Combinar 2016 y 2021
# ==============================
resumen_eco_total <- bind_rows(resumen_eco16, resumen_eco21)
# ==============================
# Gráfico de barras con porcentaje y total
# ==============================
ggplot(resumen_eco_total, aes(x = factor(pregunta_num), y = porcentaje, fill = factor(anio))) +
geom_col(position = position_dodge(width = 0.8)) +
geom_text(
aes(label = paste0(porcentaje, "%\n", total)),
position = position_dodge(width = 0.8),
vjust = -0.25,
size = 3
) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.18))
) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon")) +
labs(
title = "Distribución de respuestas 'Sí' de violencia económica (2016 vs 2021) (15-18)",
x = "Pregunta (1-7)",
y = "Porcentaje",
fill = "Año"
) +
theme_minimal() +
theme(
plot.title = element_text(margin = margin(b = 22))
)library(dplyr)
library(ggplot2)
# ==============================
# Función resumen total
# ==============================
resumen_total <- function(data, prefix) {
data %>%
mutate(
disc_total = if_else(
rowSums(select(., starts_with(prefix)), na.rm = TRUE) > 0,
"Sí",
"No"
)
)
}
# ==============================
# 2016: preparar datos
# ==============================
datos_eco16 <- resumen_total(violec_especif16, "violencia16_economica") %>%
filter(!is.na(disc_total)) %>%
group_by(disc_total) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(porcentaje = round(100 * total / sum(total),1),
anio = 2016)
# ==============================
# 2021: preparar datos
# ==============================
datos_eco21 <- resumen_total(violec_especif21, "violencia21_economica") %>%
filter(!is.na(disc_total)) %>%
group_by(disc_total) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(porcentaje = round(100 * total / sum(total),1),
anio = 2021)
# ==============================
# Combinar años
# ==============================
datos_eco_total <- bind_rows(datos_eco16, datos_eco21)
# ==============================
# Gráfico comparativo
# ==============================
ggplot(datos_eco_total, aes(x = disc_total, y = porcentaje, fill = factor(anio))) +
geom_col(position = position_dodge(width = 0.8)) +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
position = position_dodge(width = 0.8),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon"),
name = "Año") +
labs(
title = "Mujeres que han sufrido violencia económica (2016 vs 2021)",
x = "",
y = "Porcentaje"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# ==============================
# 1. 2016 vorbereiten
# ==============================
data_merged16 <- data_merged16 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 ~ "65-80+",
TRUE ~ NA_character_
),
viol_eco_disc = if_else(
rowSums(select(., starts_with("violencia16_economica")), na.rm = TRUE) > 0,
"Sí", "No"
),
anio = "2016"
)
# ==============================
# 2. 2021 vorbereiten
# ==============================
data_merged21 <- data_merged21 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 ~ "65-80+",
TRUE ~ NA_character_
),
viol_eco_disc = if_else(
rowSums(select(., starts_with("violencia21_economica")), na.rm = TRUE) > 0,
"Sí", "No"
),
anio = "2021"
)
# ==============================
# 3. Beide Jahre kombinieren
# ==============================
datos_total_eco <- bind_rows(
select(data_merged16, grupo_edad, viol_eco_disc, anio),
select(data_merged21, grupo_edad, viol_eco_disc, anio)
)
# ==============================
# 4. Prozent berechnen
# ==============================
datos_plot_eco <- datos_total_eco %>%
filter(!is.na(grupo_edad), !is.na(viol_eco_disc)) %>%
group_by(anio, grupo_edad, viol_eco_disc) %>%
summarise(total = n(), .groups = "drop") %>%
group_by(anio, grupo_edad) %>%
mutate(
porcentaje = 100 * total / sum(total)
)
# Reihenfolge fixieren
datos_plot_eco$grupo_edad <- factor(
datos_plot_eco$grupo_edad,
levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)
# ==============================
# 5. Grafik
# ==============================
ggplot(datos_plot_eco,
aes(x = grupo_edad,
y = porcentaje,
fill = viol_eco_disc)) +
geom_col(position = "fill") +
geom_text(
aes(label = paste0(round(porcentaje,1), "%")),
position = position_fill(vjust = 0.5),
color = "white",
size = 3
) +
facet_wrap(~anio) +
scale_fill_manual(values = c("No" = "lightblue", "Sí" = "salmon")) +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Violencia económica por grupo de edad (2016 vs 2021)",
x = "Grupo de edad",
y = "Proporción",
fill = "Violencia"
) +
theme_minimal()library(dplyr)
library(ggplot2)
# ==============================
# Función resumen total
# ==============================
resumen_total <- function(data, prefix) {
data %>%
mutate(
disc_total = if_else(
rowSums(select(., starts_with(prefix)), na.rm = TRUE) > 0,
"Sí",
"No"
)
)
}
# ==============================
# 2016: preparar datos
# ==============================
datos_eco16 <- resumen_total(ad_violec_16, "violencia16_economica") %>%
filter(!is.na(disc_total)) %>%
group_by(disc_total) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(porcentaje = round(100 * total / sum(total),1),
anio = 2016)
# ==============================
# 2021: preparar datos
# ==============================
datos_eco21 <- resumen_total(ad_violec_21, "violencia21_economica") %>%
filter(!is.na(disc_total)) %>%
group_by(disc_total) %>%
summarise(total = n(), .groups = "drop") %>%
mutate(porcentaje = round(100 * total / sum(total),1),
anio = 2021)
# ==============================
# Combinar años
# ==============================
datos_eco_total <- bind_rows(datos_eco16, datos_eco21)
# ==============================
# Gráfico comparativo
# ==============================
ggplot(datos_eco_total, aes(x = disc_total, y = porcentaje, fill = factor(anio))) +
geom_col(position = position_dodge(width = 0.8)) +
geom_text(aes(label = paste0(total, " (", porcentaje, "%)")),
position = position_dodge(width = 0.8),
vjust = -0.5, size = 4) +
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "salmon"),
name = "Año") +
labs(
title = "Mujeres que han sufrido violencia económica (2016 vs 2021) (15-18)",
x = "",
y = "Porcentaje"
) +
theme_minimal()library(dplyr)
library(dplyr)
library(dplyr)
# einfache Variante: nur Frauen behalten, alle Spalten bleiben erhalten
datos_mujeres16 <- datos_basicos1_16 %>%
filter(SEXO == 2)
# Kontrolle
glimpse(datos_mujeres16)## Rows: 229,854
## Columns: 42
## $ ID_VIV <dbl> 100006.0, 100025.0, 100031.1, 100035.0, 100041.0, 100126.0,…
## $ ID_MUJ <chr> "0100006.04.01.02\n", "0100025.03.02.03\n", "0100031.05.01.…
## $ UPM <dbl> 100006, 100025, 100031, 100035, 100041, 100126, 100133, 100…
## $ PROG <dbl> 39, 46, 7, 46, 71, 16, 95, 10, 6, 51, 32, 75, 122, 41, 93, …
## $ VIV_SEL <dbl> 4, 3, 5, 3, 1, 1, 2, 3, 1, 3, 3, 2, 5, 1, 5, 2, 4, 1, 1, 1,…
## $ CVE_ENT <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ NOM_ENT <chr> "Aguascalientes\n", "Aguascalientes\n", "Aguascalientes\n",…
## $ CVE_MUN <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ NOM_MUN <chr> "Aguascalientes\n", "Aguascalientes\n", "Aguascalientes\n",…
## $ HOGAR <dbl> 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1,…
## $ N_REN <dbl> 2, 3, 3, 2, 3, 2, 1, 3, 2, 2, 1, 3, 2, 1, 2, 1, 3, 2, 7, 1,…
## $ COD_RES <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ COD_RES_E <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ NOMBRE <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ PAREN <dbl> 3, 1, 3, 1, 3, 2, 1, 8, 2, 3, 1, 3, 2, 1, 1, 1, 3, 2, 9, 1,…
## $ SEXO <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ EDAD <dbl> 20, 42, 50, 44, 28, 34, 46, 26, 33, 24, 52, 29, 55, 59, 25,…
## $ P2_5 <dbl> 1, 97, 97, 97, 1, 96, 96, 2, 97, 1, 97, 2, 97, 96, 96, 97, …
## $ P2_6 <dbl> 98, 97, 1, 97, 96, 96, 96, 96, 97, 96, 97, 1, 97, 97, 96, 9…
## $ NIV <dbl> 10, 4, 2, 3, 10, 10, 10, 7, 4, 10, 5, 10, 10, 10, 3, 5, 7, …
## $ GRA <dbl> 1, 3, 6, 3, 4, 4, 3, 3, 3, 4, 2, 3, 5, 5, 3, 3, 3, 3, 6, 3,…
## $ P2_8 <dbl> NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ P2_9 <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ P2_10 <dbl> 3, 3, 3, 3, 3, 2, 3, 1, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 8, 3,…
## $ P2_11 <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ P2_12 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ P2_13 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1,…
## $ P2_14 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 10, 12, 10, NA, NA, 9, NA, …
## $ P2_15 <dbl> 1, 1, 2, 1, 1, 5, 4, 1, NA, NA, NA, 1, 4, NA, 1, NA, 1, NA,…
## $ P2_16 <dbl> 6, 5, 3, 1, 5, 5, 3, 6, 1, 6, 1, 6, 5, 3, 2, 2, 3, 5, 5, 4,…
## $ CODIGO <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ REN_MUJ_EL <dbl> 2, 3, 3, 2, 3, 2, 1, 3, 2, 2, 1, 3, 2, 1, 2, 1, 3, 2, 7, 1,…
## $ REN_INF_AD <dbl> NA, NA, 3, NA, NA, 2, 1, NA, NA, NA, 1, NA, NA, NA, NA, NA,…
## $ FN_DIA <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ FN_MES <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ FAC_VIV <dbl> 86, 92, 92, 80, 101, 78, 84, 84, 80, 80, 102, 94, 90, 76, 7…
## $ FAC_MUJ <dbl> 172, 184, 184, 80, 203, 78, 84, 168, 80, 160, 205, 187, 90,…
## $ DOMINIO <chr> "U\n", "U\n", "U\n", "U\n", "U\n", "U\n", "U\n", "U\n", "U\…
## $ ESTRATO <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 2,…
## $ EST_DIS <dbl> 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,…
## $ UPM_DIS <dbl> 3, 9, 11, 12, 14, 36, 37, 38, 39, 39, 43, 46, 56, 65, 87, 1…
## $ COD_M15 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
##
## 2
## 229854
``` r
## Edad de mujeres
hist(datos_mujeres16$EDAD,
main = "Distribución de la edad (mujeres) 2016",
xlab = "Edad",
col = "lightblue",
border = "white")hist(datos_mujeres21$EDAD,
main = "Distribución de la edad (mujeres) 2021",
xlab = "Edad",
col = "#DDA0DD",
border = "white")library(tidyverse)
edad_2016 <- datos_mujeres16 %>%
filter(EDAD >= 15) %>%
mutate(
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 & EDAD <= 79 ~ "65-79",
EDAD >= 80 ~ "80+",
TRUE ~ NA_character_
),
anio = "2016"
)
edad_2021 <- datos_mujeres21 %>%
filter(EDAD >= 15) %>%
mutate(
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 & EDAD <= 79 ~ "65-79",
EDAD >= 80 ~ "80+",
TRUE ~ NA_character_
),
anio = "2021"
)
edad_all <- bind_rows(edad_2016, edad_2021)
tabla_edad <- edad_all %>%
filter(!is.na(grupo_edad)) %>%
group_by(anio, grupo_edad) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(anio) %>%
mutate(
porcentaje = round(n / sum(n) * 100, 1)
) %>%
ungroup()
# Reihenfolge fixieren (sehr wichtig!)
tabla_edad$grupo_edad <- factor(
tabla_edad$grupo_edad,
levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-79", "80+")
)
ggplot(tabla_edad, aes(x = grupo_edad, y = porcentaje, fill = anio)) +
geom_col(position = position_dodge(width = 0.8)) +
geom_text(
aes(label = paste0( n, "\n", porcentaje)),
position = position_dodge(width = 0.8),
vjust = -0.25,
size = 3
)+
scale_fill_manual(values = c("2016" = "lightblue", "2021" = "#DDA0DD")) +
scale_y_continuous(
name = "Porcentaje",
expand = expansion(mult = c(0, 0.18))
) +
labs(
title = "Distribución por grupos de edad",
subtitle = "ENDIREH 2016 vs 2021",
x = "Grupo de edad",
fill = "Año"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold"),
legend.position = "top"
)library(dplyr)
# ----------------------------------------------------------
# 1. Seleccionar mujeres ≥ 15 años
# ----------------------------------------------------------
datos_mujeres16 <- datos_basicos1_16 %>%
filter(SEXO == 2, !is.na(EDAD), EDAD >= 15) %>%
select(
ID_VIV, UPM, PROG, VIV_SEL, HOGAR, ID_MUJ,
EDAD, NIV, P2_8, P2_10
)
# ----------------------------------------------------------
# 2. Crear variables binarias de violencia
# ----------------------------------------------------------
violec_especif16 <- violec_especif16 %>%
mutate(
viol16_fis_bin = if_else(viol16_fis_total > 0, 1, 0),
viol16_psi_bin = if_else(viol16_psi_total > 0, 1, 0),
viol16_sex_bin = if_else(viol16_sex_total > 0, 1, 0),
viol16_eco_bin = if_else(viol16_eco_total > 0, 1, 0),
viol16_any_bin = if_else(
viol16_fis_bin + viol16_psi_bin +
viol16_sex_bin + viol16_eco_bin > 0,
1, 0
),
viol16_any_disc = factor(
viol16_any_bin,
levels = c(0, 1),
labels = c("No", "Sí")
)
)
# ----------------------------------------------------------
# 3. Unir ambos conjuntos de datos
# ----------------------------------------------------------
joined16 <- datos_mujeres16 %>%
inner_join(
violec_especif16,
by = c("ID_VIV", "UPM", "PROG", "VIV_SEL", "HOGAR", "ID_MUJ")
) %>%
filter(!is.na(viol16_any_disc)) # ← korrekt
table(joined16$viol16_any_disc, useNA = "ifany")##
## No Sí
## 52623 39819
library(dplyr)
joined16 <- joined16 %>%
mutate(
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 & EDAD <= 80 ~ "65-80",
EDAD > 80 ~ "80+",
TRUE ~ NA_character_
)
)
plot_data <- joined16 %>%
group_by(grupo_edad, viol16_any_disc) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(grupo_edad) %>%
mutate(
porcentaje = n / sum(n) * 100,
porcentaje_label = round(porcentaje, 1) # ← nur Zahl, kein %
)
plot_data$grupo_edad <- factor(
plot_data$grupo_edad,
levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)
library(ggplot2)
ggplot(plot_data,
aes(x = grupo_edad,
y = porcentaje,
fill = viol16_any_disc)) +
geom_col(position = "fill") +
geom_text(
aes(label = porcentaje_label),
position = position_fill(vjust = 0.5),
color = "white",
size = 4
) +
scale_y_continuous(labels = scales::label_number()) +
scale_fill_manual(
values = c("No" = "#00BFC4", "Sí" = "#F8766D")
) +
labs(
x = "Grupo de edad",
y = "Porcentaje",
fill = "¿Violencia?",
title = "Porcentaje de mujeres que experimentaron violencia\nsegún grupo de edad (ENDIREH 2016)"
) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12)
)## [1] "ID_VIV" "ID_PER"
## [3] "UPM" "VIV_SEL"
## [5] "HOGAR" "N_REN"
## [7] "DOMINIO" "CVE_ENT"
## [9] "NOM_ENT" "CVE_MUN"
## [11] "NOM_MUN" "T_INSTRUM"
## [13] "P14_1_1" "P14_1_2"
## [15] "P14_1_3" "P14_1_4"
## [17] "P14_1_5" "P14_1_6"
## [19] "P14_1_7" "P14_1_8"
## [21] "P14_1_9" "P14_1_10"
## [23] "P14_1_11" "P14_1_12"
## [25] "P14_1_13" "P14_1_14"
## [27] "P14_1_15" "P14_1_16"
## [29] "P14_1_17" "P14_1_18"
## [31] "P14_1_19" "P14_1_20"
## [33] "P14_1_21" "P14_1_22"
## [35] "P14_1_23AB" "P14_1_24AB"
## [37] "P14_1_25" "P14_1_26"
## [39] "P14_1_27" "P14_1_28"
## [41] "P14_1_29" "P14_1_30"
## [43] "P14_1_31" "P14_1_32"
## [45] "P14_1_33" "P14_1_34"
## [47] "P14_1_35AB" "P14_1_36AB"
## [49] "P14_1_37AB" "P14_1_38AB"
## [51] "P14_2_1" "P14_3_1"
## [53] "P14_2_2" "P14_3_2"
## [55] "P14_2_3" "P14_3_3"
## [57] "P14_2_4" "P14_3_4"
## [59] "P14_2_5" "P14_3_5"
## [61] "P14_2_6" "P14_3_6"
## [63] "P14_2_7" "P14_3_7"
## [65] "P14_2_8" "P14_3_8"
## [67] "P14_2_9" "P14_3_9"
## [69] "P14_2_10" "P14_3_10"
## [71] "P14_2_11" "P14_3_11"
## [73] "P14_2_12" "P14_3_12"
## [75] "P14_2_13" "P14_3_13"
## [77] "P14_2_14" "P14_3_14"
## [79] "P14_2_15" "P14_3_15"
## [81] "P14_2_16" "P14_3_16"
## [83] "P14_2_17" "P14_3_17"
## [85] "P14_2_18" "P14_3_18"
## [87] "P14_2_19" "P14_3_19"
## [89] "P14_2_20" "P14_3_20"
## [91] "P14_2_21" "P14_3_21"
## [93] "P14_2_22" "P14_3_22"
## [95] "P14_2_23AB" "P14_3_23AB"
## [97] "P14_2_24AB" "P14_3_24AB"
## [99] "P14_2_25" "P14_3_25"
## [101] "P14_2_26" "P14_3_26"
## [103] "P14_2_27" "P14_3_27"
## [105] "P14_2_28" "P14_3_28"
## [107] "P14_2_29" "P14_3_29"
## [109] "P14_2_30" "P14_3_30"
## [111] "P14_2_31" "P14_3_31"
## [113] "P14_2_32" "P14_3_32"
## [115] "P14_2_33" "P14_3_33"
## [117] "P14_2_34" "P14_3_34"
## [119] "P14_2_35AB" "P14_3_35AB"
## [121] "P14_2_36AB" "P14_3_36AB"
## [123] "P14_2_37AB" "P14_3_37AB"
## [125] "P14_2_38AB" "P14_3_38AB"
## [127] "P14_4" "P14_5_1"
## [129] "P14_5_2" "P14_5_3"
## [131] "P14_5_4" "P14_5_5"
## [133] "P14_5_6" "P14_5_7"
## [135] "P14_6" "P14_7_1"
## [137] "P14_7_2" "P14_8_1"
## [139] "P14_8_2" "P14_8_3"
## [141] "P14_8_4" "P14_8_5"
## [143] "P14_8_6" "P14_8_7"
## [145] "P14_8_8" "P14_8_9"
## [147] "P14_8_10" "P14_8_10E"
## [149] "R14_8_10X" "P14_9_1_1"
## [151] "P14_9_1_2" "P14_9_1_3"
## [153] "P14_10_1" "P14_11_1"
## [155] "P14_12_1" "P14_13_1"
## [157] "P14_9_2_1" "P14_9_2_2"
## [159] "P14_9_2_3" "P14_10_2"
## [161] "P14_11_2" "P14_12_2"
## [163] "P14_13_2" "P14_9_3_1"
## [165] "P14_9_3_2" "P14_9_3_3"
## [167] "P14_10_3" "P14_11_3"
## [169] "P14_12_3" "P14_13_3"
## [171] "P14_9_4_1" "P14_9_4_2"
## [173] "P14_9_4_3" "P14_10_4"
## [175] "P14_11_4" "P14_12_4"
## [177] "P14_13_4" "P14_9_5_1"
## [179] "P14_9_5_2" "P14_9_5_3"
## [181] "P14_10_5" "P14_11_5"
## [183] "P14_12_5" "P14_13_5"
## [185] "P14_9_6_1" "P14_9_6_2"
## [187] "P14_9_6_3" "P14_10_6"
## [189] "P14_11_6" "P14_12_6"
## [191] "P14_13_6" "P14_9_7_1"
## [193] "P14_9_7_2" "P14_9_7_3"
## [195] "P14_10_7" "P14_11_7"
## [197] "P14_12_7" "P14_13_7"
## [199] "P14_9_8_1" "P14_9_8_2"
## [201] "P14_9_8_3" "P14_10_8"
## [203] "P14_11_8" "P14_12_8"
## [205] "P14_13_8" "P14_9_9_1"
## [207] "P14_9_9_2" "P14_9_9_3"
## [209] "P14_10_9" "P14_11_9"
## [211] "P14_12_9" "P14_13_9"
## [213] "P14_9_10_1" "P14_9_10_2"
## [215] "P14_9_10_3" "P14_10_10"
## [217] "P14_11_10" "P14_12_10"
## [219] "P14_13_10" "FAC_VIV"
## [221] "FAC_MUJ" "ESTRATO"
## [223] "UPM_DIS" "EST_DIS"
## [225] "violencia21_fisica1" "violencia21_fisica2"
## [227] "violencia21_fisica3" "violencia21_fisica4"
## [229] "violencia21_fisica5" "violencia21_fisica6"
## [231] "violencia21_fisica7" "violencia21_fisica8"
## [233] "violencia21_fisica9" "violencia21_psicologica1"
## [235] "violencia21_psicologica2" "violencia21_psicologica3"
## [237] "violencia21_psicologica4" "violencia21_psicologica5"
## [239] "violencia21_psicologica6" "violencia21_psicologica7"
## [241] "violencia21_psicologica8" "violencia21_psicologica9"
## [243] "violencia21_psicologica10" "violencia21_psicologica11"
## [245] "violencia21_psicologica12" "violencia21_psicologica13"
## [247] "violencia21_psicologica14" "violencia21_psicologica15"
## [249] "violencia21_psicologica16" "violencia21_sexual1"
## [251] "violencia21_sexual2" "violencia21_sexual3"
## [253] "violencia21_sexual4" "violencia21_sexual5"
## [255] "violencia21_sexual6" "violencia21_economica1"
## [257] "violencia21_economica2" "violencia21_economica3"
## [259] "violencia21_economica4" "violencia21_economica5"
## [261] "violencia21_economica6" "violencia21_economica7"
## [263] "viol21_fis_total" "viol21_fis_disc"
## [265] "viol21_psi_disc" "viol21_sex_total"
## [267] "viol21_sex_disc" "viol21_sex_disc_total"
## [269] "viol21_eco_total" "viol21_eco_disc"
## [271] "viol21_eco_total_disc" "fisica_total"
## [273] "psicologica_total" "sexual_total"
## [275] "economica_total" "viol_fis_disc"
## [277] "anio" "viol_psi_disc"
## [279] "viol_sex_disc"
library(dplyr)
library(ggplot2)
library(scales)
# ==========================================================
# 1. Dichotomische Gewaltvariable (robust mit rowSums)
# ==========================================================
violec_especif21 <- violec_especif21 %>%
mutate(
viol21_fis_bin = if_else(
rowSums(select(., starts_with("violencia21_fisica")), na.rm = TRUE) > 0, 1, 0
),
viol21_psi_bin = if_else(
rowSums(select(., starts_with("violencia21_psicologica")), na.rm = TRUE) > 0, 1, 0
),
viol21_sex_bin = if_else(
rowSums(select(., starts_with("violencia21_sexual")), na.rm = TRUE) > 0, 1, 0
),
viol21_eco_bin = if_else(
rowSums(select(., starts_with("violencia21_economica")), na.rm = TRUE) > 0, 1, 0
),
# mindestens eine Form von Gewalt
viol21_any_bin = if_else(
viol21_fis_bin + viol21_psi_bin +
viol21_sex_bin + viol21_eco_bin > 0,
1, 0
),
viol21_any_disc = factor(
viol21_any_bin,
levels = c(0, 1),
labels = c("No", "Sí")
)
)
# Kontrolle
table(violec_especif21$viol21_any_disc)##
## No Sí
## 63640 41576
# ==========================================================
# 2. Merge mit Basisdaten
# ==========================================================
joined21 <- datos_mujeres21 %>%
inner_join(
violec_especif21 %>%
select(ID_VIV, ID_PER, UPM, VIV_SEL, HOGAR, viol21_any_disc),
by = c("ID_VIV", "ID_PER", "UPM", "VIV_SEL", "HOGAR")
)
# Kontrolle
dim(joined21)## [1] 105216 38
##
## No Sí
## 63640 41576
# ==========================================================
# 3. Altersgruppen erstellen
# ==========================================================
joined21 <- joined21 %>%
mutate(
EDAD = as.numeric(EDAD),
grupo_edad = case_when(
EDAD >= 15 & EDAD <= 18 ~ "15-18",
EDAD >= 19 & EDAD <= 25 ~ "19-25",
EDAD >= 26 & EDAD <= 35 ~ "26-35",
EDAD >= 36 & EDAD <= 49 ~ "36-49",
EDAD >= 50 & EDAD <= 64 ~ "50-64",
EDAD >= 65 ~ "65-80+",
TRUE ~ NA_character_
)
)
# ==========================================================
# 4. Resumen por edad
# ==========================================================
plot_data21 <- joined21 %>%
filter(!is.na(grupo_edad), !is.na(viol21_any_disc)) %>%
group_by(grupo_edad, viol21_any_disc) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(grupo_edad) %>%
mutate(
porcentaje = n / sum(n) * 100,
porcentaje_label = paste0(round(porcentaje, 1), "%")
)
# Reihenfolge fixieren
plot_data21$grupo_edad <- factor(
plot_data21$grupo_edad,
levels = c("15-18", "19-25", "26-35", "36-49", "50-64", "65-80+")
)
# ==========================================================
# 5. Grafik
# ==========================================================
ggplot(plot_data21,
aes(x = grupo_edad,
y = porcentaje,
fill = viol21_any_disc)) +
geom_col(position = "fill") +
geom_text(
aes(label = porcentaje_label),
position = position_fill(vjust = 0.5),
color = "white",
size = 4
) +
scale_fill_manual(
values = c("No" = "#00BFC4", "Sí" = "#F8766D")
) +
scale_y_continuous(labels = percent_format(scale = 1)) +
labs(
x = "Grupo de edad",
y = "Proporción",
fill = "¿Violencia?",
title = "Distribución de mujeres que experimentaron violencia\nsegún grupo de edad (ENDIREH 2021)"
) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1)
)